Exemple #1
0
def logOverallPerformance(classifier, features_test, labels_test):

    performanceObject = getOverallPerformance(classifier, features_test,
                                              labels_test)

    print("ACCURACY SCORE:")
    cPrint(performanceObject["accuracy_score"], "green")
Exemple #2
0
def createModel(data):

    #DATA INPUT - GAME DATA & USER DATA
    profile_response = data["user_profile_json"]
    games_response = data["games_json"]
    # FEATURE SELECTION - SELECT FEATURES TO BE USED BY ADDING THEM TO THE features_to_use LIST
    #features_to_use = ["appid","name","genres", "publishers", "developers", "hasMultiplayerSupport", "hasSingleplayerSupport", "hasOnlineMultiplayerSupport"]
    features_to_use = ["appid","name","genres"]

    logWithTitle("USED-FEATURES")
    logCommaSeparatedList(features_to_use)
    cPrint(str(len(features_to_use)) + " Features in Use", "green")

    transformed_games = list(map(transformDBResultForTraining, games_response))

    labels = labelByPredefinedLists(transformed_games, boring_games, interesting_games)
    features = createFeaturesDataFrame(transformed_games, features_to_use)
    #features = transformSimpleCategoricalDataForTraining(features, ["genres","tags"])
    #features = transformComplexCategoricalFeatureForTraining(features, "genres")
    features = transformComplexCategoricalFeatureForTraining(features, "genres")


    #logWithTitle("All-LABELS", labels)
    #logWithTitle("ALL-FEATURES", features)
    #printSeperator()

    #print(len(features))

    features_train_indexes, features_test_indexes, labels_train, labels_test = model_selection.train_test_split(features.index, labels,test_size=0.30)
    features_train_named = features.iloc[features_train_indexes]
    features_test_named = features.iloc[features_test_indexes]
    features_train = features_train_named.drop(["appid","name"], axis=1)
    features_test = features_test_named.drop(["appid","name"], axis=1)

    logWithTitle("DATA SHAPES")
    print(features_train.shape, labels_train.shape, " : ", features_test.shape, labels_test.shape)

    #logWithTitle("Train-Features", features_train)
    #logWithTitle("Test-Features", features_test)
    #logWithTitle("Train-Labels", labels_train)
    #logWithTitle("Test-Labels", labels_test)

    #for game in features_test:
     #   cPrint(game)

    classifier = svm.SVC(kernel="rbf", gamma=1000, C=100.)

    classifier.fit(features_train, labels_train)

    #logWithTitle("PREDICTED LABELS", classifier.predict(features_test))
    #logWithTitle("GROUNDTRUTH LABELS", labels_test)

    logOverallPerformance(classifier, features_test, labels_test)
    # save_classifier("./results/model/SVC.pickle", classifier)

    visualizeClassificationReport(classifier, features_train, labels_train, features_test, labels_test)

    predicted_labels = classifier.predict(features_test)

    logNameOfPredictedGames(predicted_labels, features_test_named)
Exemple #3
0
def getNameOfPredictedGames(predicted_labels, features_named):

    interesting_games = []
    boring_games = []
    unknown_games = []
    for index, prediction in enumerate(predicted_labels):
        if (prediction == "interesting"):
            interesting_games.append(features_named.iloc[index]["name"])
        elif (prediction == "boring"):
            boring_games.append(features_named.iloc[index]["name"])
        else:
            unknown_games.append(features_named.iloc[index]["name"])
            cPrint(str(features_named.iloc[index]["name"]) +
                   " has no label. investigate further",
                   color="yellow")
    return {
        "interesting_games": interesting_games,
        "boring_games": boring_games,
        "unknown_games": unknown_games
    }
Exemple #4
0
def logNameOfPredictedGames(predicted_labels, features_named):

    for index, prediction in enumerate(predicted_labels):
        if (prediction == "interesting"):
            cPrint(features_named.iloc[index]["name"], color="green")
        elif (prediction == "boring"):
            cPrint(features_named.iloc[index]["name"], color="red")
        else:
            cPrint(str(prediction) + " can't be found in any list",
                   color="yellow")
Exemple #5
0
def logCrossValidatedOverallPerformance(classifier, features, labels):
    def precisionInterestingScorer(labels_test, predicted_labels):
        return classification_report(labels_test,
                                     predicted_labels,
                                     output_dict=True)["1"]["precision"]

    def precisionBoringScorer(labels_test, predicted_labels):
        return classification_report(labels_test,
                                     predicted_labels,
                                     output_dict=True)["0"]["precision"]

    def recallInterestingScorer(labels_test, predicted_labels):
        return classification_report(labels_test,
                                     predicted_labels,
                                     output_dict=True)["1"]["recall"]

    def recallBoringScorer(labels_test, predicted_labels):
        return classification_report(labels_test,
                                     predicted_labels,
                                     output_dict=True)["0"]["recall"]

    scoring = {
        'accuracy':
        'accuracy',
        'precision_interesting':
        make_scorer(precisionInterestingScorer, greater_is_better=True),
        'precision_boring':
        make_scorer(precisionBoringScorer, greater_is_better=True),
        'recall_interesting':
        make_scorer(recallInterestingScorer, greater_is_better=True),
        'recall_boring':
        make_scorer(recallBoringScorer, greater_is_better=True)
    }
    folds = 10
    scores = cross_validate(classifier,
                            features,
                            list(map(convertLabelToNumber, labels)),
                            scoring=scoring,
                            cv=folds,
                            return_train_score=False,
                            return_estimator=False)
    logWithTitle("CROSS VALIDATED METRICS")

    print("INTERESTING RECALL")
    average_recall_interesting = calculateAverage(
        scores["test_recall_interesting"])
    cPrint(round(average_recall_interesting, 2), color="red")

    print("INTERESTING PRECISION")
    average_precision_interesting = calculateAverage(
        scores["test_precision_interesting"])
    cPrint(round(average_precision_interesting, 2), color="green")
    #print(scores["test_precision_interesting"])
    #print(numpy.var(scores["test_precision_interesting"]))

    print("BORING RECALL")
    average_recall_boring = calculateAverage(scores["test_recall_boring"])
    cPrint(round(average_recall_boring, 2), color="red")

    print("BORING PRECISION")
    average_precision_boring = calculateAverage(
        scores["test_precision_boring"])
    cPrint(round(average_precision_boring, 2), color="green")

    print("ACCURACY")
    average_precision_boring = calculateAverage(scores["test_accuracy"])
    cPrint(round(average_precision_boring, 2))