def logOverallPerformance(classifier, features_test, labels_test): performanceObject = getOverallPerformance(classifier, features_test, labels_test) print("ACCURACY SCORE:") cPrint(performanceObject["accuracy_score"], "green")
def createModel(data): #DATA INPUT - GAME DATA & USER DATA profile_response = data["user_profile_json"] games_response = data["games_json"] # FEATURE SELECTION - SELECT FEATURES TO BE USED BY ADDING THEM TO THE features_to_use LIST #features_to_use = ["appid","name","genres", "publishers", "developers", "hasMultiplayerSupport", "hasSingleplayerSupport", "hasOnlineMultiplayerSupport"] features_to_use = ["appid","name","genres"] logWithTitle("USED-FEATURES") logCommaSeparatedList(features_to_use) cPrint(str(len(features_to_use)) + " Features in Use", "green") transformed_games = list(map(transformDBResultForTraining, games_response)) labels = labelByPredefinedLists(transformed_games, boring_games, interesting_games) features = createFeaturesDataFrame(transformed_games, features_to_use) #features = transformSimpleCategoricalDataForTraining(features, ["genres","tags"]) #features = transformComplexCategoricalFeatureForTraining(features, "genres") features = transformComplexCategoricalFeatureForTraining(features, "genres") #logWithTitle("All-LABELS", labels) #logWithTitle("ALL-FEATURES", features) #printSeperator() #print(len(features)) features_train_indexes, features_test_indexes, labels_train, labels_test = model_selection.train_test_split(features.index, labels,test_size=0.30) features_train_named = features.iloc[features_train_indexes] features_test_named = features.iloc[features_test_indexes] features_train = features_train_named.drop(["appid","name"], axis=1) features_test = features_test_named.drop(["appid","name"], axis=1) logWithTitle("DATA SHAPES") print(features_train.shape, labels_train.shape, " : ", features_test.shape, labels_test.shape) #logWithTitle("Train-Features", features_train) #logWithTitle("Test-Features", features_test) #logWithTitle("Train-Labels", labels_train) #logWithTitle("Test-Labels", labels_test) #for game in features_test: # cPrint(game) classifier = svm.SVC(kernel="rbf", gamma=1000, C=100.) classifier.fit(features_train, labels_train) #logWithTitle("PREDICTED LABELS", classifier.predict(features_test)) #logWithTitle("GROUNDTRUTH LABELS", labels_test) logOverallPerformance(classifier, features_test, labels_test) # save_classifier("./results/model/SVC.pickle", classifier) visualizeClassificationReport(classifier, features_train, labels_train, features_test, labels_test) predicted_labels = classifier.predict(features_test) logNameOfPredictedGames(predicted_labels, features_test_named)
def getNameOfPredictedGames(predicted_labels, features_named): interesting_games = [] boring_games = [] unknown_games = [] for index, prediction in enumerate(predicted_labels): if (prediction == "interesting"): interesting_games.append(features_named.iloc[index]["name"]) elif (prediction == "boring"): boring_games.append(features_named.iloc[index]["name"]) else: unknown_games.append(features_named.iloc[index]["name"]) cPrint(str(features_named.iloc[index]["name"]) + " has no label. investigate further", color="yellow") return { "interesting_games": interesting_games, "boring_games": boring_games, "unknown_games": unknown_games }
def logNameOfPredictedGames(predicted_labels, features_named): for index, prediction in enumerate(predicted_labels): if (prediction == "interesting"): cPrint(features_named.iloc[index]["name"], color="green") elif (prediction == "boring"): cPrint(features_named.iloc[index]["name"], color="red") else: cPrint(str(prediction) + " can't be found in any list", color="yellow")
def logCrossValidatedOverallPerformance(classifier, features, labels): def precisionInterestingScorer(labels_test, predicted_labels): return classification_report(labels_test, predicted_labels, output_dict=True)["1"]["precision"] def precisionBoringScorer(labels_test, predicted_labels): return classification_report(labels_test, predicted_labels, output_dict=True)["0"]["precision"] def recallInterestingScorer(labels_test, predicted_labels): return classification_report(labels_test, predicted_labels, output_dict=True)["1"]["recall"] def recallBoringScorer(labels_test, predicted_labels): return classification_report(labels_test, predicted_labels, output_dict=True)["0"]["recall"] scoring = { 'accuracy': 'accuracy', 'precision_interesting': make_scorer(precisionInterestingScorer, greater_is_better=True), 'precision_boring': make_scorer(precisionBoringScorer, greater_is_better=True), 'recall_interesting': make_scorer(recallInterestingScorer, greater_is_better=True), 'recall_boring': make_scorer(recallBoringScorer, greater_is_better=True) } folds = 10 scores = cross_validate(classifier, features, list(map(convertLabelToNumber, labels)), scoring=scoring, cv=folds, return_train_score=False, return_estimator=False) logWithTitle("CROSS VALIDATED METRICS") print("INTERESTING RECALL") average_recall_interesting = calculateAverage( scores["test_recall_interesting"]) cPrint(round(average_recall_interesting, 2), color="red") print("INTERESTING PRECISION") average_precision_interesting = calculateAverage( scores["test_precision_interesting"]) cPrint(round(average_precision_interesting, 2), color="green") #print(scores["test_precision_interesting"]) #print(numpy.var(scores["test_precision_interesting"])) print("BORING RECALL") average_recall_boring = calculateAverage(scores["test_recall_boring"]) cPrint(round(average_recall_boring, 2), color="red") print("BORING PRECISION") average_precision_boring = calculateAverage( scores["test_precision_boring"]) cPrint(round(average_precision_boring, 2), color="green") print("ACCURACY") average_precision_boring = calculateAverage(scores["test_accuracy"]) cPrint(round(average_precision_boring, 2))