def get_model(classifier_name, training_data, target_data, model_settings=None): """ Function returns a trained model based on the csv file and features chosen. Creation and training of models has been separated into their own file: models.py Arguments: training_data: the data the training is performed on target_data: the targets for supervised learning model_settings: dict of settings for the classifier Returns: model: the trained model """ model = ((models.linreg(training_data, target_data)) if (classifier_name == "linreg") else (models.svm(training_data, target_data, model_settings)) if (classifier_name == "svm") else (models.cart(training_data, target_data)) if (classifier_name == "cart") else (models.knn(training_data, target_data, model_settings)) if (classifier_name == "knn") else (models.lda(training_data, target_data, model_settings)) if (classifier_name == "lda") else (models.nb(training_data, target_data)) if (classifier_name == "nb") else (models.lr(training_data, target_data)) if (classifier_name == "lr") else None) return model
def main(argv): param_fpath = sys.argv[1] # prepare training data X, y = get_train_data(param_fpath) # X: feature list y: label list # train print '\nBegin Running Logistic Regression...' lr_acc = lr(X, y) print "LR Accuracy: " + str(lr_acc) print '\nBegin Running KNN...' knn_acc = knn(X, y) print "KNN Accuracy: " + str(knn_acc) print '\nBegin Running GNB...' gnb_acc = gnb(X, y) print "GNB Accuracy: " + str(gnb_acc) print '\nBegin Running MNB...' mnb_acc = mnb(X, y) print "MNB Accuracy: " + str(mnb_acc) print '\nBegin Running BNB...' bnb_acc = bnb(X, y) print "BNB Accuracy: " + str(bnb_acc) print '\nBegin Running Decision Tree...' dtree_acc = dtree(X, y) print "Decision Tree Accuracy: " + str(dtree_acc) print '\nBegin Running SVM...' svm_acc = svm(X, y) print "SVM Accuracy: " + str(svm_acc)
def findPartitions(): #main function to find optimal splits for opening/midgame/endgame global ENDGAME global MIDGAME global OPENING parser = Parser() print("PARSING") games = parser.read_uci(sys.argv[1]) scored = parser.parseStockfish(games,sys.argv[2]) validationgames = parser.read_uci(sys.argv[3]) validationscored = parser.parseStockfish(validationgames,sys.argv[4]) #Now I'm going to run KNN which is going to return y_ training_labels_w = getLabels(scored,WHITE) training_labels_b = getLabels(scored,BLACK) validation_labels_w = getLabels(validationscored,WHITE) validation_labels_b = getLabels(validationscored,BLACK) minscore = 10000 for i in range(10): for j in range(10-i): OPENING = float(i)/10 MIDGAME = float(j)/10 ENDGAME = float(10 - i - j)/10 print(OPENING,MIDGAME,ENDGAME) training_features = set1(scored) validation_features = set1(validationscored) y_w = knn(training_features,training_labels_w,validation_features) y_b = knn(training_features,training_labels_b,validation_features) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " " + str(j) + " " + str(10-i-j) if error_t < minscore: minscore = error_t print(st) o.write(st+"\n") o.close()
def main(): #I guess this can be my main thing #parses into 2 groups, training data, validation data #PARSING TIME parser = Parser() print("PARSING") games = parser.read_uci(sys.argv[1]) scored = parser.parseStockfish(games,sys.argv[2]) validationgames = parser.read_uci(sys.argv[3]) validationscored = parser.parseStockfish(validationgames,sys.argv[4]) print("Building Features/Labels") #Now I'm going to run KNN which is going to return y_ training_features = set2(scored) training_labels_w = getLabels(scored,WHITE) training_labels_b = getLabels(scored,BLACK) validation_features = set2(validationscored) validation_labels_w = getLabels(validationscored,WHITE) validation_labels_b = getLabels(validationscored,BLACK) print("Running LR") y_w = knn(training_features,training_labels_w,validation_features) y_b = knn(training_features,training_labels_b,validation_features) print("Calculating Error") error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean print(str(error_w) + " White error") print(str(error_b) + " Black error") print(str(error_t) + " Total error")
def runModels(testdata, testlabel): res = models.knn(testdata, testlabel) print("knn: ", res.mean()) res = models.svm(testdata, testlabel) print("svm: ", res.mean()) res = models.decisionTree(testdata, testlabel) print("decision tree: ", res.mean()) res = models.naiveBayes(testdata, testlabel) print("Gaussian NB: ", res.mean()) res = models.randomForest(testdata, testlabel) print("random forest: ", res.mean()) res = models.nearestCentroid(testdata, testlabel) print("nearest centroid: ", res.mean()) res = models.extraTree(testdata, testlabel) print("extra tree: ", res.mean()) res = models.extraTrees(testdata, testlabel) print("extra trees: ", res.mean())
import models import visualize # Find some interests through figure visualize.plot0() visualize.plot1() visualize.plot2() visualize.plot3() # Start mining models.init() # Naive Bayes models.bayes() # K-nn model models.knn() # Decision tree models.decision_tree() # Random forest models.random_forest() print("\n") models.compare() # Random test models.final_test()
def overnight(): global OPENING global ENDGAME global MIDGAME costofthisprogrambecomingskynet = 999999999999999999999 parser = Parser() games = parser.read_uci(sys.argv[1]) scored = parser.parseStockfish(games,sys.argv[2]) validationgames = parser.read_uci(sys.argv[3]) validationscored = parser.parseStockfish(validationgames,sys.argv[4]) training_features = set2(scored) training_labels_w = getLabels(scored,WHITE) training_labels_b = getLabels(scored,BLACK) validation_features = set2(validationscored) validation_labels_w = getLabels(validationscored,WHITE) validation_labels_b = getLabels(validationscored,BLACK) f = open("knnpartition","w") try: #opening/endgame/midgame for each model minscore = 10000 for i in range(10): for j in range(10-i): OPENING = float(i)/10 MIDGAME = float(j)/10 ENDGAME = float(10 - i - j)/10 print(OPENING,MIDGAME,ENDGAME) training_features = set2(scored) validation_features = set2(validationscored) y_w = knn(training_features,training_labels_w,validation_features) y_b = knn(training_features,training_labels_b,validation_features) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " " + str(j) + " " + str(10-i-j) if error_t < minscore: minscore = error_t print(st) f.write(st+"\n") f.close() except: f.close() f = open("lrpartition","w") try: #opening/endgame/midgame for each model minscore = 10000 for i in range(10): for j in range(10-i): OPENING = float(i)/10 MIDGAME = float(j)/10 ENDGAME = float(10 - i - j)/10 print(OPENING,MIDGAME,ENDGAME) training_features = set2(scored) validation_features = set2(validationscored) y_w = lr(training_features,training_labels_w,validation_features) y_b = lr(training_features,training_labels_b,validation_features) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " " + str(j) + " " + str(10-i-j) if error_t < minscore: minscore = error_t print(st) f.write(st+"\n") f.close() except: f.close() f = open("treepartition","w") try: #opening/endgame/midgame for each model minscore = 10000 for i in range(10): for j in range(10-i): OPENING = float(i)/10 MIDGAME = float(j)/10 ENDGAME = float(10 - i - j)/10 print(OPENING,MIDGAME,ENDGAME) training_features = set2(scored) validation_features = set2(validationscored) y_w = tree(training_features,training_labels_w,validation_features) y_b = tree(training_features,training_labels_b,validation_features) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " " + str(j) + " " + str(10-i-j) if error_t < minscore: minscore = error_t print(st) f.write(st+"\n") f.close() except: f.close() f = open("forestpartition","w") try: #opening/endgame/midgame for each model minscore = 10000 for i in range(10): for j in range(10-i): OPENING = float(i)/10 MIDGAME = float(j)/10 ENDGAME = float(10 - i - j)/10 print(OPENING,MIDGAME,ENDGAME) training_features = set2(scored) validation_features = set2(validationscored) y_w = forest(training_features,training_labels_w,validation_features) y_b = forest(training_features,training_labels_b,validation_features) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " " + str(j) + " " + str(10-i-j) if error_t < minscore: minscore = error_t print(st) f.write(st+"\n") f.close() except: f.close() OPENING = 0.4 MIDGAME = 0.3 ENDGAME = 0.3 training_features = set2(scored) validation_features = set2(validationscored) f = open("knnoverfit","w") for i in range(100): try: y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() f = open("treeoverfit","w") for i in range(100): try: y_w = treeO(training_features,training_labels_w,validation_features,i) y_b = treeO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() f = open("forestoverfit","w") for i in range(100): try: y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() f = open("knnoverfit2","w") try: i = 500 y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 1000 y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 3000 y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 4500 y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() f = open("forestoverfit2","w") try: i = 500 y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 1000 y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 3000 y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 4500 y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() f = open("treeoverfit2","w") try: i = 500 y_w = treeO(training_features,training_labels_w,validation_features,i) y_b = treeO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 1000 y_w = treeO(training_features,training_labels_w,validation_features,i) y_b = treeO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 3000 y_w = treeO(training_features,training_labels_w,validation_features,i) y_b = treeO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") i = 4500 y_w = treeO(training_features,training_labels_w,validation_features,i) y_b = treeO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) f.write(st+"\n") except: pass f.close() i = 101 f = open("lastoverfits","w") while 1: i+=2 try: y_w = knnO(training_features,training_labels_w,validation_features,i) y_b = knnO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " knn" f.write(st+"\n") y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " tree" f.write(st+"\n") y_w = forestO(training_features,training_labels_w,validation_features,i) y_b = forestO(training_features,training_labels_b,validation_features,i) error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean st = "Got score of " + str(error_t) + " with " + str(i) + " forest" f.write(st+"\n") except: pass f.close
def knnMoves(): #use knn using just the moves as features #parses into 2 groups, training data, validation data #PARSING TIME parser = Parser() print("PARSING") games = parser.read_uci(sys.argv[1]) scored = parser.parseStockfish(games,sys.argv[2]) validationgames = parser.read_uci(sys.argv[3]) validationscored = parser.parseStockfish(validationgames,sys.argv[4]) print("Building Features/Labels") #Now I'm going to run KNN which is going to return y_ training_features = [] #finding longest game longest = 0 for i in scored: if len(i.turns) > longest: longest = len(i.turns) for i in validationscored: if len(i.turns) > longest: longest = len(i.turns) #have longest game for i in scored: game = [] for j in range(len(i.turns)): game.append(i.turns[j].moves[0].uci) game.append(i.turns[j].moves[1].uci) for k in range(longest): if k < j: continue game.append(0) game.append(0) training_features.append(game) training_labels_w = getLabels(scored,WHITE) training_labels_b = getLabels(scored,BLACK) validation_features = [] for i in validationscored: game = [] for j in range(len(i.turns)): game.append(i.turns[j].moves[0].uci) game.append(i.turns[j].moves[1].uci) for k in range(longest): if k < j: continue game.append(0) game.append(0) validation_features.append(game) validation_labels_w = getLabels(validationscored,WHITE) validation_labels_b = getLabels(validationscored,BLACK) print("Running KNN") y_w = knn(training_features,training_labels_w,validation_features) y_b = knn(training_features,training_labels_b,validation_features) print("Calculating Error") error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean print(str(error_w) + " White error") print(str(error_b) + " Black error") print(str(error_t) + " Total error") return
def knnMovesAll(): #use knn using the moves and other features #parses into 2 groups, training data, validation data #PARSING TIME parser = Parser() print("PARSING") games = parser.read_uci(sys.argv[1]) scored = parser.parseStockfish(games,sys.argv[2]) validationgames = parser.read_uci(sys.argv[3]) validationscored = parser.parseStockfish(validationgames,sys.argv[4]) print("Building Features/Labels") #Now I'm going to run KNN which is going to return y_ training_features = [] #finding longest game longest = 0 for i in scored: if len(i.turns) > longest: longest = len(i.turns) for i in validationscored: if len(i.turns) > longest: longest = len(i.turns) #have longest game for i in scored: game = [] for j in range(len(i.turns)): game.append(i.turns[j].moves[0].uci) game.append(i.turns[j].moves[1].uci) for k in range(longest): if k < j: continue game.append(0) game.append(0) game.append(numturns(i)) #number of turns in the game game.append(averagescore(i)) #average score game.append(numlosingturns(i)) #% of game where white is ahead game.append(averageopening(i)) # average score in opening game.append(scoreopening(i)) #score at end of opening game.append(averagemidgame(i)) #average score in midgame game.append(scoremidgame(i)) #score at/near end of midgame game.append(averageendgame(i)) #average score in endgame game.append(lastscore(i)) #score of last white move at end of the game game.append(totalscore(i)) #gets total change in score game.append(averagescorechange(i)) #average amount the score changes per move game.append(minimumscore(i)) #minimum score in the game game.append(maximumscore(i)) #maximum score in the game game.append(scoreswitch(i)) #counts number of times who is ahead changes training_features.append(game) training_labels_w = getLabels(scored,WHITE) training_labels_b = getLabels(scored,BLACK) validation_features = [] for i in validationscored: game = [] for j in range(len(i.turns)): game.append(i.turns[j].moves[0].uci) game.append(i.turns[j].moves[1].uci) for k in range(longest): if k < j: continue game.append(0) game.append(0) game.append(numturns(i)) #number of turns in the game game.append(averagescore(i)) #average score game.append(numlosingturns(i)) #% of game where white is ahead game.append(averageopening(i)) # average score in opening game.append(scoreopening(i)) #score at end of opening game.append(averagemidgame(i)) #average score in midgame game.append(scoremidgame(i)) #score at/near end of midgame game.append(averageendgame(i)) #average score in endgame game.append(lastscore(i)) #score of last white move at end of the game game.append(totalscore(i)) #gets total change in score game.append(averagescorechange(i)) #average amount the score changes per move game.append(minimumscore(i)) #minimum score in the game game.append(maximumscore(i)) #maximum score in the game game.append(scoreswitch(i)) #counts number of times who is ahead changes validation_features.append(game) validation_labels_w = getLabels(validationscored,WHITE) validation_labels_b = getLabels(validationscored,BLACK) print("Running KNN") y_w = knn(training_features,training_labels_w,validation_features) y_b = knn(training_features,training_labels_b,validation_features) print("Calculating Error") error_w = meanError(validation_labels_w,y_w) error_b = meanError(validation_labels_b,y_b) error_t = (error_b+error_w)/2 #mean print(str(error_w) + " White error") print(str(error_b) + " Black error") print(str(error_t) + " Total error") return
for file_path in sorted(data_dir.glob('mpd.slice.*.json'), key=alphanum_key): # if True: if counter > 5: break # file_path = data_dir / 'mpd.slice.0-999.json' # print("fullpath -> ", file_path) array_df += get_playlists_from_file(file_path, spotify) # setup_model(array_df) # sequential_regressor = sequential_model(sequential_regressor, array_df) # array_df = get_playlists_from_file(file_path, spotify) # # setup_model(array_df) # regressor = random_forest(regressor, array_df) # print(type(regressor)) # af_df = audio_features_df_knn(file_path, spotify) model = knn(knn_classifier, tracks_df) # print('predicted track -> ', get_track_info(spotify, track_pred[0])) # track_name = get_track_info(spotify, track_pred[0]) # print("PREDICTED TRACK NAME -> ", track_name["name"]) # track_artist = get_track_info(spotify, track_pred[0]) # print("PREDICTED ARTIST NAME -> ", track_artist["artists"][0]["name"]) counter += 1 # sequential_regressor = sequential_model(sequential_regressor, array_df) # %% data_dir = file_dir / 'data' filenames = os.listdir(data_dir)