def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False): if folder[-1] == "/": folder = folder[:-1] if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model"): print( "There is already a model saved for these data, would you like to train again? (y/N)\n" ) rep = input("") while rep not in ["y", "Y", "n", "N", "", "\n"]: rep = input("We did not understand, please type again (y/N).") if rep.lower() == "y": pass else: return L = idyom.idyom(maxOrder=maxOrder) M = data.data(quantization=quantization) M.parse(folder) L.train(M) L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model")
def Train(folder, jump=False): L = idyom.idyom(jump=jump, maxOrder=100) M = data.data(quantization=24) M.parse(folder) L.train(M) L.save("models/jump_" + str(jump) + ".model")
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False): """ """ np.random.seed(0) Likelihoods = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) if int(k_fold) == -1: k_fold = len(files) if int(k_fold) > len(files): raise ValueError( "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation." ) k_fold = len(files) // int(k_fold) validationFiles = [] for i in tqdm(range(math.ceil(len(files) / k_fold))): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] # Our IDyOM L = idyom.idyom(maxOrder=maxOrder) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: tmp = L.getLikelihoodfromFile(file, long_term_only=long_term_only, short_term_only=short_term_only) for i in range(len(tmp)): if tmp[i] != tmp[i]: tmp[i] = 1 / 30 Likelihoods.append(np.mean(tmp)) filename = file[file.rfind("/") + 1:file.rfind(".")] validationFiles.append(filename) return Likelihoods, validationFiles
def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both"): """ Train a model with the passed parameters and then save it to the hardrive. """ if folder[-1] == "/": folder = folder[:-1] if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] else: raise ValueError("We do not know these viewpoints ... ") if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model"): print( "There is already a model saved for these data, would you like to train again? (y/N)\n" ) rep = input("") while rep not in ["y", "Y", "n", "N", "", "\n"]: rep = input("We did not understand, please type again (y/N).") if rep.lower() == "y": pass else: return preComputeEntropies = not ( long_term_only or short_term_only ) # We only precompute if we need to combine short and long term models L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o) M = data.data(quantization=quantization) M.parse(folder, augment=True) L.train(M) L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model")
def LikelihoodOverFolder(folder, jump=False, zero_padding=True): L = idyom.idyom(jump=jump) if os.path.isfile("models/jump_" + str(jump) + ".model"): print("We load saved model.") L.load("models/jump_" + str(jump) + ".model") else: print("No saved model found, please train before.") S, files = L.getSurprisefromFolder(folder) data = {} for i in range(len(S)): name = files[i][files[i].rfind("/") + 1:files[i].rfind(".")] data[name] = np.array(S[i]) if not os.path.exists(folder + "surprises"): os.makedirs(folder + "surprises") sio.savemat(folder + 'surprises/surpriseSignal_jump_' + str(jump) + '.mat', data) pickle.dump( data, open(folder + 'surprises/surpriseSignal_jump_' + str(jump) + '.pickle', "wb")) print() print() print() print("Data have been succesfully saved in:", folder + 'jump_' + str(jump) + '.mat') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not SERVER: for i in range(len(S)): plt.title(files[i]) plt.plot(S[i]) plt.show()
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, jump=False): """ """ np.random.seed(0) Likelihoods = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) k_fold = len(files) // int(k_fold) validationFiles = [] for i in range(len(files) // k_fold): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] # Our IDyOM L = idyom.idyom(maxOrder=maxOrder, jump=jump) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: Likelihoods.append(np.mean(L.getLikelihoodfromFile(file))) validationFiles.append(file) return Likelihoods, validationFiles
def SurpriseOverFolder(folderTrain, folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False): L = idyom.idyom() if folderTrain[-1] == "/": folderTrain = folderTrain[:-1] if folder[-1] != "/": folder += "/" name_train = folderTrain[folderTrain[:-1].rfind("/") + 1:] + "/" name = folder[folder[:-1].rfind("/") + 1:] if not os.path.exists("out/" + name): os.makedirs("out/" + name) if not os.path.exists("out/" + name + "surprises/"): os.makedirs("out/" + name + "surprises/") if not os.path.exists("out/" + name + "surprises/" + name_train): os.makedirs("out/" + name + "surprises/" + name_train) if not os.path.exists("out/" + name + "surprises/" + name_train + "data/"): os.makedirs("out/" + name + "surprises/" + name_train + "data/") if not os.path.exists("out/" + name + "surprises/" + name_train + "figs/"): os.makedirs("out/" + name + "surprises/" + name_train + "figs/") if os.path.isfile("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model"): print("We load saved model.") L.load("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model") else: print("No saved model found, please train before.") print("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model") quit() S, files = L.getSurprisefromFolder(folder, time_representation=time_representation, long_term_only=long_term_only, short_term_only=short_term_only) data = {} for i in range(len(S)): name_tmp = files[i][files[i].rfind("/") + 1:files[i].rfind(".")] data[name_tmp] = np.array(S[i]).tolist() more_info = "" if long_term_only: more_info += "_longTermOnly" if short_term_only: more_info += "_shortTermOnly" more_info += "_quantization_" + str(quantization) + "_maxOrder_" + str( maxOrder) sio.savemat( "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat', data) pickle.dump( data, open( "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.pickle', "wb")) print() print() print() print( "Data have been succesfully saved in:", "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not os.path.exists("out/" + name + "surprises/" + name_train + "figs/" + more_info[1:]): os.makedirs("out/" + name + "surprises/" + name_train + "figs/" + more_info[1:]) for i in range(len(S)): plt.title(files[i]) plt.plot(S[i]) plt.savefig("out/" + name + "surprises/" + name_train + "figs/" + more_info[1:] + "/" + str(files[i][files[i].rfind("/") + 1:files[i].rfind(".")]) + '.eps') if not SERVER: plt.show() else: plt.close()
import sys sys.path.append('../') from idyom import longTermModel from idyom import data from idyom import score from idyom import idyom import numpy as np import matplotlib.pyplot as plt L = idyom.idyom(maxOrder=20, jump=False, maxDepth=10) M = data.data(quantization=6) #M.parse("../dataset/") #M.parse("../datasetprout/") M.parse("../examples/dataBaseTest") L.train(M) L.sample([{"pitch": 74, "length": 24}]) s = L.generate(20) print(s.getData()) s.plot() s.writeToMidi("exGen.mid")
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False,\ viewPoints="both"): """ Cross validate a unique folder using k-fold """ if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] np.random.seed(0) ICs = [] Entropies = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) if int(k_fold) == -1: k_fold = len(files) if int(k_fold) > len(files): raise ValueError( "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation." ) k_fold = len(files) // int(k_fold) validationFiles = [] for i in tqdm(range(math.ceil(len(files) / k_fold))): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: IC, E = L.getSurprisefromFile( file, long_term_only=long_term_only, short_term_only=short_term_only, time_representation=time_representation, zero_padding=zero_padding) ICs.append(IC) Entropies.append(E) filename = file[file.rfind("/") + 1:file.rfind(".")] filename = filename.replace("-", "_") validationFiles.append(filename) return ICs, Entropies, validationFiles
def SilentNotesOverFolder(folderTrain, folder, threshold=0.3, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both"): """ Identify for each silence wether a note was expected. These notes are identify using a threshold passed as a parameter. """ L = idyom.idyom() if folderTrain[-1] == "/": folderTrain = folderTrain[:-1] if folder[-1] != "/": folder += "/" name_train = folderTrain[folderTrain[:-1].rfind("/") + 1:] + "/" name = folder[folder[:-1].rfind("/") + 1:] if not os.path.exists("out/" + name): os.makedirs("out/" + name) if not os.path.exists("out/" + name + "missing_notes/"): os.makedirs("out/" + name + "missing_notes/") if not os.path.exists("out/" + name + "missing_notes/" + name_train): os.makedirs("out/" + name + "missing_notes/" + name_train) if not os.path.exists("out/" + name + "missing_notes/" + name_train + "data/"): os.makedirs("out/" + name + "missing_notes/" + name_train + "data/") if not os.path.exists("out/" + name + "missing_notes/" + name_train + "figs/"): os.makedirs("out/" + name + "missing_notes/" + name_train + "figs/") if os.path.isfile("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model"): print("We load saved model.") L.load("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model") else: print("No saved model found, please train before.") print("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model") quit() S, files = L.getDistributionsfromFolder( folder, threshold, time_representation=time_representation, long_term_only=long_term_only, short_term_only=short_term_only) data = {} for i in range(len(S)): name_tmp = files[i][files[i].rfind("/") + 1:files[i].rfind(".")] name_tmp = name_tmp.replace("-", "_") data[name_tmp] = np.array(S[i]).tolist() more_info = "" if long_term_only: more_info += "_longTermOnly" if short_term_only: more_info += "_shortTermOnly" more_info += "_quantization_" + str(quantization) + "_maxOrder_" + str( maxOrder) + "_viewpoints_" + str(viewPoints) sio.savemat( "out/" + name + "missing_notes/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat', data) pickle.dump( data, open( "out/" + name + "missing_notes/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.pickle', "wb")) print() print() print() print( "Data have been succesfully saved in:", "out/" + name + "missing_notes/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not os.path.exists("out/" + name + "missing_notes/" + name_train + "figs/" + more_info[1:]): os.makedirs("out/" + name + "missing_notes/" + name_train + "figs/" + more_info[1:]) for i in range(len(files)): plt.plot(S[i][0]) plt.plot(S[i][1]) plt.legend(["Actual Notes", "Missing Notes"]) plt.title("Piece: " + files[i]) plt.savefig("out/" + name + "missing_notes/" + name_train + "figs/" + more_info[1:] + "/" + str(files[i][files[i].rfind("/") + 1:files[i].rfind(".")]) + '.eps') if not SERVER: plt.show() else: plt.close()
def SurpriseOverFolder(folderTrain, folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False,\ viewPoints="both"): """ Compute the information content over a folder """ L = idyom.idyom() if folderTrain[-1] == "/": folderTrain = folderTrain[:-1] if folder[-1] != "/": folder += "/" name_train = folderTrain[folderTrain[:-1].rfind("/") + 1:] + "/" name = folder[folder[:-1].rfind("/") + 1:] if not os.path.exists("out/" + name): os.makedirs("out/" + name) if not os.path.exists("out/" + name + "surprises/"): os.makedirs("out/" + name + "surprises/") if not os.path.exists("out/" + name + "surprises/" + name_train): os.makedirs("out/" + name + "surprises/" + name_train) if not os.path.exists("out/" + name + "surprises/" + name_train + "data/"): os.makedirs("out/" + name + "surprises/" + name_train + "data/") if not os.path.exists("out/" + name + "surprises/" + name_train + "figs/"): os.makedirs("out/" + name + "surprises/" + name_train + "figs/") if os.path.isfile("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model"): print("We load saved model.") L.load("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model") else: print("No saved model found, please train before.") print("models/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model") quit() S, E, files = L.getSurprisefromFolder( folder, time_representation=time_representation, long_term_only=long_term_only, short_term_only=short_term_only) data = {} for i in range(len(S)): name_tmp = files[i][files[i].rfind("/") + 1:files[i].rfind(".")] name_tmp = name_tmp.replace("-", "_") data[name_tmp] = [np.array(S[i]).tolist(), np.array(E[i]).tolist()] data[ "info"] = "Each variable corresponds to a song. For each song you have the Information Content as the first dimension, and then the Relative Entropy as the second dimension. They are both vectors over the time dimension." more_info = "" if long_term_only: more_info += "_longTermOnly" if short_term_only: more_info += "_shortTermOnly" more_info += "_quantization_" + str(quantization) + "_maxOrder_" + str( maxOrder) + "_viewpoints_" + str(viewPoints) sio.savemat( "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat', data) pickle.dump( data, open( "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.pickle', "wb")) print() print() print() print( "Data have been succesfully saved in:", "out/" + name + "surprises/" + name_train + "data/" + str(folderTrain[folderTrain.rfind("/") + 1:]) + more_info + '.mat') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not os.path.exists("out/" + name + "surprises/" + name_train + "figs/" + more_info[1:]): os.makedirs("out/" + name + "surprises/" + name_train + "figs/" + more_info[1:])
def Train_by_piece(folder, nb_pieces=20, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both", \ europa_init=True): """ Train and evaluate a model piece by piece. This allows to see the evolution of the generalization error during the course of the training and for instance identify the right number of data needed the converge. """ name_temp_file = ".tmp_test_folder_" + folder[folder.rfind("/") + 1:] + "_" + str( np.random.randint( 100, 999)) if folder[-1] == "/": folder = folder[:-1] if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] else: raise ValueError("We do not know this viewpoint ... ") L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o, evolutive=True) files = glob(folder + '/**.mid', recursive=True) + glob( folder + '/**.midi', recursive=True) random.shuffle(files) train = files[:-nb_pieces] test = files[-nb_pieces:] if europa_init: europe_files = files = glob( 'dataset/mixed2/**.mid', recursive=True) + glob( 'dataset/mixed2/**.midi', recursive=True) train = europe_files[:100] + train if os.path.exists(name_temp_file): if os.path.isdir(name_temp_file): rmtree(name_temp_file) else: os.remove(name_temp_file) os.mkdir(name_temp_file) for file in test: copyfile(file, name_temp_file + file[file.rfind("/"):]) note_counter = [] dicos = [] matrix = np.zeros((len(train), nb_pieces)) print("___ Starting Training ___") k = 0 for file in tqdm(train): try: M = data.data(quantization=quantization) M.parseFile(file) L.train(M, preComputeEntropies=False) S, E, files = L.getSurprisefromFolder( name_temp_file, time_representation=time_representation, long_term_only=long_term_only, short_term_only=short_term_only) note_counter.append(len(M.viewPointRepresentation["pitch"][0])) dico = {} for i in range(len(files)): dico[files[i]] = S[i] dicos.append(dico) tmp = [] for s in S: tmp.append(np.mean(s)) matrix[k, :] = tmp k += 1 except (FileNotFoundError, RuntimeError, ValueError): print(file + " skipped.") for i in range(1, len(note_counter)): note_counter[i] += note_counter[i - 1] saving = {} saving['matrix'] = matrix saving['note_counter'] = note_counter saving['dico'] = dico if not os.path.exists("out/" + folder[folder.rfind("/"):]): os.makedirs("out/" + folder[folder.rfind("/"):]) if not os.path.exists("out/" + folder[folder.rfind("/"):] + "/evolution/"): os.makedirs("out/" + folder[folder.rfind("/"):] + "/evolution/") pickle.dump( saving, open( "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.pickle', "wb")) sio.savemat( "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.mat', data) print() print() print() print("Data saved at " + "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.pickle') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not SERVER: plt.errorbar(note_counter, np.mean(matrix, 1), yerr=np.std(matrix, 1) / np.sqrt(nb_pieces)) plt.title("Evolution of the mean IC over Learning (" + folder[folder.rfind("/") + 1:] + ")") plt.ylabel("Mean IC (generlization error)") plt.xlabel("Learning (in notes)") plt.show() rmtree(name_temp_file)