def preprocess(self): # Initialize variables files = [] self.wrongs_idx = [[]] i = 0 converter = cv.Converter() # Get all files in the directory path = os.path.join(utils.getRoot(), self.directory) for file in os.listdir(path): files.append(utils.getPath(self.directory, file)) # Convert all files retrieved for file in files: base = file.split('/')[-1].split('.')[0] ext = file.split('/')[-1].split('.')[-1] outname = "".join([base, ".txt"]) if ext == "xml": converter.reset() self.datas.append([]) (score_info, _, notes) = utils.xml_parser(file) converter.setKappa(score_info.divisions) for note in notes: matrix = converter.convert_note(note) for elem in matrix: self.datas[i].append(elem) with open(utils.getPath(self.directory, outname), 'r') as file: for line in file: self.wrongs_idx[i].append(int(line.rstrip('\n'))) self.wrongs_idx.append([]) i += 1
def sort(arg): """ Outdated. Check is xml file is correct : - has more then 30 notes; - has at least 8 different notes; - notes are between the 1st and 7th octave. If the file is not correct, it will be deleted. More information in utils module. Parameters ---------- arg: string File to be sorted. If it is a directory, all file will be sorted. Returns ------- """ if os.path.isdir(arg): for file in os.listdir(arg): temp = utils.getPath(arg, file) if os.path.isfile(temp): utils.xml_sort(temp) elif os.path.isfile(arg): utils.xml_sort(arg) else: print "It is not a file or a directory." sys.exit(2)
def unzip(arg): """ Outdated. Transform n2c file to xml file. More information in extract module. Parameters ---------- arg: string File to be uncompressed. If it is a directory, all files will be uncompressed. Returns ------- """ if os.path.isdir(arg): for file in os.listdir(arg): temp = utils.getPath(arg, file) if os.path.isfile(temp): extract.from_n2c_to_xml(temp) elif os.path.isfile(arg): extract.from_n2c_to_xml(arg) else: print "It is not a file or a directory." sys.exit(2)
def preprocess(self): # Initialize variables files = [] num_file = 0 converter = cv.Converter() self.wrongs_idx = [[]] # Get all files in the directory path = os.path.join(utils.getRoot(), self.directory) for file in os.listdir(path): files.append(utils.getPath(self.directory, file)) # Convert all files retrieved for data in files: converter.reset() self.datas.append([]) (score_info, _, notes) = utils.xml_parser(data) converter.setKappa(score_info.divisions) num_note = 0 for note in notes: matrix = converter.convert_note(note) for elem in matrix: new_note, has_changed = self.random_note(elem) self.datas[num_file].append(new_note) if has_changed: self.wrongs_idx[num_file].append(num_note) num_note += 1 self.wrongs_idx.append([]) num_file += 1
def getMels(audioPath, id, category): print("Generating mels for " + audioPath) featuresArray = [] for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE): if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1: y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13) # featuresArray.append(mfcc) featuresArray.append(S) if len(featuresArray) == 599: break localPath = "mels/{0}/{1}.pp".format(category, id) ppFilePath = utils.resolvePath(utils.getPath(), localPath) f = open(ppFilePath, 'wb') f.write(pickle.dumps(featuresArray)) f.close()
def random(arg): """ Outdated. Now, false note are dynamically generated in the batch (see Dataset module) Create xml file(s), with some random notes. Txt file(s) is/are also created, including all random note index. In txt files, all numbers are comma-separated. More information in rng module. Parameters ---------- arg: string File to be note-randomized . If it is a directory, all files will be note-randomized. Returns ------- """ if os.path.isdir(arg): for file in os.listdir(arg): temp = utils.getPath(arg, file) if os.path.isfile(temp): rng.randomly_modify_file(temp) elif os.path.isfile(arg): rng.randomly_modify_file(arg) else: print "It is not a file or a directory." sys.exit(2)
def main(verbose=False): try: #Initialize stats_authors = dict() # Get current location path = utils.getPath() params = utils.loadJson(path + "/docs/parameters.json") # Get selenium driver driver = sf.getDriver(path) # Iterate over tags for tagName in params["tags"]: print("--------------- Starting tag " + tagName + " ---------------") # Get tag URL driver.get('https://www.tiktok.com/tag/' + tagName) # Scroll in tags login_form, driver = sf.scrollPage(driver, scope="tag", maxNScrolls=50) #Get authors names: authors_list = sf.get_authors(login_form, driver) print("--------------- Found ", len(authors_list), "users: ---------------") #Extract statistics from each author: stats_authors = sf.get_stats_author(driver, authors_list, params, stats_authors, useTikster=True) #Compute metrics for each author: metrics_author = sf.compute_metrics(stats_authors) #print(metrics_author) except Exception as e: traceback.print_exc() finally: # Check whether driver has been initialized yet try: driver except NameError: driver = None # Always close drivers if not driver is None: driver.close() driver.quit() print("Driver closed")
def convert(id, category, ext): localPath = "data/{0}/{1}".format(category, id) fullPath = utils.resolvePath(utils.getPath(), localPath) subprocess.call([ "ffmpeg", "-i", fullPath + ".ogg", fullPath + ".wav", "-loglevel", "panic" ]) #subprocess.call(["rm","-rf",fullPath+".ogg"]) #print("Converted {0}".format(id)) return fullPath + ".wav"
def preprocess_harmony(self): """ Same as above. But in the process we take harmony into account. """ # Initialize variables files = [] num_file = 0 # Get all files in the directory path = os.path.join(utils.getRoot(), self.directory) for file in os.listdir(path): if file.split('/')[-1].split('.')[0][-2:] == "_m": files.append(utils.getPath(self.directory, file)) # Convert all files retrieved for data in files: self.converter.reset() self.datas.append([]) (score_info, _, notes) = utils.xml_parser(data) # Retrieve harmony file with data file path = "".join( [data.split('/')[-1].split('.')[0][:-2], "_h", ".xml"]) data_h = utils.getPath(self.directory, path) if not os.path.isfile(data_h): continue # Convert harmony file harmonies = utils.xml_harmony_parser(data_h) # Append harmony note (xmlnote) notes = [notes] for harmony in harmonies: notes.append(harmony) #print len(notes) self.converter.setKappa(score_info.divisions) self.datas[num_file] = self.converter.convert_with_harmony( notes, self.datas, num_file) num_file += 1 temp = [] for i in range(0, len(self.datas) - 1): if len(self.datas[i]) > 10: temp.append(self.datas[i]) self.datas = temp
def __str__(self): """ Display note as define in xml_note.xml """ if self.step == REST: xml = "xml_rest.xml" else: xml = "xml_note.xml" path = utils.getPath(XML_DIR, xml) accidental = FLAT if self.alter == -1 else SHARP _dict = self.__dict__ _dict['accident'] = accidental self.setType(220.0) return Template(open(path, "r").read()).substitute(_dict)
def preprocess(self): """ Convert all files in self.directory in right format for feeding neural network (array format). All converted files are stocked in self.datas. Parameters ---------- Returns ------- """ # Initialize variables files = [] num_file = 0 self.wrongs_idx = [[]] # Get all files in the directory path = os.path.join(utils.getRoot(), self.directory) for file in os.listdir(path): files.append(utils.getPath(self.directory, file)) # Convert all files retrieved for data in files: self.converter.reset() self.datas.append([]) (score_info, _, notes) = utils.xml_parser(data) self.converter.setKappa(score_info.divisions) num_note = 0 for note in notes: try: matrix = self.converter.convert_note(note) for elem in matrix: self.datas[num_file].append(elem) #Old preprocessing, now random notes are proceeded in next_bach() #new_note, has_changed = rng.random_note(elem, self.epsilon) #self.datas[num_file].append(new_note) #if has_changed: # self.wrongs_idx[num_file].append(num_note) #num_note += 1 except AssertionError as err: print(err) print(data) sys.exit(2) self.wrongs_idx.append([]) num_file += 1
def saveModel(model, epoch): # print(next(iter(model.state_dict().items()))) # print(next(iter(model.named_parameters()))) """Above 2 statements show that state_dict does not contain grad status of the params but names params does.So we save grad status separately.If we do model.load_state_dict all grads will be reset to True by default.This is not what we want as we wish to train backend and frontend separately ie we wish to freeze some layers by making requires_grad = False. """ grad_states = {} for param, tensor in model.named_parameters(): grad_states[param] = tensor.requires_grad state = { "state_dict": model.state_dict(), "grad_states": grad_states, } file = os.path.join(getPath(epoch, model.stage), getUniqueName(epoch, model.stage)) torch.save(state, file) print(f"Model saved as Epoch{epoch}_{model.stage[-1]}.pt")
def on_press(self, event): value = self.text_ctrl.GetValue() if not value: print("You didn't enter anything!") else: self.text_ctrl.Hide() png = wx.Image('img/whatever.png', wx.BITMAP_TYPE_ANY).ConvertToBitmap() wx.StaticBitmap(self, -1, png, (0, 0), (png.GetWidth(), png.GetHeight())) if os.path.exists("result.json"): os.remove("result.json") wordlist = utils.extract(value) words = ",".join(wordlist) path = utils.getPath() utils.crawl(words) output = utils.process() utils.writelist(output, path) png = wx.Image('img/finish.png', wx.BITMAP_TYPE_ANY).ConvertToBitmap() wx.StaticBitmap(self, -1, png, (0, 0), (png.GetWidth(), png.GetHeight()))
def zip(arg, with_txt_file=False): """ Outdated. Compressed xml file to n2c file (hexadecimal file). More information is compress module. Parameters ---------- arg: string File to be compressed. If it is a directory, all files will be compressed. with_txt_file: bool If it is False, not txt file are required. (see compress.from_xml_to_n2c) If it is True, a txt file with wrong note index (coma-separated) is required. Txt file must have the same basename than arg's basename. (see compres.from_txt_to_n2c). """ if os.path.isdir(arg): for file in os.listdir(arg): temp = utils.getPath(arg, file) if os.path.isfile(temp): if with_txt_file: basename_temp, _ = os.path.splitext(temp) temp_txt = "".join([basename_temp, '.txt']) compress.from_txt_to_n2c(temp, temp_txt) else: compress.from_xml_to_n2c(temp) elif os.path.isfile(arg): if with_txt_file: basename_temp, _ = os.path.splitext(temp) temp_txt = "".join([basename_temp, '.txt']) compress.from_txt_to_n2c(arg, temp_txt) else: compress.from_xml_to_n2c(arg) else: print "It is not a file or a directory.", arg sys.exit(2)
def melody_len(arg): """ Count average number of notes in dataset Parameters ---------- arg: directory Directory with all files to take into account. """ mean = [] if os.path.isdir(arg): for file in os.listdir(arg): temp = utils.getPath(arg, file) if os.path.isfile(temp): if temp.split('.')[0][-2:] == '_m': mean.append(utils.xml_melody_length(temp)) elif os.path.isfile(arg): mean.append(utils.xml_melody_length(arg)) else: print "File or directory not found." sys.exit(2) print "Mean :", np.mean(mean) print "Std :", np.std(mean) print 'Sum :', sum(mean)
def __str__(self): path = utils.getPath(XML_DIR, "xml_harmony.xml") self.setType(220.0) return Template(open(path, "r").read()).substitue(self.__dict__)
if name not in duplicates: counter = 1 else: counter = duplicates[name] newName = name + str(counter) while newName in disallowed or newName in allowed: # newName cannot be a duplicate or another disallowed sequence counter += 1 newName = name + str(counter) duplicates[name] = counter + 1 if dry_run: print(iD, name + ' => ' + '[' + newName + ']') else: updateName(s, iD, newName) c.close() s.close() conn.commit() conn.close() if __name__ == "__main__": parser = createParser() args = parser.parse_args() DRY_RUN = True if args.dry_run else False resolveDisallowedUsers(getPath(), DRY_RUN)
def returnDisallowedUsers(path): """ Returns all users with disallowed usernames as a list. Args: - path: String of Path to Database file Returns: - [row]: List of Tuples in form ({id}, {username}) """ conn = connectDB(path) c = conn.cursor() query = ''' SELECT USERS.id, USERS.username FROM USERS JOIN DISALLOWED_USERNAMES WHERE USERS.username = DISALLOWED_USERNAMES.invalid_username ORDER BY USERS.id ''' c.execute(query) row = c.fetchall() # List of Tuples ({id}, {username}) c.close() conn.close() return row if __name__ == "__main__": findDisallowedUsers(getPath())
# check if the New Name would cause another collisions counter += 1 newName = name + str(counter) if dry_run: print(iD, name + " => " + "[" + newName + "]") else: updateName(s, iD, newName) duplicates[name] = counter + 1 else: # we have seen this duplicate for the first time if dry_run: print(iD, name + " => " + "[" + name + "]") duplicates[name] += 1 conn.commit() # commit changes/updates to DB s.close() c.close() conn.close() if __name__ == "__main__": parser = createParser() args = parser.parse_args() DRY_RUN = True if args.dry_run else False resolveUsernameCollisions(getPath(), DRY_RUN)
def main(): # # Probabilistic model training # # Note bigram # note_bigram_list = [] # for file in os.listdir('./training_set'): # file_name = './training_set/'+file # (_, _, note_list) = xml_parser(file_name) # note_list = [note2Label(note) for note in note_list] # note_bigram_file = [[note_list[i+1], note_list[i]] for i in range(len(note_list)-1)] # note_bigram_list = note_bigram_list + note_bigram_file # # note_bigram_list = allModulationBigram(note_bigram_list) # # bigram = SubModel("bigram", note_bigram_list) # bigram.printSubModel() # bigram.writeSaveFile() # Probabilistic model construction with data save bigram_path = utils.getPath(PROBA_DIR, config.model['bigram']) bigram = proba.SubModel("bigram", bigram_path) melody_path = utils.getPath(PROBA_DIR, config.model['melody']) melody_chord = proba.SubModel("melody_chord", melody_path) proba_model = proba.Model([bigram, melody_chord], [0.2, 0.8]) # Oracle construction file_input = config.oracle['oracle'] if len(sys.argv) >= 3: file_input = str(sys.argv[1]) file_path = utils.getPath(OMNIBOOK, file_input) (score_info, chord_list, note_list) = utils.xml_parser(file_path) # CHANGEMENT DUREE #for note in note_list: # note.duration = score_info.divisions/2 note_labels = [] for note in note_list: # We choose integer representing pitch relative to octave as labels. note_labels.append(note.toLabel()) pythie = oracle.FactorOracle(note_labels, note_list) # Improvisation parameters improv_length = config.improvisation['length'] continuity_factor_min = config.improvisation['continuity_factor'] taboo_list_length = config.improvisation['taboo_length'] context_length_min = config.improvisation['context_length'] # Scenario #scenario_file = raw_input("Fichier du scenario : ") scenario_file = config.oracle['scenario'] if len(sys.argv) >= 3: scenario_file = str(sys.argv[2]) scenario_name = utils.getPath(OMNIBOOK, scenario_file) (_, chord_list, _) = utils.xml_parser(scenario_name) for chord in chord_list: #chord.duration = chord.duration*score_info.divisions chord.timestamp = chord.timestamp * score_info.divisions scenario = [[chord.toLabel(), chord.timestamp] for chord in chord_list] # Improvisation # improv = pythie.classicPath(improv_length, continuity_factor_min, taboo_list_length, context_length_min) improv = pythie.informedPath(improv_length, continuity_factor_min, taboo_list_length, context_length_min, proba_model, scenario) #for i in range(len(improv)): # print str(improv[i]) # OpenMusic output utils.back2xml(score_info, improv) utils.improv2OM(score_info, improv)
def downloadFile(id, category, url): localPath = "data/{0}/{1}.ogg".format(category, id) fullPath = utils.resolvePath(utils.getPath(), localPath) subprocess.call(["wget", url, "-O", fullPath, "--quiet"])
def __init__(self, logFile='f1'): self.logFile = logFile self.logConfigFile = getPath()['LOG_CONFIG_FILE'] self.setLogger()
result_df = pd.DataFrame(data={'y': predY}) joined_df = pd.DataFrame(testFid).join(result_df) joined_df.to_csv(os.path.join( 'result', 'xgb_result%d_%dfold.csv' % (iter, args.xgb_nfold)), index=False) # re-sorted the fid because of the random splitting data logger.info( '----------------------------------------------------------------------\n\n\n' ) if __name__ == '__main__': # setting file path path = getPath() # setting logger logger = Logger().getLogger() logger.info( '======================== Execution of train.py ========================' ) # setting args parser = argparse.ArgumentParser() parser.add_argument('--sample_num', type=int, default=10, help='the number of trying sampling') parser.add_argument('--valid_ratio', type=float, default=0.1, help='the ratio of data to be selected as validation')
def run(): """ Function to run/build the neural network. Logs (for tensorboard) are stored in src/logs/ A config file is created. It consists of how long it took, hyperparameters, and so on. (see end of the function). Parameters can be changed, see above. Parameters ---------- Returns ------- """ # Build the model network = model.Model( n_inputs=FLAGS.note_size, n_outputs=FLAGS.output_size, n_hidden=FLAGS.n_hidden, n_step=FLAGS.future_size + FLAGS.past_size + 1, num_layers=FLAGS.num_layers, lr=FLAGS.learning_rate, momentum=FLAGS.momentum, target_note=FLAGS.past_size - 1, ) # Initialize session and Tensorboard index = 1 while os.path.isdir( os.path.join(utils.getPath(FLAGS.logs_dir, FLAGS.train_dir), str(index))): index += 1 session = tf.Session() session.run(tf.global_variables_initializer()) # Training Summary tf.summary.scalar("loss", network.ferror) tf.summary.scalar("Accuracy", network.faccuracy) summary_op = tf.summary.merge_all() writer_dir = os.path.join(utils.getPath(FLAGS.logs_dir, FLAGS.train_dir), str(index)) os.makedirs(writer_dir) writer = tf.summary.FileWriter(writer_dir, graph=tf.get_default_graph()) # Validation Summary summary_valid_op = tf.summary.merge_all() writer_dir = os.path.join(utils.getPath(FLAGS.logs_dir, FLAGS.valid_dir), str(index)) os.makedirs(writer_dir) writer_valid = tf.summary.FileWriter(writer_dir, graph=tf.get_default_graph()) # Testing Summary summary_test_op = tf.summary.merge_all() writer_dir = os.path.join(utils.getPath(FLAGS.logs_dir, FLAGS.test_wdir), str(index)) os.makedirs(writer_dir) writer_test = tf.summary.FileWriter(writer_dir, graph=tf.get_default_graph()) # Saving writer_dir = os.path.join(utils.getRoot(), FLAGS.save_dir, str(index)) os.makedirs(writer_dir) parameters = { "directory": FLAGS.validation_dir, "batch_size": FLAGS.batch_size, "past": FLAGS.past_size, "future": FLAGS.future_size, "note_size": FLAGS.note_size, "output_size": FLAGS.output_size, "epsilon": FLAGS.epsilon_min, "meaning": True } # Pre-processing Dataset for Validation #validset = dataset.ValidSet(**parameters) validset = ValidationSet(**parameters) valid_x, valid_y = validset.next_batch(206) #valid_x_p, valid_y_p = validset.next_batch(40*FLAGS.batch_size, for_future=False) # Eval batch eval_x, eval_y = validset.eval_batch(206, for_future=True) tmp_x, tmp_y = validset.eval_batch(FLAGS.batch_size, for_future=False) del validset print "[INFO] Validation Dataset Created" # Pre-processing Dataset for Testing parameters["directory"] = FLAGS.test_dir #testset = dataset.TestSet(**parameters) testset = TestingSet(**parameters) test_x, test_y = testset.next_batch(206) #test_x_p, test_y_p = testset.next_batch(40*FLAGS.batch_size, for_future=False) del testset print "[INFO] Test Dataset Created" # Pre-processing Dataset for Training parameters["directory"] = FLAGS.training_dir parameters["epsilon"] = FLAGS.epsilon_max #sets = dataset.TrainSet(**parameters) trainset = TrainingSet(**parameters) print "[INFO] Training Dataset Created" batch_x, batch_y = trainset.next_batch() #--- # It's high time we fed our model ! #--- start = time.time() for step in xrange(FLAGS.max_step): for i in xrange(FLAGS.max_iter): # Feed the model summary, _ = network.feed(session, summary_op, batch_x, batch_y) # Write log writer.add_summary(summary, step * FLAGS.max_iter + i) batch_x, batch_y = trainset.next_transpositions_batch() # Validation summary_valid, _ = network.eval(session, summary_valid_op, valid_x, valid_y) # Write Log writer_valid.add_summary(summary_valid, step * FLAGS.max_iter) # Test the model summary_test, _ = network.eval(session, summary_test_op, test_x, test_y) # Write Log writer_test.add_summary(summary_test, step * FLAGS.max_iter) # Display accuracy and loss every 200 steps time print("Iterations : {}".format(step)) if (step + 1) % 10 == 0: trainset.epsilon = trainset.epsilon - 2 if trainset.epsilon > 1 else 1 saver = tf.train.Saver() #saver.save(session, utils.getPath(writer_dir, FLAGS.save + str(step*FLAGS.max_iter))) """for step in xrange(FLAGS.max_step, 2*FLAGS.max_step): # Get next batch batch_x, batch_y = trainset.next_batch(for_future=False) # Feed the model summary, _ = network.feed_past(session, summary_op, batch_x, batch_y) # Write log writer.add_summary(summary, step) # Validation summary_valid, _ = network.eval_past(session, summary_valid_op, valid_x_p, valid_y_p) # Write Log writer_valid.add_summary(summary_valid, step) # Test the model summary_test, _ = network.eval_past(session, summary_test_op, test_x_p, test_y_p) # Write Log writer_test.add_summary(summary_test, step) # Display accuracy and loss every 200 steps time #if step % FLAGS.display_time == 0: # print("Iterations : {}".format(step))""" # Save the model #writer_dir = os.path.join(utils.getRoot(), FLAGS.save_dir, str(index)) #os.makedirs(writer_dir) saver = tf.train.Saver() saver.save(session, utils.getPath(writer_dir, FLAGS.save)) end = time.time() predicted, real = network.test(session, eval_x, eval_y) dict_acc = utils.metrics(utils.perf_measure(predicted, real)) print "On evaluation dataset" for x in dict_acc: print x predicted, real = network.test(session, valid_x, valid_y) dict_acc = utils.metrics(utils.perf_measure(predicted, real)) print "On validation dataset" for x in dict_acc: print x # Write parameters and hyper-parameters into a file named output.txt with open( os.path.join(utils.getRoot(), FLAGS.config_dir, "".join([str(index), ".txt"])), "wb") as config_file: config_file.write("Time : {}\n".format(end - start)) config_file.write("Step : {}\n".format(FLAGS.max_step)) config_file.write("LR : {}\n".format(FLAGS.learning_rate)) config_file.write("Momentum : {}\n".format(FLAGS.momentum)) config_file.write("Past Size : {}\n".format(FLAGS.past_size)) config_file.write("Future Size : {}\n".format(FLAGS.future_size)) config_file.write("Batch Size : {}\n".format(FLAGS.batch_size)) config_file.write("Hidden Size : {}\n".format(FLAGS.n_hidden)) config_file.write("M-epsilon : {}\n".format(FLAGS.epsilon_max)) config_file.write("m-Epsilon : {}\n".format(FLAGS.epsilon_min)) config_file.write("Rhythm Size : {}\n".format(cv.TOTAL_SIZE)) config_file.write("Iter Size : {}\n".format(FLAGS.max_iter)) config_file.write("Step Size : {}\n".format(FLAGS.max_step)) config_file.write("Hasard : {}\n".format(0.5))