def no_valid_tracks(): no_valid_tracks = [ # Too good os.path.join(config.database_root(), "hand_picked_Spotify/40"), os.path.join(config.database_root(), "hand_picked_Spotify/45"), os.path.join(config.database_root(), "imslp/21"), os.path.join(config.database_root(), "imslp/43"), os.path.join(config.database_root(), "imslp/20"), os.path.join(config.database_root(), "imslp/44"), os.path.join(config.database_root(), "imslp/22"), os.path.join(config.database_root(), "imslp/12"), os.path.join(config.database_root(), "imslp/14"), os.path.join(config.database_root(), "imslp/62"), os.path.join(config.database_root(), "imslp/68"), os.path.join(config.database_root(), "imslp/39"), os.path.join(config.database_root(), "imslp/15"), os.path.join(config.database_root(), "imslp/26"), os.path.join(config.database_root(), "imslp/71"), os.path.join(config.database_root(), "imslp/3"), os.path.join(config.database_root(), "imslp/78"), os.path.join(config.database_root(), "imslp/11"), os.path.join(config.database_root(), "imslp/86"), os.path.join(config.database_root(), "imslp/16"), os.path.join(config.database_root(), "imslp/25"), os.path.join(config.database_root(), "imslp/56"), os.path.join(config.database_root(), "imslp/77"), os.path.join(config.database_root(), "imslp/5"), os.path.join(config.database_root(), "imslp/23"), os.path.join(config.database_root(), "imslp/45"), os.path.join(config.database_root(), "imslp/50"), os.path.join(config.database_root(), "imslp/64"), os.path.join(config.database_root(), "debug/1"), os.path.join(config.database_root(), "debug/2"), ] # All IMSLP files # imslp_files = glob.glob(config.database_root() + '/imslp/[0-9]*') # training_avoid += imslp_files tracks_with_too_few_instruments = [] # with open(config.data_root() + "/few_instrument_files_pretraining.txt", 'rb') as ff: # for line in ff: # tracks_with_too_few_instruments.append(os.path.join(config.database_pretraining_root(), line.rstrip("\n"))) with open(config.data_root() + "/few_instrument_files.txt", 'r') as ff: for line in ff: tracks_with_too_few_instruments.append( os.path.join(config.database_root(), line.rstrip("\n"))) return no_valid_tracks + tracks_with_too_few_instruments
def load_matrix_NO_PROCESSING(block_folder, embedded_piano_bool, mask_orch_bool): piano_file = os.path.join(config.data_root(), config.data_name(), block_folder, 'pr_piano.npy') orch_file = re.sub('piano', 'orch', piano_file) piano_embedded_file = re.sub('piano', 'piano_embedded', piano_file) duration_piano_file = re.sub('pr_piano', 'duration_piano', piano_file) pr_piano_transformed = np.load(piano_file) if embedded_piano_bool: pr_piano_embedded = np.load(piano_embedded_file) else: pr_piano_embedded = None pr_orch_transformed = np.load(orch_file) duration_piano = np.load(duration_piano_file) if mask_orch_bool: mask_orch_file = re.sub('piano', 'mask_orch', piano_file) mask_orch = np.load(mask_orch_file) else: mask_orch = None return pr_piano_transformed, pr_piano_embedded, pr_orch_transformed, duration_piano, mask_orch
def generate_mean_ordering(self, sess, feed_dict, orch_t): batch_size, orch_dim = orch_t.shape loss_batch_list = [] # Generate the orderings in parallel -> duplicate the matrices along batch dim piano_t_OLD = feed_dict[self.piano_t_ph] for k, v in feed_dict.iteritems(): new_v = np.concatenate([v for _ in range(self.num_ordering)], axis=0) # DEBUG, can be removed later assert [np.all(new_v[:batch_size] == v) ], "problem when duplicating v" feed_dict[k] = new_v # Also duplicate orch_t new_orch_t = np.concatenate([orch_t for _ in range(self.num_ordering)], axis=0) # Start with an orchestra prediction and mask equal to zero orch_pred = np.zeros_like(new_orch_t) # Mask mask = np.zeros_like(orch_t) pitch_orch = np.load( os.path.join(config.data_root(), config.data_name(), "pitch_orch.npy")) instru_orch = np.load( os.path.join(config.data_root(), config.data_name(), "instru_orch.npy")) pitch_piano = np.load( os.path.join(config.data_root(), config.data_name(), "pitch_piano.npy")) # Initialization harmonic rules ind_notes_piano = np.where(piano_t_OLD == 1) for tt, y in zip(ind_notes_piano[0], ind_notes_piano[1]): piano_note = pitch_piano[y] this_instru = instru_orch[y] # List forbiden intervals list_intervals = [1, 6] list_intervals = list_intervals + [-e for e in list_intervals] for interval in list_intervals: if (this_instru == instru_orch[y + interval]) and ( y + interval < orch_dim) and (y + interval >= 0): mask[tt, pitch_orch == (piano_note + 1)] = 1 # Have to do a second pass, because if a semi-tone or tritone was in the original chord, we masked it # But building the mask does not take long, so it's not a big problem ~ 50ms for tt, y in zip(ind_notes_piano[0], ind_notes_piano[1]): piano_note = pitch_piano[y] this_instru = instru_orch[y] mask[tt, pitch_orch == piano_note] = 0 # Expand these matrices (and your horizon) pitch_orch = np.concatenate( [pitch_orch for _ in range(self.num_ordering)], axis=0) pitch_piano = np.concatenate( [pitch_piano for _ in range(self.num_ordering)], axis=0) instru_orch = np.concatenate( [instru_orch for _ in range(self.num_ordering)], axis=0) mask = np.concatenate([mask for _ in range(self.num_ordering)], axis=0) orderings = [] for ordering_ind in range(self.num_ordering): # This ordering ordering = range(orch_dim) random.shuffle(ordering) orderings.append(ordering) # Loop over the length of the orderings for d in range(orch_dim): # Generate step feed_dict[self.orch_pred] = orch_pred feed_dict[self.mask_input] = mask loss_batch, preds_batch = sess.run([self.loss_val, self.preds], feed_dict) loss_batch_list.append(loss_batch) # Update matrices for ordering_ind in range(self.num_ordering): batch_begin = batch_size * ordering_ind batch_end = batch_size * (ordering_ind + 1) mask[batch_begin:batch_end, orderings[ordering_ind][d]] = 1 ################################################## # Do we sample or not ?????? note_predicted = orderings[ordering_ind][d] is_note_on = np.random.binomial( 1, preds_batch[batch_begin:batch_end, note_predicted]) orch_pred[batch_begin:batch_end, note_predicted] = is_note_on # Update the mask by banning, rule based, units in the orchestra for batch_ind in range(batch_size): if is_note_on[batch_ind]: for interval in list_intervals: if (this_instru == instru_orch[y + interval]) and ( y + interval < orch_dim) and (y + interval >= 0): mask[tt, pitch_orch == (piano_note + 1)] = 1 ################################################## # Mean over the different generations (Comb filter output) preds_mean_over_ordering = np.zeros((batch_size, orch_dim)) ind_orderings = np.asarray( [e * batch_size for e in range(self.num_ordering)]) for ind_batch in range(batch_size): preds_mean_over_ordering[ind_batch, :] = np.mean( orch_pred[ind_orderings, :], axis=0) ind_orderings += 1 # Ca c'est n'importe quoi loss_batch_mean = np.mean(loss_batch_list) return loss_batch_mean, preds_mean_over_ordering
] subset_B = [ database_orchestration + "/bouliane", database_orchestration + "/hand_picked_Spotify", database_orchestration + "/imslp" ] subset_C = [ database_arrangement + "/OpenMusicScores", database_arrangement + "/Kunstderfuge", database_arrangement + "/Musicalion", database_arrangement + "/Mutopia" ] ############################## data_folder = config.data_root() + '/Data' if DEBUG: data_folder += '_DEBUG' if binary_piano: data_folder += '_bp' if binary_orch: data_folder += '_bo' data_folder += '_tempGran' + str(quantization) if ERASE: if os.path.isdir(data_folder): shutil.rmtree(data_folder) os.makedirs(data_folder) ff = open(data_folder + '/binary_piano', 'wb') ff.close()
def main(): model_name = config.model() Model = import_model.import_model(model_name) # DATABASE DATABASE = config.data_name() DATABASE_PATH = config.data_root() + "/" + DATABASE # RESULTS result_folder = config.result_root() + '/' + DATABASE + '/' + Model.name() if not os.path.isdir(result_folder): os.makedirs(result_folder) # Parameters parameters = config.parameters(result_folder) if os.path.isfile(DATABASE_PATH + '/binary_piano'): parameters["binarize_piano"] = True else: parameters["binarize_piano"] = False if os.path.isfile(DATABASE_PATH + '/binary_orch'): parameters["binarize_orch"] = True else: parameters["binarize_orch"] = False parameters["model_name"] = model_name # Load the database metadata and add them to the script parameters to keep a record of the data processing pipeline parameters.update(pkl.load(open(DATABASE_PATH + '/metadata.pkl', 'rb'))) ############################################################ # Logging ############################################################ # log file log_file_path = config.scratch_space() + '/log' # set up logging to file - see previous section for more details logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename=log_file_path, filemode='w') # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler() console.setLevel(logging.INFO) # set a format which is simpler for console use formatter = logging.Formatter('%(levelname)-8s %(message)s') # tell the handler to use this format console.setFormatter(formatter) # add the handler to the root logger logging.getLogger('').addHandler(console) # Now, we can log to the root logger, or any other logger. First the root... logging.info('#' * 60) logging.info('#' * 60) logging.info('#' * 60) logging.info('* L * O * P *') logging.info('** Model : ' + Model.name()) for k, v in parameters.items(): logging.info('** ' + k + ' : ' + str(v)) logging.info('#' * 60) logging.info('#' * 60) ############################################################ # Hyper parameter space ############################################################ # Two cases : # 1/ Random search model_parameters_space = Model.get_hp_space() # 2/ Defined configurations configs = config.import_configs() ############################################################ # Grid search loop ############################################################ # Organisation : # Each config is a folder with a random ID (integer) # In eahc of this folder there is : # - a config.pkl file with the hyper-parameter space # - a result.txt file with the result # The result.csv file containing id;result is created from the directory, rebuilt from time to time if DEFINED_CONFIG: for config_id, model_parameters in configs.items(): config_folder = parameters['result_folder'] + '/' + config_id if os.path.isdir(config_folder): shutil.rmtree(config_folder) os.mkdir(config_folder) config_loop(Model, config_folder, model_parameters, parameters, DATABASE_PATH) else: # Already tested configs list_config_folders = glob.glob(result_folder + '/*') number_hp_config = max( 0, parameters["max_hyperparam_configs"] - len(list_config_folders)) for hp_config in range(number_hp_config): # Give a random ID and create folder ID_SET = False while not ID_SET: ID_config = str(random.randint(0, 2**25)) config_folder = parameters['result_folder'] + '/' + ID_config if config_folder not in list_config_folders: ID_SET = True os.mkdir(config_folder) # Sample model parameters from hyperparam space model_parameters = hyperopt.pyll.stochastic.sample( model_parameters_space) config_loop(Model, config_folder, model_parameters, parameters, DATABASE_PATH) # Update folder list list_config_folders.append(config_folder) return
folder_paths = [os.path.join(DATABASE_PATH, e) for e in folder_paths] folder_paths_pretraining = build_filepaths_list( DATABASE_PATH_PRETRAINING, DATABASE_NAMES_PRETRAINING) folder_paths_pretraining = [ os.path.join(DATABASE_PATH_PRETRAINING, e) for e in folder_paths_pretraining ] rotten_files = [] for track in (folder_paths): num_instru = list_tracks(track) if num_instru < MIN_INSTRU: rotten_files.append(track) with open(config.data_root() + "/few_instrument_files.txt", 'w') as ff: for rotten_file in rotten_files: # Get only the last part split_filename = re.split('/', rotten_file) ff.write(split_filename[-2] + '/' + split_filename[-1] + '\n') rotten_files_pretraining = [] for track in (folder_paths_pretraining): num_instru = list_tracks(track) if num_instru < MIN_INSTRU: rotten_files_pretraining.append(track) with open(config.data_root() + "/few_instrument_files_pretraining.txt", 'w') as ff: for rotten_file in rotten_files_pretraining: # Get only the last part split_filename = re.split('/', rotten_file)