def update_instru_mapping(folder_path, instru_mapping, quantization): if not os.path.isdir(folder_path): return instru_mapping # Is there an original piano score or do we have to create it ? num_music_file = max(len(glob.glob(folder_path + '/*.mid')), len(glob.glob(folder_path + '/*.xml'))) if num_music_file == 2: is_piano = True elif num_music_file == 1: is_piano = False else: raise Exception("CAVAVAVAMAVAL") # Read pr if is_piano: pr_piano, _, _, instru_piano, _, pr_orch, _, _, instru_orch, _, duration =\ build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1) else: try: pr_piano, _, _, instru_piano, _, pr_orch, _, _, instru_orch, _, duration =\ build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity) except: duration = None logging.warning("Could not read file in " + folder_path) if duration is None: # Files that could not be aligned return instru_mapping # Modify the mapping from instrument to indices in pianorolls and pitch bounds instru_mapping = build_data_aux.instru_pitch_range( instrumentation=instru_piano, pr=pr_piano, instru_mapping=instru_mapping, ) # remark : instru_mapping would be modified if it is only passed to the function, # f(a) where a is modified inside the function # but i prefer to make the reallocation explicit # a = f(a) with f returning the modified value of a. # Does it change anything for computation speed ? (Python pass by reference, # but a slightly different version of it, not clear to me) instru_mapping = build_data_aux.instru_pitch_range( instrumentation=instru_orch, pr=pr_orch, instru_mapping=instru_mapping, ) return instru_mapping
def check_orchestration_alignment(path_db, subfolder_names, temporal_granularity, quantization, unit_type, gapopen, gapextend): output_dir = 'DEBUG/' +\ 'Grid_search_database_alignment/' + str(quantization) +\ '_' + temporal_granularity +\ '_' + unit_type +\ '_' + str(gapopen) +\ '_' + str(gapextend) if temporal_granularity == "event_level": quantization_write = 1 else: quantization_write = quantization counter = 0 sum_score = 0 nbFrame = 0 nbId = 0 nbDiffs = 0 for sub_db in subfolder_names: print '#' * 30 print sub_db sub_db_path = path_db + '/' + sub_db if not os.path.isdir(sub_db_path): continue for folder_name in os.listdir(sub_db_path): print '# ' + sub_db + ' : ' + folder_name folder_path = sub_db_path + '/' + folder_name if not os.path.isdir(folder_path): continue # Skip already computed folders save_folder_name = output_dir +\ '/' + sub_db + '_' + folder_name if os.path.isdir(save_folder_name): continue pr_piano_no_map, _, _, _, _, pr_orchestra_no_map, _, _, instru_orch, _, duration =\ build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen, gapextend) # Apply the mapping pr_piano = {} pr_orchestra = {} for k, v in pr_piano_no_map.iteritems(): if 'Piano' in pr_piano: pr_piano['Piano'] = np.maximum(pr_piano['Piano'], v) else: pr_piano['Piano'] = v for k, v in pr_orchestra_no_map.iteritems(): # unmix instrus new_k = instru_orch[k.rstrip('\x00')] instru_names = build_data_aux.unmixed_instru(new_k) for instru_name in instru_names: if instru_name in pr_orchestra: pr_orchestra[instru_name] = np.maximum(pr_orchestra[instru_name], v) else: pr_orchestra[instru_name] = v # Sum all instrument piano_aligned = sum_along_instru_dim(pr_piano) orchestra_aligned = sum_along_instru_dim(pr_orchestra) OOO_aligned = np.zeros((duration, 30), dtype=np.int16) CCC_aligned = np.concatenate((piano_aligned, OOO_aligned, orchestra_aligned), axis=1) # Update statistics # nbFrame += duration # sum_score += this_sum_score # nbId += this_nbId # nbDiffs += this_nbDiffs # counter = counter + 1 # Save every 100 example # if counter % 10 == 0: # import pdb; pdb.set_trace() if not os.path.isdir(save_folder_name): os.makedirs(save_folder_name) visualize_mat(CCC_aligned, save_folder_name, 'aligned') # write_midi(pr={'piano1': sum_along_instru_dim(pr0)}, quantization=quantization, write_path=save_folder_name + '/0.mid', tempo=80) # write_midi(pr={'piano1': sum_along_instru_dim(pr1)}, quantization=quantization, write_path=save_folder_name + '/1.mid', tempo=80) write_midi(pr=pr_piano, quantization=quantization_write, write_path=save_folder_name + '/0.mid', tempo=80) write_midi(pr=pr_orchestra, quantization=quantization_write, write_path=save_folder_name + '/1.mid', tempo=80) write_midi(pr={'Piano': piano_aligned, 'Violin': orchestra_aligned}, quantization=quantization_write, write_path=save_folder_name + '/both_aligned.mid', tempo=80)
def build_split_matrices(folder_paths, destination_folder, chunk_size, instru_mapping, N_piano, N_orchestra, embedding_model, binary_piano, binary_orch): file_counter = 0 train_only_files = {} train_and_valid_files = {} for folder_path in folder_paths: ############################### # Read file folder_path = folder_path.rstrip() logging.info(" : " + folder_path) if not os.path.isdir(folder_path): continue if folder_path in avoid_tracks.no_valid_tracks(): train_only_files[folder_path] = [] else: train_and_valid_files[folder_path] = [] # Is there an original piano score or do we have to create it ? num_music_file = max(len(glob.glob(folder_path + '/*.mid')), len(glob.glob(folder_path + '/*.xml'))) if num_music_file == 2: is_piano = True elif num_music_file == 1: is_piano = False else: raise Exception("CAVAVAVAMAVAL") # Get pr, warped and duration if is_piano: new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_instru_orchestra, _, duration\ = build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1) else: try: new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_instru_orchestra, _, duration\ = build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity) except: logging.warning("Could not read file in " + folder_path) continue # Skip shitty files if new_pr_piano is None: # It's definitely not a match... # Check for the files : are they really a piano score and its orchestration ?? with (open('log_build_db.txt', 'a')) as f: f.write(folder_path + '\n') continue pr_orch = build_data_aux.cast_small_pr_into_big_pr( new_pr_orchestra, new_instru_orchestra, 0, duration, instru_mapping, np.zeros((duration, N_orchestra))) pr_piano = build_data_aux.cast_small_pr_into_big_pr( new_pr_piano, {}, 0, duration, instru_mapping, np.zeros((duration, N_piano))) ############################### ############################### # Embed piano piano_embedded = [] len_piano = len(pr_piano) batch_size = 500 # forced to batch for memory issues start_batch_index = 0 while start_batch_index < len_piano: end_batch_index = min(start_batch_index + batch_size, len_piano) this_batch_size = end_batch_index - start_batch_index piano_resize_emb = np.zeros( (this_batch_size, 1, 128)) # Embeddings accetp size 128 samples piano_resize_emb[:, 0, instru_mapping['Piano']['pitch_min']: instru_mapping['Piano']['pitch_max']] = pr_piano[ start_batch_index:end_batch_index] piano_resize_emb_TT = torch.tensor(piano_resize_emb) if cuda_gpu: piano_resize_emb_TT = piano_resize_emb_TT.cuda() piano_embedded_TT = embedding_model(piano_resize_emb_TT.float(), 0) if cuda_gpu: piano_embedded.append(piano_embedded_TT.cpu().numpy()) else: piano_embedded.append(piano_embedded_TT.numpy()) start_batch_index += batch_size piano_embedded = np.concatenate(piano_embedded) ############################### ############################### # Split last_index = pr_piano.shape[0] start_indices = range(0, pr_piano.shape[0], chunk_size) for split_counter, start_index in enumerate(start_indices): this_split_folder = destination_folder + '/' + str( file_counter) + '_' + str(split_counter) os.mkdir(this_split_folder) end_index = min(start_index + chunk_size, last_index) section = pr_piano[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_piano.npy', section_cast) section = piano_embedded[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_piano_embedded.npy', section_cast) section = pr_orch[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_orch.npy', section_cast) section = new_duration_piano[start_index:end_index] section_cast = np.asarray(section, dtype=np.int8) np.save(this_split_folder + '/duration_piano.npy', section_cast) section = new_duration_orch[start_index:end_index] section_cast = np.asarray(section, dtype=np.int8) np.save(this_split_folder + '/duration_orch.npy', section_cast) # Keep track of splits if folder_path in avoid_tracks.no_valid_tracks(): train_only_files[folder_path].append(this_split_folder) else: train_and_valid_files[folder_path].append(this_split_folder) file_counter += 1 ############################### return train_and_valid_files, train_only_files
def build_split_matrices(folder_paths, destination_folder, chunk_size, instru_mapping, N_piano, N_orchestra, embedding_model, binary_piano, binary_orch, build_embedding, max_number_note_played): file_counter = 0 train_only_files = {} train_and_valid_files = {} for folder_path in folder_paths: ############################### # Read file folder_path = folder_path.rstrip() logging.info(" : " + folder_path) if not os.path.isdir(folder_path): continue folder_path_split = re.split("/", folder_path) folder_path_relative = folder_path_split[-2] + "/" + folder_path_split[ -1] if folder_path_relative in avoid_tracks.no_valid_tracks(): train_only_files[folder_path_relative] = [] else: train_and_valid_files[folder_path_relative] = [] # Is there an original piano score or do we have to create it ? num_music_file = max(len(glob.glob(folder_path + '/*.mid')), len(glob.glob(folder_path + '/*.xml'))) if num_music_file == 2: is_piano = True elif num_music_file == 1: is_piano = False else: raise Exception("CAVAVAVAMAVAL") # Get pr, warped and duration if is_piano: new_pr_piano, _, new_duration_piano, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_name_orchestra, duration\ = build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1, align_bool=True) else: try: new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_name_orchestra, duration\ = build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity) except: logging.warning("Could not read file in " + folder_path) continue # Skip shitty files if new_pr_piano is None: # It's definitely not a match... # Check for the files : are they really a piano score and its orchestration ?? with (open('log_build_db.txt', 'a')) as f: f.write(folder_path + '\n') continue ######################################################################################################################## ######################################################################################################################## # D E B U G ############################## ############################## # Test for aligned event Piano/Orch ############################## # Instru mapping # matrix_orch = build_data_aux.cast_small_pr_into_big_pr(new_pr_orchestra, 0, duration, instru_mapping, np.zeros((duration, N_orchestra))) # matrix_piano = build_data_aux.cast_small_pr_into_big_pr(new_pr_piano, 0, duration, instru_mapping, np.zeros((duration, N_piano))) # ############################## # # Reconstruct rhythm # orch_reconstruction = reconstruct_pr.instrument_reconstruction(matrix_orch, instru_mapping) # piano_reconstruction = reconstruct_pr.instrument_reconstruction_piano(matrix_piano, instru_mapping) # ############################## # # Write midi # write_midi({k: v*90 for k,v in orch_reconstruction.items()}, ticks_per_beat=1, write_path="../DEBUG/orch_reconstruction.mid", articulation=None) # write_midi({k: v*90 for k,v in piano_reconstruction.items() if (v.sum()>0)}, ticks_per_beat=1, write_path="../DEBUG/piano_reconstruction.mid", articulation=None) # import pdb; pdb.set_trace() ############################## # Detect short silence = only zeros and duration < double croche (=8) # def silence_short(pr, duration): # mat = sum_along_instru_dim(pr) # flat = mat.sum(axis=1) # silence_ind = [] # for e in range(len(flat)): # if (flat[e] == 0) and (duration[e] < 8): # silence_ind.append(e) # return silence_ind # import pdb; pdb.set_trace() # silence_short_piano = silence_short(new_pr_piano, new_duration_piano) # silence_short_orch = silence_short(new_pr_orchestra, new_duration_orch) ############################## # Detect misaligned silences # def silence_misaligned(pr_orch, pr_piano): # mat_orch = sum_along_instru_dim(pr_orch) # mat_piano = sum_along_instru_dim(pr_piano) # flat_orch = mat.sum(axis=1) # flat_piano = mat.sum(axis=1) # silence_piano_not_orch = [] # silence_orch_not_piano = [] # for e in range(len(flat_orch)): # if (flat_piano[e] == 0) and (flat_orch[e] != 0): # silence_piano_not_orch.append(e) # if (flat_orch[e] == 0) and (flat_piano[e] != 0): # silence_orch_not_piano.append(e) # return silence_piano_not_orch, silence_orch_not_piano # silence_piano_not_orch, silence_orch_not_piano = silence_misaligned(new_pr_orchestra, new_pr_piano) ############################## # Detect out of tessitura frames ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## # for track_name, pr_instru in new_pr_orchestra.items(): # pitch_min = instru_mapping[track_name]['pitch_min'] # pitch_max = instru_mapping[track_name]['pitch_max'] # pr_min = pr_instru[:, :pitch_min] # pr_max = pr_instru[:, pitch_max:] # notes_out = (pr_min>0).sum() + (pr_max>0).sum() # if (pr_min.sum() > 0) or (pr_max.sum() > 0): # with open('temp.txt', 'a') as ff: # ff.write(folder_path + ' : ' + track_name + ' : ' + str(notes_out) + '\n') ######################################################################################################################## ######################################################################################################################## pr_orch = build_data_aux.cast_small_pr_into_big_pr( new_pr_orchestra, 0, duration, instru_mapping, np.zeros((duration, N_orchestra))) pr_piano = build_data_aux.cast_small_pr_into_big_pr( new_pr_piano, 0, duration, instru_mapping, np.zeros((duration, N_piano))) ############################## # Bonus: write aligned midi files in an external folder # Reconstruct rhythm # pr_orchestra_I = reconstruct_pr.instrument_reconstruction(pr_orch, instru_mapping) # pr_piano_I = reconstruct_pr.instrument_reconstruction_piano(pr_piano, instru_mapping) # # Write midi # target_folder = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", folder_path) # os.makedirs(target_folder) # target_piano = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", new_name_piano) + '.mid' # target_orchestra = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", new_name_orchestra) + '.mid' # write_midi({k: v*90 for k,v in pr_piano_I.items()}, ticks_per_beat=1, write_path=target_piano, articulation=None) # write_midi({k: v*90 for k,v in pr_orchestra_I.items() if (v.sum()>0)}, ticks_per_beat=1, write_path=target_orchestra, articulation=None) ############################### ############################### # Embed piano if build_embedding: piano_embedded = [] len_piano = len(pr_piano) batch_size = 500 # forced to batch for memory issues start_batch_index = 0 while start_batch_index < len_piano: end_batch_index = min(start_batch_index + batch_size, len_piano) this_batch_size = end_batch_index - start_batch_index piano_resize_emb = np.zeros( (this_batch_size, 1, 128)) # Embeddings accetp size 128 samples piano_resize_emb[:, 0, instru_mapping['Piano']['pitch_min']: instru_mapping['Piano'] ['pitch_max']] = pr_piano[ start_batch_index:end_batch_index] piano_resize_emb_TT = torch.tensor(piano_resize_emb) if cuda_gpu: piano_resize_emb_TT = piano_resize_emb_TT.cuda() piano_embedded_TT = embedding_model( piano_resize_emb_TT.float(), 0) if cuda_gpu: piano_embedded.append(piano_embedded_TT.cpu().numpy()) else: piano_embedded.append(piano_embedded_TT.numpy()) start_batch_index += batch_size piano_embedded = np.concatenate(piano_embedded) ############################### ############################## # Update the max number of notes played in the orchestral score this_max_num_notes = int(np.max(np.sum(pr_orch, axis=1))) max_number_note_played = max(max_number_note_played, this_max_num_notes) ############################## ############################### # Split last_index = pr_piano.shape[0] start_indices = range(0, pr_piano.shape[0], chunk_size) for split_counter, start_index in enumerate(start_indices): this_split_folder = destination_folder + '/' + str( file_counter) + '_' + str(split_counter) os.mkdir(this_split_folder) end_index = min(start_index + chunk_size, last_index) section = pr_piano[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_piano.npy', section_cast) if build_embedding: section = piano_embedded[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_piano_embedded.npy', section_cast) section = pr_orch[start_index:end_index] section_cast = section.astype(np.float32) np.save(this_split_folder + '/pr_orch.npy', section_cast) if new_duration_piano: section = new_duration_piano[start_index:end_index] else: section = np.ones((chunk_size, )) section_cast = np.asarray(section, dtype=np.int8) np.save(this_split_folder + '/duration_piano.npy', section_cast) if new_duration_orch: section = new_duration_orch[start_index:end_index] else: section = np.ones((chunk_size, )) section_cast = np.asarray(section, dtype=np.int8) np.save(this_split_folder + '/duration_orch.npy', section_cast) # Keep track of splits split_path = re.split("/", this_split_folder) this_split_folder_relative = split_path[-2] + "/" + split_path[-1] if folder_path_relative in avoid_tracks.no_valid_tracks(): train_only_files[folder_path_relative].append( this_split_folder_relative) else: train_and_valid_files[folder_path_relative].append( this_split_folder_relative) file_counter += 1 ############################### return train_and_valid_files, train_only_files, max_number_note_played