def process_folder(folder_path, quantization, temporal_granularity): # Get instrus and prs from a folder name name pr0, instru0, _, name0, pr1, instru1, _, name1 = build_data_aux.get_instru_and_pr_from_folder_path(folder_path, quantization) # Temporal granularity if temporal_granularity == 'event_level': pr0 = warp_pr_aux(pr0, get_event_ind_dict(pr0)) pr1 = warp_pr_aux(pr1, get_event_ind_dict(pr1)) # Get trace from needleman_wunsch algorithm # Traces are binary lists, 0 meaning a gap is inserted trace_0, trace_1, this_sum_score, this_nbId, this_nbDiffs = needleman_chord_wrapper(sum_along_instru_dim(pr0), sum_along_instru_dim(pr1)) # Wrap dictionnaries according to the traces assert(len(trace_0) == len(trace_1)), "size mismatch" pr0_warp = warp_dictionnary_trace(pr0, trace_0) pr1_warp = warp_dictionnary_trace(pr1, trace_1) # Get pr warped and duration# In fact we just discard 0 in traces for both pr trace_prod = [e1 * e2 for (e1,e2) in zip(trace_0, trace_1)] duration = sum(trace_prod) if duration == 0: return [None]*7 pr0_aligned = remove_zero_in_trace(pr0_warp, trace_prod) pr1_aligned = remove_zero_in_trace(pr1_warp, trace_prod) return pr0_aligned, instru0, name0, pr1_aligned, instru1, name1, duration
def process_folder(folder_path, quantization, unit_type, temporal_granularity, gapopen=3, gapextend=1): # Get instrus and prs from a folder name name pr0, instru0, _, name0, pr1, instru1, _, name1 = get_instru_and_pr_from_folder_path(folder_path, quantization) # Unit type pr0 = Unit_type.from_rawpr_to_type(pr0, unit_type) pr1 = Unit_type.from_rawpr_to_type(pr1, unit_type) # Temporal granularity if temporal_granularity == 'event_level': event_0 = get_event_ind_dict(pr0) event_1 = get_event_ind_dict(pr1) pr0 = warp_pr_aux(pr0, event_0) pr1 = warp_pr_aux(pr1, event_1) else: event_0 = None event_1 = None # Align tracks pr0_aligned, trace_0, pr1_aligned, trace_1, trace_prod, duration = align_tracks(pr0, pr1, unit_type, gapopen, gapextend) # Clean events event0_aligned = clean_event(event_0, trace_0, trace_prod) event1_aligned = clean_event(event_1, trace_1, trace_prod) # Find which pr is orchestra, which one is piano pr_piano, event_piano, instru_piano, name_piano,\ pr_orch, event_orch, instru_orch, name_orch,\ duration =\ discriminate_between_piano_and_orchestra(pr0_aligned, event0_aligned, instru0, name0, pr1_aligned, event1_aligned, instru1, name1, duration) return pr_piano, event_piano, instru_piano, name_piano, pr_orch, event_orch, instru_orch, name_orch, duration
def get_dim_matrix(index_files_dict, meta_info_path='temp.p', quantization=12, temporal_granularity='frame_level', logging=None): # Determine the temporal size of the matrices # If the two files have different sizes, we use the shortest (to limit the use of memory, # we better contract files instead of expanding them). # Get instrument names instrument_list_from_dico = build_dico().keys() instru_mapping = {} # instru_mapping = {'piano': {'pitch_min': 24, 'pitch_max':117, 'ind_min': 0, 'ind_max': 92}, # 'harp' ... } T_dict = {} # indexed by set_identifier for set_identifier, index_files in index_files_dict.iteritems(): logging.info("##########") logging.info(set_identifier) # Get the full size of the tracks and instrument present T = 0 for index_file in index_files: # Read the csv file indexing the database with open(index_file, 'rb') as f: for folder_path in f: folder_path = folder_path.rstrip() logging.info(folder_path) if not os.path.isdir(folder_path): continue # Read pr try: pr0, instru0, T0, name0, pr1, instru1, T1, name1 = build_data_aux.get_instru_and_pr_from_folder_path(folder_path, quantization) except: with open('log', 'wb') as f: f.write('Bad file' + folder_path + '\n') continue # Temporal granularity if temporal_granularity == 'event_level': new_event_0 = get_event_ind_dict(pr0) pr0 = warp_pr_aux(pr0, new_event_0) new_event_1 = get_event_ind_dict(pr1) pr1 = warp_pr_aux(pr1, new_event_1) # Get T trace_0, trace_1, this_sum_score, this_nbId, this_nbDiffs = needleman_chord_wrapper(sum_along_instru_dim(pr0), sum_along_instru_dim(pr1)) trace_prod = [e1 * e2 for (e1,e2) in zip(trace_0, trace_1)] T += sum(trace_prod) # Modify the mapping from instrument to indices in pianorolls and pitch bounds instru_mapping = build_data_aux.instru_pitch_range(instrumentation=instru0, pr=pr0, instru_mapping=instru_mapping, instrument_list_from_dico=instrument_list_from_dico, ) # remark : instru_mapping would be modified if it is only passed to the function, # f(a) where a is modified inside the function # but i prefer to make the reallocation explicit # a = f(a) with f returning the modified value of a. # Does it change anything for computation speed ? (Python pass by reference, # but a slightly different version of it, not clear to me) instru_mapping = build_data_aux.instru_pitch_range(instrumentation=instru1, pr=pr1, instru_mapping=instru_mapping, instrument_list_from_dico=instrument_list_from_dico ) # Delete prs del pr0, pr1, instru0, instru1 T_dict[set_identifier] = T # Build the index_min and index_max in the instru_mapping dictionary counter = 0 for k, v in instru_mapping.iteritems(): if k == 'piano': index_min = 0 index_max = v['pitch_max'] - v['pitch_min'] v['index_min'] = index_min v['index_max'] = index_max continue index_min = counter counter = counter + v['pitch_max'] - v['pitch_min'] index_max = counter v['index_min'] = index_min v['index_max'] = index_max # Instanciate the matrices ######################################## ######################################## ######################################## temp = {} temp['instru_mapping'] = instru_mapping temp['quantization'] = quantization temp['T'] = T_dict temp['N_orchestra'] = counter pickle.dump(temp, open(meta_info_path, 'wb')) return
# orch = sum_along_instru_dim(pr)[start_ind:end_ind] # orch[np.nonzero(orch)] = 1 # orch[0, 30] = 1 # orch[0, 92] = 1 ####################################################### ####################################################### pianofile = Read_midi(piano_path, quantization) pr_piano = pianofile.read_file() ####################################################### ####################################################### # Event level representation event_piano = get_event_ind_dict(pr_piano) event_orch = get_event_ind_dict(pr_orch) pr_piano = warp_pr_aux(pr_piano, event_piano) pr_orch = warp_pr_aux(pr_orch, event_orch) ######################################################## def align_tracks(pr0, pr1, unit_type, gapopen, gapextend): # Get trace from needleman_wunsch algorithm # First extract binary representation, whatever unit_type is pr0_binary = Unit_type.from_type_to_binary(pr0, unit_type) pr1_binary = Unit_type.from_type_to_binary(pr1, unit_type) pr0_trace = sum_along_instru_dim(pr0_binary) pr1_trace = sum_along_instru_dim(pr1_binary) # Traces are computed from binaries matrices # Traces are binary lists, 0 meaning a gap is inserted trace_0, trace_1, this_sum_score, this_nbId, this_nbDiffs = needleman_chord_wrapper(pr0_trace, pr1_trace, gapopen, gapextend)