def calculate_piano_solo_prob(args): """Calculate the piano solo probability of all downloaded mp3s, and append the probability to the meta csv file. """ # Arguments & parameters workspace = args.workspace mp3s_dir = args.mp3s_dir mini_data = args.mini_data sample_rate = piano_detection_model.SR if mini_data: prefix = 'minidata_' else: prefix = '' # Paths similarity_csv_path = os.path.join(workspace, '{}full_music_pieces_youtube_similarity.csv'.format(prefix)) piano_prediction_path = os.path.join(workspace, '{}full_music_pieces_youtube_similarity_pianosoloprob.csv'.format(prefix)) # Meta info meta_dict = read_csv_to_meta_dict(similarity_csv_path) meta_dict['piano_solo_prob'] = [] meta_dict['audio_name'] = [] meta_dict['audio_duration'] = [] count = 0 piano_solo_detector = piano_detection_model.PianoSoloDetector() for n in range(len(meta_dict['surname'])): mp3_path = os.path.join(mp3s_dir, '{}, {}, {}, {}.mp3'.format( meta_dict['surname'][n], meta_dict['firstname'][n], meta_dict['music'][n], meta_dict['youtube_id'][n]).replace('/', '_')) if os.path.exists(mp3_path): (audio, _) = librosa.core.load(mp3_path, sr=sample_rate, mono=True) try: probs = piano_solo_detector.predict(audio) prob = np.mean(probs) except: prob = 0 print(n, mp3_path, prob) meta_dict['audio_name'].append(get_filename(mp3_path)) meta_dict['piano_solo_prob'].append(prob) meta_dict['audio_duration'].append(len(audio) / sample_rate) else: meta_dict['piano_solo_prob'].append('') meta_dict['audio_name'].append('') meta_dict['audio_duration'].append('') write_meta_dict_to_csv(meta_dict, piano_prediction_path) print('Write out to {}'.format(piano_prediction_path))
def create_subset200_piano_solo_eval_csv(args): r"""Select 200 pieces from GiantMIDI-Piano to evaluate the music piece accuracy. Args: workspace: str Returns: None """ # arguments & parameters workspace = args.workspace eval_num = 200 # paths csv_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob_split.csv') output_path = os.path.join('subset_csvs_for_evaluation', 'subset200_piano_solo_eval.csv') os.makedirs(os.path.dirname(output_path), exist_ok=True) meta_dict = read_csv_to_meta_dict(csv_path) audios_num = len(meta_dict['surname']) indexes = [] for n in range(audios_num): if meta_dict['giant_midi_piano'][n] != '' and int( meta_dict['giant_midi_piano'][n]) == 1: indexes.append(n) skip_num = len(indexes) // eval_num eval_indexes = indexes[0::skip_num][0:eval_num] new_meta_dict = {key: [] for key in meta_dict.keys()} new_meta_dict['index_in_csv'] = [] for index in eval_indexes: for key in meta_dict.keys(): new_meta_dict[key].append(meta_dict[key][index]) new_meta_dict['index_in_csv'].append(index) new_meta_dict['meta_correct'] = [''] * eval_num new_meta_dict['sequenced'] = [''] * eval_num write_meta_dict_to_csv(new_meta_dict, output_path) print('Write out to {}'.format(output_path))
def create_subset200_eval_csv(args): r"""Select 200 files from 60,724 downloaded files to evaluate the precision, recall of piano solo detection. Args: workspace: str Returns: None """ workspace = args.workspace eval_num = 200 csv_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob_split.csv') output_path = os.path.join('subset_csvs_for_evaluation', 'subset200_eval.csv') os.makedirs(os.path.dirname(output_path), exist_ok=True) meta_dict = read_csv_to_meta_dict(csv_path) audios_num = len(meta_dict['surname']) indexes = [] for n in range(audios_num): if float(meta_dict['similarity'][n]) > 0.6: indexes.append(n) skip_num = len(indexes) // eval_num eval_indexes = indexes[0::skip_num][0:eval_num] new_meta_dict = {key: [] for key in meta_dict.keys()} new_meta_dict['index_in_csv'] = [] for index in eval_indexes: for key in meta_dict.keys(): new_meta_dict[key].append(meta_dict[key][index]) new_meta_dict['index_in_csv'].append(index) new_meta_dict['piano_solo'] = [''] * eval_num new_meta_dict['electronic_piano'] = [''] * eval_num new_meta_dict['sequenced'] = [''] * eval_num write_meta_dict_to_csv(new_meta_dict, output_path) print('Write out to {}'.format(output_path))
def create_piano_split(args): """Validation, test, train: 1:1:8 """ # Arguments & parameters workspace = args.workspace # Paths piano_prediction_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob.csv') split_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob_split.csv') # Meta info to be downloaded meta_dict = read_csv_to_meta_dict(piano_prediction_path) splits = [] i = 0 for n in range(len(meta_dict['surname'])): if float(meta_dict['piano_solo_prob'][n]) >= 0.5: if i == 0: splits.append('validation') elif i == 1: splits.append('test') else: splits.append('train') i += 1 else: splits.append('none') # Reset i if moved to next composer if n > 0: previous_name = '{}, {}'.format(meta_dict['surname'][n - 1], meta_dict['surname'][n - 1]) current_name = '{}, {}'.format(meta_dict['surname'][n], meta_dict['surname'][n]) if previous_name != current_name: i = 0 if i == 10: i = 0 meta_dict['split'] = splits write_meta_dict_to_csv(meta_dict, split_path) print('Write csv to {}'.format(split_path))
def create_piano_split(args): """Add 'giant_midi_piano', 'split', and 'surname_in_youtube_title' flags to csv file and write out the csv file. The ratio of validation, test, train subsets are 1:1:8. Args: workspace: str Returns: NoReturn """ # arguments & parameters workspace = args.workspace # Paths piano_prediction_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob.csv') split_path = os.path.join( workspace, 'full_music_pieces_youtube_similarity_pianosoloprob_split.csv') # Meta info to be downloaded meta_dict = read_csv_to_meta_dict(piano_prediction_path) giant_midi_pianos = [] splits = [] surname_in_youtube_titles = [] i = 0 # Add 'giant_midi_piano', 'split', and 'surname_in_youtube_title' flags to .csv file. for n in range(len(meta_dict['surname'])): if meta_dict['piano_solo_prob'][n] == "": giant_midi_pianos.append("") splits.append("") surname_in_youtube_titles.append("") else: if float(meta_dict['piano_solo_prob'][n]) >= 0.5: giant_midi_pianos.append(1) if i == 0: splits.append('validation') elif i == 1: splits.append('test') else: splits.append('train') i += 1 else: giant_midi_pianos.append(0) splits.append("") if meta_dict['surname'][n] in meta_dict['youtube_title'][n]: surname_in_youtube_titles.append(1) else: surname_in_youtube_titles.append(0) # Reset i if moved to next composer if n > 0: previous_name = '{}, {}'.format(meta_dict['surname'][n - 1], meta_dict['surname'][n - 1]) current_name = '{}, {}'.format(meta_dict['surname'][n], meta_dict['surname'][n]) if previous_name != current_name: i = 0 if i == 10: i = 0 meta_dict['giant_midi_piano'] = giant_midi_pianos meta_dict['split'] = splits meta_dict['surname_in_youtube_title'] = surname_in_youtube_titles write_meta_dict_to_csv(meta_dict, split_path) print('Write csv to {}'.format(split_path))