# Load in hash sequences (and metadata) for all MSD entries msd_data = experiment_utils.load_precomputed_data( msd_list, os.path.join(RESULTS_PATH, 'tpaa_msd_hash_sequences')) # Ignore very short sequences msd_data = [d for d in msd_data if len(d['hash_sequence']) > MIN_SEQUENCE_LENGTH] # Get a list of valid MIDI-MSD match pairs midi_msd_mapping = experiment_utils.get_valid_matches( os.path.join(RESULTS_PATH, '{}_pairs.csv'.format(SPLIT)), SCORE_THRESHOLD, os.path.join(RESULTS_PATH, 'clean_midi_aligned', 'h5')) midi_datas, midi_index_mapping = experiment_utils.load_valid_midi_datas( midi_msd_mapping, msd_data, midi_list, os.path.join(RESULTS_PATH, 'tpaa_clean_midi_hash_sequences')) # Run match_one_midi for each MIDI data and MSD index list results = joblib.Parallel(n_jobs=11, verbose=51)( joblib.delayed(experiment_utils.match_sequence)( midi_datas[md5], msd_data, midi_index_mapping[md5], GULLY, PENALTY) for md5 in midi_datas) # Create DHS match results output path if it doesn't exist output_path = os.path.join(RESULTS_PATH, 'tpaa_match_results') if not os.path.exists(output_path): os.makedirs(output_path) # Save list of all matching results results_file = os.path.join(output_path, '{}_results.h5'.format(SPLIT)) deepdish.io.save(results_file, results)
# Load in hash sequences (and metadata) for all MSD entries msd_data = experiment_utils.load_precomputed_data( msd_list, os.path.join(RESULTS_PATH, 'dhs_piano_msd_hash_sequences')) # Ignore very short sequences msd_data = [ d for d in msd_data if len(d['hash_sequence']) > MIN_SEQUENCE_LENGTH ] # Get a list of valid MIDI-MSD match pairs midi_msd_mapping = experiment_utils.get_valid_matches( os.path.join(RESULTS_PATH, '{}_pairs.csv'.format(SPLIT)), SCORE_THRESHOLD, os.path.join(RESULTS_PATH, 'clean_midi_aligned', 'h5')) midi_datas, midi_index_mapping = experiment_utils.load_valid_midi_datas( midi_msd_mapping, msd_data, midi_list, os.path.join(RESULTS_PATH, 'dhs_piano_clean_midi_hash_sequences')) # Delete variables which are no longer used to conserve memory del msd_index del midi_index del msd_list del midi_list del midi_msd_mapping # Run match_one_midi for each MIDI data and MSD index list results = joblib.Parallel(n_jobs=8, verbose=51)( joblib.delayed(experiment_utils.match_sequence)( midi_datas[md5], msd_data, midi_index_mapping[md5], GULLY, PENALTY) for md5 in midi_datas)
os.path.join(DATA_PATH, 'clean_midi', 'index')) with midi_index.searcher() as searcher: midi_list = list(searcher.documents()) # Load in hash sequences (and metadata) for all MSD entries msd_data = experiment_utils.load_precomputed_data( msd_list, os.path.join(RESULTS_PATH, 'tdftm_msd_embeddings')) # Get a list of valid MIDI-MSD match pairs midi_msd_mapping = experiment_utils.get_valid_matches( os.path.join(RESULTS_PATH, '{}_pairs.csv'.format(SPLIT)), SCORE_THRESHOLD, os.path.join(RESULTS_PATH, 'clean_midi_aligned', 'h5')) midi_datas, midi_index_mapping = experiment_utils.load_valid_midi_datas( midi_msd_mapping, msd_data, midi_list, os.path.join(RESULTS_PATH, 'tdftm_clean_midi_embeddings')) # Run match_one_midi for each MIDI data and MSD index list results = [experiment_utils.match_embedding( midi_datas[md5], msd_data, midi_index_mapping[md5]) for md5 in midi_datas] # Create DHS match results output path if it doesn't exist output_path = os.path.join(RESULTS_PATH, 'tdftm_match_results') if not os.path.exists(output_path): os.makedirs(output_path) # Save list of all matching results results_file = os.path.join(output_path, '{}_results.h5'.format(SPLIT)) deepdish.io.save(results_file, results)
os.path.join(DATA_PATH, 'clean_midi', 'index')) with midi_index.searcher() as searcher: midi_list = list(searcher.documents()) # Load in hash sequences (and metadata) for all MSD entries msd_data = experiment_utils.load_precomputed_data( msd_list, os.path.join(RESULTS_PATH, 'stats_msd_embeddings')) # Get a list of valid MIDI-MSD match pairs midi_msd_mapping = experiment_utils.get_valid_matches( os.path.join(RESULTS_PATH, '{}_pairs.csv'.format(SPLIT)), SCORE_THRESHOLD, os.path.join(RESULTS_PATH, 'clean_midi_aligned', 'h5')) midi_datas, midi_index_mapping = experiment_utils.load_valid_midi_datas( midi_msd_mapping, msd_data, midi_list, os.path.join(RESULTS_PATH, 'stats_clean_midi_embeddings')) # Run match_one_midi for each MIDI data and MSD index list results = [ experiment_utils.match_embedding( midi_datas[md5], msd_data, midi_index_mapping[md5]) for md5 in midi_datas] # Create DHS match results output path if it doesn't exist output_path = os.path.join(RESULTS_PATH, 'stats_match_results') if not os.path.exists(output_path): os.makedirs(output_path) # Save list of all matching results results_file = os.path.join(output_path, '{}_results.h5'.format(SPLIT)) deepdish.io.save(results_file, results)