def process(midi_path: str, counter: AtomicCounter) -> Optional[dict]: """ Processes the MIDI file at the given path and increments the counter. The method will call the extract_drums method and the get_bass_drums_on_beat method, and write the resulting drum file if the bass drum ratio is over the threshold. :param midi_path: the MIDI file path to process :param counter: the counter to increment :return: the dictionary containing the MIDI path, the PrettyMIDI instance and the ratio of bass drum on beat, raises an exception if the file cannot be processed """ try: pm_drums = extract_drums(midi_path) bass_drums_on_beat = get_bass_drums_on_beat(pm_drums) if bass_drums_on_beat >= args.bass_drums_on_beat_threshold: midi_filename = os.path.basename(midi_path) pm_drums.write( os.path.join(args.path_output_dir, f"{midi_filename}.mid")) else: raise Exception(f"Not on beat {midi_path}: {bass_drums_on_beat}") return { "midi_path": midi_path, "pm_drums": pm_drums, "bass_drums_on_beat": bass_drums_on_beat } except Exception as e: if "Not on beat" not in str(e): print(f"Exception during processing of {midi_path}: {e}") finally: counter.increment()
def process(msd_id: str, counter: AtomicCounter) -> Optional[dict]: """ Processes the given MSD id and increments the counter. The method will call the get_tags and the extract_pianos method and write the resulting MIDI files to disk. :param msd_id: the MSD id to process :param counter: the counter to increment :return: the dictionary containing the MSD id, the PrettyMIDI pianos and the matching tags, raises an exception if the file cannot be processed """ try: with tables.open_file(msd_id_to_h5(msd_id, args.path_dataset_dir)) as h5: tags = get_tags(h5) matching_tags = [tag for tag in tags if tag in TAGS] if not matching_tags: return pm_pianos = extract_pianos(msd_id) for index, pm_piano in enumerate(pm_pianos): pm_piano.write( os.path.join(args.path_output_dir, f"{msd_id}_{index}.mid")) return { "msd_id": msd_id, "pm_pianos": pm_pianos, "tags": matching_tags } except Exception as e: print(f"Exception during processing of {msd_id}: {e}") finally: counter.increment()
def process(msd_id: str, counter: AtomicCounter) -> Optional[dict]: try: # TODO pass except Exception as e: print(f"Exception during processing of {msd_id}: {e}") finally: counter.increment()
def app(msd_ids: List[str]): start = timeit.default_timer() # Cleanup the output directory shutil.rmtree(args.path_output_dir, ignore_errors=True) # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(msd_ids)) print("START") results = pool.starmap(process, zip(msd_ids, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(msd_ids) * 100 print(f"Number of tracks: {len(MSD_SCORE_MATCHES)}, " f"number of tracks in sample: {len(msd_ids)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Creates an histogram for the piano lengths pm_pianos_list = [result["pm_pianos"] for result in results] pm_piano_lengths = [ pm_piano.get_end_time() for pm_pianos in pm_pianos_list for pm_piano in pm_pianos ] #plt.figure(num=None, figsize=(10, 8), dpi=500) plt.hist(pm_piano_lengths, bins=100, color="darkmagenta") plt.title('Piano lengths') plt.ylabel('length (sec)') plt.show() stop = timeit.default_timer() print("Time: ", stop - start)
def app(msd_ids: List[str]): start = timeit.default_timer() # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(msd_ids)) print("START") results = pool.starmap(process, zip(msd_ids, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(msd_ids) * 100 print(f"Number of tracks: {len(MSD_SCORE_MATCHES)}, " f"number of tracks in sample: {len(msd_ids)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Creates a bar chart for the most common artists artists = [result["artist"] for result in results] most_common_artists = Counter(artists).most_common(25) print(f"Most common artists: {most_common_artists}") plot_bars(most_common_artists, "Artist song count") stop = timeit.default_timer() print("Time: ", stop - start)
def app(msd_ids: List[str]): start = timeit.default_timer() # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(msd_ids)) print("START") results = pool.starmap(process, zip(msd_ids, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(msd_ids) * 100 print(f"Number of tracks: {len(MSD_SCORE_MATCHES)}, " f"number of tracks in sample: {len(msd_ids)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Creates a bar chart for the most common artists artists = [result["artist"] for result in results] most_common_artists = Counter(artists).most_common(25) print(f"Most common artists: {most_common_artists}") plt.figure(num=None, figsize=(10, 8), dpi=500) plt.bar([artist for artist, _ in most_common_artists], [count for _, count in most_common_artists], color=[color.name for color in colors if color.name != "lavender"]) plt.title("Artist song count") plt.xticks(rotation=30, horizontalalignment="right") plt.ylabel("count") plt.show() stop = timeit.default_timer() print("Time: ", stop - start)
def process(msd_id: str, counter: AtomicCounter) -> Optional[dict]: """ Processes the given MSD id and increments the counter. The method will find and return the artist. :param msd_id: the MSD id to process :param counter: the counter to increment :return: the dictionary containing the MSD id and the artist, raises an exception if the file cannot be processed """ try: with tables.open_file(msd_id_to_h5(msd_id, args.path_dataset_dir)) as h5: artist = h5.root.metadata.songs.cols.artist_name[0].decode("utf-8") return {"msd_id": msd_id, "artist": artist} except Exception as e: print(f"Exception during processing of {msd_id}: {e}") finally: counter.increment()
def process(msd_id: str, counter: AtomicCounter) -> Optional[dict]: """ Processes the given MSD id and increments the counter. The method will call the get_instrument_classes method. :param msd_id: the MSD id to process :param counter: the counter to increment :return: the dictionary containing the MSD id and the classes, raises an exception if the file cannot be processed """ try: with tables.open_file(msd_id_to_h5(msd_id, args.path_dataset_dir)) as h5: classes = get_instrument_classes(msd_id) return {"msd_id": msd_id, "classes": classes} except Exception as e: print(f"Exception during processing of {msd_id}: {e}") finally: counter.increment()
def process(msd_id: str, counter: AtomicCounter) -> Optional[dict]: """ Processes the given MSD id and increments the counter. The method will call the extract_drums method and write the resulting MIDI files to disk. :param msd_id: the MSD id to process :param counter: the counter to increment :return: the dictionary containing the MSD id and the PrettyMIDI drums; raises an exception if the file cannot be processed """ try: with tables.open_file(msd_id_to_h5(msd_id, args.path_dataset_dir)) as h5: pm_drums = extract_drums(msd_id) pm_drums.write(os.path.join(args.path_output_dir, f"{msd_id}.mid")) return {"msd_id": msd_id, "pm_drums": pm_drums} except Exception as e: print(f"Exception during processing of {msd_id}: {e}") finally: counter.increment()
def app(msd_ids: List[str]): start = timeit.default_timer() # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(msd_ids)) print("START") results = pool.starmap(process, zip(msd_ids, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(msd_ids) * 100 print(f"Number of tracks: {len(MSD_SCORE_MATCHES)}, " f"number of tracks in sample: {len(msd_ids)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Finds which tags matches and count the results tags = [] unique_tags = set() for result in results: result_tags = result["tags"] matching_tags = [tag for tag in result_tags if tag in TAGS] if matching_tags: joined_tag = "+".join(matching_tags) tags.append(joined_tag) unique_tags.add(joined_tag) match_percentage = len(tags) / len(results) * 100 print(f"Number of results: {len(results)}, " f"number of matched tags: {len(tags)} " f"({match_percentage:.2f}%)") # Creates a bar chart for the most common tags most_common_tags = Counter(tags).most_common() plot_bars(most_common_tags, "Tags count for " + ",".join(TAGS)) # plt.figure(num=None, figsize=(10, 8), dpi=500) # plt.bar([tag for tag, _ in most_common_tags], # [count for _, count in most_common_tags], # color=[color.name for color in colors # if color.name != "lavender"]) # plt.title("Tags count for " + ",".join(TAGS)) # plt.xticks(rotation=30, horizontalalignment="right") # plt.ylabel("count") # plt.show() stop = timeit.default_timer() print("Time: ", stop - start)
def app(msd_ids: List[str]): start = timeit.default_timer() # Cleanup the output directory shutil.rmtree(args.path_output_dir, ignore_errors=True) # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(msd_ids)) print("START") results = pool.starmap(process, zip(msd_ids, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(msd_ids) * 100 print(f"Number of tracks: {len(MSD_SCORE_MATCHES)}, " f"number of tracks in sample: {len(msd_ids)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Creates an histogram for the piano lengths pm_pianos_list = [result["pm_pianos"] for result in results] pm_piano_lengths = [ pm_piano.get_end_time() for pm_pianos in pm_pianos_list for pm_piano in pm_pianos ] plt.figure(num=None, figsize=(10, 8), dpi=500) plt.hist(pm_piano_lengths, bins=100, color="darkmagenta") plt.title('Piano lengths') plt.ylabel('length (sec)') plt.show() # Creates a bar chart for the tags tags_list = [result["tags"] for result in results] tags = [tag for tags in tags_list for tag in tags] most_common_tags = Counter(tags).most_common() plt.figure(num=None, figsize=(10, 8), dpi=500) plt.bar([tag for tag, _ in most_common_tags], [count for _, count in most_common_tags], color=[color.name for color in colors if color.name != "lavender"]) plt.title("Tags count for " + ",".join(TAGS)) plt.xticks(rotation=30, horizontalalignment="right") plt.ylabel("count") plt.show() stop = timeit.default_timer() print("Time: ", stop - start)
def app(midi_paths: List[str]): start = timeit.default_timer() # Cleanup the output directory shutil.rmtree(args.path_output_dir, ignore_errors=True) # Starts the threads with Pool(args.pool_size) as pool: manager = Manager() counter = AtomicCounter(manager, len(midi_paths), 1000) print("START") results = pool.starmap(process, zip(midi_paths, cycle([counter]))) results = [result for result in results if result] print("END") results_percentage = len(results) / len(midi_paths) * 100 print(f"Number of tracks: {len(MIDI_PATHS)}, " f"number of tracks in sample: {len(midi_paths)}, " f"number of results: {len(results)} " f"({results_percentage:.2f}%)") # Creates an histogram for the drum lengths pm_drums = [result["pm_drums"] for result in results] pm_drums_lengths = [pm.get_end_time() for pm in pm_drums] plt.figure(num=None, figsize=(10, 8), dpi=500) plt.hist(pm_drums_lengths, bins=100, color="darkmagenta") plt.title('Drums lengths') plt.ylabel('length (sec)') plt.show() # Creates an histogram for the bass drums on beat bass_drums_on_beat = [result["bass_drums_on_beat"] for result in results] plt.figure(num=None, figsize=(10, 8), dpi=500) plt.hist(bass_drums_on_beat, bins=100, color="darkmagenta") plt.title('Bass drums on beat') plt.ylabel('count') plt.show() stop = timeit.default_timer() print("Time: ", stop - start)