def main(): name = 'Aly & Fila - Future Sound of Egypt 338 ' print name csv_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.csv') cue_file = os.path.join('.', 'data', name + '.cue') sim_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.sim') timestamps, data = experiment.read_csv(csv_file) info = experiment.read_cue(cue_file) has_intro = 'intro' in info[0]['TITLE'].strip().lower() has_outro = 'outro' in info[-1]['TITLE'].strip().lower() with open(sim_file, 'rb') as f: (self_sim, factor) = cPickle.load(f) filtered, novelty = tools.init_borders2(self_sim) sec_per_row = timestamps[-1] / self_sim.shape[0] #filtered = [x * sec_per_row for x in filtered] filtered = tools.detect_track_borders(data, timestamps[-1], len(info), self_sim=self_sim, factor=factor, has_intro=has_intro, has_outro=has_outro) #make_average(data, factor, [int(x / sec_per_row) for x in filtered]) #for i in range(len(info)): # print '%d.\t%.2f\t%.2f: %f' % (i, info[i]['INDEX'], filtered[i], filtered[i] - info[i]['INDEX']) #print filtered #print [x['INDEX'] for x in info] #print experiment.get_diff(info, filtered) data = data.transpose((1, 0)) draw_spectrum(data, timestamps, 1) #draw_selfsim(self_sim, timestamps) vlines([datetime.datetime.fromtimestamp(x) for x in filtered], 0, 24, color='k', linewidths=[1] * len(filtered), alpha=1.0) vlines([datetime.datetime.fromtimestamp(x['INDEX']) for x in info], 24, 48, color='y', linewidths=[1] * len(info), alpha=1.0) #savefig('fig.pdf', bbox_inches='tight') tracks = [(x['PERFORMER'], x['TITLE']) for x in info] export_cue(name, tracks, filtered) # borders2, novelty = tools.init_borders2(self_sim) # plt.figure(2) # plt.plot(novelty) # print borders2 #draw_spectrum(data2, timestamps, 2) # plt.figure(2) # plt.plot(data2) # plt.figure(3) # plt.matshow(areas, fignum=3) # _, axs = plt.subplots(5, 1, sharex=True) # tss = [datetime.datetime.fromtimestamp(x) for x in timestamps] # for i in range(len(centroids)): # axs[i].plot(centroids[i]) # axs[4].plot(flux) # plt.figure(4) # plt.matshow(self_sim, fignum=4) show()
def main(): name = 'Aly & Fila - Future Sound of Egypt 338 ' print name csv_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.csv') cue_file = os.path.join('.', 'data', name + '.cue') sim_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.sim') timestamps, data = experiment.read_csv(csv_file) info = experiment.read_cue(cue_file) has_intro = 'intro' in info[0]['TITLE'].strip().lower() has_outro = 'outro' in info[-1]['TITLE'].strip().lower() with open(sim_file, 'rb') as f: (self_sim, factor) = cPickle.load(f) filtered, novelty = tools.init_borders2(self_sim) sec_per_row = timestamps[-1] / self_sim.shape[0] #filtered = [x * sec_per_row for x in filtered] filtered = tools.detect_track_borders(data, timestamps[-1], len(info), self_sim=self_sim, factor=factor, has_intro=has_intro, has_outro=has_outro) #make_average(data, factor, [int(x / sec_per_row) for x in filtered]) #for i in range(len(info)): # print '%d.\t%.2f\t%.2f: %f' % (i, info[i]['INDEX'], filtered[i], filtered[i] - info[i]['INDEX']) #print filtered #print [x['INDEX'] for x in info] #print experiment.get_diff(info, filtered) data = data.transpose((1, 0)) draw_spectrum(data, timestamps, 1) #draw_selfsim(self_sim, timestamps) vlines([datetime.datetime.fromtimestamp(x) for x in filtered], 0, 24, color='k', linewidths=[1]*len(filtered), alpha=1.0) vlines([datetime.datetime.fromtimestamp(x['INDEX']) for x in info], 24, 48, color='y', linewidths=[1]*len(info), alpha=1.0) #savefig('fig.pdf', bbox_inches='tight') tracks = [(x['PERFORMER'], x['TITLE']) for x in info] export_cue(name, tracks, filtered) # borders2, novelty = tools.init_borders2(self_sim) # plt.figure(2) # plt.plot(novelty) # print borders2 #draw_spectrum(data2, timestamps, 2) # plt.figure(2) # plt.plot(data2) # plt.figure(3) # plt.matshow(areas, fignum=3) # _, axs = plt.subplots(5, 1, sharex=True) # tss = [datetime.datetime.fromtimestamp(x) for x in timestamps] # for i in range(len(centroids)): # axs[i].plot(centroids[i]) # axs[4].plot(flux) # plt.figure(4) # plt.matshow(self_sim, fignum=4) show()
def main(): data_dir = os.path.join(".", "temp_data") mp3_files = [f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".mp3")] result = [] nc = tools.NoveltyCalculator() with open(os.path.join("logs", "test.log"), "wb") as log: for mp3 in mp3_files: name = mp3[:-4] cue = os.path.join(data_dir, name + ".cue") info = None if os.path.isfile(cue): info = read_cue(cue) else: print "No cue file for %s, skipping" % mp3 if info: has_intro = "intro" in info[0]["TITLE"].strip().lower() has_outro = "outro" in info[-1]["TITLE"].strip().lower() # has_intro = False # has_outro = False csv_file = os.path.join(data_dir, name + plugin_suffix + ".csv") npz_file = os.path.join(data_dir, name + plugin_suffix + ".npz") self_sim_file = os.path.join(data_dir, name + plugin_suffix + ".sim.npz") if not os.path.isfile(csv_file) and not os.path.isfile(npz_file): extract_features(os.path.join(data_dir, mp3)) if os.path.isfile(npz_file): saved = numpy.load(npz_file) timestamps = saved["timestamps"] data = saved["data"] elif os.path.isfile(csv_file): (timestamps, data) = read_csv(csv_file) replace_csv(timestamps, data, csv_file, npz_file) if timestamps is not None and data is not None: if os.path.isfile(self_sim_file): saved = numpy.load(self_sim_file) self_sim = saved["self_sim"] factor = saved["factor"] borders = tools.detect_track_borders( data, timestamps[-1], len(info), nc, self_sim=self_sim, factor=factor, has_intro=has_intro, has_outro=has_outro, ) else: borders = tools.detect_track_borders( data, timestamps[-1], len(info), nc, sim_file=self_sim_file, has_intro=has_intro, has_outro=has_outro, ) if len(borders) < len(info): print len(borders), len(info) true_borders = [c["INDEX"] for c in info] true_borders.append(borders[-1]) expected_intervals, expected_labels = create_labeled_intervals(true_borders) actual_intervals, actual_labels = create_labeled_intervals(borders) validate_structure(expected_intervals, expected_labels, actual_intervals, actual_labels) precision, recall, f_measure = pairwise( expected_intervals, expected_labels, actual_intervals, actual_labels, frame_size=1 ) actual_to_expected, expected_to_actual = deviation(expected_intervals, actual_intervals) avg_diff, max_diff = get_diff(info, borders) result.append( {"name": mp3, "info": info, "borders": borders, "avg_diff": avg_diff, "max_diff": max_diff} ) print "%s\t%.3f\t%.3f\t\t%.4f\t%.4f\t%.4f\t\t%.4f\t%.4f" % ( mp3, avg_diff, max_diff, precision, recall, f_measure, actual_to_expected, expected_to_actual, ) log.write( "%s\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%s\r\n" % ( mp3, avg_diff, max_diff, precision, recall, f_measure, actual_to_expected, expected_to_actual, borders, ) ) print_results(sorted(result, key=lambda x: x["name"]), log)
def main(): data_dir = os.path.join('.', 'temp_data') mp3_files = [ f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f)) and f.endswith('.mp3') ] result = [] nc = tools.NoveltyCalculator() with open(os.path.join('logs', 'test.log'), 'wb') as log: for mp3 in mp3_files: name = mp3[:-4] cue = os.path.join(data_dir, name + '.cue') info = None if os.path.isfile(cue): info = read_cue(cue) else: print 'No cue file for %s, skipping' % mp3 if info: has_intro = 'intro' in info[0]['TITLE'].strip().lower() has_outro = 'outro' in info[-1]['TITLE'].strip().lower() #has_intro = False #has_outro = False csv_file = os.path.join(data_dir, name + plugin_suffix + '.csv') npz_file = os.path.join(data_dir, name + plugin_suffix + '.npz') self_sim_file = os.path.join(data_dir, name + plugin_suffix + '.sim.npz') if not os.path.isfile(csv_file) and not os.path.isfile( npz_file): extract_features(os.path.join(data_dir, mp3)) if os.path.isfile(npz_file): saved = numpy.load(npz_file) timestamps = saved['timestamps'] data = saved['data'] elif os.path.isfile(csv_file): (timestamps, data) = read_csv(csv_file) replace_csv(timestamps, data, csv_file, npz_file) if timestamps is not None and data is not None: if os.path.isfile(self_sim_file): saved = numpy.load(self_sim_file) self_sim = saved['self_sim'] factor = saved['factor'] borders = tools.detect_track_borders( data, timestamps[-1], len(info), nc, self_sim=self_sim, factor=factor, has_intro=has_intro, has_outro=has_outro) else: borders = tools.detect_track_borders( data, timestamps[-1], len(info), nc, sim_file=self_sim_file, has_intro=has_intro, has_outro=has_outro) if len(borders) < len(info): print len(borders), len(info) true_borders = [c['INDEX'] for c in info] true_borders.append(borders[-1]) expected_intervals, expected_labels = create_labeled_intervals( true_borders) actual_intervals, actual_labels = create_labeled_intervals( borders) validate_structure(expected_intervals, expected_labels, actual_intervals, actual_labels) precision, recall, f_measure = \ pairwise(expected_intervals, expected_labels, actual_intervals, actual_labels, frame_size=1) actual_to_expected, expected_to_actual = deviation( expected_intervals, actual_intervals) avg_diff, max_diff = get_diff(info, borders) result.append({ 'name': mp3, 'info': info, 'borders': borders, 'avg_diff': avg_diff, 'max_diff': max_diff }) print '%s\t%.3f\t%.3f\t\t%.4f\t%.4f\t%.4f\t\t%.4f\t%.4f' % \ (mp3, avg_diff, max_diff, precision, recall, f_measure, actual_to_expected, expected_to_actual) log.write( '%s\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%s\r\n' % (mp3, avg_diff, max_diff, precision, recall, f_measure, actual_to_expected, expected_to_actual, borders)) print_results(sorted(result, key=lambda x: x['name']), log)