Ejemplo n.º 1
0
def main():
    name = 'Aly & Fila - Future Sound of Egypt 338 '
    print name
    csv_file = os.path.join('.', 'data',
                            name + experiment.plugin_suffix + '.csv')
    cue_file = os.path.join('.', 'data', name + '.cue')
    sim_file = os.path.join('.', 'data',
                            name + experiment.plugin_suffix + '.sim')
    timestamps, data = experiment.read_csv(csv_file)
    info = experiment.read_cue(cue_file)
    has_intro = 'intro' in info[0]['TITLE'].strip().lower()
    has_outro = 'outro' in info[-1]['TITLE'].strip().lower()
    with open(sim_file, 'rb') as f:
        (self_sim, factor) = cPickle.load(f)

    filtered, novelty = tools.init_borders2(self_sim)
    sec_per_row = timestamps[-1] / self_sim.shape[0]
    #filtered = [x * sec_per_row for x in filtered]
    filtered = tools.detect_track_borders(data,
                                          timestamps[-1],
                                          len(info),
                                          self_sim=self_sim,
                                          factor=factor,
                                          has_intro=has_intro,
                                          has_outro=has_outro)
    #make_average(data, factor, [int(x / sec_per_row) for x in filtered])
    #for i in range(len(info)):
    #    print '%d.\t%.2f\t%.2f: %f' % (i, info[i]['INDEX'], filtered[i], filtered[i] - info[i]['INDEX'])
    #print filtered
    #print [x['INDEX'] for x in info]
    #print experiment.get_diff(info, filtered)

    data = data.transpose((1, 0))
    draw_spectrum(data, timestamps, 1)
    #draw_selfsim(self_sim, timestamps)
    vlines([datetime.datetime.fromtimestamp(x) for x in filtered],
           0,
           24,
           color='k',
           linewidths=[1] * len(filtered),
           alpha=1.0)
    vlines([datetime.datetime.fromtimestamp(x['INDEX']) for x in info],
           24,
           48,
           color='y',
           linewidths=[1] * len(info),
           alpha=1.0)
    #savefig('fig.pdf', bbox_inches='tight')

    tracks = [(x['PERFORMER'], x['TITLE']) for x in info]
    export_cue(name, tracks, filtered)

    # borders2, novelty = tools.init_borders2(self_sim)
    # plt.figure(2)
    # plt.plot(novelty)
    # print borders2

    #draw_spectrum(data2, timestamps, 2)
    #    plt.figure(2)
    #    plt.plot(data2)

    #    plt.figure(3)
    #    plt.matshow(areas, fignum=3)
    #    _, axs = plt.subplots(5, 1, sharex=True)
    #    tss = [datetime.datetime.fromtimestamp(x) for x in timestamps]
    #    for i in range(len(centroids)):
    #        axs[i].plot(centroids[i])
    #    axs[4].plot(flux)

    # plt.figure(4)
    # plt.matshow(self_sim, fignum=4)

    show()
Ejemplo n.º 2
0
def main():
    name = 'Aly & Fila - Future Sound of Egypt 338 '
    print name
    csv_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.csv')
    cue_file = os.path.join('.', 'data', name + '.cue')
    sim_file = os.path.join('.', 'data', name + experiment.plugin_suffix + '.sim')
    timestamps, data = experiment.read_csv(csv_file)
    info = experiment.read_cue(cue_file)
    has_intro = 'intro' in info[0]['TITLE'].strip().lower()
    has_outro = 'outro' in info[-1]['TITLE'].strip().lower()
    with open(sim_file, 'rb') as f:
        (self_sim, factor) = cPickle.load(f)

    filtered, novelty = tools.init_borders2(self_sim)
    sec_per_row = timestamps[-1] / self_sim.shape[0]
    #filtered = [x * sec_per_row for x in filtered]
    filtered = tools.detect_track_borders(data,
                                          timestamps[-1],
                                          len(info),
                                          self_sim=self_sim,
                                          factor=factor,
                                          has_intro=has_intro,
                                          has_outro=has_outro)
    #make_average(data, factor, [int(x / sec_per_row) for x in filtered])
    #for i in range(len(info)):
    #    print '%d.\t%.2f\t%.2f: %f' % (i, info[i]['INDEX'], filtered[i], filtered[i] - info[i]['INDEX'])
    #print filtered
    #print [x['INDEX'] for x in info]
    #print experiment.get_diff(info, filtered)

    data = data.transpose((1, 0))
    draw_spectrum(data, timestamps, 1)
    #draw_selfsim(self_sim, timestamps)
    vlines([datetime.datetime.fromtimestamp(x) for x in filtered], 0, 24, color='k', linewidths=[1]*len(filtered), alpha=1.0)
    vlines([datetime.datetime.fromtimestamp(x['INDEX']) for x in info], 24, 48, color='y', linewidths=[1]*len(info), alpha=1.0)
    #savefig('fig.pdf', bbox_inches='tight')

    tracks = [(x['PERFORMER'], x['TITLE']) for x in info]
    export_cue(name, tracks, filtered)

    # borders2, novelty = tools.init_borders2(self_sim)
    # plt.figure(2)
    # plt.plot(novelty)
    # print borders2

    #draw_spectrum(data2, timestamps, 2)
#    plt.figure(2)
#    plt.plot(data2)

#    plt.figure(3)
#    plt.matshow(areas, fignum=3)
#    _, axs = plt.subplots(5, 1, sharex=True)
#    tss = [datetime.datetime.fromtimestamp(x) for x in timestamps]
#    for i in range(len(centroids)):
#        axs[i].plot(centroids[i])
#    axs[4].plot(flux)

    # plt.figure(4)
    # plt.matshow(self_sim, fignum=4)

    show()
Ejemplo n.º 3
0
def main():
    data_dir = os.path.join(".", "temp_data")
    mp3_files = [f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".mp3")]
    result = []
    nc = tools.NoveltyCalculator()
    with open(os.path.join("logs", "test.log"), "wb") as log:
        for mp3 in mp3_files:
            name = mp3[:-4]
            cue = os.path.join(data_dir, name + ".cue")
            info = None
            if os.path.isfile(cue):
                info = read_cue(cue)
            else:
                print "No cue file for %s, skipping" % mp3
            if info:
                has_intro = "intro" in info[0]["TITLE"].strip().lower()
                has_outro = "outro" in info[-1]["TITLE"].strip().lower()
                # has_intro = False
                # has_outro = False
                csv_file = os.path.join(data_dir, name + plugin_suffix + ".csv")
                npz_file = os.path.join(data_dir, name + plugin_suffix + ".npz")
                self_sim_file = os.path.join(data_dir, name + plugin_suffix + ".sim.npz")
                if not os.path.isfile(csv_file) and not os.path.isfile(npz_file):
                    extract_features(os.path.join(data_dir, mp3))
                if os.path.isfile(npz_file):
                    saved = numpy.load(npz_file)
                    timestamps = saved["timestamps"]
                    data = saved["data"]
                elif os.path.isfile(csv_file):
                    (timestamps, data) = read_csv(csv_file)
                    replace_csv(timestamps, data, csv_file, npz_file)
                if timestamps is not None and data is not None:
                    if os.path.isfile(self_sim_file):
                        saved = numpy.load(self_sim_file)
                        self_sim = saved["self_sim"]
                        factor = saved["factor"]
                        borders = tools.detect_track_borders(
                            data,
                            timestamps[-1],
                            len(info),
                            nc,
                            self_sim=self_sim,
                            factor=factor,
                            has_intro=has_intro,
                            has_outro=has_outro,
                        )
                    else:
                        borders = tools.detect_track_borders(
                            data,
                            timestamps[-1],
                            len(info),
                            nc,
                            sim_file=self_sim_file,
                            has_intro=has_intro,
                            has_outro=has_outro,
                        )
                    if len(borders) < len(info):
                        print len(borders), len(info)

                    true_borders = [c["INDEX"] for c in info]
                    true_borders.append(borders[-1])
                    expected_intervals, expected_labels = create_labeled_intervals(true_borders)
                    actual_intervals, actual_labels = create_labeled_intervals(borders)
                    validate_structure(expected_intervals, expected_labels, actual_intervals, actual_labels)
                    precision, recall, f_measure = pairwise(
                        expected_intervals, expected_labels, actual_intervals, actual_labels, frame_size=1
                    )
                    actual_to_expected, expected_to_actual = deviation(expected_intervals, actual_intervals)
                    avg_diff, max_diff = get_diff(info, borders)
                    result.append(
                        {"name": mp3, "info": info, "borders": borders, "avg_diff": avg_diff, "max_diff": max_diff}
                    )
                    print "%s\t%.3f\t%.3f\t\t%.4f\t%.4f\t%.4f\t\t%.4f\t%.4f" % (
                        mp3,
                        avg_diff,
                        max_diff,
                        precision,
                        recall,
                        f_measure,
                        actual_to_expected,
                        expected_to_actual,
                    )
                    log.write(
                        "%s\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%s\r\n"
                        % (
                            mp3,
                            avg_diff,
                            max_diff,
                            precision,
                            recall,
                            f_measure,
                            actual_to_expected,
                            expected_to_actual,
                            borders,
                        )
                    )
        print_results(sorted(result, key=lambda x: x["name"]), log)
Ejemplo n.º 4
0
def main():
    data_dir = os.path.join('.', 'temp_data')
    mp3_files = [
        f for f in os.listdir(data_dir)
        if os.path.isfile(os.path.join(data_dir, f)) and f.endswith('.mp3')
    ]
    result = []
    nc = tools.NoveltyCalculator()
    with open(os.path.join('logs', 'test.log'), 'wb') as log:
        for mp3 in mp3_files:
            name = mp3[:-4]
            cue = os.path.join(data_dir, name + '.cue')
            info = None
            if os.path.isfile(cue):
                info = read_cue(cue)
            else:
                print 'No cue file for %s, skipping' % mp3
            if info:
                has_intro = 'intro' in info[0]['TITLE'].strip().lower()
                has_outro = 'outro' in info[-1]['TITLE'].strip().lower()
                #has_intro = False
                #has_outro = False
                csv_file = os.path.join(data_dir,
                                        name + plugin_suffix + '.csv')
                npz_file = os.path.join(data_dir,
                                        name + plugin_suffix + '.npz')
                self_sim_file = os.path.join(data_dir,
                                             name + plugin_suffix + '.sim.npz')
                if not os.path.isfile(csv_file) and not os.path.isfile(
                        npz_file):
                    extract_features(os.path.join(data_dir, mp3))
                if os.path.isfile(npz_file):
                    saved = numpy.load(npz_file)
                    timestamps = saved['timestamps']
                    data = saved['data']
                elif os.path.isfile(csv_file):
                    (timestamps, data) = read_csv(csv_file)
                    replace_csv(timestamps, data, csv_file, npz_file)
                if timestamps is not None and data is not None:
                    if os.path.isfile(self_sim_file):
                        saved = numpy.load(self_sim_file)
                        self_sim = saved['self_sim']
                        factor = saved['factor']
                        borders = tools.detect_track_borders(
                            data,
                            timestamps[-1],
                            len(info),
                            nc,
                            self_sim=self_sim,
                            factor=factor,
                            has_intro=has_intro,
                            has_outro=has_outro)
                    else:
                        borders = tools.detect_track_borders(
                            data,
                            timestamps[-1],
                            len(info),
                            nc,
                            sim_file=self_sim_file,
                            has_intro=has_intro,
                            has_outro=has_outro)
                    if len(borders) < len(info):
                        print len(borders), len(info)

                    true_borders = [c['INDEX'] for c in info]
                    true_borders.append(borders[-1])
                    expected_intervals, expected_labels = create_labeled_intervals(
                        true_borders)
                    actual_intervals, actual_labels = create_labeled_intervals(
                        borders)
                    validate_structure(expected_intervals, expected_labels,
                                       actual_intervals, actual_labels)
                    precision, recall, f_measure = \
                        pairwise(expected_intervals, expected_labels, actual_intervals, actual_labels, frame_size=1)
                    actual_to_expected, expected_to_actual = deviation(
                        expected_intervals, actual_intervals)
                    avg_diff, max_diff = get_diff(info, borders)
                    result.append({
                        'name': mp3,
                        'info': info,
                        'borders': borders,
                        'avg_diff': avg_diff,
                        'max_diff': max_diff
                    })
                    print '%s\t%.3f\t%.3f\t\t%.4f\t%.4f\t%.4f\t\t%.4f\t%.4f' % \
                          (mp3, avg_diff, max_diff, precision, recall, f_measure, actual_to_expected, expected_to_actual)
                    log.write(
                        '%s\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%s\r\n' %
                        (mp3, avg_diff, max_diff, precision, recall, f_measure,
                         actual_to_expected, expected_to_actual, borders))
        print_results(sorted(result, key=lambda x: x['name']), log)