def posterior_stash_to_jams(stash, penalty_values, output_directory,
                            vocab, model_params):
    """Decode a stash of posteriors to JAMS and write to disk.

    Parameters
    ----------
    stash : biggie.Stash
        Posteriors to decode.
    penalty_values : array_like
        Collection of penalty values with which to run Viterbi.
    output_directory : str
        Base path to write out JAMS files; each collection will be written as
        {output_directory}/{penalty_values[i]}.jamset
    vocab : dl4mir.chords.lexicon.Vocab
        Map from posterior indices to string labels.
    model_params : dict
        Metadata to associate with the annotation.
    """
    # Sweep over the default penalty values.
    for penalty in penalty_values:
        print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty)
        results = decode_stash_parallel(stash, penalty, vocab, NUM_CPUS)

        output_file = os.path.join(
            output_directory, "{0}.jamset".format(penalty))

        jamset = dict()
        for key, annot in results.iteritems():
            annot.sandbox.update(timestamp=time.asctime(), **model_params)
            jam = pyjams.JAMS(chord=[annot])
            jam.sandbox.track_id = key
            jamset[key] = jam

        futils.create_directory(output_directory)
        util.save_jamset(jamset, output_file)
Esempio n. 2
0
def posterior_stash_to_jams(stash, penalty_values, output_directory, vocab,
                            model_params):
    """Decode a stash of posteriors to JAMS and write to disk.

    Parameters
    ----------
    stash : biggie.Stash
        Posteriors to decode.
    penalty_values : array_like
        Collection of penalty values with which to run Viterbi.
    output_directory : str
        Base path to write out JAMS files; each collection will be written as
        {output_directory}/{penalty_values[i]}.jamset
    vocab : dl4mir.chords.lexicon.Vocab
        Map from posterior indices to string labels.
    model_params : dict
        Metadata to associate with the annotation.
    """
    # Sweep over the default penalty values.
    for penalty in penalty_values:
        print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty)
        results = decode_stash_parallel(stash, penalty, vocab, NUM_CPUS)

        output_file = os.path.join(output_directory,
                                   "{0}.jamset".format(penalty))

        jamset = dict()
        for key, annot in results.iteritems():
            annot.sandbox.update(timestamp=time.asctime(), **model_params)
            jam = pyjams.JAMS(chord=[annot])
            jam.sandbox.track_id = key
            jamset[key] = jam

        futils.create_directory(output_directory)
        util.save_jamset(jamset, output_file)
Esempio n. 3
0
def main(stash_file, input_key, transform_file,
         param_file, output_file, verbose=True):
    transform = optimus.load(transform_file, param_file)
    stash = biggie.Stash(stash_file)
    futil.create_directory(os.path.split(output_file)[0])
    output = biggie.Stash(output_file)
    util.process_stash(stash, transform, output, input_key, verbose=verbose)
Esempio n. 4
0
def main(args):
    """Main routine for importing data."""
    futils.create_directory(path.split(args.output_file)[0])
    if args.verbose:
        print "[%s] Creating: %s" % (time.asctime(), args.output_file)
    stash = biggie.Stash(args.output_file)
    populate_stash(futils.load_textlist(args.key_list), args.cqt_directory,
                   args.lab_directory, stash, np.float32)
Esempio n. 5
0
def main(args):
    """Main routine for importing data."""
    futils.create_directory(path.split(args.output_file)[0])
    if args.verbose:
        print "[%s] Creating: %s" % (time.asctime(), args.output_file)
    stash = biggie.Stash(args.output_file)
    populate_stash(
        futils.load_textlist(args.key_list), args.cqt_directory,
        args.lab_directory, stash, np.float32)
Esempio n. 6
0
def main(args):
    stash = biggie.Stash(args.input_file)
    futil.create_directory(path.split(args.output_file)[0])

    stats = dict()
    vocab = lex.Strict(157)

    stats['prior'] = D.class_prior_v157(stash, vocab).tolist()

    with open(args.output_file, 'w') as fp:
        json.dump(stats, fp)
Esempio n. 7
0
def main(args):
    stash = biggie.Stash(args.input_file)
    futil.create_directory(path.split(args.output_file)[0])

    stats = dict()
    vocab = lex.Strict(157)

    stats['prior'] = D.class_prior_v157(stash, vocab).tolist()

    with open(args.output_file, 'w') as fp:
        json.dump(stats, fp)
Esempio n. 8
0
def main(args):
    ref_jamset = jams_util.load_jamset(args.ref_jamset)
    jamset_files = futil.load_textlist(args.jamset_textlist)

    pool = Parallel(n_jobs=args.num_cpus)
    fx = delayed(score_one)
    results = pool(fx(ref_jamset, f, args.min_support) for f in jamset_files)

    results = {f: r for f, r in zip(jamset_files, results)}
    output_dir = os.path.split(args.output_file)[0]
    futil.create_directory(output_dir)

    with open(args.output_file, 'w') as fp:
        json.dump(results, fp, indent=2)
Esempio n. 9
0
def main(args):
    ref_jamset = jams_util.load_jamset(args.ref_jamset)
    jamset_files = futil.load_textlist(args.jamset_textlist)

    pool = Parallel(n_jobs=args.num_cpus)
    fx = delayed(score_one)
    results = pool(fx(ref_jamset, f, args.min_support) for f in jamset_files)

    results = {f: r for f, r in zip(jamset_files, results)}
    output_dir = os.path.split(args.output_file)[0]
    futil.create_directory(output_dir)

    with open(args.output_file, 'w') as fp:
        json.dump(results, fp, indent=2)
Esempio n. 10
0
def main(args):
    """Main routine for importing data."""
    data_splits = json.load(open(args.split_file))

    output_file_fmt = path.join(args.output_directory, FILE_FMT)
    for fold in data_splits:
        for split in data_splits[fold]:
            output_file = output_file_fmt % (fold, split)
            futils.create_directory(path.split(output_file)[0])
            if args.verbose:
                print "[%s] Creating: %s" % (time.asctime(), output_file)
            stash = biggie.Stash(output_file)
            populate_stash(data_splits[fold][split], args.cqt_directory,
                           args.jams_directory, stash, np.float32)
Esempio n. 11
0
def main(args):
    fpath = os.path.join(args.data_directory, "{0}.hdf5")
    train = biggie.Stash(fpath.format('train'), cache=True)
    valid = biggie.Stash(fpath.format('valid'), cache=True)
    test = biggie.Stash(fpath.format('test'), cache=True)
    results = classify(train, valid, test,
                       num_train=50000, num_valid=10000, num_test=25000)

    for k in 'train', 'valid', 'test':
        print("{0}: {1:.4}".format(k, results['{0}_score'.format(k)]))

    output_dir = os.path.split(args.stats_file)[0]
    futil.create_directory(output_dir)
    with open(args.stats_file, 'w') as fp:
        json.dump(results, fp, indent=2)
Esempio n. 12
0
def main(textlist, dim0, dim1, output_directory, param_file, num_cpus=-1):
    """Apply Local Contrast Normalization to a collection of files.

    Parameters
    ----------
    textlist : str
        A text list of npz filepaths.
    dim0 : int
        First dimension of the filter kernel (time).
    dim1 : int
        Second dimension of the filter kernel (frequency).
    output_directory : str
        Directory to save output arrays.
    param_file : str
        Directory to save the parameters used.
    num_cpus : int, default=-1
        Number of CPUs over which to parallelize computations.
    """
    # Set the kernel globally.
    PARAMS[KERNEL] = create_kernel(dim0, dim1)

    output_dir = futil.create_directory(output_directory)
    with open(os.path.join(output_dir, param_file), "w") as fp:
        json.dump({"dim0": dim0, "dim1": dim1}, fp, indent=2)

    pool = Parallel(n_jobs=num_cpus)
    dlcn = delayed(apply_lcn)
    iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT)
    return pool(dlcn(x) for x in iterargs)
Esempio n. 13
0
def main(args):
    sim_margin = -RADIUS * args.margin
    trainer, predictor, zerofilter = models.iX_c3f2_oY(20, 3, 'xlarge')
    time_dim = trainer.inputs['cqt'].shape[2]

    if args.init_param_file:
        print("Loading parameters: {0}".format(args.init_param_file))
        trainer.load_param_values(args.init_param_file)

    print("Opening {0}".format(args.training_file))
    stash = biggie.Stash(args.training_file, cache=True)
    stream = S.minibatch(
        D.create_pairwise_stream(stash, time_dim,
                                 working_size=100, threshold=0.05),
        batch_size=BATCH_SIZE)

    stream = D.batch_filter(
        stream, zerofilter, threshold=2.0**-16, min_batch=1,
        max_consecutive_skips=100, sim_margin=sim_margin, diff_margin=RADIUS)

    print("Starting '{0}'".format(args.trial_name))
    driver = optimus.Driver(
        graph=trainer,
        name=args.trial_name,
        output_directory=futil.create_directory(args.output_directory))

    hyperparams = dict(
        learning_rate=LEARNING_RATE,
        sim_margin=sim_margin, diff_margin=RADIUS)

    predictor_file = path.join(driver.output_directory, args.predictor_file)
    optimus.save(predictor, def_file=predictor_file)

    driver.fit(stream, hyperparams=hyperparams, **DRIVER_ARGS)
Esempio n. 14
0
def main(args):
    predictor = pca_lda_graph(20, args.n_components, 3)
    input_shape = list(predictor.inputs['cqt'].shape)
    time_dim = input_shape[2]
    input_shape[0] = args.num_points

    print("Opening {0}".format(args.training_file))
    stash = biggie.Stash(args.training_file, cache=True)
    stream = D.create_labeled_stream(stash,
                                     time_dim,
                                     working_size=1000,
                                     threshold=0.05)

    print("Starting '{0}'".format(args.trial_name))
    data, labels = np.zeros(input_shape), []
    for idx, x in enumerate(stream):
        data[idx, ...] = x.cqt
        labels.append(x.label)
        if len(labels) == args.num_points:
            break
        elif (len(labels) % PRINT_FREQ) == 0:
            print("[{0}] {1:5} / {2:5}"
                  "".format(time.asctime(), len(labels), args.num_points))

    predictor.param_values = fit_params(data, labels, args.n_components, 3)
    output_directory = futil.create_directory(args.output_directory)
    predictor_file = path.join(output_directory, args.predictor_file)
    param_file = predictor_file.replace(".json", ".npz")
    optimus.save(predictor, def_file=predictor_file, param_file=param_file)
Esempio n. 15
0
def main(args):
    filepaths = glob.glob(
        path.join(args.audio_directory, "*.%s" % args.ext.strip(".")))
    if path.exists(args.output_file):
        result = json.load(open(args.output_file))
        print "File exists: Found %d results" % len(result)
    else:
        futil.create_directory(path.split(args.output_file)[0])
        result = dict()

    result = fetch_data(
        filepaths, result=result, overwrite=False,
        checkpoint_file=args.output_file)

    with open(args.output_file, 'w') as fp:
        json.dump(result, fp, indent=2)
Esempio n. 16
0
def main(textlist, dim0, dim1, output_directory, param_file, num_cpus=-1):
    """Apply Local Contrast Normalization to a collection of files.

    Parameters
    ----------
    textlist : str
        A text list of npz filepaths.
    dim0 : int
        First dimension of the filter kernel (time).
    dim1 : int
        Second dimension of the filter kernel (frequency).
    output_directory : str
        Directory to save output arrays.
    param_file : str
        Directory to save the parameters used.
    num_cpus : int, default=-1
        Number of CPUs over which to parallelize computations.
    """
    # Set the kernel globally.
    PARAMS[KERNEL] = create_kernel(dim0, dim1)

    output_dir = futil.create_directory(output_directory)
    with open(os.path.join(output_dir, param_file), "w") as fp:
        json.dump({"dim0": dim0, "dim1": dim1}, fp, indent=2)

    pool = Parallel(n_jobs=num_cpus)
    dlcn = delayed(apply_lcn)
    iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT)
    return pool(dlcn(x) for x in iterargs)
Esempio n. 17
0
def main(args):
    filepaths = glob.glob(
        path.join(args.audio_directory, "*.%s" % args.ext.strip(".")))
    if path.exists(args.output_file):
        result = json.load(open(args.output_file))
        print "File exists: Found %d results" % len(result)
    else:
        futil.create_directory(path.split(args.output_file)[0])
        result = dict()

    result = fetch_data(filepaths,
                        result=result,
                        overwrite=False,
                        checkpoint_file=args.output_file)

    with open(args.output_file, 'w') as fp:
        json.dump(result, fp, indent=2)
Esempio n. 18
0
def main(args):
    """Main routine for importing data."""
    partitions = json.load(open(args.split_file))

    output_file_fmt = path.join(args.output_directory, FILE_FMT)
    for set_name, subset in partitions.items():
        for fold_idx, splits in subset.items():
            for split, keys in splits.items():
                output_file = output_file_fmt.format(
                    subset=set_name, fold_idx=fold_idx, split=split)
                futil.create_directory(path.split(output_file)[0])
                if args.verbose:
                    print("[{0}] Creating: {1}"
                          "".format(time.asctime(), output_file))
                stash = biggie.Stash(output_file)
                populate_stash(keys, args.cqt_directory, stash, np.float32)
                stash.close()
Esempio n. 19
0
def main(textlist, output_directory, cqt_params=None, num_cpus=-1):
    if cqt_params:
        DEFAULT_PARAMS.update(json.load(open(cqt_params)))

    output_dir = futil.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus)
    dcqt = delayed(audio_file_to_cqt)
    iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT)
    return pool(dcqt(x) for x in iterargs)
Esempio n. 20
0
def main(args):
    """Main routine for importing data."""
    partitions = json.load(open(args.split_file))

    output_file_fmt = path.join(args.output_directory, FILE_FMT)
    for set_name, subset in partitions.items():
        for fold_idx, splits in subset.items():
            for split, keys in splits.items():
                output_file = output_file_fmt.format(subset=set_name,
                                                     fold_idx=fold_idx,
                                                     split=split)
                futil.create_directory(path.split(output_file)[0])
                if args.verbose:
                    print("[{0}] Creating: {1}"
                          "".format(time.asctime(), output_file))
                stash = biggie.Stash(output_file)
                populate_stash(keys, args.cqt_directory, stash, np.float32)
                stash.close()
def main(textlist, output_directory, cqt_params=None, num_cpus=-1):
    if cqt_params:
        DEFAULT_PARAMS.update(json.load(open(cqt_params)))

    output_dir = futil.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus)
    dcqt = delayed(audio_file_to_cqt)
    iterargs = futil.map_path_file_to_dir(textlist, output_dir, EXT)
    return pool(dcqt(x) for x in iterargs)
Esempio n. 22
0
def main(args):
    param_files = futils.load_textlist(args.param_textlist)
    param_files.sort()
    param_files = param_files[args.start_index::args.stride]

    transform = optimus.load(args.transform_file)
    stash = biggie.Stash(args.validation_file, cache=True)
    output_dir = futils.create_directory(args.output_dir)

    for fidx, param_file in enumerate(param_files):
        transform.load_param_values(param_file)
        output_file = params_to_output_file(param_file, output_dir)
        futils.create_directory(os.path.split(output_file)[0])
        if os.path.exists(output_file):
            os.remove(output_file)

        output = biggie.Stash(output_file)
        util.process_stash(stash, transform, output,
                           args.field, verbose=args.verbose)
Esempio n. 23
0
def main(args):
    fpath = os.path.join(args.data_directory, "{0}.hdf5")
    train = biggie.Stash(fpath.format('train'), cache=True)
    valid = biggie.Stash(fpath.format('valid'), cache=True)
    test = biggie.Stash(fpath.format('test'), cache=True)
    results = classify(train,
                       valid,
                       test,
                       num_train=50000,
                       num_valid=10000,
                       num_test=25000)

    for k in 'train', 'valid', 'test':
        print("{0}: {1:.4}".format(k, results['{0}_score'.format(k)]))

    output_dir = os.path.split(args.stats_file)[0]
    futil.create_directory(output_dir)
    with open(args.stats_file, 'w') as fp:
        json.dump(results, fp, indent=2)
def main(args):
    metadata = dict()
    if args.annotation_metadata:
        metadata.update(json.load(open(args.annotation_metadata)))

    jamset = dict()
    for key, lab_files in json.load(open(args.annotation_set)).items():
        jamset[key] = pyjams.JAMS()
        for f in [lab_files]:
            intervals, labels = mir_eval.io.load_labeled_intervals(str(f))
            annot = jamset[key].chord.create_annotation()
            pyjams.util.fill_range_annotation_data(
                intervals[:, 0], intervals[:, 1], labels, annot)

            annot.annotation_metadata.update(**metadata.get(key, {}))
            annot.sandbox.source_file = f
            annot.sandbox.timestamp = time.asctime()

    futil.create_directory(os.path.split(args.output_file)[0])
    util.save_jamset(jamset, args.output_file)
Esempio n. 25
0
def main(args):
    ref_jamset = jams_util.load_jamset(args.ref_jamset)
    est_jamset = jams_util.load_jamset(args.est_jamset)
    keys = est_jamset.keys()
    keys.sort()

    ref_annots = [ref_jamset[k].chord[0] for k in keys]
    est_annots = [est_jamset[k].chord[0] for k in keys]

    scores, supports = EVAL.score_annotations(ref_annots, est_annots, METRICS)
    results = dict(metrics=METRICS,
                   score_annotations=(scores.tolist(), supports.tolist()))
    scores_macro = scores.mean(axis=0)
    scalar = supports.sum(axis=0)
    scalar[scalar == 0] = 1.0
    scores_micro = (supports * scores).sum(axis=0) / scalar

    print tabulate.tabulate(
        [['macro'] + scores_macro.tolist(), ['micro'] + scores_micro.tolist()],
        headers=[''] + METRICS)

    label_counts = EVAL.reduce_annotations(ref_annots, est_annots, METRICS)

    mac_aves = []
    for m in METRICS:
        (labels, scores, support) = EVAL.macro_average(label_counts[m], True,
                                                       0.0)
        mac_aves.append([labels, scores.tolist(), support.tolist()])

    results.update(macro_average=mac_aves)

    output_dir = os.path.split(args.output_file)[0]
    futil.create_directory(output_dir)

    with open(args.output_file, 'w') as fp:
        json.dump(results, fp, indent=2)
Esempio n. 26
0
def main(args):
    arch_key = args.arch_size
    if args.dropout:
        arch_key += '_dropout'

    trainer, predictor = models.MODELS[arch_key]()
    time_dim = trainer.inputs['data'].shape[2]

    if args.init_param_file:
        print "Loading parameters: %s" % args.init_param_file
        trainer.load_param_values(args.init_param_file)

    print "Opening %s" % args.training_file
    stash = biggie.Stash(args.training_file, cache=True)
    stream = D.create_chord_index_stream(stash,
                                         time_dim,
                                         max_pitch_shift=0,
                                         lexicon=VOCAB)

    # Load prior
    stat_file = "%s.json" % path.splitext(args.training_file)[0]
    prior = np.array(json.load(open(stat_file))['prior'], dtype=float)
    trainer.nodes['prior'].weight.value = 1.0 / prior.reshape(1, -1)

    stream = S.minibatch(stream, batch_size=BATCH_SIZE)

    print "Starting '%s'" % args.trial_name
    driver = optimus.Driver(graph=trainer,
                            name=args.trial_name,
                            output_directory=futil.create_directory(
                                args.output_directory))

    hyperparams = dict(learning_rate=LEARNING_RATE)
    if args.dropout:
        hyperparams.update(dropout=args.dropout)

    predictor_file = path.join(driver.output_directory, args.predictor_file)
    optimus.save(predictor, def_file=predictor_file)

    driver.fit(stream, hyperparams=hyperparams, **DRIVER_ARGS)