def posterior_stash_to_jams(stash, penalty_values, output_directory, label_map, model_params): """Decode a stash of posteriors to JAMS and write to disk. Parameters ---------- stash : biggie.Stash Posteriors to decode. penalty_values : array_like Collection of penalty values with which to run Viterbi. output_directory : str Base path to write out JAMS files; each collection will be written as {output_directory}/{penalty_values[i]}.jamset label_map : callable object Map from frets to string labels. model_params : dict Metadata to associate with the annotation. """ # Sweep over the default penalty values. for penalty in penalty_values: print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty) results = decode_stash_parallel(stash, penalty, label_map, NUM_CPUS) output_file = os.path.join( output_directory, "{0}.jamset".format(penalty)) jamset = dict() for key, annot in results.iteritems(): annot.sandbox.update(timestamp=time.asctime(), **model_params) jam = pyjams.JAMS(chord=[annot]) jam.sandbox.track_id = key jamset[key] = jam futils.create_directory(output_directory) util.save_jamset(jamset, output_file)
def posterior_stash_to_jams(stash, penalty_values, output_directory, label_map, model_params): """Decode a stash of posteriors to JAMS and write to disk. Parameters ---------- stash : biggie.Stash Posteriors to decode. penalty_values : array_like Collection of penalty values with which to run Viterbi. output_directory : str Base path to write out JAMS files; each collection will be written as {output_directory}/{penalty_values[i]}.jamset label_map : callable object Map from frets to string labels. model_params : dict Metadata to associate with the annotation. """ # Sweep over the default penalty values. for penalty in penalty_values: print "[{0}] \tStarting p = {1}".format(time.asctime(), penalty) results = decode_stash_parallel(stash, penalty, label_map, NUM_CPUS) output_file = os.path.join(output_directory, "{0}.jamset".format(penalty)) jamset = dict() for key, annot in results.iteritems(): annot.sandbox.update(timestamp=time.asctime(), **model_params) jam = pyjams.JAMS(chord=[annot]) jam.sandbox.track_id = key jamset[key] = jam futils.create_directory(output_directory) util.save_jamset(jamset, output_file)
def main(args): chromas = biggie.File(args.chroma_file) templates = generate_templates(157) futil.create_directory(os.path.split(args.posterior_file)[0]) posteriors = biggie.File(args.posterior_file) for idx, key in enumerate(chromas.keys()): posteriors.add(key, classify_chroma(chromas.get(key), templates)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, len(chromas), key) posteriors.close()
def main(args): stash = biggie.Stash(args.input_file) futils.create_directory(path.split(args.output_file)[0]) stash_out = biggie.Stash(args.output_file) total_count = len(stash) args = ['cqt', [2, 4, 8, 16, 32, 64, 128], 0, 'mean'] for idx, key in enumerate(stash.keys()): stash_out.add(key, pool_entity(stash.get(key), *args)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key) stash_out.close()
def main(args): stash = biggie.Stash(args.input_file) futils.create_directory(path.split(args.output_file)[0]) stash_out = biggie.Stash(args.output_file) total_count = len(stash) args = ['cqt', [2, 4, 8, 16, 32, 64, 128], 0, 'mean'] for idx, key in enumerate(stash.keys()): stash_out.add(key, pool_entity(stash.get(key), *args)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key) stash_out.close()
def main(args): if not os.path.exists(args.estimation_file): print "File does not exist: %s" % args.estimation_file return vocab = lex.Strict(157) stats, confusions = compute_scores( json.load(open(args.estimation_file)), lexicon=vocab) res_str = stats_to_string(stats) + confusions_to_str(confusions, vocab) futil.create_directory(os.path.split(args.stats_file)[0]) with open(args.stats_file, 'w') as fp: fp.write(res_str) print "\n%s\n%s" % (args.estimation_file, res_str)
def main(args): """Main routine for importing data.""" data_splits = json.load(open(args.split_file)) output_file_fmt = path.join(args.output_directory, FILE_FMT) for fold in data_splits: for split in data_splits[fold]: output_file = output_file_fmt % (fold, split) futils.create_directory(path.split(output_file)[0]) if args.verbose: print "[%s] Creating: %s" % (time.asctime(), output_file) stash = biggie.Stash(output_file) populate_stash(data_splits[fold][split], args.cqt_directory, args.lab_directory, stash, np.float32)
def main(args): dset = biggie.Stash(args.input_file) futils.create_directory(path.split(args.output_file)[0]) dout = biggie.Stash(args.output_file) beat_times = json.load(open(args.beat_times)) total_count = len(dset) for idx, key in enumerate(dset.keys()): boundaries = subdivide_boundaries(beat_times[key], args.subdivide) dout.add( key, beat_sync(dset.get(key), boundaries, pool_func=args.pool_func)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key) dout.close()
def main(args): in_stash = biggie.Stash(args.data_file) futils.create_directory(os.path.split(args.output_file)[0]) if os.path.exists(args.output_file): os.remove(args.output_file) out_stash = biggie.Stash(args.output_file) total_count = len(in_stash.keys()) for idx, key in enumerate(in_stash.keys()): out_stash.add(key, wrap_entity(in_stash.get(key), args.length, args.stride)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key) out_stash.close()
def main(args): dset = biggie.Stash(args.input_file) futils.create_directory(path.split(args.output_file)[0]) dout = biggie.Stash(args.output_file) beat_times = json.load(open(args.beat_times)) total_count = len(dset) for idx, key in enumerate(dset.keys()): boundaries = subdivide_boundaries(beat_times[key], args.subdivide) dout.add(key, beat_sync(dset.get(key), boundaries, pool_func=args.pool_func)) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key) dout.close()
def main(args): estimations = json.load(open(args.estimation_set)) references = json.load(open(args.reference_set)) results = dict() for idx, key in enumerate(estimations): est_data = estimations[key] ref_data = references.get(key, None) if not ref_data is None: results[key] = mir_eval.chord.evaluate( est_data['intervals'], est_data['labels'], ref_data['intervals'], ref_data['labels']) futil.create_directory(os.path.split(args.output_file)[0]) with open(args.output_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(args): estimations = json.load(open(args.estimation_set)) references = json.load(open(args.reference_set)) results = dict() for idx, key in enumerate(estimations): est_data = estimations[key] ref_data = references.get(key, None) if not ref_data is None: accumulate_estimations(est_data['intervals'], est_data['labels'], ref_data['intervals'], ref_data['labels'], results, transpose=args.transpose) futil.create_directory(os.path.split(args.output_file)[0]) with open(args.output_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(args): """Main routine for importing data.""" data_splits = json.load(open(args.split_file)) output_file_fmt = path.join(args.output_directory, FILE_FMT) for fold in data_splits: for split in data_splits[fold]: output_file = output_file_fmt % (fold, split) futils.create_directory(path.split(output_file)[0]) if args.verbose: print "[%s] Creating: %s" % (time.asctime(), output_file) stash = biggie.Stash(output_file) populate_stash( data_splits[fold][split], args.cqt_directory, args.lab_directory, stash, np.float32)
def main(args): filepaths = glob.glob( path.join(args.audio_directory, "*.%s" % args.ext.strip("."))) if path.exists(args.output_file): result = json.load(open(args.output_file)) print "File exists: Found %d results" % len(result) else: futil.create_directory(path.split(args.output_file)[0]) result = dict() result = fetch_data( filepaths, result=result, overwrite=False, checkpoint_file=args.output_file) with open(args.output_file, 'w') as fp: json.dump(result, fp, indent=2)
def main(args): filepaths = glob.glob( path.join(args.audio_directory, "*.%s" % args.ext.strip("."))) if path.exists(args.output_file): result = json.load(open(args.output_file)) print "File exists: Found %d results" % len(result) else: futil.create_directory(path.split(args.output_file)[0]) result = dict() result = fetch_data(filepaths, result=result, overwrite=False, checkpoint_file=args.output_file) with open(args.output_file, 'w') as fp: json.dump(result, fp, indent=2)
def main(args): """Main routine for staging parallelization.""" output_dir = futils.create_directory(args.output_directory) pool = Pool(processes=NUM_CPUS) pool.map_async(func=run, iterable=futils.map_path_file_to_dir( args.textlist_file, output_dir, EXT)) pool.close() pool.join()
def main(args): audio_dir = futil.create_directory( os.path.join(args.output_directory, "audio")) lab_dir = futil.create_directory( os.path.join(args.output_directory, "labs")) drum_set = json.load(open(args.drum_set)) instrument_set = dict( [(int(k), v) for k, v in json.load(open(args.instrument_set)).items()]) noise_files = glob.glob(os.path.join(args.noise_dir, "*.wav")) duration = 180 samplerate = 44100.0 for idx in range(args.num_files): render_polypitch(idx, drum_set, instrument_set, noise_files, duration, audio_dir, lab_dir, samplerate)
def main(args): """Main routine for staging parallelization.""" files = F.load_textlist(args.file_list) temp_dir = F.create_directory("tmpdir") temp_fmt = os.path.join(temp_dir, "deleteme-%d.txt") file_lists = [] for n in range(args.num_cpus): file_lists.append(temp_fmt % n) F.dump_textlist(files[n::args.num_cpus], file_lists[-1])
def main(args): results_files = glob.glob(os.path.join(args.results_dir, "*%s" % TMC_EXT)) predictions = dict() for idx, result_file in enumerate(results_files): key = futil.filebase(result_file.replace(TMC_EXT, '')) lab_file = os.path.join(args.labs_dir, "%s.lab" % key) try: predictions[key] = align_estimation_to_reference( result_file, lab_file) except IndexError: print "Index error: %s" % result_file print "[%s] %12d / %12d: %s" % (time.asctime(), idx, len(results_files), key) futil.create_directory(os.path.split(args.output_file)[0]) with open(args.output_file, 'w') as fp: json.dump(predictions, fp, indent=2)
def main(args): estimations = json.load(open(args.estimation_set)) reference_set = [json.load(open(_)) for _ in args.reference_set] best_refs = dict() worst_refs = dict() for idx, key in enumerate(estimations): est_data = estimations[key] for ridx, ref in enumerate(references): ref_data = ref.get(key, None) if not ref_data is None: results[key] = mir_eval.chord.evaluate( est_data['intervals'], est_data['labels'], ref_data['intervals'], ref_data['labels']) futil.create_directory(os.path.split(args.output_file)[0]) with open(args.output_file, 'w') as fp: json.dump(results, fp, indent=2)
def main(args): dset = biggie.Stash(args.input_file) labseg = json.load(open(args.labseg)) out_dir = futils.create_directory(args.output_directory) total_count = len(dset) for idx, key in enumerate(dset.keys()): out_file = path.join(out_dir, "%s.%s" % (key, FILE_EXT)) mdict = entity_to_mdict(dset.get(key), labseg[key]) MLAB.savemat(out_file, mdict=mdict) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key)
def main(args): audio_dir = futil.create_directory( os.path.join(args.output_directory, "audio")) lab_dir = futil.create_directory( os.path.join(args.output_directory, "labs")) drum_set = json.load(open(args.drum_set)) instrument_set = dict([ (int(k), v) for k, v in json.load(open(args.instrument_set)).items() ]) noise_files = glob.glob(os.path.join(args.noise_dir, "*.wav")) duration = 180 samplerate = 44100.0 for idx in range(args.num_files): render_polypitch(idx, drum_set, instrument_set, noise_files, duration, audio_dir, lab_dir, samplerate)
def main(args): """Main routine for staging parallelization.""" output_dir = futils.create_directory(args.output_directory) pool = Pool(processes=NUM_CPUS) pool.map_async( func=run, iterable=futils.map_path_file_to_dir( args.textlist_file, output_dir, EXT)) pool.close() pool.join()
def main(args): dset = biggie.Stash(args.input_file) labseg = json.load(open(args.labseg)) out_dir = futils.create_directory(args.output_directory) total_count = len(dset) for idx, key in enumerate(dset.keys()): out_file = path.join(out_dir, "%s.%s" % (key, FILE_EXT)) mdict = entity_to_mdict(dset.get(key), labseg[key]) MLAB.savemat(out_file, mdict=mdict) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, total_count, key)
def main(args): output_dir = futil.create_directory(args.output_directory) instrument_set = dict([ (int(k), v) for k, v in json.load(open(args.instrument_set)).items() ]) midi_files = glob.glob(os.path.join(args.midi_directory, "*.mid")) pool = Pool(processes=NUM_CPUS) pool.map_async(func=process_one, iterable=spool_args(midi_files, output_dir, instrument_set)) pool.close() pool.join()
def main(args): if not os.path.exists(args.posterior_file): print "File does not exist: %s" % args.posterior_file return dset = biggie.Stash(args.posterior_file) stats = json.load(open(args.validation_file)) penalty = float(stats['best_config']['penalty']) estimations = dict() for idx, key in enumerate(dset.keys()): estimations[key] = estimate_classes( dset.get(key), util.viterbi, penalty=penalty) print "[%s] %12d / %12d: %s" % (time.asctime(), idx, len(dset), key) futil.create_directory(os.path.split(args.estimation_file)[0]) with open(args.estimation_file, 'w') as fp: json.dump(estimations, fp, indent=2) futil.create_directory(os.path.split(args.estimation_file)[0]) with open(args.estimation_file, 'w') as fp: json.dump(estimations, fp, indent=2)
def main(args): output_dir = futil.create_directory(args.output_directory) instrument_set = dict( [(int(k), v) for k, v in json.load(open(args.instrument_set)).items()]) midi_files = glob.glob(os.path.join(args.midi_directory, "*.mid")) pool = Pool(processes=NUM_CPUS) pool.map_async( func=process_one, iterable=spool_args(midi_files, output_dir, instrument_set)) pool.close() pool.join()
def main(args): arch_key = args.arch_size if args.dropout: arch_key += '_dropout' trainer, predictor, classifier = models.MODELS[arch_key]() time_dim = trainer.inputs['data'].shape[2] if args.init_param_file: print "Loading parameters: %s" % args.init_param_file trainer.load_param_values(args.init_param_file) print "Opening %s" % args.training_file stash = biggie.Stash(args.training_file, cache=True) stream = D.create_chord_index_stream( stash, time_dim, max_pitch_shift=12, lexicon=VOCAB) if "weighted" in arch_key: stat_file = "%s.json" % path.splitext(args.training_file)[0] prior = np.array(json.load(open(stat_file))['prior'], dtype=float) scalars = prior.max() / prior.flatten() scalars = (scalars - scalars.mean()) / scalars.std() stream = weighted_stream(stream, scalars + 1) stream = S.minibatch(stream, batch_size=BATCH_SIZE) print "Starting '%s'" % args.trial_name driver = optimus.Driver( graph=trainer, name=args.trial_name, output_directory=futil.create_directory(args.output_directory)) hyperparams = dict(learning_rate=LEARNING_RATE) if args.dropout: hyperparams.update(dropout=args.dropout) predictor_file = path.join(driver.output_directory, args.predictor_file) optimus.save(predictor, def_file=predictor_file) classifier_file = path.join(driver.output_directory, args.classifier_file) optimus.save(classifier, def_file=classifier_file) driver.fit(stream, hyperparams=hyperparams, **DRIVER_ARGS)