def main(args): metadata = dict() if args.annotation_metadata: metadata.update(json.load(open(args.annotation_metadata))) labs_dt = glob.glob(os.path.join(args.lab_directory, "*_dt.lab")) labs_tdc = glob.glob(os.path.join(args.lab_directory, "*_tdc.lab")) jfmt = os.path.join(args.output_directory, "%s.jams") for dt, tdc in zip(labs_dt, labs_tdc): jam = pyjams.JAMS() intervals, labels = mir_eval.io.load_labeled_intervals(dt) annot_dt = jam.chord.create_annotation() pyjams.util.fill_range_annotation_data(intervals[:, 0], intervals[:, 1], labels, annot_dt) annot_tdc = jam.chord.create_annotation() intervals, labels = mir_eval.io.load_labeled_intervals(tdc) pyjams.util.fill_range_annotation_data(intervals[:, 0], intervals[:, 1], labels, annot_tdc) annot_tdc.sandbox.key = "human/RockCorpus/tdeclerqc" annot_dt.sandbox.key = "human/RockCorpus/dtemperley" jam.sandbox.local_key = futil.filebase(dt).replace("_dt", '') pyjams.save(jam, jfmt % jam.sandbox.local_key)
def main(args): files = futil.load_textlist(args.textlist_file) keys = np.array([futil.filebase(f) for f in files]) subsets = json.load(open(args.subset_file)) folds = dict() for config_name, key_set in subsets.iteritems(): filt_keys = np.array([k for k in keys if k.split('_')[0] in key_set]) splitter = KFold(n=len(filt_keys), n_folds=args.num_folds, shuffle=True) folds[config_name] = dict() for fold_idx, data_idxs in enumerate(splitter): train_keys, test_keys = [filt_keys[idxs] for idxs in data_idxs] num_train = len(train_keys) train_idx = np.random.permutation(num_train) valid_count = int(args.valid_ratio * num_train) valid_keys = train_keys[train_idx[:valid_count]] train_keys = train_keys[train_idx[valid_count:]] folds[config_name][fold_idx] = dict(train=train_keys.tolist(), valid=valid_keys.tolist(), test=test_keys.tolist()) with open(args.output_file, 'w') as fp: json.dump(folds, fp, indent=2)
def fetch_data(filepaths, result=None, overwrite=False, checkpoint_file=''): """ Parameters ---------- filepaths : list Collection of audio files on disk to query against the EchoNest API. result : dict, or None Dictionary to add info; will create if None. overwrite : bool, default=False If False, will skip any keys contained in `result`. checkpoint_file : str, or None Path to write results as they are accumulated; ignored if empty. Returns ------- result : dict Map of filebases to metadata. """ throttle = Throttle() throttle.touch() if result is None: result = dict() filepaths = set(filepaths) while filepaths: fpath = filepaths.pop() key = futil.filebase(fpath) # If we've already got data and we're not overwriting, move on. if key in result and not overwrite: print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key) continue try: # Otherwise, let's make some requests. print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key) track = T.track_from_filename(fpath) if track: result[key] = extract_info(track) if checkpoint_file: with open(checkpoint_file, 'w') as fp: json.dump(result, fp, indent=2) throttle.wait() except T.util.EchoNestAPIError as err: if err.http_status == 429: print "You got rate limited braah ... hang on." throttle.wait(10) filepaths.add(fpath) elif err.http_status >= 500: print "Server error; moving on, dropping key: %s" % key except socket.error as err: print "Socket Error %s" % err filepaths.add(fpath) throttle.wait(10) return result
def fetch_data(filepaths, result=None, overwrite=False, checkpoint_file=''): """ Parameters ---------- filepaths : list Collection of audio files on disk to query against the EchoNest API. result : dict, or None Dictionary to add info; will create if None. overwrite : bool, default=False If False, will skip any keys contained in `result`. checkpoint_file : str, or None Path to write results as they are accumulated; ignored if empty. Returns ------- result : dict Map of filebases to metadata. """ throttle = Throttle() throttle.touch() if result is None: result = dict() filepaths = set(filepaths) while filepaths: fpath = filepaths.pop() key = futil.filebase(fpath) # If we've already got data and we're not overwriting, move on. if key in result and not overwrite: print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key) continue try: # Otherwise, let's make some requests. print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key) track = T.track_from_filename(fpath) if track: result[key] = extract_info(track) if checkpoint_file: with open(checkpoint_file, 'w') as fp: json.dump(result, fp, indent=2) throttle.wait() except T.util.EchoNestAPIError as err: if err.http_status == 429: print "You got rate limited braah ... hang on." throttle.wait(10) filepaths.add(fpath) elif err.http_status >= 500: print "Server error; moving on, dropping key: %s" % key except socket.error as err: print "Socket Error %s" % err filepaths.add(fpath) throttle.wait(10) return result
def main(textlist, num_folds, valid_ratio, output_file): """Stratify a textlist into a number of disjoint partitions. Parameters ---------- textlist : str Path to a textlist file. num_folds : int Number of splits for the data. valid_ratioratio : scalar, in [0, 1.0) Ratio of the training data for validation. output_file_file : str File to save the output splits as JSON. """ files = futils.load_textlist(textlist) keys = [futils.filebase(f) for f in files] folds = util.stratify(keys, num_folds, valid_ratio) with open(output_file, 'w') as fp: json.dump(folds, fp, indent=2)
def main(textlist, num_folds, valid_ratio, output_file): """Stratify a textlist into a number of disjoint partitions. Parameters ---------- textlist : str Path to a textlist file. num_folds : int Number of splits for the data. valid_ratioratio : scalar, in [0, 1.0) Ratio of the training data for validation. output_file_file : str File to save the output splits as JSON. """ files = futils.load_textlist(textlist) keys = [futils.filebase(f) for f in files] folds = util.stratify(keys, num_folds, valid_ratio) with open(output_file, 'w') as fp: json.dump(folds, fp, indent=2)
def create_entity(npz_file, dtype=np.float32): """Create an entity from the given file. Parameters ---------- npz_file: str Path to a 'npz' archive, containing at least a value for 'cqt'. dtype: type Data type for the cqt array. Returns ------- entity: biggie.Entity Populated entity, with the following fields: {cqt, time_points, icode, note_number, fcode}. """ (icode, note_number, fcode) = [np.array(_) for _ in futil.filebase(npz_file).split('_')] entity = biggie.Entity(icode=icode, note_number=note_number, fcode=fcode, **np.load(npz_file)) entity.cqt = entity.cqt.astype(dtype) return entity
def params_to_output_file(param_file, output_dir): fbase = futils.filebase(param_file) return os.path.join(output_dir, "{0}.hdf5".format(fbase))
def file_to_instrument_code(filename): return re.split('[\W_]+', futil.filebase(filename))[0]
def file_to_note_number(filename): matches = re.findall('.*([A-G]#?[0-9])', futil.filebase(filename)) if len(matches) == 0: return None return note_name_to_number(matches[0].strip(".wav"))
def has_pitch(f): return False if re.match('.*([A-G]#?[0-9])', futil.filebase(f)) is None else True
def file_to_instrument_code(filename): return re.split('[\W_]+', futil.filebase(filename))[0]
def file_to_note_number(filename): matches = re.findall('.*([A-G]#?[0-9])', futil.filebase(filename)) if len(matches) == 0: return None return note_name_to_number(matches[0].strip(".wav"))
def has_pitch(f): return False if re.match('.*([A-G]#?[0-9])', futil.filebase(f)) is None else True