Ejemplo n.º 1
0
def main(args):
    metadata = dict()
    if args.annotation_metadata:
        metadata.update(json.load(open(args.annotation_metadata)))

    labs_dt = glob.glob(os.path.join(args.lab_directory, "*_dt.lab"))
    labs_tdc = glob.glob(os.path.join(args.lab_directory, "*_tdc.lab"))
    jfmt = os.path.join(args.output_directory, "%s.jams")

    for dt, tdc in zip(labs_dt, labs_tdc):
        jam = pyjams.JAMS()
        intervals, labels = mir_eval.io.load_labeled_intervals(dt)
        annot_dt = jam.chord.create_annotation()
        pyjams.util.fill_range_annotation_data(intervals[:, 0], intervals[:,
                                                                          1],
                                               labels, annot_dt)
        annot_tdc = jam.chord.create_annotation()
        intervals, labels = mir_eval.io.load_labeled_intervals(tdc)
        pyjams.util.fill_range_annotation_data(intervals[:, 0], intervals[:,
                                                                          1],
                                               labels, annot_tdc)
        annot_tdc.sandbox.key = "human/RockCorpus/tdeclerqc"
        annot_dt.sandbox.key = "human/RockCorpus/dtemperley"
        jam.sandbox.local_key = futil.filebase(dt).replace("_dt", '')
        pyjams.save(jam, jfmt % jam.sandbox.local_key)
Ejemplo n.º 2
0
def main(args):
    files = futil.load_textlist(args.textlist_file)
    keys = np.array([futil.filebase(f) for f in files])
    subsets = json.load(open(args.subset_file))

    folds = dict()
    for config_name, key_set in subsets.iteritems():
        filt_keys = np.array([k for k in keys if k.split('_')[0] in key_set])
        splitter = KFold(n=len(filt_keys),
                         n_folds=args.num_folds,
                         shuffle=True)
        folds[config_name] = dict()
        for fold_idx, data_idxs in enumerate(splitter):
            train_keys, test_keys = [filt_keys[idxs] for idxs in data_idxs]
            num_train = len(train_keys)
            train_idx = np.random.permutation(num_train)
            valid_count = int(args.valid_ratio * num_train)
            valid_keys = train_keys[train_idx[:valid_count]]
            train_keys = train_keys[train_idx[valid_count:]]
            folds[config_name][fold_idx] = dict(train=train_keys.tolist(),
                                                valid=valid_keys.tolist(),
                                                test=test_keys.tolist())

    with open(args.output_file, 'w') as fp:
        json.dump(folds, fp, indent=2)
Ejemplo n.º 3
0
def fetch_data(filepaths, result=None, overwrite=False, checkpoint_file=''):
    """
    Parameters
    ----------
    filepaths : list
        Collection of audio files on disk to query against the EchoNest API.
    result : dict, or None
        Dictionary to add info; will create if None.
    overwrite : bool, default=False
        If False, will skip any keys contained in `result`.
    checkpoint_file : str, or None
        Path to write results as they are accumulated; ignored if empty.

    Returns
    -------
    result : dict
        Map of filebases to metadata.
    """
    throttle = Throttle()
    throttle.touch()
    if result is None:
        result = dict()

    filepaths = set(filepaths)
    while filepaths:
        fpath = filepaths.pop()
        key = futil.filebase(fpath)
        # If we've already got data and we're not overwriting, move on.
        if key in result and not overwrite:
            print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key)
            continue
        try:
            # Otherwise, let's make some requests.
            print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key)
            track = T.track_from_filename(fpath)
            if track:
                result[key] = extract_info(track)
            if checkpoint_file:
                with open(checkpoint_file, 'w') as fp:
                    json.dump(result, fp, indent=2)
            throttle.wait()
        except T.util.EchoNestAPIError as err:
            if err.http_status == 429:
                print "You got rate limited braah ... hang on."
                throttle.wait(10)
                filepaths.add(fpath)
            elif err.http_status >= 500:
                print "Server error; moving on, dropping key: %s" % key
        except socket.error as err:
            print "Socket Error %s" % err
            filepaths.add(fpath)
            throttle.wait(10)
    return result
Ejemplo n.º 4
0
def fetch_data(filepaths, result=None, overwrite=False, checkpoint_file=''):
    """
    Parameters
    ----------
    filepaths : list
        Collection of audio files on disk to query against the EchoNest API.
    result : dict, or None
        Dictionary to add info; will create if None.
    overwrite : bool, default=False
        If False, will skip any keys contained in `result`.
    checkpoint_file : str, or None
        Path to write results as they are accumulated; ignored if empty.

    Returns
    -------
    result : dict
        Map of filebases to metadata.
    """
    throttle = Throttle()
    throttle.touch()
    if result is None:
        result = dict()

    filepaths = set(filepaths)
    while filepaths:
        fpath = filepaths.pop()
        key = futil.filebase(fpath)
        # If we've already got data and we're not overwriting, move on.
        if key in result and not overwrite:
            print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key)
            continue
        try:
            # Otherwise, let's make some requests.
            print "[%s] %4d: '%s'" % (time.asctime(), len(filepaths), key)
            track = T.track_from_filename(fpath)
            if track:
                result[key] = extract_info(track)
            if checkpoint_file:
                with open(checkpoint_file, 'w') as fp:
                    json.dump(result, fp, indent=2)
            throttle.wait()
        except T.util.EchoNestAPIError as err:
            if err.http_status == 429:
                print "You got rate limited braah ... hang on."
                throttle.wait(10)
                filepaths.add(fpath)
            elif err.http_status >= 500:
                print "Server error; moving on, dropping key: %s" % key
        except socket.error as err:
            print "Socket Error %s" % err
            filepaths.add(fpath)
            throttle.wait(10)
    return result
Ejemplo n.º 5
0
def main(textlist, num_folds, valid_ratio, output_file):
    """Stratify a textlist into a number of disjoint partitions.

    Parameters
    ----------
    textlist : str
        Path to a textlist file.
    num_folds : int
        Number of splits for the data.
    valid_ratioratio : scalar, in [0, 1.0)
        Ratio of the training data for validation.
    output_file_file : str
        File to save the output splits as JSON.
    """
    files = futils.load_textlist(textlist)
    keys = [futils.filebase(f) for f in files]
    folds = util.stratify(keys, num_folds, valid_ratio)
    with open(output_file, 'w') as fp:
        json.dump(folds, fp, indent=2)
Ejemplo n.º 6
0
def main(textlist, num_folds, valid_ratio, output_file):
    """Stratify a textlist into a number of disjoint partitions.

    Parameters
    ----------
    textlist : str
        Path to a textlist file.
    num_folds : int
        Number of splits for the data.
    valid_ratioratio : scalar, in [0, 1.0)
        Ratio of the training data for validation.
    output_file_file : str
        File to save the output splits as JSON.
    """
    files = futils.load_textlist(textlist)
    keys = [futils.filebase(f) for f in files]
    folds = util.stratify(keys, num_folds, valid_ratio)
    with open(output_file, 'w') as fp:
        json.dump(folds, fp, indent=2)
Ejemplo n.º 7
0
def create_entity(npz_file, dtype=np.float32):
    """Create an entity from the given file.

    Parameters
    ----------
    npz_file: str
        Path to a 'npz' archive, containing at least a value for 'cqt'.
    dtype: type
        Data type for the cqt array.

    Returns
    -------
    entity: biggie.Entity
        Populated entity, with the following fields:
            {cqt, time_points, icode, note_number, fcode}.
    """
    (icode, note_number,
        fcode) = [np.array(_) for _ in futil.filebase(npz_file).split('_')]
    entity = biggie.Entity(icode=icode, note_number=note_number,
                           fcode=fcode, **np.load(npz_file))
    entity.cqt = entity.cqt.astype(dtype)
    return entity
Ejemplo n.º 8
0
def params_to_output_file(param_file, output_dir):
    fbase = futils.filebase(param_file)
    return os.path.join(output_dir, "{0}.hdf5".format(fbase))
Ejemplo n.º 9
0
def file_to_instrument_code(filename):
    return re.split('[\W_]+', futil.filebase(filename))[0]
Ejemplo n.º 10
0
def file_to_note_number(filename):
    matches = re.findall('.*([A-G]#?[0-9])', futil.filebase(filename))
    if len(matches) == 0:
        return None
    return note_name_to_number(matches[0].strip(".wav"))
Ejemplo n.º 11
0
def has_pitch(f):
    return False if re.match('.*([A-G]#?[0-9])',
                             futil.filebase(f)) is None else True
Ejemplo n.º 12
0
def file_to_instrument_code(filename):
    return re.split('[\W_]+', futil.filebase(filename))[0]
Ejemplo n.º 13
0
def file_to_note_number(filename):
    matches = re.findall('.*([A-G]#?[0-9])', futil.filebase(filename))
    if len(matches) == 0:
        return None
    return note_name_to_number(matches[0].strip(".wav"))
Ejemplo n.º 14
0
def has_pitch(f):
    return False if re.match('.*([A-G]#?[0-9])',
                             futil.filebase(f)) is None else True