コード例 #1
0
def parse(filename):
    """Decode a filename matching the UIowa format.

    Parameters
    ----------
    filename : str
        A filename conforming to the UIowa MIS conventions.

    Returns
    -------
    instrument : str, or None
        Instrument name

    dynamic : str
        Dymanic marker

    notevalue : str
        Note names for the file.
    """
    filename = utils.filebase(filename)
    parameters = [x.strip() for x in filename.split('.')]
    instrument = parameters.pop(0)
    # This regex matches note names with a preceeding and following '.'
    note_match = re.search(r"(?<=\.)[A-Fb#0-6]*(?<!\.)", filename)
    notevalue = filename[note_match.start():note_match.end()] \
        if note_match else None
    # This regex matches dynamic chars with a preceeding and following '.'
    dynamic_match = re.search(r"(?<=\.)[f|p|m]*(?<!\.)", filename)
    dynamic = filename[dynamic_match.start():dynamic_match.end()] \
        if dynamic_match else None
    return instrument, dynamic, notevalue
コード例 #2
0
ファイル: uiowa.py プロジェクト: ejhumphrey/minst-dataset
def parse(filename):
    """Decode a filename matching the UIowa format.

    Parameters
    ----------
    filename : str
        A filename conforming to the UIowa MIS conventions.

    Returns
    -------
    instrument : str, or None
        Instrument name

    dynamic : str
        Dymanic marker

    notevalue : str
        Note names for the file.
    """
    filename = utils.filebase(filename)
    parameters = [x.strip() for x in filename.split('.')]
    instrument = parameters.pop(0)
    # This regex matches note names with a preceeding and following '.'
    note_match = re.search(r"(?<=\.)[A-Fb#0-6]*(?<!\.)", filename)
    notevalue = filename[note_match.start():note_match.end()] \
        if note_match else None
    # This regex matches dynamic chars with a preceeding and following '.'
    dynamic_match = re.search(r"(?<=\.)[f|p|m]*(?<!\.)", filename)
    dynamic = filename[dynamic_match.start():dynamic_match.end()] \
        if dynamic_match else None
    return instrument, dynamic, notevalue
コード例 #3
0
def parse(filename):
    """Convert phil path to codes/parameters.

    Parameters
    ----------
    filename : full path.

    Returns
    -------
    parts : tuple, len=5
        From the filename, the following parts:
            (instrument, note, duration, dynamic, articulation).
    """
    audio_file_name = utils.filebase(filename)
    (instrument, note, duration, dynamic,
        articulation) = audio_file_name.split('_')
    return instrument, note, duration, dynamic, articulation
コード例 #4
0
def parse(filename):
    """Convert phil path to codes/parameters.

    Parameters
    ----------
    filename : full path.

    Returns
    -------
    parts : tuple, len=5
        From the filename, the following parts:
            (instrument, note, duration, dynamic, articulation).
    """
    audio_file_name = utils.filebase(filename)
    (instrument, note, duration, dynamic,
     articulation) = audio_file_name.split('_')
    return instrument, note, duration, dynamic, articulation
コード例 #5
0
def parse(filename):
    """Convert good-sounds path to codes/parameters.

    Parameters
    ----------
    filename : full path.

    Returns
    -------
    parts : tuple, len=5
        From the filename, the following parts:
            (instrument, pack, take, note_id)
    """
    if EXPECTED_ROOT_DIR in filename:
        filename = filename[filename.index(EXPECTED_ROOT_DIR) +
                            len(EXPECTED_ROOT_DIR):]
    (instrument, pack, take, note_id) = (filename.strip('/').split('/'))
    note_id = utils.filebase(note_id)
    return instrument, pack, take, note_id
コード例 #6
0
def collect(base_dir, fext="*.flac", onset_dir=ONSET_DIR):
    """Convert a base directory of RWC files to a pa datafra
    Parameters
    ----------
    base_dir : str
        Full path to the base RWC directory.

    dataset : str
        Datset string to use in this df.

    Returns
    -------
    pandas.DataFrame
        Indexed by:
            id : [dataset identifier] + [8 char md5 of filename]
        With the following columns:
            audio_file : full path to original audio file.
            dataset : dataset it is from
            instrument : instrument label.
            dynamic : dynamic tag
    """
    logger.info("Scanning {} for audio files.".format(base_dir))

    indexes = []
    records = []
    fmt = "*/*/{}".format(fext)
    for audio_file_path in glob.glob(os.path.join(base_dir, fmt)):
        instrument_name, style_code, dynamic_code = parse(audio_file_path)
        # TODO: Other datasets hash the filepath relative to `base_dir`; we
        # should really do the same here, but care must be taken to keep the
        # onsets sync'ed.
        uid = utils.generate_id(NAME, utils.filebase(audio_file_path))
        onsets = utils.find_onset_file_from_uid(uid, onset_dir)
        indexes.append(uid)
        records.append(
            dict(audio_file=audio_file_path,
                 dataset=NAME,
                 instrument=instrument_name,
                 dynamic=dynamic_code,
                 onsets_file=onsets))

    logger.info("Found {} files from RWC.".format(len(records)))
    return pd.DataFrame(records, index=indexes)
コード例 #7
0
ファイル: rwc.py プロジェクト: ejhumphrey/minst-dataset
def collect(base_dir, fext="*.flac", onset_dir=ONSET_DIR):
    """Convert a base directory of RWC files to a pa datafra
    Parameters
    ----------
    base_dir : str
        Full path to the base RWC directory.

    dataset : str
        Datset string to use in this df.

    Returns
    -------
    pandas.DataFrame
        Indexed by:
            id : [dataset identifier] + [8 char md5 of filename]
        With the following columns:
            audio_file : full path to original audio file.
            dataset : dataset it is from
            instrument : instrument label.
            dynamic : dynamic tag
    """
    logger.info("Scanning {} for audio files.".format(base_dir))

    indexes = []
    records = []
    fmt = "*/*/{}".format(fext)
    for audio_file_path in glob.glob(os.path.join(base_dir, fmt)):
        instrument_name, style_code, dynamic_code = parse(audio_file_path)
        # TODO: Other datasets hash the filepath relative to `base_dir`; we
        # should really do the same here, but care must be taken to keep the
        # onsets sync'ed.
        uid = utils.generate_id(NAME, utils.filebase(audio_file_path))
        onsets = utils.find_onset_file_from_uid(uid, onset_dir)
        indexes.append(uid)
        records.append(
            dict(audio_file=audio_file_path,
                 dataset=NAME,
                 instrument=instrument_name,
                 dynamic=dynamic_code,
                 onsets_file=onsets))

    logger.info("Found {} files from RWC.".format(len(records)))
    return pd.DataFrame(records, index=indexes)
コード例 #8
0
def parse(filename):
    """Convert good-sounds path to codes/parameters.

    Parameters
    ----------
    filename : full path.

    Returns
    -------
    parts : tuple, len=5
        From the filename, the following parts:
            (instrument, pack, take, note_id)
    """
    if EXPECTED_ROOT_DIR in filename:
        filename = filename[filename.index(EXPECTED_ROOT_DIR) +
                            len(EXPECTED_ROOT_DIR):]
    (instrument, pack, take, note_id) = (
        filename.strip('/').split('/'))
    note_id = utils.filebase(note_id)
    return instrument, pack, take, note_id
コード例 #9
0
ファイル: rwc.py プロジェクト: faroit/minst-dataset
def collect(base_dir, fext="*.flac"):
    """Convert a base directory of RWC files to a pandas dataframe.

    Parameters
    ----------
    base_dir : str
        Full path to the base RWC directory.

    dataset : str
        Datset string to use in this df.

    Returns
    -------
    pandas.DataFrame
        Indexed by:
            id : [dataset identifier] + [8 char md5 of filename]
        With the following columns:
            audio_file : full path to original audio file.
            dataset : dataset it is from
            instrument : instrument label.
            dynamic : dynamic tag
    """
    logger.info("Scanning {} for audio files.".format(base_dir))

    indexes = []
    records = []
    fmt = "*/*/{}".format(fext)
    for audio_file_path in glob.glob(os.path.join(base_dir, fmt)):
        instrument_name, style_code, dynamic_code = parse(audio_file_path)
        uid = utils.generate_id(NAME, utils.filebase(audio_file_path))
        indexes.append(uid)
        records.append(
            dict(audio_file=audio_file_path,
                 dataset=NAME,
                 instrument=instrument_name,
                 dynamic=dynamic_code))

    logger.info("Found {} files from RWC.".format(len(records)))

    return pd.DataFrame(records, index=indexes)
コード例 #10
0
def parse(filename):
    """Takes an rwc path, and returns the extracted codes from the
    filename.

    Parameters
    ----------
    rwc_path : str
        Full path or basename. If full path, gets the basename.

    Returns
    -------
    instrument_name : str, or None if cannot be parsed.
    style_code : str
    dynamic_code : str
    """
    filebase = utils.filebase(filename)
    instrument_code = filebase[3:5]
    # Get the instrument name from the json file.
    instrument_name = instrument_code_to_name(instrument_code)
    style_code = filebase[5:7]
    dynamic_code = filebase[7]
    return instrument_name, style_code, dynamic_code
コード例 #11
0
ファイル: rwc.py プロジェクト: ejhumphrey/minst-dataset
def parse(filename):
    """Takes an rwc path, and returns the extracted codes from the
    filename.

    Parameters
    ----------
    rwc_path : str
        Full path or basename. If full path, gets the basename.

    Returns
    -------
    instrument_name : str, or None if cannot be parsed.
    style_code : str
    dynamic_code : str
    """
    filebase = utils.filebase(filename)
    instrument_code = filebase[3:5]
    # Get the instrument name from the json file.
    instrument_name = instrument_code_to_name(instrument_code)
    style_code = filebase[5:7]
    dynamic_code = filebase[7]
    return instrument_name, style_code, dynamic_code