def parse(filename): """Decode a filename matching the UIowa format. Parameters ---------- filename : str A filename conforming to the UIowa MIS conventions. Returns ------- instrument : str, or None Instrument name dynamic : str Dymanic marker notevalue : str Note names for the file. """ filename = utils.filebase(filename) parameters = [x.strip() for x in filename.split('.')] instrument = parameters.pop(0) # This regex matches note names with a preceeding and following '.' note_match = re.search(r"(?<=\.)[A-Fb#0-6]*(?<!\.)", filename) notevalue = filename[note_match.start():note_match.end()] \ if note_match else None # This regex matches dynamic chars with a preceeding and following '.' dynamic_match = re.search(r"(?<=\.)[f|p|m]*(?<!\.)", filename) dynamic = filename[dynamic_match.start():dynamic_match.end()] \ if dynamic_match else None return instrument, dynamic, notevalue
def parse(filename): """Convert phil path to codes/parameters. Parameters ---------- filename : full path. Returns ------- parts : tuple, len=5 From the filename, the following parts: (instrument, note, duration, dynamic, articulation). """ audio_file_name = utils.filebase(filename) (instrument, note, duration, dynamic, articulation) = audio_file_name.split('_') return instrument, note, duration, dynamic, articulation
def parse(filename): """Convert good-sounds path to codes/parameters. Parameters ---------- filename : full path. Returns ------- parts : tuple, len=5 From the filename, the following parts: (instrument, pack, take, note_id) """ if EXPECTED_ROOT_DIR in filename: filename = filename[filename.index(EXPECTED_ROOT_DIR) + len(EXPECTED_ROOT_DIR):] (instrument, pack, take, note_id) = (filename.strip('/').split('/')) note_id = utils.filebase(note_id) return instrument, pack, take, note_id
def collect(base_dir, fext="*.flac", onset_dir=ONSET_DIR): """Convert a base directory of RWC files to a pa datafra Parameters ---------- base_dir : str Full path to the base RWC directory. dataset : str Datset string to use in this df. Returns ------- pandas.DataFrame Indexed by: id : [dataset identifier] + [8 char md5 of filename] With the following columns: audio_file : full path to original audio file. dataset : dataset it is from instrument : instrument label. dynamic : dynamic tag """ logger.info("Scanning {} for audio files.".format(base_dir)) indexes = [] records = [] fmt = "*/*/{}".format(fext) for audio_file_path in glob.glob(os.path.join(base_dir, fmt)): instrument_name, style_code, dynamic_code = parse(audio_file_path) # TODO: Other datasets hash the filepath relative to `base_dir`; we # should really do the same here, but care must be taken to keep the # onsets sync'ed. uid = utils.generate_id(NAME, utils.filebase(audio_file_path)) onsets = utils.find_onset_file_from_uid(uid, onset_dir) indexes.append(uid) records.append( dict(audio_file=audio_file_path, dataset=NAME, instrument=instrument_name, dynamic=dynamic_code, onsets_file=onsets)) logger.info("Found {} files from RWC.".format(len(records))) return pd.DataFrame(records, index=indexes)
def parse(filename): """Convert good-sounds path to codes/parameters. Parameters ---------- filename : full path. Returns ------- parts : tuple, len=5 From the filename, the following parts: (instrument, pack, take, note_id) """ if EXPECTED_ROOT_DIR in filename: filename = filename[filename.index(EXPECTED_ROOT_DIR) + len(EXPECTED_ROOT_DIR):] (instrument, pack, take, note_id) = ( filename.strip('/').split('/')) note_id = utils.filebase(note_id) return instrument, pack, take, note_id
def collect(base_dir, fext="*.flac"): """Convert a base directory of RWC files to a pandas dataframe. Parameters ---------- base_dir : str Full path to the base RWC directory. dataset : str Datset string to use in this df. Returns ------- pandas.DataFrame Indexed by: id : [dataset identifier] + [8 char md5 of filename] With the following columns: audio_file : full path to original audio file. dataset : dataset it is from instrument : instrument label. dynamic : dynamic tag """ logger.info("Scanning {} for audio files.".format(base_dir)) indexes = [] records = [] fmt = "*/*/{}".format(fext) for audio_file_path in glob.glob(os.path.join(base_dir, fmt)): instrument_name, style_code, dynamic_code = parse(audio_file_path) uid = utils.generate_id(NAME, utils.filebase(audio_file_path)) indexes.append(uid) records.append( dict(audio_file=audio_file_path, dataset=NAME, instrument=instrument_name, dynamic=dynamic_code)) logger.info("Found {} files from RWC.".format(len(records))) return pd.DataFrame(records, index=indexes)
def parse(filename): """Takes an rwc path, and returns the extracted codes from the filename. Parameters ---------- rwc_path : str Full path or basename. If full path, gets the basename. Returns ------- instrument_name : str, or None if cannot be parsed. style_code : str dynamic_code : str """ filebase = utils.filebase(filename) instrument_code = filebase[3:5] # Get the instrument name from the json file. instrument_name = instrument_code_to_name(instrument_code) style_code = filebase[5:7] dynamic_code = filebase[7] return instrument_name, style_code, dynamic_code