Exemple #1
0
def save_example_to_tfrecord(df,
                             output_path,
                             audio_format,
                             root_dir,
                             tag_path,
                             sample_rate=16000,
                             num_mels=96,
                             multitag=False,
                             verbose=False):
    ''' Creates and saves a TFRecord file.

    Parameters
    ----------
    df: DataFrame
        A pandas DataFrame containing the following columns: "track_id", "mp3_path", "npz_path".

    output_path: str
        The path or filename to save TFRecord file as.
        If not a path, the current folder will be used with output_path as filename.

    audio_format: {'waveform', 'log-mel-spectrogram'}
        If 'log-mel-spectrogram', audio will be converted to that format; otherwise, it will default to raw waveform.

    root_dir: str
        The root directory to where the .npz files (or the .mp3 files) are stored.

    tag_path: str
        The path to the lastfm_clean.db database.

    sample_rate: int
        The sample rate to use when serializing the audio.

    num_mels: int
        The number of mels in the mel-spectrogram.
    
    multitag: list
        If True, encode multiple tags at the same time (provide as list of filenames; feature names will be 'tags-0', 'tags-1' etc.)

    verbose: bool
        If True, print progress.
    '''

    with tf.io.TFRecordWriter(output_path) as writer:
        if not multitag:
            fm = LastFm(tag_path)
            n_tags = len(fm.get_tag_nums())
        else:
            fm = [LastFm(os.path.join(tag_path, path)) for path in multitag]
            n_tags = [len(fm.get_tag_nums()) for fm in fm]
            assert all(
                x == n_tags[0] for x in
                n_tags), 'all databases need to have the same number of tags'
            n_tags = n_tags[0]  # cast back to int

        # initialize
        exceptions = []

        df.reset_index(drop=True, inplace=True)

        if verbose:
            progbar = Progbar(
                len(df))  # create an instance of the progress bar

        for i, cols in df.iterrows():
            if verbose:
                progbar.add(1)  # update progress bar

            # unpack cols
            tid, path = cols

            # encode tags
            if not multitag:
                encoded_tags = get_encoded_tags(fm, tid, n_tags)
            else:
                encoded_tags = np.array(
                    [get_encoded_tags(fm, tid, n_tags) for fm in fm]
                )  # convert to ndarray to ensure consistency with one-dimensional case

            # skip tracks which dont have any "clean" tags
            if encoded_tags.size == 0:
                if verbose:
                    print("{} has no tags. Skipping...".format(tid))
                continue

            path = os.path.join(root_dir, path)

            if set(df.columns) == {'track_id', 'npz_path'}:
                # get the unsampled array from the .npz file
                unsampled_audio = np.load(path)
            else:
                # get the unsampled array from the original .mp3 file
                try:
                    array, sr = librosa.core.load(path, sr=None)
                except:
                    exceptions.append({
                        'path': path,
                        'tid': tid,
                        'encoded_tags': encoded_tags
                    })
                    continue
                unsampled_audio = {'array': array, 'sr': sr}

            # resample audio array into 'sample_rate' and convert into 'audio_format'
            processed_array = process_array(unsampled_audio['array'],
                                            audio_format,
                                            sr_in=unsampled_audio['sr'],
                                            sr_out=sample_rate,
                                            num_mels=num_mels)

            # load the tf.Example
            example = get_example(processed_array, tid, encoded_tags)

            # save the tf.Example into a .tfrecord file
            writer.write(example.SerializeToString())

        # print exceptions
        if set(df.columns) == {'track_id', 'npz_path'}:
            return
        else:
            if exceptions:
                print('Could not process the following tracks:')
                for i, exception in enumerate(exceptions):
                    print(" {:3d}. {} {}".format(
                        i, exception["tid"] + exception["path"]))
            return