def save_example_to_tfrecord(df, output_path, audio_format, root_dir, tag_path, sample_rate=16000, num_mels=96, multitag=False, verbose=False): ''' Creates and saves a TFRecord file. Parameters ---------- df: DataFrame A pandas DataFrame containing the following columns: "track_id", "mp3_path", "npz_path". output_path: str The path or filename to save TFRecord file as. If not a path, the current folder will be used with output_path as filename. audio_format: {'waveform', 'log-mel-spectrogram'} If 'log-mel-spectrogram', audio will be converted to that format; otherwise, it will default to raw waveform. root_dir: str The root directory to where the .npz files (or the .mp3 files) are stored. tag_path: str The path to the lastfm_clean.db database. sample_rate: int The sample rate to use when serializing the audio. num_mels: int The number of mels in the mel-spectrogram. multitag: list If True, encode multiple tags at the same time (provide as list of filenames; feature names will be 'tags-0', 'tags-1' etc.) verbose: bool If True, print progress. ''' with tf.io.TFRecordWriter(output_path) as writer: if not multitag: fm = LastFm(tag_path) n_tags = len(fm.get_tag_nums()) else: fm = [LastFm(os.path.join(tag_path, path)) for path in multitag] n_tags = [len(fm.get_tag_nums()) for fm in fm] assert all( x == n_tags[0] for x in n_tags), 'all databases need to have the same number of tags' n_tags = n_tags[0] # cast back to int # initialize exceptions = [] df.reset_index(drop=True, inplace=True) if verbose: progbar = Progbar( len(df)) # create an instance of the progress bar for i, cols in df.iterrows(): if verbose: progbar.add(1) # update progress bar # unpack cols tid, path = cols # encode tags if not multitag: encoded_tags = get_encoded_tags(fm, tid, n_tags) else: encoded_tags = np.array( [get_encoded_tags(fm, tid, n_tags) for fm in fm] ) # convert to ndarray to ensure consistency with one-dimensional case # skip tracks which dont have any "clean" tags if encoded_tags.size == 0: if verbose: print("{} has no tags. Skipping...".format(tid)) continue path = os.path.join(root_dir, path) if set(df.columns) == {'track_id', 'npz_path'}: # get the unsampled array from the .npz file unsampled_audio = np.load(path) else: # get the unsampled array from the original .mp3 file try: array, sr = librosa.core.load(path, sr=None) except: exceptions.append({ 'path': path, 'tid': tid, 'encoded_tags': encoded_tags }) continue unsampled_audio = {'array': array, 'sr': sr} # resample audio array into 'sample_rate' and convert into 'audio_format' processed_array = process_array(unsampled_audio['array'], audio_format, sr_in=unsampled_audio['sr'], sr_out=sample_rate, num_mels=num_mels) # load the tf.Example example = get_example(processed_array, tid, encoded_tags) # save the tf.Example into a .tfrecord file writer.write(example.SerializeToString()) # print exceptions if set(df.columns) == {'track_id', 'npz_path'}: return else: if exceptions: print('Could not process the following tracks:') for i, exception in enumerate(exceptions): print(" {:3d}. {} {}".format( i, exception["tid"] + exception["path"])) return