def extract_features_from_meta(self, csv_audio, feature_dir, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path to the directory where the features are subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) out_filename = os.path.join(feature_dir, name_only(wav_name) + ".npy") if not os.path.exists(out_filename): if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) else: mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) np.save(out_filename, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) return df_meta.reset_index(drop=True)
def extract_features_from_meta(self, csv_audio, subpart_data=None, training=False): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav subpart_data: int, number of files to extract features from the csv. """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) df_all = list() feature_fns = list() LOG.info('Extracting/loading features') LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) augmentation_funcs = [ ('orig', None), # original signal ] if training: augmentation_funcs += [ # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)), # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)), # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)), # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)), # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)), # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)), # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)), # ('ts1.25', partial(time_stretch, rate=1.25)), # ('ts1.5', partial(time_stretch, rate=1.5)), # ('amp0.5', partial(amplitude_scale, coeff=0.5)), # ('amp0.75', partial(amplitude_scale, coeff=0.75)), # ('hp0.25', partial(hp_reweight, lam=0.25)), # ('hp0.75', partial(hp_reweight, lam=0.75)) ] wav_fns = df_meta.filename.unique() flag = False for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)): if ind % 500 == 0: LOG.debug(ind) # verify the audio file is present wav_dir = self.get_audio_dir_path_from_meta(csv_audio) wav_path = os.path.join(wav_dir, wav_name) if os.path.isfile(wav_path): # defer loading audio until the need for feature extraction is verified audio = None # perform all augmentations (including no augmentation) for name, func in augmentation_funcs: if name == 'orig': out_filename = os.path.splitext(wav_name)[0] + ".npy" else: out_filename = os.path.splitext( wav_name)[0] + '_' + name + ".npy" out_path = os.path.join(self.feature_dir, out_filename) # add the metadata meta = df_meta.loc[df_meta.filename == wav_name] df_all.append(meta) # for synthetic data with time annotation of events, the meta df will have several entries for # each wav file. therefore, we need to append the feature filename len(meta) times. if len(meta) > 1: feature_fns += [out_filename] * len(meta) if flag: print('Length of meta: {}'.format(len(meta))) flag = False else: feature_fns.append(out_filename) if not os.path.exists(out_path): if audio is None: (audio, _) = read_audio(wav_path, cfg.sample_rate) if audio.shape[0] == 0: print("File %s is corrupted!" % wav_path) del feature_fns[-1] del df_all[-1] # perform any augmentation, extract features, save features # LOG.info('extracting {}'.format(out_filename)) if func is not None: mel_spec = self.calculate_mel_spec(func(audio)) else: mel_spec = self.calculate_mel_spec(audio) np.save(out_path, mel_spec) LOG.debug("compute features time: %s" % (time.time() - t1)) else: LOG.error( "File %s is in the csv file but the feature is not extracted!" % wav_path) # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index) # form the final DataFrame of meta data for features from original and augmented audio df_all = pd.concat(df_all).reset_index(drop=True) df_all['feature_filename'] = feature_fns return df_all
def extract_features_from_meta_segment(self, csv_audio, feature_dir, subpart_data=None, fixed_segment=None): """Extract log mel spectrogram features, but the csv needs to be strongly labeled. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the path of the features directory. subpart_data: int, number of files to extract features from the csv. fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept. If segment is True, and >label, it takes the surrounding (allow creating weak labels). """ t1 = time.time() df_meta = self.get_df_from_meta(csv_audio, subpart_data) self.get_classes(df_meta) LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) ext_name = "_segment_" if subpart_data: ext_name += str(subpart_data) if fixed_segment is not None: LOG.debug( f" durations before: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) ext_name += f"fix{fixed_segment}" df_meta = self.trunc_pad_segment(df_meta, fixed_segment) LOG.debug( f" durations after: " f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}" ) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: # Loop in all the filenames for ind, wav_name in enumerate(df_meta.filename.unique()): if ind % 500 == 0: LOG.debug(ind) wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? sub_df = df_meta[df_meta.filename == wav_name] cnt_max = len(sub_df) if cnt_max == 0: break base_wav_name = name_only(wav_name) ext_featname = "_seg" if fixed_segment: ext_featname += f"fix{fixed_segment}" files_exist = False # We should always recompute because of the randomness of onset offset # Check if files already exist out_filenames = [ base_wav_name + ext_featname + str(cnt) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: fpath = os.path.join(feature_dir, fname) if not os.path.exists(fpath): files_exist = False break add_item = { "raw_filename": [], "filename": [], "event_labels": [] } for ii, (i, row) in enumerate(sub_df.iterrows()): if not pd.isna(row.event_label): if ii > 0: extnb = str(ii) else: extnb = "" out_filename = os.path.join( feature_dir, name_only(wav_name)) out_filename += ext_featname + extnb + ".npy" if not files_exist: sr = soundfile.info(wav_path).samplerate (audio, _) = read_audio(wav_path, cfg.sample_rate, start=int(row.onset * sr), stop=int(row.offset * sr)) mel_spec = self.calculate_mel_spec( audio, log_feature=self.save_log_feature) if fixed_segment: pad_trunc_length = int( fixed_segment * cfg.sample_rate // cfg.hop_length) mel_spec = pad_trunc_seq( mel_spec, pad_trunc_length) np.save(out_filename, mel_spec) add_item["raw_filename"].append(wav_name) add_item["filename"].append(out_filename) add_item["event_labels"].append( row["event_label"]) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features
def extract_features_from_meta_frames(self, csv_audio, feature_dir, frames_in_sec, subpart_data=None): """Extract log mel spectrogram features. Args: csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index) the associated wav_filename is Yname_start_end.wav feature_dir: str, the directory where the features are or will be created subpart_data: int, number of files to extract features from the csv. frames_in_sec: int, number of frames to take for a subsegment. """ frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length) t1 = time.time() df_meta = pd.read_csv(csv_audio, header=0, sep="\t") LOG.info("{} Total file number: {}".format( csv_audio, len(df_meta.filename.unique()))) # Csv to store the features ext_name = "_" + str(frames) if subpart_data is not None and subpart_data < len( df_meta.filename.unique()): ext_name += "_sub" + str(subpart_data) df_meta = self.get_subpart_data(df_meta, subpart_data) self.get_classes(df_meta) meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1]) csv_features = os.path.join(self.metadata_dir, meta_base + ext_name + meta_ext) wav_dir = self.get_audio_dir_path_from_meta(csv_audio) df_features = pd.DataFrame() path_exists = os.path.exists(csv_features) if not path_exists: LOG.debug("Creating new feature df") # Loop in all the filenames cnt_new_features = 0 for ind, wav_name in enumerate(df_meta.filename.unique()): wav_path = os.path.join(wav_dir, wav_name) if not os.path.isfile(wav_path): LOG.error( "File %s is in the csv file but the feature is not extracted, deleting...!" % wav_path) df_meta = df_meta.drop( df_meta[df_meta.filename == wav_name].index) else: try: audio_len_sec = soundfile.info(wav_path).duration except Exception as e: print("File %s is corrupted, not added to df!" % wav_path) print(e) continue if audio_len_sec == 0: print("File %s is corrupted, not added to df!" % wav_path) else: files_exist = True # How many features we can compute from this file ? cnt_max = min( int(audio_len_sec // frames_in_sec), int(cfg.max_len_seconds // frames_in_sec)) if cnt_max == 0: cnt_max = 1 base_wav_name = os.path.join(feature_dir, name_only(wav_name)) # Check if files already exist out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] for fname in out_filenames: if not os.path.exists(fname): files_exist = False break if not files_exist: if cnt_new_features % 500 == 0: LOG.debug(f"new features, {cnt_new_features}") cnt_new_features += 1 audio, cnt_max = self.get_features( wav_path, feature_dir, frames) out_filenames = [ base_wav_name + "fr" + str(frames) + "_" + str(cnt * frames) + "-" + str( (cnt + 1) * frames) + ".npy" for cnt in range(cnt_max) ] # features label to add to the dataframe add_item = self.get_labels(ind, df_meta, wav_name, frames, out_filenames) df_features = df_features.append( pd.DataFrame(add_item), ignore_index=True) LOG.info(csv_features) df_features.to_csv(csv_features, sep="\t", header=True, index=False) df_features = pd.read_csv( csv_features, sep="\t") # Otherwise event_labels is "" and not NaN else: df_features = self.get_df_from_meta( csv_features) # No subpart data because should be in the name LOG.debug("compute features time: %s" % (time.time() - t1)) return df_features