Exemple #1
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   feature_dir,
                                   subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path to the directory where the features are
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        for ind, wav_name in enumerate(df_meta.filename.unique()):
            if ind % 500 == 0:
                LOG.debug(ind)
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)

            out_filename = os.path.join(feature_dir,
                                        name_only(wav_name) + ".npy")

            if not os.path.exists(out_filename):
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    (audio, _) = read_audio(wav_path, cfg.sample_rate)
                    if audio.shape[0] == 0:
                        print("File %s is corrupted!" % wav_path)
                    else:
                        mel_spec = self.calculate_mel_spec(
                            audio, log_feature=self.save_log_feature)

                        np.save(out_filename, mel_spec)

                    LOG.debug("compute features time: %s" % (time.time() - t1))

        return df_meta.reset_index(drop=True)
    def extract_features_from_meta(self,
                                   csv_audio,
                                   subpart_data=None,
                                   training=False):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        df_all = list()
        feature_fns = list()
        LOG.info('Extracting/loading features')
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        augmentation_funcs = [
            ('orig', None),  # original signal
        ]

        if training:
            augmentation_funcs += [
                # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)),
                # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)),
                # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)),
                # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)),
                # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)),
                # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)),
                # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)),
                # ('ts1.25', partial(time_stretch, rate=1.25)),
                # ('ts1.5', partial(time_stretch, rate=1.5)),
                # ('amp0.5', partial(amplitude_scale, coeff=0.5)),
                # ('amp0.75', partial(amplitude_scale, coeff=0.75)),
                # ('hp0.25', partial(hp_reweight, lam=0.25)),
                # ('hp0.75', partial(hp_reweight, lam=0.75))
            ]

        wav_fns = df_meta.filename.unique()
        flag = False
        for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)):
            if ind % 500 == 0:
                LOG.debug(ind)

            # verify the audio file is present
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)
            if os.path.isfile(wav_path):
                # defer loading audio until the need for feature extraction is verified
                audio = None

                # perform all augmentations (including no augmentation)
                for name, func in augmentation_funcs:
                    if name == 'orig':
                        out_filename = os.path.splitext(wav_name)[0] + ".npy"
                    else:
                        out_filename = os.path.splitext(
                            wav_name)[0] + '_' + name + ".npy"
                    out_path = os.path.join(self.feature_dir, out_filename)

                    # add the metadata
                    meta = df_meta.loc[df_meta.filename == wav_name]
                    df_all.append(meta)

                    # for synthetic data with time annotation of events, the meta df will have several entries for
                    # each wav file. therefore, we need to append the feature filename len(meta) times.
                    if len(meta) > 1:
                        feature_fns += [out_filename] * len(meta)
                        if flag:
                            print('Length of meta: {}'.format(len(meta)))
                            flag = False
                    else:
                        feature_fns.append(out_filename)

                    if not os.path.exists(out_path):
                        if audio is None:
                            (audio, _) = read_audio(wav_path, cfg.sample_rate)
                            if audio.shape[0] == 0:
                                print("File %s is corrupted!" % wav_path)
                                del feature_fns[-1]
                                del df_all[-1]

                        # perform any augmentation, extract features, save features
                        # LOG.info('extracting {}'.format(out_filename))
                        if func is not None:
                            mel_spec = self.calculate_mel_spec(func(audio))
                        else:
                            mel_spec = self.calculate_mel_spec(audio)
                        np.save(out_path, mel_spec)

                        LOG.debug("compute features time: %s" %
                                  (time.time() - t1))
            else:
                LOG.error(
                    "File %s is in the csv file but the feature is not extracted!"
                    % wav_path)
                # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index)

        # form the final DataFrame of meta data for features from original and augmented audio
        df_all = pd.concat(df_all).reset_index(drop=True)
        df_all['feature_filename'] = feature_fns

        return df_all
Exemple #3
0
    def extract_features_from_meta_segment(self,
                                           csv_audio,
                                           feature_dir,
                                           subpart_data=None,
                                           fixed_segment=None):
        """Extract log mel spectrogram features, but the csv needs to be strongly labeled.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path of the features directory.
            subpart_data: int, number of files to extract features from the csv.
            fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept.
                If segment is True, and >label, it takes the surrounding (allow creating weak labels).
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        self.get_classes(df_meta)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        ext_name = "_segment_"
        if subpart_data:
            ext_name += str(subpart_data)

        if fixed_segment is not None:
            LOG.debug(
                f" durations before: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )
            ext_name += f"fix{fixed_segment}"
            df_meta = self.trunc_pad_segment(df_meta, fixed_segment)
            LOG.debug(
                f" durations after: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            # Loop in all the filenames
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                if ind % 500 == 0:
                    LOG.debug(ind)

                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        sub_df = df_meta[df_meta.filename == wav_name]
                        cnt_max = len(sub_df)

                        if cnt_max == 0:
                            break

                        base_wav_name = name_only(wav_name)
                        ext_featname = "_seg"
                        if fixed_segment:
                            ext_featname += f"fix{fixed_segment}"
                            files_exist = False  # We should always recompute because of the randomness of onset offset
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + ext_featname + str(cnt) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            fpath = os.path.join(feature_dir, fname)
                            if not os.path.exists(fpath):
                                files_exist = False
                                break

                        add_item = {
                            "raw_filename": [],
                            "filename": [],
                            "event_labels": []
                        }
                        for ii, (i, row) in enumerate(sub_df.iterrows()):
                            if not pd.isna(row.event_label):
                                if ii > 0:
                                    extnb = str(ii)
                                else:
                                    extnb = ""
                                out_filename = os.path.join(
                                    feature_dir, name_only(wav_name))
                                out_filename += ext_featname + extnb + ".npy"
                                if not files_exist:
                                    sr = soundfile.info(wav_path).samplerate
                                    (audio,
                                     _) = read_audio(wav_path,
                                                     cfg.sample_rate,
                                                     start=int(row.onset * sr),
                                                     stop=int(row.offset * sr))
                                    mel_spec = self.calculate_mel_spec(
                                        audio,
                                        log_feature=self.save_log_feature)
                                    if fixed_segment:
                                        pad_trunc_length = int(
                                            fixed_segment * cfg.sample_rate //
                                            cfg.hop_length)
                                        mel_spec = pad_trunc_seq(
                                            mel_spec, pad_trunc_length)
                                    np.save(out_filename, mel_spec)

                                add_item["raw_filename"].append(wav_name)
                                add_item["filename"].append(out_filename)
                                add_item["event_labels"].append(
                                    row["event_label"])

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features
Exemple #4
0
    def extract_features_from_meta_frames(self,
                                          csv_audio,
                                          feature_dir,
                                          frames_in_sec,
                                          subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the directory where the features are or will be created
            subpart_data: int, number of files to extract features from the csv.
            frames_in_sec: int, number of frames to take for a subsegment.
        """
        frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length)
        t1 = time.time()
        df_meta = pd.read_csv(csv_audio, header=0, sep="\t")
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        # Csv to store the features
        ext_name = "_" + str(frames)
        if subpart_data is not None and subpart_data < len(
                df_meta.filename.unique()):
            ext_name += "_sub" + str(subpart_data)
            df_meta = self.get_subpart_data(df_meta, subpart_data)

        self.get_classes(df_meta)

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            LOG.debug("Creating new feature df")

            # Loop in all the filenames
            cnt_new_features = 0
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        cnt_max = min(
                            int(audio_len_sec // frames_in_sec),
                            int(cfg.max_len_seconds // frames_in_sec))
                        if cnt_max == 0:
                            cnt_max = 1

                        base_wav_name = os.path.join(feature_dir,
                                                     name_only(wav_name))
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + "fr" + str(frames) + "_" +
                            str(cnt * frames) + "-" + str(
                                (cnt + 1) * frames) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            if not os.path.exists(fname):
                                files_exist = False
                                break

                        if not files_exist:
                            if cnt_new_features % 500 == 0:
                                LOG.debug(f"new features, {cnt_new_features}")
                            cnt_new_features += 1
                            audio, cnt_max = self.get_features(
                                wav_path, feature_dir, frames)
                            out_filenames = [
                                base_wav_name + "fr" + str(frames) + "_" +
                                str(cnt * frames) + "-" + str(
                                    (cnt + 1) * frames) + ".npy"
                                for cnt in range(cnt_max)
                            ]

                        # features label to add to the dataframe
                        add_item = self.get_labels(ind, df_meta, wav_name,
                                                   frames, out_filenames)

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            LOG.info(csv_features)
            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features