コード例 #1
0
def get_variables(args):
    model_pth = args.model_path
    gt_fname, ext = osp.splitext(args.groundtruth_tsv)
    median_win = args.median_window
    meta_gt = args.meta_gt
    gt_audio_pth = args.groundtruth_audio_dir

    if meta_gt is None:
        meta_gt = gt_fname + "_durations" + ext

    if gt_audio_pth is None:
        gt_audio_pth = meta_path_to_audio_dir(gt_fname)
        # Useful because of the data format
        if "validation" in gt_audio_pth:
            gt_audio_pth = osp.dirname(gt_audio_pth)

    groundtruth = pd.read_csv(args.groundtruth_tsv, sep="\t")
    if osp.exists(meta_gt):
        meta_dur_df = pd.read_csv(meta_gt, sep='\t')
        if len(meta_dur_df) == 0:
            meta_dur_df = generate_tsv_wav_durations(gt_audio_pth, meta_gt)
    else:
        meta_dur_df = generate_tsv_wav_durations(gt_audio_pth, meta_gt)

    return model_pth, median_win, gt_audio_pth, groundtruth, meta_dur_df
コード例 #2
0
ファイル: Desed.py プロジェクト: skarbs001/dcase20_task4
    def initialize_and_get_df(self,
                              tsv_path,
                              audio_dir=None,
                              audio_dir_ss=None,
                              pattern_ss=None,
                              ext_ss_feature_file="_ss",
                              nb_files=None,
                              download=True):
        """ Initialize the dataset, extract the features dataframes
        Args:
            tsv_path: str, tsv path in the initial dataset
            audio_dir: str, the path where to search the filename of the df
            audio_dir_ss: str, the path where to search the separated_sources
            pattern_ss: str, only when audio_dir_ss is not None, this should be defined. The pattern that's added
                after normal filenames to get associated separated sources (have been done during source separation)
            ext_ss_feature_file: str, only when audio_dir_ss is not None, what to add at the end of the feature files
            nb_files: int, optional, the number of file to take in the dataframe if taking a small part of the dataset.
            download: bool, optional, whether or not to download the data from the internet (youtube).

        Returns:
            pd.DataFrame
            The dataframe containing the right features and labels
        """
        # Parameters
        if audio_dir_ss is not None and pattern_ss is None:
            pattern_ss = "_events"
        if audio_dir is None:
            audio_dir = meta_path_to_audio_dir(tsv_path)
        # Path to save features, subdir, otherwise could have duplicate paths for synthetic data
        fdir = audio_dir if audio_dir_ss is None else audio_dir_ss
        fdir = fdir[:-1] if fdir.endswith(osp.sep) else fdir
        subdir = osp.sep.join(fdir.split(osp.sep)[-2:])
        meta_feat_dir = osp.join(self.meta_feat_dir, subdir)
        feature_dir = osp.join(self.feature_dir, subdir)
        os.makedirs(meta_feat_dir, exist_ok=True)
        os.makedirs(feature_dir, exist_ok=True)

        df_meta = self.get_df_from_meta(tsv_path,
                                        nb_files,
                                        pattern_ss=pattern_ss)
        logger.info(
            f"{tsv_path} Total file number: {len(df_meta.filename.unique())}")

        # Download real data
        if download:
            # Get only one filename once
            filenames = df_meta.filename.drop_duplicates()
            self.download(filenames, audio_dir)

        # Meta filename
        ext_tsv_feature = ""
        if audio_dir_ss is not None:
            ext_tsv_feature = ext_ss_feature_file
        fname, ext = osp.splitext(osp.basename(tsv_path))
        feat_fname = fname + ext_tsv_feature + ext
        if nb_files is not None:
            feat_fname = f"{nb_files}_{feat_fname}"
        features_tsv = osp.join(meta_feat_dir, feat_fname)

        # if not osp.exists(features_tsv):
        t = time.time()
        logger.info(f"Getting features ...")
        df_features = self.extract_features_from_df(df_meta, audio_dir,
                                                    feature_dir, audio_dir_ss,
                                                    pattern_ss,
                                                    ext_ss_feature_file)
        if len(df_features) != 0:
            df_features.to_csv(features_tsv, sep="\t", index=False)
            logger.info(
                f"features created/retrieved in {time.time() - t:.2f}s, metadata: {features_tsv}"
            )
        else:
            raise IndexError(f"Empty features DataFrames {features_tsv}")
        return df_features