コード例 #1
0
    def __init__(
        self,
        root: str,
        split: str,
        source_language: str,
        target_language: Optional[str] = None,
        version: int = 2,
    ) -> None:
        assert version in self.VERSIONS and split in self.SPLITS
        assert source_language is not None
        self.no_translation = target_language is None
        if not self.no_translation:
            assert "en" in {source_language, target_language}
            if source_language == "en":
                assert target_language in self.EN_XX_LANGUAGES[version]
            else:
                assert source_language in self.XX_EN_LANGUAGES[version]
        else:
            # Hack here so that we can get "split" column from CoVoST TSV.
            # Note that we use CoVoST train split for ASR which is an extension
            # to Common Voice train split.
            target_language = "de" if source_language == "en" else "en"

        self.root: Path = Path(root)

        cv_tsv_path = self.root / "validated.tsv"
        assert cv_tsv_path.is_file()

        covost_url = self.COVOST_URL_TEMPLATE.format(
            src_lang=source_language, tgt_lang=target_language
        )
        covost_archive = self.root / Path(covost_url).name
        if not covost_archive.is_file():
            download_url(covost_url, self.root.as_posix(), hash_value=None)
        extract_archive(covost_archive.as_posix())

        cv_tsv = load_df_from_tsv(cv_tsv_path)
        covost_tsv = load_df_from_tsv(
            self.root / Path(covost_url).name.replace(".tar.gz", "")
        )
        df = pd.merge(
            left=cv_tsv[["path", "sentence", "client_id"]],
            right=covost_tsv[["path", "translation", "split"]],
            how="inner",
            on="path",
        )
        if split == "train":
            df = df[(df["split"] == split) | (df["split"] == f"{split}_covost")]
        else:
            df = df[df["split"] == split]
        data = df.to_dict(orient="index").items()
        data = [v for k, v in sorted(data, key=lambda x: x[0])]
        self.data = []
        for e in data:
            try:
                path = self.root / "clips" / e["path"]
                _ = torchaudio.info(path.as_posix())
                self.data.append(e)
            except RuntimeError:
                pass
コード例 #2
0
    def __init__(self,
                 root: str,
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False) -> None:

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(self._ext_archive)[0]
        folder_in_archive = os.path.join(basename, folder_in_archive)

        self._path = os.path.join(root, folder_in_archive)
        self._metadata_path = os.path.join(root, basename, 'metadata.csv')

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

        with open(self._metadata_path, "r", newline='') as metadata:
            walker = unicode_csv_reader(metadata,
                                        delimiter="|",
                                        quoting=csv.QUOTE_NONE)
            self._walker = list(walker)
コード例 #3
0
    def __init__(self,
                 root: Union[str, Path],
                 url: str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b",
                 url_symbols: str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols",
                 download: bool = False,
                 exclude_punctuations: bool = True) -> None:

        self.exclude_punctuations = exclude_punctuations

        root = Path(root)
        if not os.path.isdir(root):
            os.mkdir(root)

        if download:
            if os.path.isdir(root):
                checksum = _CHECKSUMS.get(url, None)
                download_url(url, root, hash_value=checksum, hash_type="md5")
                checksum = _CHECKSUMS.get(url_symbols, None)
                download_url(url_symbols, root, hash_value=checksum, hash_type="md5")
            else:
                RuntimeError("The argument `root` must be a path to directory, "
                             f"but '{root}' is passed in instead.")

        self._root_path = root
        basename = os.path.basename(url)
        basename_symbols = os.path.basename(url_symbols)

        with open(os.path.join(self._root_path, basename_symbols), "r") as text:
            self._symbols = [line.strip() for line in text.readlines()]

        with open(os.path.join(self._root_path, basename), "r", encoding='latin-1') as text:
            self._dictionary = _parse_dictionary(text.readlines(),
                                                 exclude_punctuations=self.exclude_punctuations)
コード例 #4
0
ファイル: yesno.py プロジェクト: fagan2888/audio
    def __init__(self,
                 root: Union[str, Path],
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False) -> None:

        # Get string representation of 'root' in case Path object is passed
        root = os.fspath(root)

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url,
                                 root,
                                 hash_value=checksum,
                                 hash_type="md5")
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        self._walker = sorted(
            str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio))
コード例 #5
0
def get_metadata(out_root, subset):
    def predicate(id_):
        is_plenary = id_.find("PLENARY") > -1
        if subset in {"10k", "10k_sd"}:
            return is_plenary and 20190101 <= int(id_[:8]) < 20200801
        elif subset in {"100k"}:
            return is_plenary
        elif subset in LANGUAGES:
            return is_plenary and id_.endswith(subset)
        elif subset in LANGUAGES_V2:
            return id_.endswith(subset.split("_")[0])
        return True

    filename = "unlabelled_sd" if subset == "10k_sd" else "unlabelled_v2"
    url = f"{DOWNLOAD_BASE_URL}/annotations/{filename}.tsv.gz"
    tsv_path = out_root / Path(url).name
    if not tsv_path.exists():
        download_url(url, out_root.as_posix(), Path(url).name)
    if subset == '10k_sd':
        with gzip.open(tsv_path, mode="rt") as f:
            rows = [(r["session_id"], r["id_"], r["start_time"], r["end_time"])
                    for r in csv.DictReader(f, delimiter="|")
                    if predicate(r["session_id"])]
    else:
        with gzip.open(tsv_path, mode="rt") as f:
            rows = [(r["event_id"], r["segment_no"], r["start"], r["end"])
                    for r in csv.DictReader(f, delimiter="\t")
                    if predicate(r["event_id"])]
    return rows
コード例 #6
0
    def __init__(self, root, tsv=TSV, url=URL, download=False):

        languages = {
            "tatar": "tt",
            "english": "en",
            "german": "de",
            "french": "fr",
            "welsh": "cy",
            "breton": "br",
            "chuvash": "cv",
            "turkish": "tr",
            "kyrgyz": "ky",
            "irish": "ga-IE",
            "kabyle": "kab",
            "catalan": "ca",
            "taiwanese": "zh-TW",
            "slovenian": "sl",
            "italian": "it",
            "dutch": "nl",
            "hakha chin": "cnh",
            "esperanto": "eo",
            "estonian": "et",
            "persian": "fa",
            "basque": "eu",
            "spanish": "es",
            "chinese": "zh-CN",
            "mongolian": "mn",
            "sakha": "sah",
            "dhivehi": "dv",
            "kinyarwanda": "rw",
            "swedish": "sv-SE",
            "russian": "ru",
        }

        if url is languages:
            ext_archive = ".tar.gz"
            language = languages[url]

            base_url = (
                "https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4"
                + ".s3.amazonaws.com/cv-corpus-3/"
            )
            url = base_url + language + ext_archive

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = root

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive)

        self._tsv = os.path.join(root, tsv)

        with open(self._tsv, "r") as tsv:
            walker = unicode_csv_reader(tsv, delimiter="\t")
            self._header = next(walker)
            self._walker = list(walker)
コード例 #7
0
    def __init__(
            self, root, params, url=URL, download=False):

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(self._ext_archive)[0]
        base_folder = os.path.join(root, basename)
        
        self._wav_path = os.path.join(base_folder, 'wavs')
        self._mel_path = os.path.join(base_folder, 'mels')
        self._char_path = os.path.join(base_folder, 'chars')
        self._phone_path = os.path.join(base_folder, 'phones')
        self._metadata_path = os.path.join(base_folder, 'metadata.csv')

        if download:
            if not os.path.isdir(self._wav_path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive)

        if not os.path.isdir(self._mel_path):
            precompute_spectrograms(base_folder, params)

        if not os.path.isdir(self._char_path) or not os.path.isdir(self._phone_path):
            precompute_char_phone(base_folder)
            
        with open(self._metadata_path, "r") as metadata:
            walker = unicode_csv_reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
            self._walker = list(walker)
コード例 #8
0
ファイル: speechcommands.py プロジェクト: tuxzz/audio
    def __init__(self,
                 root,
                 url=URL,
                 folder_in_archive=FOLDER_IN_ARCHIVE,
                 download=False):
        if url in [
                "speech_commands_v0.01",
                "speech_commands_v0.02",
        ]:
            base_url = "https://storage.googleapis.com/download.tensorflow.org/data/"
            ext_archive = ".tar.gz"

            url = os.path.join(base_url, url + ext_archive)

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.rsplit(".", 2)[0]
        folder_in_archive = os.path.join(folder_in_archive, basename)

        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive, self._path)

        walker = walk_files(self._path, suffix=".wav", prefix=True)
        walker = filter(lambda w: HASH_DIVIDER in w and EXCEPT_FOLDER not in w,
                        walker)
        self._walker = list(walker)
コード例 #9
0
ファイル: download_audios.py プロジェクト: ag027592/voxpopuli
def download(args):
    if args.subset in LANGUAGES:
        languages = [args.subset]
        years = YEARS
    else:
        languages = {
            "100k": LANGUAGES, "10k": LANGUAGES, "asr": ["original"]
        }.get(args.subset, None)
        years = {
            "100k": YEARS, "10k": [2019, 2020], "asr": YEARS
        }.get(args.subset, None)

    url_list = []
    for l in languages:
        for y in years:
            url_list.append(f"{DOWNLOAD_BASE_URL}/audios/{l}_{y}.tar")

    out_root = Path(args.root) / "raw_audios"
    out_root.mkdir(exist_ok=True, parents=True)
    print(f"{len(url_list)} files to download...")
    for url in tqdm(url_list):
        tar_path = out_root / Path(url).name
        download_url(url, out_root, Path(url).name)
        extract_archive(tar_path.as_posix())
        os.remove(tar_path)
コード例 #10
0
    def __init__(self,
                 root: str,
                 release: str = "release1",
                 subset: str = None,
                 download: bool = False,
                 audio_ext=".sph") -> None:
        self._ext_audio = audio_ext
        if release in _RELEASE_CONFIGS.keys():
            folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"]
            url = _RELEASE_CONFIGS[release]["url"]
            subset = subset if subset else _RELEASE_CONFIGS[release]["subset"]
        else:
            # Raise warning
            raise RuntimeError(
                "The release {} does not match any of the supported tedlium releases{} "
                .format(
                    release,
                    _RELEASE_CONFIGS.keys(),
                ))
        if subset not in _RELEASE_CONFIGS[release]["supported_subsets"]:
            # Raise warning
            raise RuntimeError(
                "The subset {} does not match any of the supported tedlium subsets{} "
                .format(
                    subset,
                    _RELEASE_CONFIGS[release]["supported_subsets"],
                ))

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(".")[0]

        self._path = os.path.join(root, folder_in_archive,
                                  _RELEASE_CONFIGS[release]["data_path"])
        if subset in ["train", "dev", "test"]:
            self._path = os.path.join(self._path, subset)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _RELEASE_CONFIGS[release]["checksum"]
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

        # Create list for all samples
        self._filelist = []
        stm_path = os.path.join(self._path, "stm")
        for file in sorted(os.listdir(stm_path)):
            if file.endswith(".stm"):
                stm_path = os.path.join(self._path, "stm", file)
                with open(stm_path) as f:
                    l = len(f.readlines())
                    file = file.replace(".stm", "")
                    self._filelist.extend((file, line) for line in range(l))
        # Create dict path for later read
        self._dict_path = os.path.join(root, folder_in_archive,
                                       _RELEASE_CONFIGS[release]["dict"])
        self._phoneme_dict = None
コード例 #11
0
def get(args):
    src_lang, tgt_lang = args.source_lang, args.target_lang
    if args.use_annotated_target:
        assert tgt_lang in S2S_TGT_LANGUAGES_WITH_HUMAN_TRANSCRIPTION
    in_root = Path(args.root) / "raw_audios" / tgt_lang
    asr_root = Path(args.root) / "transcribed_data" / src_lang
    out_root = asr_root / tgt_lang
    out_root.mkdir(exist_ok=True, parents=True)
    # Get metadata TSV
    url = f"{DOWNLOAD_BASE_URL}/annotations/asr/asr_{src_lang}.tsv.gz"
    tsv_path = asr_root / Path(url).name
    if not tsv_path.exists():
        download_url(url, asr_root.as_posix(), Path(url).name)
    with gzip.open(tsv_path, "rt") as f:
        src_metadata = [x for x in csv.DictReader(f, delimiter="|")]
    src_metadata = {
        "{}-{}".format(r["session_id"], r["id_"]):
        (r["original_text"], r["speaker_id"])
        for r in src_metadata
    }
    ref_sfx = "_ref" if args.use_annotated_target else ""
    url = f"{DOWNLOAD_BASE_URL}/annotations/s2s/s2s_{tgt_lang}{ref_sfx}.tsv.gz"
    tsv_path = out_root / Path(url).name
    if not tsv_path.exists():
        download_url(url, out_root.as_posix(), Path(url).name)
    with gzip.open(tsv_path, "rt") as f:
        tgt_metadata = [x for x in csv.DictReader(f, delimiter="\t")]
    # Get segment into list
    items = defaultdict(list)
    manifest = []
    print("Loading manifest...")
    for r in tqdm(tgt_metadata):
        src_id = r["id"]
        event_id, _src_lang, utt_id = parse_src_id(src_id)
        if _src_lang != src_lang:
            continue
        year = event_id[:4]
        in_path = in_root / year / f"{event_id}_{tgt_lang}.ogg"
        cur_out_root = out_root / year
        cur_out_root.mkdir(exist_ok=True, parents=True)
        tgt_id = f"{event_id}-{tgt_lang}_{utt_id}"
        out_path = cur_out_root / f"{tgt_id}.ogg"
        items[in_path.as_posix()].append(
            (out_path.as_posix(), float(r["start_time"]),
             float(r["end_time"])))
        src_text, src_speaker_id = src_metadata[src_id]
        tgt_text = r["tgt_text"] if args.use_annotated_target else ""
        manifest.append((src_id, src_text, src_speaker_id, tgt_id, tgt_text))
    items = list(items.items())
    # Segment
    print(f"Segmenting {len(items):,} files...")
    multiprocess_run(items, _segment)
    # Output per-data-split list
    header = ["src_id", "src_text", "src_speaker_id", "tgt_id", "tgt_text"]
    with open(out_root / f"s2s{ref_sfx}.tsv", "w") as f_o:
        f_o.write("\t".join(header) + "\n")
        for cols in manifest:
            f_o.write("\t".join(cols) + "\n")
コード例 #12
0
ファイル: cmuarctic.py プロジェクト: zkneupper/audio
    def __init__(self,
                 root: Union[str, Path],
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False) -> None:

        if url in [
            "aew",
            "ahw",
            "aup",
            "awb",
            "axb",
            "bdl",
            "clb",
            "eey",
            "fem",
            "gka",
            "jmk",
            "ksp",
            "ljm",
            "lnh",
            "rms",
            "rxr",
            "slp",
            "slt"
        ]:

            url = "cmu_us_" + url + "_arctic"
            ext_archive = ".tar.bz2"
            base_url = "http://www.festvox.org/cmu_arctic/packed/"

            url = os.path.join(base_url, url + ext_archive)

        # Get string representation of 'root' in case Path object is passed
        root = os.fspath(root)

        basename = os.path.basename(url)
        root = os.path.join(root, folder_in_archive)
        if not os.path.isdir(root):
            os.mkdir(root)
        archive = os.path.join(root, basename)

        basename = basename.split(".")[0]

        self._path = os.path.join(root, basename)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url, root, hash_value=checksum, hash_type="md5")
                extract_archive(archive)

        self._text = os.path.join(self._path, self._folder_text, self._file_text)

        with open(self._text, "r") as text:
            walker = csv.reader(text, delimiter="\n")
            self._walker = list(walker)
コード例 #13
0
def sample_data():
    path = get_default_cache_folder()
    download_url(
        "https://github.com/scart97/lapsbm-backup/archive/refs/tags/lapsbm-ci.tar.gz",
        download_folder=path,
        resume=True,
    )
    extract_archive(path / "lapsbm-backup-lapsbm-ci.tar.gz", path)
    return path / "lapsbm-backup-lapsbm-ci"
コード例 #14
0
ファイル: esc.py プロジェクト: leocances/Deep-Co-Training
    def download(self) -> None:
        """Download the dataset and extract the archive"""
        if self.check_integrity(self.target_directory):
            print("Dataset already downloaded and verified.")

        else:
            archive_path = os.path.join(self.root, FOLDER_IN_ARCHIVE + ".zip")

            download_url(self.url, self.root)
            extract_archive(archive_path, self.root)
コード例 #15
0
ファイル: gtzan.py プロジェクト: yuvrajmetrani2/audio
    def __init__(
        self,
        root: str,
        url: str = URL,
        folder_in_archive: str = FOLDER_IN_ARCHIVE,
        download: bool = False,
        subset: Any = None,
    ) -> None:

        # super(GTZAN, self).__init__()
        self.root = root
        self.url = url
        self.folder_in_archive = folder_in_archive
        self.download = download
        self.subset = subset

        assert subset is None or subset in [
            "training", "validation", "testing"
        ], ("When `subset` not None, it must take a value from " +
            "{'training', 'validation', 'testing'}.")

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url,
                                 root,
                                 hash_value=checksum,
                                 hash_type="md5")
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        if self.subset is None:
            walker = walk_files(self._path,
                                suffix=self._ext_audio,
                                prefix=False,
                                remove_suffix=True)
            self._walker = list(walker)
        else:
            if self.subset == "training":
                self._walker = filtered_train
            elif self.subset == "validation":
                self._walker = filtered_valid
            elif self.subset == "testing":
                self._walker = filtered_test
コード例 #16
0
ファイル: prep_covost_data.py プロジェクト: bcmi220/d2gpo
    def __init__(
            self, root: str, split: str, source_language: str,
            target_language: Optional[str] = None, version: int = 2,
            download: bool = False
    ) -> None:
        assert version in self.VERSIONS and split in self.SPLITS
        assert source_language is not None
        self.no_translation = (target_language is None)
        if not self.no_translation:
            assert 'en' in {source_language, target_language}
            if source_language == 'en':
                assert target_language in self.EN_XX_LANGUAGES[version]
            else:
                assert source_language in self.XX_EN_LANGUAGES[version]
        else:
            # Hack here so that we can get "split" column from CoVoST TSV.
            # Note that we use CoVoST train split for ASR which is an extension
            # to Common Voice train split.
            target_language = 'de' if source_language == 'en' else 'en'

        self.root = os.path.join(root, 'raw')
        os.makedirs(self.root, exist_ok=True)

        cv_url = self.CV_URL_TEMPLATE.format(ver=self.CV_VERSION_ID[version],
                                             lang=source_language)
        cv_archive = os.path.join(self.root, os.path.basename(cv_url))
        if download:
            if not os.path.isfile(cv_archive):
                download_url(cv_url, self.root, hash_value=None)
            extract_archive(cv_archive)

        covost_url = self.COVOST_URL_TEMPLATE.format(src_lang=source_language,
                                                     tgt_lang=target_language)
        covost_archive = os.path.join(self.root, os.path.basename(covost_url))
        if download:
            if not os.path.isfile(covost_archive):
                download_url(covost_url, self.root, hash_value=None)
            extract_archive(covost_archive)

        cv_tsv = self.load_from_tsv(os.path.join(self.root, 'validated.tsv'))
        covost_tsv = self.load_from_tsv(
            os.path.join(self.root,
                         os.path.basename(covost_url).replace('.tar.gz', ''))
        )
        df = pd.merge(left=cv_tsv[['path', 'sentence', 'client_id']],
                      right=covost_tsv[['path', 'translation', 'split']],
                      how='inner', on='path')
        if split == 'train':
            df = df[(df['split'] == split) | (df['split'] == f'{split}_covost')]
        else:
            df = df[df['split'] == split]
        self.data = df.to_dict(orient='index').items()
        self.data = [v for k, v in sorted(self.data, key=lambda x: x[0])]
コード例 #17
0
ファイル: vctk.py プロジェクト: rostykamga/audio
    def __init__(self,
                 root: str,
                 url: str = URL,
                 download: bool = False,
                 mic_id: str = "mic2") -> None:

        archive = os.path.join(root, os.path.basename("VCTK-Corpus-0.92.zip"))

        self._path = os.path.join(root, "VCTK-Corpus-0.92")
        self._txt_dir = os.path.join(self._path, "txt")
        self._audio_dir = os.path.join(self._path, "wav48_silence_trimmed")
        self._mic_id = mic_id

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url,
                                 root,
                                 hash_value=checksum,
                                 hash_type="md5")
                extract_archive(archive, self._path)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        # Extracting speaker IDs from the folder structure
        self._speaker_ids = sorted(os.listdir(self._txt_dir))
        self._sample_ids = []
        """
        Due to some insufficient data complexity in the 0.92 version of this dataset,
        we start traversing the audio folder structure in accordance with the text folder.
        As some of the audio files are missing of either ``mic_1`` or ``mic_2`` but the
        text is present for the same, we first check for the existence of the audio file
        before adding it to the ``sample_ids`` list.

        Once the ``audio_ids`` are loaded into memory we can quickly access the list for
        different parameters required by the user.
        """
        for speaker_id in self._speaker_ids:
            utterance_dir = os.path.join(self._txt_dir, speaker_id)
            for utterance_file in sorted(f for f in os.listdir(utterance_dir)
                                         if f.endswith(".txt")):
                utterance_id = os.path.splitext(utterance_file)[0]
                audio_path_mic = os.path.join(self._audio_dir, speaker_id,
                                              f"{utterance_id}_{mic_id}.flac")
                if speaker_id == "p280" and mic_id == "mic2":
                    break
                if speaker_id == "p362" and not os.path.isfile(audio_path_mic):
                    continue
                self._sample_ids.append(utterance_id.split("_"))
    def __init__(self,
                 root: str,
                 url: str,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False) -> None:

        if url in [
            "dev-clean",
            "dev-other",
            "test-clean",
            "test-other",
            "train-clean-100",
            "train-clean-360",
            "train-other-500",
        ]:

            ext_archive = ".tar.gz"
            base_url = "http://www.openslr.org/resources/12/"

            url = os.path.join(base_url, url + ext_archive)

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(".")[0]
        folder_in_archive = os.path.join(folder_in_archive, basename)

        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

            audio_transforms = torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=128)
            for root, dirs, files in os.walk(self._path):
                if len(files) != 0:
                    for file in files:
                        if file.split('.')[-1]==self._ext_wav.split('.')[-1]:
                            file_audio = os.path.join(root, file)
                            waveform, _ = torchaudio.load(file_audio)
                            spec = audio_transforms(waveform)
                            file_spec = os.path.join(root, file.split('.')[0]+ self._ext_wav)
                            torch.save(spec, file_spec)

        walker = walk_files(
            self._path, suffix=self._ext_mel, prefix=False, remove_suffix=True
        )
        self._walker = list(walker)
コード例 #19
0
ファイル: yesno.py プロジェクト: music-apps/pytorch-audio
    def __init__(
        self,
        root,
        url=URL,
        folder_in_archive=FOLDER_IN_ARCHIVE,
        download=False,
        transform=None,
        target_transform=None,
        return_dict=False,
    ):

        if not return_dict:
            warnings.warn(
                "In the next version, the item returned will be a dictionary. "
                "Please use `return_dict=True` to enable this behavior now, "
                "and suppress this warning.",
                DeprecationWarning,
            )

        if transform is not None or target_transform is not None:
            warnings.warn(
                "In the next version, transforms will not be part of the dataset. "
                "Please remove the option `transform=True` and "
                "`target_transform=True` to suppress this warning.",
                DeprecationWarning,
            )

        self.transform = transform
        self.target_transform = target_transform
        self.return_dict = return_dict

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        walker = walk_files(self._path,
                            suffix=self._ext_audio,
                            prefix=False,
                            remove_suffix=True)
        self._walker = list(walker)
コード例 #20
0
ファイル: vctk.py プロジェクト: oceanos74/audio
    def __init__(self,
                 root: str,
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False,
                 downsample: bool = False,
                 transform: Any = None,
                 target_transform: Any = None) -> None:

        if downsample:
            warnings.warn(
                "In the next version, transforms will not be part of the dataset. "
                "Please use `downsample=False` to enable this behavior now, ",
                "and suppress this warning.")

        if transform is not None or target_transform is not None:
            warnings.warn(
                "In the next version, transforms will not be part of the dataset. "
                "Please remove the option `transform=True` and "
                "`target_transform=True` to suppress this warning.")

        self.downsample = downsample
        self.transform = transform
        self.target_transform = target_transform

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url,
                                 root,
                                 hash_value=checksum,
                                 hash_type="md5")
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        walker = walk_files(self._path,
                            suffix=self._ext_audio,
                            prefix=False,
                            remove_suffix=True)
        walker = filter(lambda w: self._except_folder not in w, walker)
        self._walker = list(walker)
コード例 #21
0
    def _download(self) -> None:
        """Download the dataset and extract the archive"""
        archive_path = os.path.join(self.root, self.basename)
        print(self.basename)

        if self._check_integrity(self._path):
            print("Dataset already download and verified")

        else:
            checksum = _CHECKSUMS.get(self.url, None)

            download_url(self.url,
                         self.root,
                         hash_value=checksum,
                         hash_type="md5")
            extract_archive(archive_path, self._path)
コード例 #22
0
ファイル: libritts.py プロジェクト: zeta1999/audio
    def __init__(
        self,
        root: Union[str, Path],
        url: str = URL,
        folder_in_archive: str = FOLDER_IN_ARCHIVE,
        download: bool = False,
    ) -> None:

        if url in [
                "dev-clean",
                "dev-other",
                "test-clean",
                "test-other",
                "train-clean-100",
                "train-clean-360",
                "train-other-500",
        ]:

            ext_archive = ".tar.gz"
            base_url = "http://www.openslr.org/resources/60/"

            url = os.path.join(base_url, url + ext_archive)

        # Get string representation of 'root' in case Path object is passed
        root = os.fspath(root)

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(".")[0]
        folder_in_archive = os.path.join(folder_in_archive, basename)

        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

        walker = walk_files(self._path,
                            suffix=self._ext_audio,
                            prefix=False,
                            remove_suffix=True)
        self._walker = list(walker)
コード例 #23
0
ファイル: cmudict.py プロジェクト: pytorch/audio
    def __init__(
        self,
        root: Union[str, Path],
        exclude_punctuations: bool = True,
        *,
        download: bool = False,
        url:
        str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b",
        url_symbols:
        str = "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols",
    ) -> None:

        self.exclude_punctuations = exclude_punctuations

        self._root_path = Path(root)
        if not os.path.isdir(self._root_path):
            raise RuntimeError(f'The root directory does not exist; {root}')

        dict_file = self._root_path / os.path.basename(url)
        symbol_file = self._root_path / os.path.basename(url_symbols)
        if not os.path.exists(dict_file):
            if not download:
                raise RuntimeError(
                    'The dictionary file is not found in the following location. '
                    f'Set `download=True` to download it. {dict_file}')
            checksum = _CHECKSUMS.get(url, None)
            download_url(url, root, hash_value=checksum, hash_type="md5")
        if not os.path.exists(symbol_file):
            if not download:
                raise RuntimeError(
                    'The symbol file is not found in the following location. '
                    f'Set `download=True` to download it. {symbol_file}')
            checksum = _CHECKSUMS.get(url_symbols, None)
            download_url(url_symbols,
                         root,
                         hash_value=checksum,
                         hash_type="md5")

        with open(symbol_file, "r") as text:
            self._symbols = [line.strip() for line in text.readlines()]

        with open(dict_file, "r", encoding='latin-1') as text:
            self._dictionary = _parse_dictionary(
                text.readlines(),
                exclude_punctuations=self.exclude_punctuations)
コード例 #24
0
    def __init__(self,
                 root,
                 url=URL,
                 folder_in_archive=FOLDER_IN_ARCHIVE,
                 download=False,
                 preprocess=False):

        if url in [
                "dev-clean",
                "dev-other",
                "test-clean",
                "test-other",
                "train-clean-100",
                "train-clean-360",
                "train-other-500",
        ]:

            ext_archive = ".tar.gz"
            base_url = "http://www.openslr.org/resources/12/"

            url = os.path.join(base_url, url + ext_archive)

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(".")[0]
        folder_in_archive = os.path.join(folder_in_archive, basename)

        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive)

        walker = walk_files(self._path,
                            suffix=self._ext_audio,
                            prefix=False,
                            remove_suffix=True)
        self._walker = list(walker)

        if preprocess:
            self.preprocess_embeddings(self._path, self._ext_audio,
                                       self._ext_embed)
コード例 #25
0
ファイル: clotho_v1.py プロジェクト: Labbeti/MLU
    def _download_dataset(self):
        if not osp.isdir(self._dataset_root):
            os.mkdir(self._dataset_root)

        if self._verbose >= 1:
            print('Download files for the dataset...')

        infos = FILES_INFOS[self._subset]

        # Download archives files
        for name, info in infos.items():
            filename, url, hash_ = info['filename'], info['url'], info['hash']
            filepath = osp.join(self._dataset_root, filename)

            if not osp.isfile(filepath):
                if self._verbose >= 1:
                    print(f'Download file "{filename}" from url "{url}"...')

                if osp.exists(filepath):
                    raise RuntimeError(
                        f'Object "{filepath}" already exists but it\'s not a file.'
                    )
                download_url(url,
                             self._dataset_root,
                             filename,
                             hash_value=hash_,
                             hash_type='md5')

        # Extract audio files from archives
        for name, info in infos.items():
            filename = info['filename']
            filepath = osp.join(self._dataset_root, filename)
            extension = filename.split('.')[-1]

            if extension == '7z':
                extracted_path = osp.join(self._dataset_root, self._subset)

                if not osp.isdir(extracted_path):
                    if self._verbose >= 1:
                        print(f'Extract archive file "{filename}"...')

                    archive_file = SevenZipFile(filepath)
                    archive_file.extractall(self._dataset_root)
                    archive_file.close()
コード例 #26
0
    def __init__(self,
                 root: Union[str, Path],
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False,
                 transform: Any = None,
                 target_transform: Any = None) -> None:

        if transform is not None or target_transform is not None:
            warnings.warn(
                "In the next version, transforms will not be part of the dataset. "
                "Please remove the option `transform=True` and "
                "`target_transform=True` to suppress this warning.")

        self.transform = transform
        self.target_transform = target_transform

        # Get string representation of 'root' in case Path object is passed
        root = os.fspath(root)

        archive = os.path.basename(url)
        archive = os.path.join(root, archive)
        self._path = os.path.join(root, folder_in_archive)

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _CHECKSUMS.get(url, None)
                    download_url(url,
                                 root,
                                 hash_value=checksum,
                                 hash_type="md5")
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        walker = walk_files(self._path,
                            suffix=self._ext_audio,
                            prefix=False,
                            remove_suffix=True)
        self._walker = list(walker)
コード例 #27
0
def test_expected_prediction_from_pretrained_model():
    # Loading the sample file
    try:
        folder = get_default_cache_folder()
        download_url(
            "https://github.com/fastaudio/10_Speakers_Sample/raw/76f365de2f4d282ec44450d68f5b88de37b8b7ad/train/f0001_us_f0001_00001.wav",
            download_folder=str(folder),
            filename="f0001_us_f0001_00001.wav",
            resume=True,
        )
        # Preparing data and model
        module = QuartznetModule.load_from_nemo(
            checkpoint_name=NemoCheckpoint.QuartzNet5x5LS_En)
        audio, sr = torchaudio.load(folder / "f0001_us_f0001_00001.wav")
        assert sr == 16000

        output = module.predict(audio)
        expected = "the world needs opportunities for new leaders and new ideas"
        assert output[0].strip() == expected
    except HTTPError:
        return
コード例 #28
0
    def _parse_filesystem(self, root: str, url: str, folder_in_archive: str,
                          download: bool) -> None:
        root = Path(root)
        archive = os.path.basename(url)
        archive = root / archive

        self._path = root / folder_in_archive
        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _RELEASE_CONFIGS["release1"]["checksum"]
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

        if not os.path.isdir(self._path):
            raise RuntimeError(
                "Dataset not found. Please use `download=True` to download it."
            )

        self._walker = sorted(
            str(p.stem) for p in Path(self._path).glob("*.wav"))
コード例 #29
0
    def _parse_filesystem(self, root: str, url: str, folder_in_archive: str, download: bool) -> None:
        root = Path(root)

        basename = os.path.basename(url)
        archive = root / basename

        basename = Path(basename.split(".tar.bz2")[0])
        folder_in_archive = basename / folder_in_archive

        self._path = root / folder_in_archive
        self._metadata_path = root / basename / 'metadata.csv'

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    checksum = _RELEASE_CONFIGS["release1"]["checksum"]
                    download_url(url, root, hash_value=checksum)
                extract_archive(archive)

        with open(self._metadata_path, "r", newline='') as metadata:
            flist = csv.reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
            self._flist = list(flist)
コード例 #30
0
ファイル: ljspeech.py プロジェクト: tuxzz/audio
    def __init__(
            self, root, url=URL, folder_in_archive=FOLDER_IN_ARCHIVE, download=False
    ):

        basename = os.path.basename(url)
        archive = os.path.join(root, basename)

        basename = basename.split(self._ext_archive)[0]
        folder_in_archive = os.path.join(basename, folder_in_archive)

        self._path = os.path.join(root, folder_in_archive)
        self._metadata_path = os.path.join(root, basename, 'metadata.csv')

        if download:
            if not os.path.isdir(self._path):
                if not os.path.isfile(archive):
                    download_url(url, root)
                extract_archive(archive)

        with open(self._metadata_path, "r") as metadata:
            walker = unicode_csv_reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
            self._walker = list(walker)