def __init__(self, stage: str, speech_featurizer: SpeechFeaturizer, clean_dir: str, noisy_dir: str, cache: bool = False, shuffle: bool = False): self.speech_featurizer = speech_featurizer self.clean_dir = preprocess_paths(clean_dir) self.noisy_dir = preprocess_paths(noisy_dir) super(SeganTrainDataset, self).__init__(merge_dirs([self.clean_dir]), None, cache, shuffle, stage)
def __init__(self, stage: str, speech_featurizer: SpeechFeaturizer, clean_dir: str, noises_config: dict, cache: bool = False, shuffle: bool = False): self.speech_featurizer = speech_featurizer self.clean_dir = preprocess_paths(clean_dir) self.noises = SignalNoise() if noises_config is None else SignalNoise(**noises_config) super(SeganAugTrainDataset, self).__init__( data_paths=merge_dirs([self.clean_dir]), augmentations=None, cache=cache, shuffle=shuffle, stage=stage)
help="Whether to use `SentencePiece` model") parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files") args = parser.parse_args() transcripts = preprocess_paths(args.transcripts) tfrecords_dir = preprocess_paths(args.tfrecords_dir) config = Config(args.config) if args.sentence_piece: print("Loading SentencePiece model ...") text_featurizer = SentencePieceFeaturizer.load_from_file( config.decoder_config, args.subwords) elif args.subwords and os.path.exists(args.subwords): print("Loading subwords ...") text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords) ASRTFRecordDataset(data_paths=transcripts, tfrecords_dir=tfrecords_dir,
parser.add_argument("--dir", "-d", type=str, default=None, help="Directory of dataset") parser.add_argument("output", type=str, default=None, help="The output .tsv transcript file path") args = parser.parse_args() assert args.dir and args.output args.dir = preprocess_paths(args.dir) args.output = preprocess_paths(args.output) transcripts = [] text_files = glob.glob(os.path.join(args.dir, "**", "*.txt"), recursive=True) for text_file in tqdm(text_files, desc="[Loading]"): current_dir = os.path.dirname(text_file) with open(text_file, "r", encoding="utf-8") as txt: lines = txt.read().splitlines() for line in lines: line = line.split(" ", maxsplit=1) audio_file = os.path.join(current_dir, line[0] + ".flac") y, sr = librosa.load(audio_file, sr=None) duration = librosa.get_duration(y, sr)
parser.add_argument("--config", type=str, default=None, help="The file path of model configuration file") parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model") parser.add_argument("--metadata_prefix", type=str, default=None, help="Path to file containing metadata") parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files") args = parser.parse_args() assert args.metadata_prefix is not None, "metadata_prefix must be defined" transcripts = preprocess_paths(args.transcripts) config = Config(args.config) speech_featurizer = TFSpeechFeaturizer(config.speech_config) if args.sentence_piece: print("Loading SentencePiece model ...") text_featurizer = SentencePieceFeaturizer.load_from_file(config.decoder_config, args.subwords) elif args.subwords and os.path.exists(args.subwords): print("Loading subwords ...") text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords) dataset = ASRDataset( data_paths=transcripts, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,