def build_preprocess_fn( cls, args: argparse.Namespace, train: bool ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: assert check_argument_types() if args.use_preprocessor: if "st" in args.subtask_series: retval = MutliTokenizerCommonPreprocessor( train=train, token_type=[args.token_type, args.src_token_type], token_list=[args.token_list, args.src_token_list], bpemodel=[args.bpemodel, args.src_bpemodel], non_linguistic_symbols=args.non_linguistic_symbols, text_cleaner=args.cleaner, g2p_type=args.g2p, # NOTE(kamo): Check attribute existence for backward compatibility rir_scp=args.rir_scp if hasattr(args, "rir_scp") else None, rir_apply_prob=args.rir_apply_prob if hasattr(args, "rir_apply_prob") else 1.0, noise_scp=args.noise_scp if hasattr(args, "noise_scp") else None, noise_apply_prob=args.noise_apply_prob if hasattr(args, "noise_apply_prob") else 1.0, noise_db_range=args.noise_db_range if hasattr(args, "noise_db_range") else "13_15", short_noise_thres=args.short_noise_thres if hasattr(args, "short_noise_thres") else 0.5, speech_volume_normalize=args.speech_volume_normalize if hasattr(args, "speech_volume_normalize") else None, speech_name="speech", text_name=["text", "src_text"], ) elif "diar" in args.subtask_series: retval = CommonPreprocessor(train=train) else: retval = CommonPreprocessor_multi( train=train, token_type=args.token_type, token_list=args.token_list, bpemodel=args.bpemodel, non_linguistic_symbols=args.non_linguistic_symbols, text_name=["text"], text_cleaner=args.cleaner, g2p_type=args.g2p, ) else: retval = None assert check_return_type(retval) return retval
def build_preprocess_fn( cls, args: argparse.Namespace, train: bool ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: assert check_argument_types() # TODO(Jing): ask Kamo if it ok to support several args, # like text_name = 'text_ref1' and 'text_ref2' if args.use_preprocessor: retval = CommonPreprocessor_multi( train=train, token_type=args.token_type, token_list=args.token_list, bpemodel=args.bpemodel, non_linguistic_symbols=args.non_linguistic_symbols, text_name=["text_ref1", "text_ref2"], text_cleaner=args.cleaner, g2p_type=args.g2p, ) else: retval = None assert check_return_type(retval) return retval