def main(cmd=None): """Run SVS model decoding.""" print(get_commandline_args(), file=sys.stderr) parser = get_parser() args = parser.parse_args(cmd) kwargs = vars(args) kwargs.pop("config", None) inference(**kwargs)
def main(): parser = get_parser() args = parser.parse_args() # logging info logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s" if args.verbose > 0: logging.basicConfig(level=logging.INFO, format=logfmt) else: logging.basicConfig(level=logging.WARN, format=logfmt) logging.info(get_commandline_args()) if args.preprocess_conf is not None: preprocessing = Transformation(args.preprocess_conf) logging.info("Apply preprocessing: {}".format(preprocessing)) else: preprocessing = None # There are no necessary for matrix without preprocessing, # so change to file_reader_helper to return shape. # This make sense only with filetype="hdf5". for utt, mat in file_reader_helper(args.rspecifier, args.filetype, return_shape=preprocessing is None): if preprocessing is not None: if is_scipy_wav_style(mat): # If data is sound file, then got as Tuple[int, ndarray] rate, mat = mat mat = preprocessing(mat, uttid_list=utt) shape_str = ",".join(map(str, mat.shape)) else: if len(mat) == 2 and isinstance(mat[1], tuple): # If data is sound file, Tuple[int, Tuple[int, ...]] rate, mat = mat shape_str = ",".join(map(str, mat)) args.out.write("{} {}\n".format(utt, shape_str))
def main(cmd=None): print(get_commandline_args(), file=sys.stderr) parser = get_parser() args = parser.parse_args(cmd) kwargs = vars(args) aggregate_stats_dirs(**kwargs)
def main(): logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s" logging.basicConfig(level=logging.INFO, format=logfmt) logging.info(get_commandline_args()) parser = argparse.ArgumentParser( description='Create waves list from "midi.scp"', formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("scp") parser.add_argument("outdir") parser.add_argument( "--name", default="midi", help="Specify the prefix word of output file name " 'such as "wav.scp"', ) parser.add_argument("--segments", default=None) parser.add_argument( "--fs", type=np.int16, default=None, help="If the sampling rate specified, " "Change the sampling rate.", ) group = parser.add_mutually_exclusive_group() # TODO: in midi, the reference channels should be related to track, it is not implemented now group.add_argument("--ref-channels", default=None, type=str2int_tuple) group.add_argument("--utt2ref-channels", default=None, type=str) args = parser.parse_args() if args.ref_channels is not None: def utt2ref_channels(x) -> Tuple[int, ...]: return args.ref_channels elif args.utt2ref_channels is not None: utt2ref_channels_dict = read_2column_text(args.utt2ref_channels) def utt2ref_channels(x, d=utt2ref_channels_dict) -> Tuple[int, ...]: chs_str = d[x] return tuple(map(int, chs_str.split())) else: utt2ref_channels = None # load segments if args.segments is not None: segments = {} with open(args.segments) as f: for line in f: if len(line) == 0: continue utt_id, recording_id, segment_begin, segment_end = line.strip( ).split(" ") segments[utt_id] = ( recording_id, float(segment_begin), float(segment_end), ) Path(args.outdir).mkdir(parents=True, exist_ok=True) out_midiscp = Path(args.outdir) / f"{args.name}.scp" if args.segments is not None: loader = MIDIScpReader(args.scp, rate=args.fs) writer = MIDIScpWriter( args.outdir, out_midiscp, format="midi", rate=args.fs, ) cache = (None, None, None) for utt_id, (recording, start, end) in tqdm(segments.items()): # TODO: specify track information here if recording == cache[0]: note_seq, tempo_seq = cache[1], cache[2] else: note_seq, tempo_seq = loader[recording] cache = (recording, note_seq, tempo_seq) if args.fs is not None: start = int(start * args.fs) end = int(end * args.fs) if start < 0: start = 0 if end > len(note_seq): end = len(note_seq) else: start = np.searchsorted([item[0] for item in note_seq], start, "left") end = np.searchsorted([item[1] for item in note_seq], end, "left") sub_note = note_seq[start:end] sub_tempo = tempo_seq[start:end] writer[utt_id] = sub_note, sub_tempo else: # midi_scp does not need to change, when no segments is applied # Note things will change, after finish other todos in the script os.system("cp {} {}".format(args.scp, Path(args.outdir / f"{args.name}.scp")))
def main(): logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s" logging.basicConfig(level=logging.INFO, format=logfmt) logging.info(get_commandline_args()) parser = argparse.ArgumentParser( description='Create waves list from "wav.scp"', formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("scp") parser.add_argument("outdir") parser.add_argument( "--name", default="wav", help="Specify the prefix word of output file name " 'such as "wav.scp"', ) parser.add_argument("--segments", default=None) parser.add_argument( "--fs", type=humanfriendly_or_none, default=None, help="If the sampling rate specified, " "Change the sampling rate.", ) parser.add_argument("--audio-format", default="wav") group = parser.add_mutually_exclusive_group() group.add_argument("--ref-channels", default=None, type=str2int_tuple) group.add_argument("--utt2ref-channels", default=None, type=str) args = parser.parse_args() out_num_samples = Path(args.outdir) / f"utt2num_samples" if args.ref_channels is not None: def utt2ref_channels(x) -> Tuple[int, ...]: return args.ref_channels elif args.utt2ref_channels is not None: utt2ref_channels_dict = read_2column_text(args.utt2ref_channels) def utt2ref_channels(x, d=utt2ref_channels_dict) -> Tuple[int, ...]: chs_str = d[x] return tuple(map(int, chs_str.split())) else: utt2ref_channels = None Path(args.outdir).mkdir(parents=True, exist_ok=True) out_wavscp = Path(args.outdir) / f"{args.name}.scp" if args.segments is not None: # Note: kaldiio supports only wav-pcm-int16le file. loader = kaldiio.load_scp_sequential(args.scp, segments=args.segments) if args.audio_format.endswith("ark"): fark = open(Path(args.outdir) / f"data_{args.name}.ark", "wb") fscp = out_wavscp.open("w") else: writer = SoundScpWriter( args.outdir, out_wavscp, format=args.audio_format, ) with out_num_samples.open("w") as fnum_samples: for uttid, (rate, wave) in tqdm(loader): # wave: (Time,) or (Time, Nmic) if wave.ndim == 2 and utt2ref_channels is not None: wave = wave[:, utt2ref_channels(uttid)] if args.fs is not None and args.fs != rate: # FIXME(kamo): To use sox? wave = resampy.resample(wave.astype(np.float64), rate, args.fs, axis=0) wave = wave.astype(np.int16) rate = args.fs if args.audio_format.endswith("ark"): if "flac" in args.audio_format: suf = "flac" elif "wav" in args.audio_format: suf = "wav" else: raise RuntimeError("wav.ark or flac") # NOTE(kamo): Using extended ark format style here. # This format is incompatible with Kaldi kaldiio.save_ark( fark, {uttid: (wave, rate)}, scp=fscp, append=True, write_function=f"soundfile_{suf}", ) else: writer[uttid] = rate, wave fnum_samples.write(f"{uttid} {len(wave)}\n") else: if args.audio_format.endswith("ark"): fark = open(Path(args.outdir) / f"data_{args.name}.ark", "wb") else: wavdir = Path(args.outdir) / f"data_{args.name}" wavdir.mkdir(parents=True, exist_ok=True) with Path(args.scp).open("r") as fscp, out_wavscp.open( "w") as fout, out_num_samples.open("w") as fnum_samples: for line in tqdm(fscp): uttid, wavpath = line.strip().split(None, 1) if wavpath.endswith("|"): # Streaming input e.g. cat a.wav | with kaldiio.open_like_kaldi(wavpath, "rb") as f: with BytesIO(f.read()) as g: wave, rate = soundfile.read(g, dtype=np.int16) if wave.ndim == 2 and utt2ref_channels is not None: wave = wave[:, utt2ref_channels(uttid)] if args.fs is not None and args.fs != rate: # FIXME(kamo): To use sox? wave = resampy.resample(wave.astype(np.float64), rate, args.fs, axis=0) wave = wave.astype(np.int16) rate = args.fs if args.audio_format.endswith("ark"): if "flac" in args.audio_format: suf = "flac" elif "wav" in args.audio_format: suf = "wav" else: raise RuntimeError("wav.ark or flac") # NOTE(kamo): Using extended ark format style here. # This format is incompatible with Kaldi kaldiio.save_ark( fark, {uttid: (wave, rate)}, scp=fout, append=True, write_function=f"soundfile_{suf}", ) else: owavpath = str(wavdir / f"{uttid}.{args.audio_format}") soundfile.write(owavpath, wave, rate) fout.write(f"{uttid} {owavpath}\n") else: wave, rate = soundfile.read(wavpath, dtype=np.int16) if wave.ndim == 2 and utt2ref_channels is not None: wave = wave[:, utt2ref_channels(uttid)] save_asis = False elif args.audio_format.endswith("ark"): save_asis = False elif Path(wavpath).suffix == "." + args.audio_format and ( args.fs is None or args.fs == rate): save_asis = True else: save_asis = False if save_asis: # Neither --segments nor --fs are specified and # the line doesn't end with "|", # i.e. not using unix-pipe, # only in this case, # just using the original file as is. fout.write(f"{uttid} {wavpath}\n") else: if args.fs is not None and args.fs != rate: # FIXME(kamo): To use sox? wave = resampy.resample(wave.astype(np.float64), rate, args.fs, axis=0) wave = wave.astype(np.int16) rate = args.fs if args.audio_format.endswith("ark"): if "flac" in args.audio_format: suf = "flac" elif "wav" in args.audio_format: suf = "wav" else: raise RuntimeError("wav.ark or flac") # NOTE(kamo): Using extended ark format style here. # This format is not supported in Kaldi. kaldiio.save_ark( fark, {uttid: (wave, rate)}, scp=fout, append=True, write_function=f"soundfile_{suf}", ) else: owavpath = str(wavdir / f"{uttid}.{args.audio_format}") soundfile.write(owavpath, wave, rate) fout.write(f"{uttid} {owavpath}\n") fnum_samples.write(f"{uttid} {len(wave)}\n")
def main(cmd=None): print(get_commandline_args(), file=sys.stderr) parser = get_parser() args = parser.parse_args(cmd) kwargs = vars(args) split_scps(**kwargs)
def main(cmd=None): logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s" logging.basicConfig(level=logging.INFO, format=logfmt) logging.info(get_commandline_args()) parser = get_parser() args = parser.parse_args(cmd) args.cmd = shlex.split(args.cmd) if args.host is None and shutil.which(args.cmd[0]) is None: raise RuntimeError( f"The first args of --cmd should be a script path. e.g. utils/run.pl: " f"{args.cmd[0]}" ) # Specify init_method: # See: https://pytorch.org/docs/stable/distributed.html#initialization if args.host is None and args.num_nodes <= 1: # Automatically set init_method if num_node=1 init_method = None else: if args.master_port is None: # Try "shared-file system initialization" if master_port is not specified # Give random name to avoid reusing previous file init_file = args.init_file_prefix + str(uuid.uuid4()) init_file = Path(init_file).absolute() Path(init_file).parent.mkdir(exist_ok=True, parents=True) init_method = ["--dist_init_method", f"file://{init_file}"] else: init_method = ["--dist_master_port", str(args.master_port)] # This can be omitted if slurm mode if args.master_addr is not None: init_method += ["--dist_master_addr", args.master_addr] elif args.host is not None: init_method += [ "--dist_master_addr", args.host.split(",")[0].split(":")[0], ] # Log-rotation for i in range(args.max_num_log_files - 1, -1, -1): if i == 0: p = Path(args.log) pn = p.parent / (p.stem + ".1" + p.suffix) else: _p = Path(args.log) p = _p.parent / (_p.stem + f".{i}" + _p.suffix) pn = _p.parent / (_p.stem + f".{i + 1}" + _p.suffix) if p.exists(): if i == args.max_num_log_files - 1: p.unlink() else: shutil.move(p, pn) processes = [] # Submit command via SSH if args.host is not None: hosts = [] ids_list = [] # e.g. args.host = "host1:0:2,host2:0:1" for host in args.host.split(","): # e.g host = "host1:0:2" sps = host.split(":") host = sps[0] if len(sps) > 1: ids = [int(x) for x in sps[1:]] else: ids = list(range(args.ngpu)) hosts.append(host) ids_list.append(ids) world_size = sum(max(len(x), 1) for x in ids_list) logging.info(f"{len(hosts)}nodes with world_size={world_size} via SSH") if args.envfile is not None: env = f"source {args.envfile}" else: env = "" if args.log != "-": Path(args.log).parent.mkdir(parents=True, exist_ok=True) f = Path(args.log).open("w", encoding="utf-8") else: # Output to stdout/stderr f = None rank = 0 for host, ids in zip(hosts, ids_list): ngpu = 1 if len(ids) > 0 else 0 ids = ids if len(ids) > 0 else ["none"] for local_rank in ids: cmd = ( args.args + [ "--ngpu", str(ngpu), "--multiprocessing_distributed", "false", "--local_rank", str(local_rank), "--dist_rank", str(rank), "--dist_world_size", str(world_size), ] + init_method ) if ngpu == 0: # Gloo supports both GPU and CPU mode. # See: https://pytorch.org/docs/stable/distributed.html cmd += ["--dist_backend", "gloo"] heredoc = f"""<< EOF set -euo pipefail cd {os.getcwd()} {env} {" ".join([c if len(c) != 0 else "''" for c in cmd])} EOF """ # FIXME(kamo): The process will be alive # even if this program is stopped because we don't set -t here, # i.e. not assigning pty, # and the program is not killed when SSH connection is closed. process = subprocess.Popen( ["ssh", host, "bash", heredoc], stdout=f, stderr=f, ) processes.append(process) rank += 1 # If Single node elif args.num_nodes <= 1: if args.ngpu > 1: if args.multiprocessing_distributed: # NOTE: # If multiprocessing_distributed=true, # -> Distributed mode, which is multi-process and Multi-GPUs. # and TCP initializetion is used if single-node case: # e.g. init_method="tcp://localhost:20000" logging.info(f"single-node with {args.ngpu}gpu on distributed mode") else: # NOTE: # If multiprocessing_distributed=false # -> "DataParallel" mode, which is single-process # and Multi-GPUs with threading. # See: # https://discuss.pytorch.org/t/why-torch-nn-parallel-distributeddataparallel-runs-faster-than-torch-nn-dataparallel-on-single-machine-with-multi-gpu/32977/2 logging.info(f"single-node with {args.ngpu}gpu using DataParallel") # Using cmd as it is simply cmd = ( args.cmd # arguments for ${cmd} + ["--gpu", str(args.ngpu), args.log] # arguments for *_train.py + args.args + [ "--ngpu", str(args.ngpu), "--multiprocessing_distributed", str(args.multiprocessing_distributed), ] ) process = subprocess.Popen(cmd) processes.append(process) elif Path(args.cmd[0]).name == "run.pl": raise RuntimeError("run.pl doesn't support submitting to the other nodes.") elif Path(args.cmd[0]).name == "ssh.pl": raise RuntimeError("Use --host option instead of ssh.pl") # If Slurm elif Path(args.cmd[0]).name == "slurm.pl": logging.info(f"{args.num_nodes}nodes and {args.ngpu}gpu-per-node using srun") cmd = ( args.cmd # arguments for ${cmd} + [ "--gpu", str(args.ngpu), "--num_threads", str(max(args.ngpu, 1)), "--num_nodes", str(args.num_nodes), args.log, "srun", # Inherit all environment variable from parent process "--export=ALL", ] # arguments for *_train.py + args.args + [ "--ngpu", str(args.ngpu), "--multiprocessing_distributed", "true", "--dist_launcher", "slurm", ] + init_method ) if args.ngpu == 0: # Gloo supports both GPU and CPU mode. # See: https://pytorch.org/docs/stable/distributed.html cmd += ["--dist_backend", "gloo"] process = subprocess.Popen(cmd) processes.append(process) else: # This pattern can also works with Slurm. logging.info(f"{args.num_nodes}nodes and {args.ngpu}gpu-per-node using mpirun") cmd = ( args.cmd # arguments for ${cmd} + [ "--gpu", str(args.ngpu), "--num_threads", str(max(args.ngpu, 1)), # Make sure scheduler setting, i.e. conf/queue.conf # so that --num_nodes requires 1process-per-node "--num_nodes", str(args.num_nodes), args.log, "mpirun", # -np option can be omitted with Torque/PBS "-np", str(args.num_nodes), ] # arguments for *_train.py + args.args + [ "--ngpu", str(args.ngpu), "--multiprocessing_distributed", "true", "--dist_launcher", "mpi", ] + init_method ) if args.ngpu == 0: # Gloo supports both GPU and CPU mode. # See: https://pytorch.org/docs/stable/distributed.html cmd += ["--dist_backend", "gloo"] process = subprocess.Popen(cmd) processes.append(process) logging.info(f"log file: {args.log}") logging.info(f"submitting cmd {cmd}") failed = False while any(p.returncode is None for p in processes): for process in processes: # If any process is failed, try to kill the other processes too if failed and process.returncode is not None: process.kill() else: try: process.wait(0.5) except subprocess.TimeoutExpired: pass if process.returncode is not None and process.returncode != 0: failed = True for process in processes: if process.returncode != 0: print( subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd), file=sys.stderr, ) p = Path(args.log) if p.exists(): with p.open() as f: lines = list(f) raise RuntimeError( f"\n################### The last 1000 lines of {args.log} " f"###################\n" + "".join(lines[-1000:]) ) else: raise RuntimeError