예제 #1
0
def main(cmd=None):
    """Run SVS model decoding."""
    print(get_commandline_args(), file=sys.stderr)
    parser = get_parser()
    args = parser.parse_args(cmd)
    kwargs = vars(args)
    kwargs.pop("config", None)
    inference(**kwargs)
예제 #2
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    # logging info
    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
    if args.verbose > 0:
        logging.basicConfig(level=logging.INFO, format=logfmt)
    else:
        logging.basicConfig(level=logging.WARN, format=logfmt)
    logging.info(get_commandline_args())

    if args.preprocess_conf is not None:
        preprocessing = Transformation(args.preprocess_conf)
        logging.info("Apply preprocessing: {}".format(preprocessing))
    else:
        preprocessing = None

    # There are no necessary for matrix without preprocessing,
    # so change to file_reader_helper to return shape.
    # This make sense only with filetype="hdf5".
    for utt, mat in file_reader_helper(args.rspecifier,
                                       args.filetype,
                                       return_shape=preprocessing is None):
        if preprocessing is not None:
            if is_scipy_wav_style(mat):
                # If data is sound file, then got as Tuple[int, ndarray]
                rate, mat = mat
            mat = preprocessing(mat, uttid_list=utt)
            shape_str = ",".join(map(str, mat.shape))
        else:
            if len(mat) == 2 and isinstance(mat[1], tuple):
                # If data is sound file, Tuple[int, Tuple[int, ...]]
                rate, mat = mat
            shape_str = ",".join(map(str, mat))
        args.out.write("{} {}\n".format(utt, shape_str))
예제 #3
0
def main(cmd=None):
    print(get_commandline_args(), file=sys.stderr)
    parser = get_parser()
    args = parser.parse_args(cmd)
    kwargs = vars(args)
    aggregate_stats_dirs(**kwargs)
예제 #4
0
def main():
    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
    logging.basicConfig(level=logging.INFO, format=logfmt)
    logging.info(get_commandline_args())

    parser = argparse.ArgumentParser(
        description='Create waves list from "midi.scp"',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("scp")
    parser.add_argument("outdir")
    parser.add_argument(
        "--name",
        default="midi",
        help="Specify the prefix word of output file name "
        'such as "wav.scp"',
    )
    parser.add_argument("--segments", default=None)
    parser.add_argument(
        "--fs",
        type=np.int16,
        default=None,
        help="If the sampling rate specified, "
        "Change the sampling rate.",
    )
    group = parser.add_mutually_exclusive_group()
    # TODO: in midi, the reference channels should be related to track, it is not implemented now
    group.add_argument("--ref-channels", default=None, type=str2int_tuple)
    group.add_argument("--utt2ref-channels", default=None, type=str)
    args = parser.parse_args()

    if args.ref_channels is not None:

        def utt2ref_channels(x) -> Tuple[int, ...]:
            return args.ref_channels

    elif args.utt2ref_channels is not None:
        utt2ref_channels_dict = read_2column_text(args.utt2ref_channels)

        def utt2ref_channels(x, d=utt2ref_channels_dict) -> Tuple[int, ...]:
            chs_str = d[x]
            return tuple(map(int, chs_str.split()))

    else:
        utt2ref_channels = None

    # load segments
    if args.segments is not None:
        segments = {}
        with open(args.segments) as f:
            for line in f:
                if len(line) == 0:
                    continue
                utt_id, recording_id, segment_begin, segment_end = line.strip(
                ).split(" ")
                segments[utt_id] = (
                    recording_id,
                    float(segment_begin),
                    float(segment_end),
                )

    Path(args.outdir).mkdir(parents=True, exist_ok=True)
    out_midiscp = Path(args.outdir) / f"{args.name}.scp"
    if args.segments is not None:
        loader = MIDIScpReader(args.scp, rate=args.fs)
        writer = MIDIScpWriter(
            args.outdir,
            out_midiscp,
            format="midi",
            rate=args.fs,
        )

        cache = (None, None, None)
        for utt_id, (recording, start, end) in tqdm(segments.items()):
            # TODO: specify track information here
            if recording == cache[0]:
                note_seq, tempo_seq = cache[1], cache[2]
            else:
                note_seq, tempo_seq = loader[recording]
                cache = (recording, note_seq, tempo_seq)

            if args.fs is not None:
                start = int(start * args.fs)
                end = int(end * args.fs)
                if start < 0:
                    start = 0
                if end > len(note_seq):
                    end = len(note_seq)
            else:
                start = np.searchsorted([item[0] for item in note_seq], start,
                                        "left")
                end = np.searchsorted([item[1] for item in note_seq], end,
                                      "left")
            sub_note = note_seq[start:end]
            sub_tempo = tempo_seq[start:end]

            writer[utt_id] = sub_note, sub_tempo

    else:
        # midi_scp does not need to change, when no segments is applied
        # Note things will change, after finish other todos in the script
        os.system("cp {} {}".format(args.scp,
                                    Path(args.outdir / f"{args.name}.scp")))
예제 #5
0
def main():
    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
    logging.basicConfig(level=logging.INFO, format=logfmt)
    logging.info(get_commandline_args())

    parser = argparse.ArgumentParser(
        description='Create waves list from "wav.scp"',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("scp")
    parser.add_argument("outdir")
    parser.add_argument(
        "--name",
        default="wav",
        help="Specify the prefix word of output file name "
        'such as "wav.scp"',
    )
    parser.add_argument("--segments", default=None)
    parser.add_argument(
        "--fs",
        type=humanfriendly_or_none,
        default=None,
        help="If the sampling rate specified, "
        "Change the sampling rate.",
    )
    parser.add_argument("--audio-format", default="wav")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("--ref-channels", default=None, type=str2int_tuple)
    group.add_argument("--utt2ref-channels", default=None, type=str)
    args = parser.parse_args()

    out_num_samples = Path(args.outdir) / f"utt2num_samples"

    if args.ref_channels is not None:

        def utt2ref_channels(x) -> Tuple[int, ...]:
            return args.ref_channels

    elif args.utt2ref_channels is not None:
        utt2ref_channels_dict = read_2column_text(args.utt2ref_channels)

        def utt2ref_channels(x, d=utt2ref_channels_dict) -> Tuple[int, ...]:
            chs_str = d[x]
            return tuple(map(int, chs_str.split()))

    else:
        utt2ref_channels = None

    Path(args.outdir).mkdir(parents=True, exist_ok=True)
    out_wavscp = Path(args.outdir) / f"{args.name}.scp"
    if args.segments is not None:
        # Note: kaldiio supports only wav-pcm-int16le file.
        loader = kaldiio.load_scp_sequential(args.scp, segments=args.segments)
        if args.audio_format.endswith("ark"):
            fark = open(Path(args.outdir) / f"data_{args.name}.ark", "wb")
            fscp = out_wavscp.open("w")
        else:
            writer = SoundScpWriter(
                args.outdir,
                out_wavscp,
                format=args.audio_format,
            )

        with out_num_samples.open("w") as fnum_samples:
            for uttid, (rate, wave) in tqdm(loader):
                # wave: (Time,) or (Time, Nmic)
                if wave.ndim == 2 and utt2ref_channels is not None:
                    wave = wave[:, utt2ref_channels(uttid)]

                if args.fs is not None and args.fs != rate:
                    # FIXME(kamo): To use sox?
                    wave = resampy.resample(wave.astype(np.float64),
                                            rate,
                                            args.fs,
                                            axis=0)
                    wave = wave.astype(np.int16)
                    rate = args.fs
                if args.audio_format.endswith("ark"):
                    if "flac" in args.audio_format:
                        suf = "flac"
                    elif "wav" in args.audio_format:
                        suf = "wav"
                    else:
                        raise RuntimeError("wav.ark or flac")

                    # NOTE(kamo): Using extended ark format style here.
                    # This format is incompatible with Kaldi
                    kaldiio.save_ark(
                        fark,
                        {uttid: (wave, rate)},
                        scp=fscp,
                        append=True,
                        write_function=f"soundfile_{suf}",
                    )

                else:
                    writer[uttid] = rate, wave
                fnum_samples.write(f"{uttid} {len(wave)}\n")
    else:
        if args.audio_format.endswith("ark"):
            fark = open(Path(args.outdir) / f"data_{args.name}.ark", "wb")
        else:
            wavdir = Path(args.outdir) / f"data_{args.name}"
            wavdir.mkdir(parents=True, exist_ok=True)

        with Path(args.scp).open("r") as fscp, out_wavscp.open(
                "w") as fout, out_num_samples.open("w") as fnum_samples:
            for line in tqdm(fscp):
                uttid, wavpath = line.strip().split(None, 1)

                if wavpath.endswith("|"):
                    # Streaming input e.g. cat a.wav |
                    with kaldiio.open_like_kaldi(wavpath, "rb") as f:
                        with BytesIO(f.read()) as g:
                            wave, rate = soundfile.read(g, dtype=np.int16)
                            if wave.ndim == 2 and utt2ref_channels is not None:
                                wave = wave[:, utt2ref_channels(uttid)]

                        if args.fs is not None and args.fs != rate:
                            # FIXME(kamo): To use sox?
                            wave = resampy.resample(wave.astype(np.float64),
                                                    rate,
                                                    args.fs,
                                                    axis=0)
                            wave = wave.astype(np.int16)
                            rate = args.fs

                        if args.audio_format.endswith("ark"):
                            if "flac" in args.audio_format:
                                suf = "flac"
                            elif "wav" in args.audio_format:
                                suf = "wav"
                            else:
                                raise RuntimeError("wav.ark or flac")

                            # NOTE(kamo): Using extended ark format style here.
                            # This format is incompatible with Kaldi
                            kaldiio.save_ark(
                                fark,
                                {uttid: (wave, rate)},
                                scp=fout,
                                append=True,
                                write_function=f"soundfile_{suf}",
                            )
                        else:
                            owavpath = str(wavdir /
                                           f"{uttid}.{args.audio_format}")
                            soundfile.write(owavpath, wave, rate)
                            fout.write(f"{uttid} {owavpath}\n")
                else:
                    wave, rate = soundfile.read(wavpath, dtype=np.int16)
                    if wave.ndim == 2 and utt2ref_channels is not None:
                        wave = wave[:, utt2ref_channels(uttid)]
                        save_asis = False

                    elif args.audio_format.endswith("ark"):
                        save_asis = False

                    elif Path(wavpath).suffix == "." + args.audio_format and (
                            args.fs is None or args.fs == rate):
                        save_asis = True

                    else:
                        save_asis = False

                    if save_asis:
                        # Neither --segments nor --fs are specified and
                        # the line doesn't end with "|",
                        # i.e. not using unix-pipe,
                        # only in this case,
                        # just using the original file as is.
                        fout.write(f"{uttid} {wavpath}\n")
                    else:
                        if args.fs is not None and args.fs != rate:
                            # FIXME(kamo): To use sox?
                            wave = resampy.resample(wave.astype(np.float64),
                                                    rate,
                                                    args.fs,
                                                    axis=0)
                            wave = wave.astype(np.int16)
                            rate = args.fs

                        if args.audio_format.endswith("ark"):
                            if "flac" in args.audio_format:
                                suf = "flac"
                            elif "wav" in args.audio_format:
                                suf = "wav"
                            else:
                                raise RuntimeError("wav.ark or flac")

                            # NOTE(kamo): Using extended ark format style here.
                            # This format is not supported in Kaldi.
                            kaldiio.save_ark(
                                fark,
                                {uttid: (wave, rate)},
                                scp=fout,
                                append=True,
                                write_function=f"soundfile_{suf}",
                            )
                        else:
                            owavpath = str(wavdir /
                                           f"{uttid}.{args.audio_format}")
                            soundfile.write(owavpath, wave, rate)
                            fout.write(f"{uttid} {owavpath}\n")
                fnum_samples.write(f"{uttid} {len(wave)}\n")
예제 #6
0
def main(cmd=None):
    print(get_commandline_args(), file=sys.stderr)
    parser = get_parser()
    args = parser.parse_args(cmd)
    kwargs = vars(args)
    split_scps(**kwargs)
예제 #7
0
def main(cmd=None):
    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
    logging.basicConfig(level=logging.INFO, format=logfmt)
    logging.info(get_commandline_args())

    parser = get_parser()
    args = parser.parse_args(cmd)
    args.cmd = shlex.split(args.cmd)

    if args.host is None and shutil.which(args.cmd[0]) is None:
        raise RuntimeError(
            f"The first args of --cmd should be a script path. e.g. utils/run.pl: "
            f"{args.cmd[0]}"
        )

    # Specify init_method:
    #   See: https://pytorch.org/docs/stable/distributed.html#initialization
    if args.host is None and args.num_nodes <= 1:
        # Automatically set init_method if num_node=1
        init_method = None
    else:
        if args.master_port is None:
            # Try "shared-file system initialization" if master_port is not specified
            # Give random name to avoid reusing previous file
            init_file = args.init_file_prefix + str(uuid.uuid4())
            init_file = Path(init_file).absolute()
            Path(init_file).parent.mkdir(exist_ok=True, parents=True)
            init_method = ["--dist_init_method", f"file://{init_file}"]
        else:
            init_method = ["--dist_master_port", str(args.master_port)]

            # This can be omitted if slurm mode
            if args.master_addr is not None:
                init_method += ["--dist_master_addr", args.master_addr]
            elif args.host is not None:
                init_method += [
                    "--dist_master_addr",
                    args.host.split(",")[0].split(":")[0],
                ]

    # Log-rotation
    for i in range(args.max_num_log_files - 1, -1, -1):
        if i == 0:
            p = Path(args.log)
            pn = p.parent / (p.stem + ".1" + p.suffix)
        else:
            _p = Path(args.log)
            p = _p.parent / (_p.stem + f".{i}" + _p.suffix)
            pn = _p.parent / (_p.stem + f".{i + 1}" + _p.suffix)

        if p.exists():
            if i == args.max_num_log_files - 1:
                p.unlink()
            else:
                shutil.move(p, pn)

    processes = []
    # Submit command via SSH
    if args.host is not None:
        hosts = []
        ids_list = []
        # e.g. args.host = "host1:0:2,host2:0:1"
        for host in args.host.split(","):
            # e.g host = "host1:0:2"
            sps = host.split(":")
            host = sps[0]
            if len(sps) > 1:
                ids = [int(x) for x in sps[1:]]
            else:
                ids = list(range(args.ngpu))
            hosts.append(host)
            ids_list.append(ids)

        world_size = sum(max(len(x), 1) for x in ids_list)
        logging.info(f"{len(hosts)}nodes with world_size={world_size} via SSH")

        if args.envfile is not None:
            env = f"source {args.envfile}"
        else:
            env = ""

        if args.log != "-":
            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
            f = Path(args.log).open("w", encoding="utf-8")
        else:
            # Output to stdout/stderr
            f = None

        rank = 0
        for host, ids in zip(hosts, ids_list):
            ngpu = 1 if len(ids) > 0 else 0
            ids = ids if len(ids) > 0 else ["none"]

            for local_rank in ids:
                cmd = (
                    args.args
                    + [
                        "--ngpu",
                        str(ngpu),
                        "--multiprocessing_distributed",
                        "false",
                        "--local_rank",
                        str(local_rank),
                        "--dist_rank",
                        str(rank),
                        "--dist_world_size",
                        str(world_size),
                    ]
                    + init_method
                )
                if ngpu == 0:
                    # Gloo supports both GPU and CPU mode.
                    #   See: https://pytorch.org/docs/stable/distributed.html
                    cmd += ["--dist_backend", "gloo"]

                heredoc = f"""<< EOF
set -euo pipefail
cd {os.getcwd()}
{env}
{" ".join([c if len(c) != 0 else "''" for c in cmd])}
EOF
"""

                # FIXME(kamo): The process will be alive
                #  even if this program is stopped because we don't set -t here,
                #  i.e. not assigning pty,
                #  and the program is not killed when SSH connection is closed.
                process = subprocess.Popen(
                    ["ssh", host, "bash", heredoc],
                    stdout=f,
                    stderr=f,
                )

                processes.append(process)

                rank += 1

    # If Single node
    elif args.num_nodes <= 1:
        if args.ngpu > 1:
            if args.multiprocessing_distributed:
                # NOTE:
                #   If multiprocessing_distributed=true,
                # -> Distributed mode, which is multi-process and Multi-GPUs.
                #    and TCP initializetion is used if single-node case:
                #      e.g. init_method="tcp://localhost:20000"
                logging.info(f"single-node with {args.ngpu}gpu on distributed mode")
            else:
                # NOTE:
                #   If multiprocessing_distributed=false
                # -> "DataParallel" mode, which is single-process
                #    and Multi-GPUs with threading.
                # See:
                # https://discuss.pytorch.org/t/why-torch-nn-parallel-distributeddataparallel-runs-faster-than-torch-nn-dataparallel-on-single-machine-with-multi-gpu/32977/2
                logging.info(f"single-node with {args.ngpu}gpu using DataParallel")

        # Using cmd as it is simply
        cmd = (
            args.cmd
            # arguments for ${cmd}
            + ["--gpu", str(args.ngpu), args.log]
            # arguments for *_train.py
            + args.args
            + [
                "--ngpu",
                str(args.ngpu),
                "--multiprocessing_distributed",
                str(args.multiprocessing_distributed),
            ]
        )
        process = subprocess.Popen(cmd)
        processes.append(process)

    elif Path(args.cmd[0]).name == "run.pl":
        raise RuntimeError("run.pl doesn't support submitting to the other nodes.")

    elif Path(args.cmd[0]).name == "ssh.pl":
        raise RuntimeError("Use --host option instead of ssh.pl")

    # If Slurm
    elif Path(args.cmd[0]).name == "slurm.pl":
        logging.info(f"{args.num_nodes}nodes and {args.ngpu}gpu-per-node using srun")
        cmd = (
            args.cmd
            # arguments for ${cmd}
            + [
                "--gpu",
                str(args.ngpu),
                "--num_threads",
                str(max(args.ngpu, 1)),
                "--num_nodes",
                str(args.num_nodes),
                args.log,
                "srun",
                # Inherit all environment variable from parent process
                "--export=ALL",
            ]
            # arguments for *_train.py
            + args.args
            + [
                "--ngpu",
                str(args.ngpu),
                "--multiprocessing_distributed",
                "true",
                "--dist_launcher",
                "slurm",
            ]
            + init_method
        )
        if args.ngpu == 0:
            # Gloo supports both GPU and CPU mode.
            #   See: https://pytorch.org/docs/stable/distributed.html
            cmd += ["--dist_backend", "gloo"]
        process = subprocess.Popen(cmd)
        processes.append(process)

    else:
        # This pattern can also works with Slurm.

        logging.info(f"{args.num_nodes}nodes and {args.ngpu}gpu-per-node using mpirun")
        cmd = (
            args.cmd
            # arguments for ${cmd}
            + [
                "--gpu",
                str(args.ngpu),
                "--num_threads",
                str(max(args.ngpu, 1)),
                # Make sure scheduler setting, i.e. conf/queue.conf
                # so that --num_nodes requires 1process-per-node
                "--num_nodes",
                str(args.num_nodes),
                args.log,
                "mpirun",
                # -np option can be omitted with Torque/PBS
                "-np",
                str(args.num_nodes),
            ]
            # arguments for *_train.py
            + args.args
            + [
                "--ngpu",
                str(args.ngpu),
                "--multiprocessing_distributed",
                "true",
                "--dist_launcher",
                "mpi",
            ]
            + init_method
        )
        if args.ngpu == 0:
            # Gloo supports both GPU and CPU mode.
            #   See: https://pytorch.org/docs/stable/distributed.html
            cmd += ["--dist_backend", "gloo"]
        process = subprocess.Popen(cmd)
        processes.append(process)

    logging.info(f"log file: {args.log}")

    logging.info(f"submitting cmd {cmd}")

    failed = False
    while any(p.returncode is None for p in processes):
        for process in processes:
            # If any process is failed, try to kill the other processes too
            if failed and process.returncode is not None:
                process.kill()
            else:
                try:
                    process.wait(0.5)
                except subprocess.TimeoutExpired:
                    pass

                if process.returncode is not None and process.returncode != 0:
                    failed = True

    for process in processes:
        if process.returncode != 0:
            print(
                subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd),
                file=sys.stderr,
            )
            p = Path(args.log)
            if p.exists():
                with p.open() as f:
                    lines = list(f)
                raise RuntimeError(
                    f"\n################### The last 1000 lines of {args.log} "
                    f"###################\n" + "".join(lines[-1000:])
                )
            else:
                raise RuntimeError