Beispiel #1
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--bindir", required=True, type=str,
        help="name of the dir of bin files")
    parser.add_argument("--stats", required=True, type=str,
        help="filename of hdf5 format")
    parser.add_argument("--spklist", required=True, type=str,
        help="list of speakers")
    parser.add_argument("--verbose", default=1,
        type=int, help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
    elif args.verbose > 1:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
    else:
        logging.basicConfig(level=logging.WARN,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
        logging.warn("logging is disabled.")

    # show argmument
    for key, value in vars(args).items():
        logging.info("%s = %s" % (key, str(value)))
    
    # define feat param here
    feat_param = {
        'fs'                : 22050,
        'shift_ms'          : 5,
        'length_ms'         : 25,
        'fftl'              : 1024,
        'n_mels'            : 80,
        'mcep_dim'          : 34,
        'mcep_alpha'        : 0.455,
        'feat_dim'          : 1064,
    }
    
    # read speakers
    spk_list = read_txt(args.spklist)

    # read file list, for trainind data only
    # file_list = sorted(find_files(args.bindir, "[12]*.bin"))
    file_list = sorted(find_files(args.bindir, "*/VAD/*.bin"))
    logging.info("number of utterances = %d" % len(file_list))

    # calculate statistics
    if not os.path.exists(os.path.dirname(args.stats)):
        os.makedirs(os.path.dirname(args.stats))
    calc_stats(file_list, feat_param, spk_list, args)
Beispiel #2
0
def main():

    parser = argparse.ArgumentParser(description="Conversion.")
    parser.add_argument("--logdir",
                        required=True,
                        type=str,
                        help="path of log directory")
    parser.add_argument("--checkpoint",
                        default=None,
                        type=str,
                        help="path of checkpoint")

    parser.add_argument("--src",
                        default=None,
                        required=True,
                        type=str,
                        help="source speaker")
    parser.add_argument("--trg",
                        default=None,
                        required=True,
                        type=str,
                        help="target speaker")
    parser.add_argument("--type",
                        default='test',
                        type=str,
                        help="test or valid (default is test)")

    parser.add_argument("--input_feat",
                        required=True,
                        type=str,
                        help="input feature type")
    parser.add_argument("--output_feat",
                        required=True,
                        type=str,
                        help="output feature type")
    parser.add_argument("--mcd",
                        action='store_true',
                        help="calculate mcd or not")
    parser.add_argument("--syn",
                        action='store_true',
                        help="synthesize voice or not")
    args = parser.parse_args()

    # make exp directory
    output_dir = get_default_logdir_output(args)
    tf.gfile.MakeDirs(output_dir)

    # set log level
    fmt = '%(asctime)s %(message)s'
    datefmt = '%m/%d/%Y %I:%M:%S'
    logFormatter = logging.Formatter(fmt, datefmt=datefmt)
    logging.basicConfig(
        level=logging.INFO,
        filename=os.path.join(output_dir, 'exp.log'),
        format=fmt,
        datefmt=datefmt,
    )
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(logFormatter)
    logging.getLogger().addHandler(consoleHandler)
    logging.info('====================')
    logging.info('Conversion start')
    logging.info(args)

    # Load architecture
    arch = tf.gfile.Glob(os.path.join(
        args.logdir, 'architecture*.json'))[0]  # should only be 1 file
    with open(arch) as fp:
        arch = json.load(fp)

    # Load the model module
    module = import_module(arch['model_module'], package=None)
    MODEL = getattr(module, arch['model'])

    input_feat = args.input_feat
    input_feat_dim = arch['feat_param']['dim'][input_feat]
    output_feat = args.output_feat

    # read speakers
    spk_list = read_txt(arch['spklist'])

    # Load statistics, normalize and NCHW
    normalizers = {}
    for k in arch['normalizer']:
        normalizers[k] = {}
        for norm_type in arch['normalizer'][k]['type']:
            if norm_type == 'minmax':
                normalizer = MinMaxScaler(
                    xmax=read_hdf5(arch['stats'], '/max/' + k),
                    xmin=read_hdf5(arch['stats'], '/min/' + k),
                )
            elif norm_type == 'meanvar':
                normalizer = StandardScaler(
                    mu=read_hdf5(arch['stats'], '/mean/' + k),
                    std=read_hdf5(arch['stats'], '/scale/' + k),
                )

            normalizers[k][norm_type] = normalizer

    # Define placeholders
    x_pl = tf.placeholder(tf.float32, [None, input_feat_dim])

    yh_pl = tf.placeholder(dtype=tf.int64, shape=[
        1,
    ])
    yh = yh_pl * tf.ones(shape=[
        tf.shape(x_pl)[0],
    ], dtype=tf.int64)
    yh = tf.expand_dims(yh, 0)

    # Define model
    model = MODEL(arch, normalizers)
    z, _ = model.encode(x_pl, input_feat)
    xh = model.decode(z, yh, output_feat)

    # make directories for output
    tf.gfile.MakeDirs(os.path.join(output_dir, 'latent'))
    tf.gfile.MakeDirs(
        os.path.join(output_dir, 'converted-{}'.format(output_feat)))

    # Define session
    with tf.Session() as sess:

        # define saver
        saver = tf.train.Saver()

        # load checkpoint
        if args.checkpoint is None:
            load(
                saver,
                sess,
                args.logdir,
            )
        else:
            _, ckpt = os.path.split(args.checkpoint)
            load(saver, sess, args.logdir, ckpt=ckpt)

        # get feature list, either validation set or test set
        if args.type == 'test':
            files = tf.gfile.Glob(
                arch['conversion']['test_file_pattern'].format(args.src))
        elif args.type == 'valid':
            files = []
            for p in arch['training']['valid_file_pattern']:
                files.extend(tf.gfile.Glob(p.replace('*', args.src)))
        files = sorted(files)

        # conversion
        for f in files:
            basename = os.path.split(f)[-1]
            path_to_latent = os.path.join(
                output_dir, 'latent',
                '{}-{}-{}'.format(args.src, args.trg, basename))
            path_to_cvt = os.path.join(
                output_dir, 'converted-{}'.format(output_feat),
                '{}-{}-{}'.format(args.src, args.trg, basename))
            logging.info(basename)

            # load source features
            src_data = Whole_feature_reader(f, arch['feat_param'])

            #
            latent, cvt = sess.run(
                [z, xh],
                feed_dict={
                    yh_pl: np.asarray([spk_list.index(args.trg)]),
                    x_pl: src_data[input_feat]
                })
            # save bin
            with open(path_to_latent, 'wb') as fp:
                fp.write(latent.tostring())
            with open(path_to_cvt, 'wb') as fp:
                fp.write(cvt.tostring())

    # optionally calculate MCD
    if args.mcd:
        cmd = "python ./mcd_calculate.py" + \
                    " --type " + args.type + \
                    " --logdir " + output_dir + \
                    " --input_feat " + input_feat + \
                    " --output_feat " + output_feat
        print(cmd)
        os.system(cmd)

    # optionally synthesize waveform
    if args.syn:
        cmd = "python ./synthesize.py" + \
                    " --type " + args.type + \
                    " --logdir " + output_dir + \
                    " --input_feat " + input_feat + \
                    " --output_feat " + output_feat
        print(cmd)
        os.system(cmd)
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--bindir",
                        required=True,
                        type=str,
                        help="name of the dir of bin files")
    parser.add_argument("--stats",
                        required=True,
                        type=str,
                        help="filename of hdf5 format")
    parser.add_argument("--spklist",
                        required=True,
                        type=str,
                        help="list of speakers")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(
            level=logging.INFO,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S')
    elif args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S')
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S')
        logging.warn("logging is disabled.")

    # show argmument
    for key, value in vars(args).items():
        logging.info("%s = %s" % (key, str(value)))

    # define feat param here
    feat_param = {
        'fs': 22050,
        'shift_ms': 5,
        'length_ms': 25,
        'fftl': 1024,
        'n_mels': 80,
        'mcep_dim': 34,
        'mcep_alpha': 0.455,
        'feat_dim': 1064,
    }

    # read speakers
    spk_list = read_txt(args.spklist)

    # read file list, for trainind data only
    # file_list = sorted(find_files(args.bindir, "[12]*.bin"))
    file_list = find_files(args.bindir, "*.bin")
    cuted_file_list = list()
    total_count = 0
    if len(file_list) > 20000:
        max_num = math.ceil(10000 / len(spk_list))  # contain VAD and noVAD
        count_dict = dict()
        for fname in file_list:
            is_vad = fname.split("/")[-3].strip()
            spk = fname.split("/")[-2].strip()
            if is_vad not in count_dict:
                count_dict[is_vad] = dict()
            if spk in count_dict[is_vad] and count_dict[is_vad][spk] > max_num:
                continue
            count_dict[is_vad][spk] = count_dict[is_vad].get(spk, 0) + 1
            cuted_file_list.append(fname)
            total_count += 1
    else:
        cuted_file_list = file_list
    file_list = sorted(cuted_file_list)
    logging.info("number of utterances = %d" % len(file_list))

    # calculate statistics
    if not os.path.exists(os.path.dirname(args.stats)):
        os.makedirs(os.path.dirname(args.stats))
    # calc_stats(file_list, feat_param, spk_list, args)
    import time
    calc_sp_stats(file_list, feat_param, spk_list, args)
    calc_mcc_stats(file_list, feat_param, spk_list, args)
    calc_f0_stats(file_list, feat_param, spk_list, args)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description="making feature file argsurations.")

    parser.add_argument(
        "--waveforms", required=True, type=str,
        help="directory or list of filename of input wavfile")
    parser.add_argument(
        "--bindir", required=True, type=str,
        help="directory to save bin")
    parser.add_argument(
        "--confdir", required=True, type=str,
        help="configuration directory")
    parser.add_argument(
        "--overwrite", default=False,
        type=strtobool, help="if set true, overwrite the exist feature files")
    parser.add_argument(
        "--n_jobs", default=12,
        type=int, help="number of parallel jobs")
    parser.add_argument(
        "--verbose", default=1,
        type=int, help="log message level")

    args = parser.parse_args()

    # set log level
    if args.verbose == 1:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
    elif args.verbose > 1:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
    else:
        logging.basicConfig(level=logging.WARN,
                            format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s',
                            datefmt='%m/%d/%Y %I:%M:%S')
        logging.warn("logging is disabled.")

    # show argmument
    for key, value in vars(args).items():
        logging.info("%s = %s" % (key, str(value)))

    # read list
    if os.path.isdir(args.waveforms):
        file_list = sorted(find_files(args.waveforms, "*.wav"))
    else:
        file_list = read_txt(args.waveforms)
    logging.info("number of utterances = %d" % len(file_list))

    # read speaker list
    spk_list = read_txt(os.path.join(args.confdir, 'spk.list'))

    # read f0 max/min of the speaker, and define feature extractor
    feat_param_list = []
    for s in spk_list:
        with open(args.confdir + '/' + s + '.f0', 'r') as f:
            f0min, f0max = [int(f0) for f0 in f.read().split(' ')]
        feat_param_list.append({
                'fs'                : 22050,
                'shift_ms'          : 5,
                'length_ms'         : 25,
                'fftl'              : 1024,
                'n_mels'            : 80,
                'mcep_dim'          : 34,
                'mcep_alpha'        : 0.455,
                'f0min'             : f0min,
                'f0max'             : f0max,
                'highpass_cutoff'   : 70,
        })

    # create file folders
    filepath_create(file_list, args.bindir)
    # divide list
    file_lists = np.array_split(file_list, args.n_jobs)
    file_lists = [f_list.tolist() for f_list in file_lists]

    # multi processing
    processes = []
    target_fn = world_feature_extract
    for f in file_lists:
        p = mp.Process(target=target_fn, args=(f, spk_list, feat_param_list, args,))
        p.start()
        processes.append(p)

    # wait for all process
    for p in processes:
        p.join()