Example #1
0
    # Disable GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)
    nnet_dir = os.path.join(args.model_dir, "nnet")
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Attention weights
    params.embedding_node = "attention_weights"

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer = Trainer(params, args.model_dir, dim, single_cpu=True)
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
Example #2
0
import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)

    nnet_dir = os.path.join(args.model_dir, "nnet")

    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Change the output node if necessary
    if len(args.node) != 0:
        params.embedding_node = args.node
    tf.logging.info("Extract embedding from %s" % params.embedding_node)

    trainer = Trainer(params, args.model_dir, single_cpu=True)

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    #trainer.build("predict", dim=dim)
    trainer.build("predict",
                  dim=dim,
                  loss_type="extract_asoftmax",
                  num_speakers=154)

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")
    else:
        # Hack:
        tf.logging.info("Use random initialization")
        trainer.is_loaded = True

    with tf.variable_scope("softmax", reuse=True):
        kernel = tf.get_variable("output/kernel",
                                 shape=[
                                     trainer.embeddings.get_shape()[-1],
                                     num_total_train_speakers
                                 ])
        kernel_val = trainer.sess.run(kernel)
    weights = np.transpose(kernel_val)

    # Output the final activation (prior to the softmax layer)
    params.embedding_node = "output"
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] != "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be scp")

    spk2int = {}
    with open(args.spklist, 'r') as f:
        for line in f.readlines():
            spk, i = line.strip().split(" ")
            spk2int[spk] = int(i)

    utt2spk = {}
    with open(args.utt2spk, 'r') as f:
        for line in f.readlines():