Esempio n. 1
0
# However, if SGE is used, we cannot simply set CUDA_VISIBLE_DEVICES.
# So it is better to specify the GPU id outside the program.
# Give an arbitrary number (except for -1) to --gpu can enable it. Leave it blank if you want to disable gpu.

import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)

    nnet_dir = os.path.join(args.model_dir, "nnet")

    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Change the output node if necessary
    if len(args.node) != 0:
        params.embedding_node = args.node
    tf.logging.info("Extract embedding from %s" % params.embedding_node)

    trainer = Trainer(params, args.model_dir, single_cpu=True)

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    #trainer.build("predict", dim=dim)
    trainer.build("predict",
                  dim=dim,
                  loss_type="extract_asoftmax",
                  num_speakers=154)
Esempio n. 2
0
parser.add_argument("weights", type=str, help="The output weights")
parser.add_argument("embeddings", type=str, help="Embeddings (label vector).")
parser.add_argument("embedding_pic", type=str, help="The output pic")

args = parser.parse_args()
import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)

    nnet_dir = os.path.join(args.model_dir, "nnet")
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # params.loss_func = "generalized_angular_triplet_loss"
    # params.dict["triplet_center"] = "average"
    # params.dict["triplet_center_momentum"] = 0.9
    # params.dict["loss_compute"] = "softplus"
    # params.dict["margin"] = 0.1

    num_total_train_speakers = KaldiDataRandomQueue(
        args.data_dir, args.data_spklist).num_total_speakers
    dim = FeatureReader(args.data_dir).get_dim()
    trainer = Trainer(params, args.model_dir, single_cpu=True)
    trainer.build("valid",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)
Esempio n. 3
0
args = parser.parse_args()

if args.gpu == -1:
    # Disable GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)
    nnet_dir = os.path.join(args.model_dir, "nnet")
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Attention weights
    params.embedding_node = "attention_weights"

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer = Trainer(params, args.model_dir, dim, single_cpu=True)
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
    return mat



if __name__ == '__main__':
    reader = tf.train.NewCheckpointReader(nnet_path + 'model-570000')
    u, s, v = np.linalg.svd(reader.get_tensor(nsplitname + 'kernel'))
    u, s, v = abandon(u, s, v, dimension=0.8)
    u = np.mat(u)
    s = np.mat(np.diag(np.array(s).squeeze()))
    v = np.mat(v)
    A = u * s
    B = v
    C = reader.get_tensor(nsplitname + 'bias')

    params = Params(json_path)
    x = tf.placeholder(tf.float32, [10, 175, 30], name='x')
    features, endpoints = tdnn_svd6(features=x, params=params, mid_channels=A.shape[1])
    init = tf.global_variables_initializer()
    graph = tf.get_default_graph()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)
        for name in reader.get_variable_to_shape_map():
            if nsplitname in name or 'softmax' in name:
                continue
            herename = nname + re.match(r'(.+?)(\/.*)', name).group(2) + ':0'
            sess.run(tf.assign(graph.get_tensor_by_name(herename), reader.get_tensor(name)))

        sess.run(tf.assign(graph.get_tensor_by_name(nname + '/tdnn6.0_dense/kernel:0'), A))
        sess.run(tf.assign(graph.get_tensor_by_name(nname + '/tdnn6.5_dense/kernel:0'), B))
args = parser.parse_args()

if args.gpu == -1:
    # Disable GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf

if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)
    nnet_dir = os.path.join(args.model_dir, "nnet")
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # First, we need to extract the weights
    num_total_train_speakers = KaldiDataRandomQueue(
        os.path.dirname(args.spklist), args.spklist).num_total_speakers
    dim = FeatureReader(os.path.dirname(args.spklist)).get_dim()
    if "selected_dim" in params.dict:
        dim = params.selected_dim
    trainer = Trainer(params,
                      args.model_dir,
                      dim,
                      num_total_train_speakers,
                      single_cpu=True)
    trainer.build("valid")
    trainer.sess.run(tf.global_variables_initializer())
    trainer.sess.run(tf.local_variables_initializer())