コード例 #1
0
                          num_total_train_speakers)
    else:
        if (params.num_speakers_per_batch *
                params.num_segments_per_speaker) % args.num_gpus != 0:
            sys.exit(
                "To use multiple GPUs, the batch size should divide num_gpus.")
        params.dict["ps"] = args.ps
        if args.ps == "cpu":
            tf.logging.warn("[Warning] The parameters will be placed on CPU.")
        trainer = TrainerMGPU(params,
                              args.finetune_model,
                              dim,
                              num_total_train_speakers,
                              num_gpus=args.num_gpus)

    trainer.build("train", noupdate_var_list=params.noupdate_var_list)
    trainer.build("valid")

    if "early_stop_epochs" not in params.dict:
        params.dict["early_stop_epochs"] = 5
    if "min_learning_rate" not in params.dict:
        params.dict["min_learning_rate"] = 1e-5
    if "lr_start_decay_epoch" not in params.dict:
        params.dict["lr_start_decay_epoch"] = 0
    if "learning_rate_reduce_factor" not in params.dict:
        params.dict["learning_rate_reduce_factor"] = 2

    if start_epoch == 0:
        # Load the pre-trained model and transfer to current model
        trainer.get_finetune_model(params.noload_var_list)
コード例 #2
0
    dim = FeatureReader(args.train_dir).get_dim()
    with open(os.path.join(model_dir, "feature_dim"), "w") as f:
        f.write("%d\n" % dim)
    num_total_train_speakers = KaldiDataRandomQueue(args.train_dir, args.train_spklist).num_total_speakers
    tf.logging.info("There are %d speakers in the training set and the dim is %d" % (num_total_train_speakers, dim))

    # Load the history valid loss
    min_valid_loss = ValidLoss()
    if os.path.isfile(os.path.join(model_dir, "valid_loss")):
        min_valid_loss = load_valid_loss(os.path.join(model_dir, "valid_loss"))

    # The trainer is used to control the training process
    trainer = Trainer(params, args.model)
    trainer.build("train",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)
    trainer.build("valid",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)

    if "early_stop_epochs" not in params.dict:
        params.dict["early_stop_epochs"] = 10
    if "min_learning_rate" not in params.dict:
        params.dict["min_learning_rate"] = 1e-5

    for epoch in range(start_epoch, params.num_epochs):
        trainer.train(args.train_dir, args.train_spklist, learning_rate_array[epoch])
        valid_loss, valid_embeddings, valid_labels = trainer.valid(args.valid_dir, args.valid_spklist,
                                                                   batch_type=params.batch_type,
コード例 #3
0
    # params.dict["triplet_center"] = "average"
    # params.dict["triplet_center_momentum"] = 0.9
    # params.dict["loss_compute"] = "softplus"
    # params.dict["margin"] = 0.1

    num_total_train_speakers = KaldiDataRandomQueue(
        args.data_dir, args.data_spklist).num_total_speakers
    dim = FeatureReader(args.data_dir).get_dim()
    if "selected_dim" in params.dict:
        dim = params.selected_dim
    trainer = Trainer(params,
                      args.model_dir,
                      dim,
                      num_total_train_speakers,
                      single_cpu=True)
    trainer.build("valid")

    # Load the model and output embeddings
    trainer.sess.run(tf.global_variables_initializer())
    trainer.sess.run(tf.local_variables_initializer())

    # load the weights
    curr_step = trainer.load()
    with tf.variable_scope("softmax", reuse=True):
        kernel = tf.get_variable("output/kernel",
                                 shape=[
                                     trainer.embeddings.get_shape()[-1],
                                     num_total_train_speakers
                                 ])
        kernel_val = trainer.sess.run(kernel)
    weights = np.transpose(kernel_val)
コード例 #4
0
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Change the output node if necessary
    if len(args.node) != 0:
        params.embedding_node = args.node
    tf.logging.info("Extract embedding from %s" % params.embedding_node)

    trainer = Trainer(params, args.model_dir, single_cpu=True)

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer.build("predict", dim=dim)

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
            continue
        if feature.shape[0] > args.chunk_size:
            feature_array = []
            feature_length = []
            num_chunks = int(
コード例 #5
0
    torch.manual_seed(params.random_seed)
    np.random.seed(params.random_seed)
    random.seed(params.random_seed)

    dim = FeatureReader(args.train_dir).get_dim()
    with open(os.path.join(model_dir, "feature_dim"), 'w') as f:
        f.write("%d\n" % dim)

    num_total_train_speakers = KaldiDataRandomQueue(
        args.train_dir, args.train_spklist).num_total_speakers  # 训练说话人数目
    with open(os.path.join(model_dir, "num_speakers"), 'w') as f:
        f.write("%d\n" % num_total_train_speakers)

    trainer = Trainer(params, args.model, num_total_train_speakers)
    trainer.build(loss_type=params.loss_func)

    if args.continue_training:
        checkpoint = torch.load(os.path.join(trainer.model, 'net.pth'))
        start_epoch = checkpoint['epoch'] + 1
        trainer.optimizer.load_state_dict(checkpoint['optimizer'])
        trainer.network.load_state_dict(checkpoint['state_dict'])
    else:
        start_epoch = 0

    learning_scheduler = lr_scheduler.StepLR(trainer.optimizer,
                                             step_size=params.reduce_lr_epochs,
                                             gamma=params.reduce_lr_gamma)
    for epoch in range(start_epoch, params.num_epochs):
        trainer.train(epoch=epoch,
                      data=args.train_dir,
コード例 #6
0
if __name__ == '__main__':
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)
    nnet_dir = os.path.join(args.model_dir, "nnet")
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Attention weights
    params.embedding_node = "attention_weights"

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer = Trainer(params, args.model_dir, dim, single_cpu=True)
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
            continue
        if feature.shape[0] > args.chunk_size:
            # We only extract the first segment
            feature = feature[:args.chunk_size]
        attention_weights = trainer.predict(feature)
コード例 #7
0
    with open(os.path.join(model_dir, "feature_dim"), "w") as f:
        f.write("%d\n" % dim)

    num_total_train_speakers = KaldiDataRandomQueue(args.train_dir, args.train_spklist).num_total_speakers
    tf.logging.info("There are %d speakers in the training set and the dim is %d" % (num_total_train_speakers, dim))

    # Load the history valid loss
    min_valid_loss = ValidLoss()

    # The trainer is used to control the training process
    if args.num_gpus == 1:
        trainer = Trainer(params, args.model, dim, num_total_train_speakers)
    else:
        if (params.num_speakers_per_batch * params.num_segments_per_speaker) % args.num_gpus != 0:
            sys.exit("To use multiple GPUs, the batch size should divide num_gpus.")
        params.dict["ps"] = args.ps
        if args.ps == "cpu":
            tf.logging.warn("[Warning] The parameters will be placed on CPU.")
        trainer = TrainerMGPU(params, args.model, dim, num_total_train_speakers, num_gpus=args.num_gpus)

    # The trainer is used to control the training process
    trainer.build("train")
    trainer.build("valid")

    # You can tune the learning rate using the following function.
    # After training, you should plot the loss v.s. the learning rate and pich a learning rate that decrease the
    # loss fastest.
    trainer.train_tune_lr(args.train_dir, args.train_spklist, args.tune_period)
    trainer.close()
    tf.logging.info("Finish tuning.")
コード例 #8
0
    dim = FeatureReader(args.train_dir).get_dim()
    with open(os.path.join(model_dir, "feature_dim"), "w") as f:
        f.write("%d\n" % dim)
    num_total_train_speakers = KaldiDataRandomQueue(args.train_dir, args.train_spklist).num_total_speakers
    tf.logging.info("There are %d speakers in the training set and the dim is %d" % (num_total_train_speakers, dim))

    min_valid_loss = ValidLoss()
    if os.path.isfile(os.path.join(model_dir, "valid_loss")):
        min_valid_loss = load_valid_loss(os.path.join(model_dir, "valid_loss"))

    # The trainer is used to control the training process
    trainer = Trainer(params, args.finetune_model)
    trainer.build("train",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers,
                  noupdate_var_list=params.noupdate_var_list)
    trainer.build("valid",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)

    if "early_stop_epochs" not in params.dict:
        params.dict["early_stop_epochs"] = 5
    if "min_learning_rate" not in params.dict:
        params.dict["min_learning_rate"] = 1e-5

    if start_epoch == 0:
        # Load the pre-trained model and transfer to current model
        trainer.get_finetune_model(params.noload_var_list)
コード例 #9
0
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # First, we need to extract the weights
    num_total_train_speakers = KaldiDataRandomQueue(
        os.path.dirname(args.spklist), args.spklist).num_total_speakers
    dim = FeatureReader(os.path.dirname(args.spklist)).get_dim()
    if "selected_dim" in params.dict:
        dim = params.selected_dim
    trainer = Trainer(params,
                      args.model_dir,
                      dim,
                      num_total_train_speakers,
                      single_cpu=True)
    trainer.build("valid")
    trainer.sess.run(tf.global_variables_initializer())
    trainer.sess.run(tf.local_variables_initializer())

    if not args.init:
        curr_step = trainer.load()
    else:
        # Hack:
        tf.logging.info("Use random initialization")
        trainer.is_loaded = True

    with tf.variable_scope("softmax", reuse=True):
        kernel = tf.get_variable("output/kernel",
                                 shape=[
                                     trainer.embeddings.get_shape()[-1],
                                     num_total_train_speakers