def load_best_model(model_dir, model_type="predictor"): model_file = model_dir + "/best_model.pt" print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) if model_type == "predictor": predictor = Predictor(model_opt) model = Trainer(model_opt, predictor, model_type=model_type) else: selector = Selector(model_opt) model = Trainer(model_opt, selector, model_type=model_type) model.load(model_file) helper.print_config(model_opt) return model
parser.add_argument('--cpu', action='store_true') args = parser.parse_args() torch.manual_seed(args.seed) random.seed(1234) if args.cpu: args.cuda = False elif args.cuda: torch.cuda.manual_seed(args.seed) # load opt model_file = args.model_dir + '/' + args.model print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) trainer = Trainer(opt) trainer.load(model_file) # load vocab vocab_file = args.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." char_vocab_file = args.model_dir + '/vocab_char.pkl' char_vocab = Vocab(char_vocab_file, load=True) assert opt[ 'char_vocab_size'] == char_vocab.size, "Char vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.jsonl'.format(args.dataset) print("Loading data from {} with batch size {}...".format(
num_total_train_speakers = KaldiDataRandomQueue( args.data_dir, args.data_spklist).num_total_speakers dim = FeatureReader(args.data_dir).get_dim() trainer = Trainer(params, args.model_dir, single_cpu=True) trainer.build("valid", dim=dim, loss_type=params.loss_func, num_speakers=num_total_train_speakers) # trainer.build("predict", dim=dim) # Load the model and output embeddings trainer.sess.run(tf.global_variables_initializer()) trainer.sess.run(tf.local_variables_initializer()) # load the weights curr_step = trainer.load() with tf.variable_scope("softmax", reuse=True): kernel = tf.get_variable("output/kernel", shape=[ trainer.embeddings.get_shape()[-1], num_total_train_speakers ]) kernel_val = trainer.sess.run(kernel) weights = np.transpose(kernel_val) embeddings_val = None labels_val = None data_loader = KaldiDataSeqQueue(args.data_dir, args.data_spklist, num_parallel=1, max_qsize=10,
if __name__ == "__main__": args = parser.parse_args() params = read_config(args.config) os.environ['CUDA_VISIBLE_DEVICES'] = '1' nnet_dir = os.path.join(args.model_dir, "nnet") with open(os.path.join(nnet_dir, "num_speakers"), 'r') as f: num_speakers = f.readline().strip() trainer = Trainer(params=params, model_dir=args.model_dir, num_speakers=int(num_speakers)) load_model_dir = os.path.join(args.model_dir, "checkpoint") trainer.load(model=trainer.network, model_name=os.path.join(load_model_dir, "net.pth")) trainer.network.eval() if args.rspecifier.rsplit(".", 1)[1] == "scp": sys.exit("The rspecifier must be ark or input pipe") fp_out = open_or_fd(args.wspecifier, "wb") for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)): feature = trainer.test_transform(feature) _, embedding = trainer.network(feature) embedding = embedding.squeeze(0).cpu().detach().numpy() if args.normalize: embedding /= np.sqrt(np.sum(np.square(embedding))) write_vec_flt(fp_out, embedding, key=key) fp_out.close() trainer.close()