data_files = [path + 'data/train.txt', path + 'data/test.txt'] idx_files = [ path + 'data/words_vocab.pkl', path + 'data/characters_vocab.pkl' ] tensor_files = [[ path + 'data/train_word_tensor.npy', path + 'data/valid_word_tensor.npy' ], [ path + 'data/train_character_tensor.npy', path + 'data/valid_character_tensor.npy' ]] batch_loader = BatchLoader(data_files, idx_files, tensor_files, path) parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) ''' ============================= BatchLoader loading =============================================== ''' data_files = [ path + 'data/super/train_2.txt', path + 'data/super/test_2.txt' ] idx_files = [ path + 'data/super/words_vocab_2.pkl', path + 'data/super/characters_vocab_2.pkl' ]
def main(): parser = argparse.ArgumentParser(description='Paraphraser') parser.add_argument('--use-cuda', type=bool, default=False, metavar='CUDA', help='use cuda (default: False)') parser.add_argument('--seq-len', default=30, metavar='SL', help='max length of sequence (default: 30)') parser.add_argument('--ml', type=bool, default=True, metavar='ML', help='sample by maximum likelihood') args = parser.parse_args() # Read data if not os.path.exists('datasets/human_test.csv'): source_file = 'datasets/test.csv' source_data = pd.read_csv(source_file)[['question1', 'question2']] sentence_categories = [[] for _ in range(5)] for i in range(len(source_data)): sent = clean_str(source_data['question1'][i]) sent_len = len(sent.split()) if sent_len < 6: j = 0 elif sent_len < 11: j = 1 elif sent_len < 16: j = 2 elif sent_len < 21: j = 3 else: j = 4 sentence_categories[j].append( [source_data['question1'][i], source_data['question2'][i]]) sample_data = [] for category in sentence_categories: sample_data += random.sample(category, 20) source_data = pd.DataFrame(sample_data, columns=['question1', 'question2']) source_data.to_csv('datasets/human_test.csv') else: source_data = pd.read_csv('datasets/human_test_1.csv')[[ 'question1', 'question2' ]] # Sample from Guptas original model batch_loader = BatchLoader() from model.parameters import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_ori_32', map_location=t.device('cpu'))) samples_ori, target, source_ori = sample_with_input( batch_loader, paraphraser, args, decoder_only=True, file_name='datasets/human_test.csv') ref_items = generate_items(source_ori, target, 'ref') ori_items = generate_items(source_ori, samples_ori[0], 'ori') # Sample from Guptas model with two-path-loss batch_loader = BatchLoader() parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, use_two_path_loss=True) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_tpl_16_32', map_location=t.device('cpu'))) samples_tpl, target, source_tpl = sample_with_input( batch_loader, paraphraser, args, decoder_only=False, file_name='datasets/human_test.csv') tpl_items = generate_items(source_tpl, samples_tpl[0], 'tpl') # Sample from GAN model batch_loader = BatchLoader() from model.parametersGAN import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Generator(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_generator_gan_140k', map_location=t.device('cpu'))) samples_gan, target, source_gan = sample_with_input( batch_loader, paraphraser, args, decoder_only=False, file_name='datasets/human_test.csv') gan_items = generate_items(source_gan, samples_gan[0], 'gan') # Sample from synonym model paraphraser = SynonymParaphraser() samples_synonym = paraphraser.generate_paraphrases( 'datasets/human_test.csv') base_items = generate_items(source_data['question1'], samples_synonym, 'base') all_items = ref_items + ori_items + tpl_items + gan_items + base_items eval_results = { 'name': 'Paraphrase Survey Full Ordered', 'items': all_items } res = json.dumps(eval_results, ensure_ascii=False) with open('datasets/human_test_ordered.json', 'w') as f: f.write(res) random.shuffle(all_items) eval_results = { 'name': 'Paraphrase Survey Full Shuffled', 'items': all_items } res = json.dumps(eval_results, ensure_ascii=False) with open('datasets/human_test_shuffled.json', 'w') as f: f.write(res) for i in range(10): eval_results = { 'name': f'Paraphrase Survey Part {i+1}/{10}', 'items': all_items[i * 50:((i + 1) * 50) - 1] } res = json.dumps(eval_results, ensure_ascii=False) with open(f'datasets/human_test_p_{i}_{10}.json', 'w') as f: f.write(res)
idx_files = [ path + f"data/words_vocab_{args.embeddings_name}.pkl", path + f"data/characters_vocab_{args.embeddings_name}.pkl", ] tensor_files = [ [path + f"data/test_word_tensor_{args.embeddings_name}.npy"], [path + f"data/test_character_tensor_{args.embeddings_name}.npy"], ] preprocessor = PreProcessor(idx_files) preprocessor.preprocess_data(data_files, idx_files, tensor_files, args.use_file, str) batch_loader = BatchLoader(data_files, idx_files, tensor_files) parameters = Parameters( batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size, args.embeddings_name, args.res_model, args.hrvae, args.wae, ) """ ============================ BatchLoader for Question-2 =============================================== """ data_files = [path + f"data/super/train_{args.data_name}_2.txt"] idx_files = [
path + f"data/words_vocab_{args.embeddings_name}.pkl", path + f"data/characters_vocab_{args.embeddings_name}.pkl", ] tensor_files = [ [ path + f"data/train_word_tensor_{args.embeddings_name}.npy", path + f"data/valid_word_tensor_{args.embeddings_name}.npy", ], [ path + f"data/train_character_tensor_{args.embeddings_name}.npy", path + f"data/valid_character_tensor_{args.embeddings_name}.npy", ], ] batch_loader = BatchLoader(data_files, idx_files, tensor_files, path) parameters = Parameters( batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size, args.embeddings_name, args.res_model, args.hrvae, args.wae, ) """ =================== Doing the same for encoder-2 =============================================== """ data_files = [path + f"data/super/train_{args.data_name}_2.txt", path + f"data/super/test_{args.data_name}_2.txt"]
parser.add_argument('--num-sample', type=int, default=5, metavar='NS', help='num sample (default: 5)') parser.add_argument('--use-cuda', type=bool, default=True, metavar='CUDA', help='use cuda (default: True)') args = parser.parse_args() prefix = 'poem' word_is_char = True batch_loader = BatchLoader('', prefix, word_is_char) params = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size, word_is_char) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda and t.cuda.is_available(): neg_loss = neg_loss.cuda() # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size] optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations): input_idx, target_idx = batch_loader.next_embedding_seq( args.batch_size)
from selfModules.neg import NEG_loss if __name__ == '__main__': parser = argparse.ArgumentParser(description='word2vec') parser.add_argument('--num-iterations', type=int, default=1000000, metavar='NI', help='num iterations (default: 1000000)') parser.add_argument('--batch-size', type=int, default=10, metavar='BS', help='batch size (default: 10)') parser.add_argument('--num-sample', type=int, default=5, metavar='NS', help='num sample (default: 5)') parser.add_argument('--use-cuda', type=bool, default=True, metavar='CUDA', help='use cuda (default: True)') args = parser.parse_args() batch_loader = BatchLoader('') params = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda: neg_loss = neg_loss.cuda() # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size] optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations): input_idx, target_idx = batch_loader.next_embedding_seq(args.batch_size)
default=20, metavar='BS', help='batch size (default: 20)') parser.add_argument('--num-sample', type=int, default=14, metavar='NS', help='num sample (default: 14)') parser.add_argument('--use-cuda', type=bool, default=False, metavar='CUDA', help='use cuda (default: True)') args = parser.parse_args() batch_loader = BatchLoader('') params = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) neg_loss = NEG_loss( params.vocab_size, params.word_embed_size, weights=[1 - sqrt(5e-5 / i) for i in batch_loader.words_freq]) if args.use_cuda: neg_loss = neg_loss.cuda() """NEG_loss is defined over two embedding matrixes with shape of [params.vocab_size, params.word_embed_size]""" optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations): input_idx, target_idx = batch_loader.next_embedding_seq( args.batch_size)
parser.add_argument('--use-cuda', type=bool, default=False, metavar='CUDA', help='use cuda (default: True)') parser.add_argument('--learning-rate', type=float, default=0.00005, metavar='LR', help='learning rate (default: 0.00005)') parser.add_argument('--dropout', type=float, default=0.3, metavar='DR', help='dropout (default: 0.3)') parser.add_argument('--use-trained', type=bool, default=False, metavar='UT', help='load pretrained model (default: False)') parser.add_argument('--ce-result', default='', metavar='CE', help='ce result path (default: '')') parser.add_argument('--kld-result', default='', metavar='KLD', help='ce result path (default: '')') args = parser.parse_args() batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) if args.use_trained: rvae.load_state_dict(t.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() optimizer = Adam(rvae.learnable_parameters(), args.learning_rate) train_step = rvae.trainer(optimizer) # validate = rvae.validater()
parser = argparse.ArgumentParser(description='word2vec') parser.add_argument('--num-iterations', type=int, default=1000000, metavar='NI', help='num iterations (default: 1000000)') parser.add_argument('--batch-size', type=int, default=10, metavar='BS', help='batch size (default: 10)') parser.add_argument('--num-sample', type=int, default=5, metavar='NS', help='num sample (default: 5)') parser.add_argument('--use-cuda', type=bool, default=True, metavar='CUDA', help='use cuda (default: True)') parser.add_argument('--train-data', default='', metavar='TD', help='load custom training dataset (default: '')') args = parser.parse_args() batch_loader = BatchLoader(path = '', custom_index = False, train_data_name=args.train_data) params = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda: neg_loss = neg_loss.cuda() # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size] optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations):
data_files = [path + 'data/train.txt', path + 'data/test.txt'] idx_files = [ path + 'data/words_vocab.pkl', path + 'data/characters_vocab.pkl' ] tensor_files = [[ path + 'data/train_word_tensor.npy', path + 'data/valid_word_tensor.npy' ], [ path + 'data/train_character_tensor.npy', path + 'data/valid_character_tensor.npy' ]] batch_loader = BatchLoader(data_files, idx_files, tensor_files, path) batch_loader.load_preprocessed(data_files, idx_files, tensor_files) parameters = para(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) ''' =================== Doing the same for encoder-2 =============================================== ''' data_files = [ path + 'data/super/train_2.txt', path + 'data/super/test_2.txt' ] idx_files = [ path + 'data/super/words_vocab_2.pkl', path + 'data/super/characters_vocab_2.pkl' ]
def sample(args): # Create locations to store samples if not os.path.isdir('logs/' + args.model_name + '/samples'): os.mkdir('logs/' + args.model_name + '/samples') batch_loader = BatchLoader() # Load model... if 'ori' in args.model_name.lower() and not 'gan' in args.model_name.lower( ) or 'tpl' in args.model_name.lower(): from model.parameters import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, use_two_path_loss=('tpl' in args.model_name.lower())) paraphraser = Paraphraser(parameters) if args.use_cuda: paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cuda:0'))) else: paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cpu'))) elif 'gan' in args.model_name.lower(): from model.parametersGAN import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Generator(parameters) if args.use_cuda: paraphraser.load_state_dict( t.load('saved_models/trained_generator_' + args.model_name, map_location=t.device('cuda:0'))) else: paraphraser.load_state_dict( t.load('saved_models/trained_generator_' + args.model_name, map_location=t.device('cpu'))) if args.beam: samples, target, source = sample_with_beam( batch_loader, paraphraser, args, decoder_only=('ori' in args.model_name.lower() and not 'gan' in args.model_name.lower()), beam_size=(args.num_samples if args.num_samples != 1 else 5)) for i in range(args.num_samples): np.savetxt(f'logs/{args.model_name}/samples/sampled_beam_{i}.txt', np.array(samples[i]), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/target_beam.txt', np.array(target), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/source_beam.txt', np.array(source), delimiter='\n', fmt='%s') else: samples, target, source = sample_with_input( batch_loader, paraphraser, args, decoder_only=('ori' in args.model_name.lower() and not 'gan' in args.model_name.lower()), num_samples=args.num_samples, ml=args.ml) for i in range(args.num_samples): np.savetxt(f'logs/{args.model_name}/samples/sampled' + ('_ml' if args.ml else '_s') + f'_{i}.txt', np.array(samples[i]), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/target' + ('_ml' if args.ml else '_s') + '.txt', np.array(target), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/source' + ('_ml' if args.ml else '_s') + '.txt', np.array(source), delimiter='\n', fmt='%s')
# getting validation data train_images, train_labels, valid_images, valid_labels = train_test_split( (train_images, train_labels), n=10000 ) # flattening images dim = train_images.shape[1] * train_images.shape[2] train_images = train_images.reshape(-1, dim) valid_images = valid_images.reshape(-1, dim) test_images = test_images.reshape(-1, dim) # reformat labels to be a binary classifier (even and odd) train_labels = np.vectorize(lambda n: 1 if n % 2 == 0 else -1)(train_labels) valid_labels = np.vectorize(lambda n: 1 if n % 2 == 0 else -1)(valid_labels) test_labels = np.vectorize(lambda n: 1 if n % 2 == 0 else -1)(test_labels) train_loader = BatchLoader((train_images, train_labels), batch_size=1, seed=seed) valid_loader = BatchLoader((valid_images, valid_labels), batch_size=None, seed=seed) test_loader = BatchLoader((test_images, test_labels), batch_size=None, seed=seed) """ NN = PLA(dim) for i, (images, labels) in enumerate(test_loader): print(NN.predict(images)) break """ # creating model NN = PLA(dim, seed=seed) statistics = NN.train(train_loader, valid_loader, test_loader, epochs=5, eval_freq=10000, plot=False) # printing statistics
import os import matplotlib.pyplot as plt import numpy as np from sklearn.manifold import TSNE from utils.batch_loader import BatchLoader if __name__ == "__main__": if not os.path.exists('../data/preprocessings/word_embeddings.npy'): raise FileNotFoundError("word embeddings file was't found") pca = TSNE(n_components=2) word_embeddings = np.load('../data/preprocessings/word_embeddings.npy') word_embeddings_pca = pca.fit_transform(word_embeddings) batch_loader = BatchLoader('../') words = batch_loader.idx_to_word fig, ax = plt.subplots() fig.set_size_inches(150, 150) x = word_embeddings_pca[:, 0] y = word_embeddings_pca[:, 1] ax.scatter(x, y) for i, word in enumerate(words): ax.annotate(word, (x[i], y[i])) fig.savefig('word_embedding.png', dpi=100)
default=False, metavar='CUDA', help='use cuda (default: True)') parser.add_argument('--learning-rate', type=float, default=5e-5, metavar='LR', help='learning rate (default: 5e-5)') parser.add_argument('--use-trained', type=bool, default=False, metavar='UT', help='load pretrained model (default: False)') args = parser.parse_args() batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.vocab_size) rgan = RGAN(parameters) if args.use_trained: rgan.load_state_dict(t.load('trained_RGAN')) if args.use_cuda: rgan = rgan.cuda() g_optimizer = Adam(rgan.generator.parameters(), args.learning_rate) d_optimizer = Adam(rgan.discriminator.parameters(), args.learning_rate) for iteration in range(args.num_iterations): for _ in range(5): '''Dicriminator forward-loss-backward-update'''
help='if include snli dataset (default: True)') parser.add_argument('--use-coco', default=False, type=bool, metavar='coco', help='if include mscoco dataset (default: False)') args = parser.parse_args() datasets = set() if args.use_quora is True: datasets.add('quora') if args.use_snli is True: datasets.add('snli') if args.use_coco is True: datasets.add('mscoco') print('use mean' , args.use_mean) batch_loader = BatchLoader(datasets=datasets) parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict(t.load('saved_models/trained_paraphraser_' + args.model_name)) # 自己导入模型的结构信息 if args.use_cuda: paraphraser = paraphraser.cuda() result, target, source = sample_with_input_file(batch_loader, paraphraser, args, args.input_file) if args.input_file not in ['snli_test', 'mscoco_test', 'quora_test', 'snips']: args.input_file = 'custom_file' sampled_file_dst = 'logs/sampled_out_{}_{}{}.txt'.format(args.input_file,
data_files = [path + 'data/train.txt', path + 'data/test.txt'] idx_files = [ path + 'data/words_vocab.pkl', path + 'data/characters_vocab.pkl' ] tensor_files = [[ path + 'data/train_word_tensor.npy', path + 'data/valid_word_tensor.npy' ], [ path + 'data/train_character_tensor.npy', path + 'data/valid_character_tensor.npy' ]] batch_loader = BatchLoader(data_files, idx_files, tensor_files, path) parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) ''' =================== Doing the same for encoder-2 =============================================== ''' data_files = [ path + 'data/super/train_2.txt', path + 'data/super/test_2.txt' ] idx_files = [ path + 'data/super/words_vocab_2.pkl', path + 'data/super/characters_vocab_2.pkl' ]
import os import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA from utils.batch_loader import BatchLoader if __name__ == "__main__": prefix = 'poem' word_is_char = True batch_loader = BatchLoader('', prefix, word_is_char) if not os.path.exists('data/' + batch_loader.prefix + 'word_embeddings.npy'): raise FileNotFoundError("word embeddings file was't found") pca = PCA(n_components=2) word_embeddings = np.load('data/' + batch_loader.prefix + 'word_embeddings.npy') word_embeddings_pca = pca.fit_transform(word_embeddings) words = batch_loader.idx_to_word fig, ax = plt.subplots() fig.set_size_inches(150, 150) x = word_embeddings_pca[:, 0] y = word_embeddings_pca[:, 1] ax.scatter(x, y)
data_files = [args.train_file] data = [open(file, "r").read() for file in data_files] elif args.train_file.endswith('.pkl'): data_files = args.train_file.split(',') print data_files data = [pkl.load(open(file, "rb")) for file in data_files] sentence_array = True idx_files = [ args.save_at + '/words_vocab.pkl', args.save_at + '/characters_vocab.pkl' ] batch_loader = BatchLoader(data, idx_files, train_embedding=True, sentence_array=sentence_array) params = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda: neg_loss = neg_loss.cuda() # NEG_loss is defined over two embedding matrixes with shape of [params.word_vocab_size, params.word_embed_size] optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations):
str = input("Input: ") else: file_1 = open(args.test_file, 'r') data = file_1.readlines() data_files = [args.test_file] idx_files = ['data/words_vocab.pkl', 'data/characters_vocab.pkl'] tensor_files = [['data/test_word_tensor.npy'], ['data/test_character_tensor.npy']] preprocess_data(data_files, idx_files, tensor_files, args.use_file, str) batch_loader = BatchLoader(data_files, idx_files, tensor_files) parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) data_files = ['data/super/train_2.txt'] idx_files = ['data/super/words_vocab_2.pkl', 'data/super/characters_vocab_2.pkl'] tensor_files = [['data/super/train_word_tensor_2.npy'], ['data/super/train_character_tensor_2.npy']] batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files) parameters_2 = Parameters(batch_loader_2.max_word_len, batch_loader_2.max_seq_len,
def generate_tensor_file(prefix, word_is_char, gen_tensors): batch_loader = BatchLoader('', prefix, word_is_char, gen_tensors)
path='' data_files = [path + 'data/train.txt', path + 'data/test.txt'] idx_files = [path + 'data/words_vocab.pkl', path + 'data/characters_vocab.pkl'] tensor_files = [[path + 'data/train_word_tensor.npy', path + 'data/valid_word_tensor.npy'], [path + 'data/train_character_tensor.npy', path + 'data/valid_character_tensor.npy']] batch_loader = BatchLoader(data_files, idx_files, tensor_files, path) # batch_loader = BatchLoader('') params = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda: neg_loss = neg_loss.cuda() optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations):
parser.add_argument('--embedding-size', type=int, default=100, metavar='ES', help='embeddings size (default: 100)') parser.add_argument('--interm-sampling', type=bool, default=False, metavar='IS', help='if sample while training (default: False)') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_loader = BatchLoader(datapath=args.data_path, use_glove=args.use_glove, glove_path=args.glove_path, embedding_size=args.embedding_size) parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, batch_loader.embedding_size) paraphraser = Paraphraser(parameters, device).to(device) cross_entropy_result_train = [] kld_result_train = [] cross_entropy_result_valid = [] kld_result_valid = [] cross_entropy_cur_train = [] kld_cur_train = [] if args.use_trained: # load the pretrained model paraphraser.load_state_dict(torch.load(args.pretrained_model_name))
help='num samplings (default: 10)') parser.add_argument('--sample-data', default='', metavar='TD', help='load custom training dataset (default: ' ')') parser.add_argument('--model-name', default='', metavar='TD', help='name of saved model (default: ' ')') args = parser.parse_args() batch_loader = BatchLoader('', custom_index=True, train_data_name=args.sample_data) parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) rvae.load_state_dict(t.load('./trained_model/{}'.format(args.model_name))) if args.use_cuda: rvae = rvae.cuda() sampler = rvae.latent_sampler(batch_loader) zs = {} for i in range(
help='num samplings (default: 10)') parser.add_argument('--model-name', default='', metavar='TD', help='name of saved model (default: ' ')') parser.add_argument('--train-data', default='', metavar='TD', help='load custom training dataset (default: ' ')') args = parser.parse_args() batch_loader = BatchLoader(path='', custom_index=False, train_data_name=args.train_data) parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) rvae.load_state_dict(t.load('./trained_model/{}'.format(args.model_name))) if args.use_cuda: rvae = rvae.cuda() sents = [] seeds = {}
parser = argparse.ArgumentParser(description='Sampler') parser.add_argument('--use-cuda', type=bool, default=True, metavar='CUDA', help='use cuda (default: True)') parser.add_argument('--num-sample', type=int, default=10, metavar='NS', help='num samplings (default: 10)') args = parser.parse_args() batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE_dilated(parameters) rvae.load_state_dict(t.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() for iteration in range(args.num_sample): seed = np.random.normal(size=[1, parameters.latent_variable_size]) result = rvae.sample(batch_loader, 50, seed, args.use_cuda) print(result) print()
assert os.path.exists('trained_RVAE'), \ 'trained model not found' parser = argparse.ArgumentParser(description='Sampler') parser.add_argument('--use-cuda', type=bool, default=True, metavar='CUDA', help='use cuda (default: True)') # parser.add_argument('--num-sample', type=int, default=10, metavar='NS', # help='num samplings (default: 10)') args = parser.parse_args() batch_loader = BatchLoader('') parameters = Parameters(batch_loader.max_word_len, batch_loader.max_seq_len, batch_loader.words_vocab_size, batch_loader.chars_vocab_size) rvae = RVAE(parameters) rvae.load_state_dict(torch.load('trained_RVAE')) if args.use_cuda: rvae = rvae.cuda() seq_len = 50 seed = np.random.normal(size=[1, parameters.latent_variable_size]) data = [["how are you ?"], ["how are you doing"]] data_words = [[line.split() for line in target] for target in data]