def train(x_train, y_train, args): model = RNN(args.batch_size, 1, 2).model num_batches = x_train.shape[0] / args.batch_size for epoch in range(args.epochs): mean_loss = [] for i in range(num_batches): for j in range(x_train.shape[1]): loss = model.train_on_batch( np.expand_dims( x_train[i * args.batch_size:(i * args.batch_size) + args.batch_size, j, :], axis=1), np.expand_dims(np.array([ y_train[i * args.batch_size:(i * args.batch_size) + args.batch_size, j] ]).T, axis=1)) mean_loss.append(loss) model.reset_states() print("error: ", np.mean(mean_loss)) return model
def test__rnn_forward(self): input_size = 5 hidden_size = 32 rnn = RNN(input_size, hidden_size=hidden_size) lengths = torch.tensor([4, 3, 3, 2]) batch_size = len(lengths) inputs = [ torch.randn(lengths[i], input_size) for i in range(batch_size) ] outputs, lens = rnn._rnn_forward(inputs, truncation_length=None) self.assertEqual(outputs.shape, (4, batch_size, hidden_size)) #check that gradients are flowing outputs.mean().backward() for i in range(rnn.num_layers): self.assertTrue(rnn.cells[i].weight_hh.grad.abs().sum() > 0) full_outputs = outputs.detach().clone() # now check that gradients can be truncated without affecting the output # of the forward pass trunc_outputs, lens = rnn._rnn_forward(inputs, truncation_length=2) self.assertEqual(trunc_outputs.shape, full_outputs.shape) self.assertAlmostEqual( (trunc_outputs.data - full_outputs.data).abs().sum().item(), 0)
def forward(self, x): outs = [] for l in self.conv1s: out = pad_layer(x, l) outs.append(out) out = torch.cat(outs + [x], dim=1) out = F.leaky_relu(out, negative_slope=self.ns) out = self.conv_block(out, [self.conv2], [self.ins_norm1, self.drop1], res=False) out = self.conv_block(out, [self.conv3, self.conv4], [self.ins_norm2, self.drop2]) out = self.conv_block(out, [self.conv5, self.conv6], [self.ins_norm3, self.drop3]) out = self.conv_block(out, [self.conv7, self.conv8], [self.ins_norm4, self.drop4]) # dense layer out = self.dense_block(out, [self.dense1, self.dense2], [self.ins_norm5, self.drop5], res=True) out = self.dense_block(out, [self.dense3, self.dense4], [self.ins_norm6, self.drop6], res=True) out_rnn = RNN(out, self.RNN) out = torch.cat([out, out_rnn], dim=1) out = linear(out, self.linear) mean = RNN(out, self.mean) log_var = RNN(out, self.log_var) if self.one_hot: out = gumbel_softmax(out) else: out = F.leaky_relu(out, negative_slope=self.ns) return out, mean, log_var
def main(): prepare() print(print_str.format("Begin to loading Data")) net = RNN(90, 256, 2, 2, 0.1) if use_cuda(): net = net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=0.1) cross_entropy = nn.CrossEntropyLoss() if mode == "train": train_data, train_label, train_wav_ids, train_lengths = load_rnn_data( "train", train_protocol, mode=mode, feature_type=feature_type) train_dataset = ASVDataSet(train_data, train_label, wav_ids=train_wav_ids, mode=mode, lengths=train_lengths) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True) for epoch in range(num_epochs): correct = 0 total = 0 total_loss = 0 for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)): data = tmp['data'] label = tmp['label'] length = tmp['length'] max_len = int(torch.max(length)) data = data[:, :max_len, :] label = label[:, :max_len] sorted_length, indices = torch.sort(length.view(-1), dim=0, descending=True) sorted_length = sorted_length.long().numpy() data, label = data[indices], label[indices] data, label = Variable(data), Variable(label).view(-1) if use_cuda(): data, label = data.cuda(), label.cuda() optimizer.zero_grad() outputs, out_length = net(data, sorted_length) loss = cross_entropy(outputs, label) loss.backward() optimizer.step() total_loss += loss.data[0] _, predict = torch.max(outputs, 1) correct += (predict.data == label.data).sum() total += label.size(0) print("Loss: {} \t Acc: {}".format(total_loss / len(train_dataloader), correct / total))
def __init__( self, loss_flag=False, checkpoint_name='./final_checkpoint/re3_final_checkpoint.pth'): self.device = device self.CNN = CNN(1, 1).to(self.device) self.RNN = RNN(CNN_OUTPUT_SIZE, 1, 1, True).to(self.device) if os.path.isfile(checkpoint_name): checkpoint = torch.load(checkpoint_name, map_location='cpu') self.CNN.load_state_dict(checkpoint['cnn_model_state_dict']) self.RNN.load_state_dict(checkpoint['rnn_model_state_dict']) else: print("Invalid/No Checkpoint. Aborting...!!") sys.exit() self.CNN = self.CNN.to(device) self.RNN = self.RNN.to(device) self.forward_count = -1 self.previous_frame = None self.cropped_input = np.zeros((2, 3, CROP_SIZE, CROP_SIZE), dtype=np.float32) self.calculate_loss = loss_flag self.criterion = nn.MSELoss() self.MSE_loss = 0
def __init__(self, epoch, sn=False): # Device configuration self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper-parameters self.__sequence_length = 50 self.__input_size = 78 self.__hidden_size = 256 self.__num_layers = 3 self.__num_classes = 7 self.__batch_size = 100 #256 self.__num_epochs = epoch self.__learning_rate = 0.00005 self.__weight_decay = 0.0001 # 0.0001 self.__vat_alpha = 0.1 self.model = RNN(self.__input_size, self.__hidden_size, self.__num_layers, self.__num_classes, sn).to(self.device) self.vat_loss = VATLoss(xi=0.1, eps=1.0, ip=1) self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.__learning_rate, weight_decay=self.__weight_decay) self.data_load()
def generate_smiles(n_smiles=500, restore_from="data/Prior.ckpt", voc_file="data/Voc", embedding_size=128): """ This function takes a checkpoint for a trained RNN and the vocabulary file and generates n_smiles new smiles strings. """ n = 32 n_smiles = n_smiles - n_smiles % n print("Generating %i smiles" % n_smiles) voc = Vocabulary(init_from_file=voc_file) generator = RNN(voc, embedding_size) if torch.cuda.is_available(): generator.rnn.load_state_dict(torch.load(restore_from)) else: generator.rnn.load_state_dict( torch.load(restore_from, map_location=lambda storage, loc: storage)) all_smiles = [] for i in range(int(n_smiles / n)): sequences, _, _ = generator.sample(n) smiles = seq_to_smiles(sequences, voc) all_smiles += smiles # Freeing up memory del generator torch.cuda.empty_cache() return all_smiles
def main(): config = ConfigRNN.instance() loader = ACLIMDB(batch_size=config.BATCH_SIZE, embed_method=config.EMBED_METHOD, is_eval=config.EVAL_MODE, debug=config.CONSOLE_LOGGING) embedding_model = loader.data.embedding_model # TODO(hyungsun): This code is temporal. Erase this later. if config.SAVE_EMBED_MODEL: embedding_model.save("embed_model.wv") return if embedding_model == "DEFAULT": model = RNN() else: vectors = loader.data.embedding_model.wv.vectors # Add padding for masking. vectors = np.append(np.array([100 * [0]]), vectors, axis=0) model = RNN(torch.from_numpy(vectors).float()) optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNTrainer(model, loader, optimizer) if config.EVAL_MODE: trainer.evaluate() else: trainer.train(config.MAX_EPOCH)
def test_model(args): # Hyper Parameters sequence_length = args.seq_len input_size = args.input_size hidden_size = args.hidden_size num_layers = args.num_layers num_classes = args.num_classes batch_size = args.batch_size num_epochs = args.num_epochs learning_rate = args.learning_rate dropout = args.dropout # Load back the best performing model rnn = RNN('LSTM', input_size, hidden_size, num_layers, num_classes, dropout) if args.cuda: rnn = rnn.cuda() rnn.load_state_dict(torch.load(args.model_path)) # train_dataset = create_dataset('data/train/', timesteps=sequence_length) # train_loader = dataloader(train_dataset, batch_size=batch_size) test_dataset = create_dataset('data/test/', timesteps=sequence_length) test_loader = dataloader(test_dataset, batch_size=batch_size) print('-' * 50) # print('training accuracy = %.4f, test accuracy = %.4f' % (eval_model(rnn, train_loader), eval_model(rnn, test_loader))) # print('training accuracy = %.4f' % eval_model(rnn, train_loader)) print('test accuracy = %.4f' % eval_model(rnn, test_loader)) # print('test f1-score = %.4f' % get_f1score(rnn, test_loader)) print_confusion_matrix(rnn, test_loader)
def __init__(self, training_file='../res/trump_tweets.txt', model_file='../res/model.pt', n_epochs=1000000, hidden_size=256, n_layers=2, learning_rate=0.001, chunk_len=140): self.training_file = training_file self.model_file = model_file self.n_epochs = n_epochs self.hidden_size = hidden_size self.n_layers = n_layers self.learning_rate = learning_rate self.chunk_len = chunk_len self.file, self.file_len = read_file(training_file) if os.path.isfile(model_file): self.decoder = torch.load(model_file) print('Loaded old model!') else: self.decoder = RNN(n_characters, hidden_size, n_characters, n_layers) print('Constructed new model!') self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), learning_rate) self.criterion = nn.CrossEntropyLoss() self.generator = Generator(self.decoder)
def __init__(self, model_name, sn=False): # Device configuration self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper-parameters self.__sequence_length = 50 self.__input_size = 78 self.__hidden_size = 256 self.__num_layers = 3 self.__num_classes = 7 # min_max-parameters self.__x_min = -2259.0780484289285 self.__x_max = 2548.842436486494 self.__y_min = -1186.3449394557435 self.__y_max = 939.5449823147761 self.__z_min = 1000.04 self.__z_max = 3323.48 self.__v_min = np.array([self.__x_min, self.__y_min, self.__z_min]) self.__v_max = np.array([self.__x_max, self.__y_max, self.__z_max]) self.__max_min = self.__v_max - self.__v_min self.model = RNN(self.__input_size, self.__hidden_size, self.__num_layers, self.__num_classes, sn).to(self.device) self.param = torch.load(model_name) self.model.load_state_dict(self.param)
def main(voc_file='data/Voc', restore_model_from='data/Prior.ckpt', output_file='data/Prior_10k.smi', sample_size=10000): voc = Vocabulary(init_from_file=voc_file) print("Setting up networks") Agent = RNN(voc) if torch.cuda.is_available(): print("Cuda available, loading prior & agent") Agent.rnn.load_state_dict(torch.load(restore_model_from)) else: raise 'Cuda not available' SMILES = [] for n in tqdm(range(sample_size//100), total=sample_size//100): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(100) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] smiles = seq_to_smiles(seqs, voc) SMILES += smiles if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, "wt") as f: [f.write(smi + '\n') for smi in SMILES] return
def train(train: Examples, model: RNN, optimizer, criterion): epoch_loss = 0 epoch_acc = 0 count = 0 model.train() for x, y, z in batch(train.shuffled(), config.batch_size): x, y, z = get_long_tensor(x), get_long_tensor( y).float(), get_long_tensor(z) optimizer.zero_grad() if config.setting == 'RNN': predictions = model(x).squeeze(1) else: predictions = model(x, z).squeeze(1) loss = criterion(predictions, y) acc = binary_accuracy(predictions, y) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() count += 1 return epoch_loss / count, epoch_acc / count
def checkAudio(path): rnn = RNN(input_size, hidden_size, num_layers, num_classes) rnn.load_state_dict(torch.load('./rnn.pth')) sample_rate,waveform = wa.read(path) mfcc_feature = mfcc(waveform, sample_rate, nfft= 1256) test_data = torch.Tensor(mfcc_feature) test_data = test_data.type(torch.float32) #test_data, test_labels = test_data, test_labels test_pred = rnn(test_data.view(-1, 1,13)) test_pred = test_pred[0] prob = torch.nn.functional.softmax(test_pred) pre_cls = torch.argmax(prob) if pre_cls == torch.tensor(0): answer = '望门投止思张俭' elif pre_cls == torch.tensor(1): answer = '忍死须臾待杜根' elif pre_cls == torch.tensor(2): answer = '我自横刀向天笑' else: answer = '去留肝胆两昆仑' return answer
def train(args): common.make_dir(args.checkout_dir) # nnet nnet = RNN((args.left_context + args.right_context + 1) * args.feat_dim, \ hidden_layer, hidden_size, args.num_classes, dropout=dropout) print(nnet) nnet.cuda() criterion = nn.CrossEntropyLoss() optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate) train_dataset = THCHS30(root=args.data_dir, data_type='train') train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch, shuffle=True) test_dataset = THCHS30(root=args.data_dir, data_type='test') test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch, shuffle=True) cross_validate(-1, nnet, test_loader, test_dataset.num_frames) for epoch in range(args.num_epochs): common.train_one_epoch(nnet, criterion, optimizer, train_loader, is_rnn=True) cross_validate(epoch, nnet, test_loader, test_dataset.num_frames) th.save( nnet, common.join_path(args.checkout_dir, 'rnn.{}.pkl'.format(epoch + 1)))
def __init__(self): self.config = CONFIG self.config['graph_part_configs']['lemm']['use_cls_placeholder'] = True self.rnn = RNN(True) self.chars = {c: index for index, c in enumerate(self.config['chars'])} self.batch_size = 65536 self.show_bad_items = False
def __init__(self): self.config = config() self.dataset_path = self.config['dict_path'] self.model_key = self.config['model_key'] self.chars = self.config['chars'] self.gram_types = self.config['grammemes_types'] self.rnn = RNN(True) self.pd_publish_paths = [ os.path.join(path, f"frozen_model_{self.model_key}.pb") for path in self.config['publish_net_paths'] ] self.xml_publish_paths = [ os.path.join(path, f"release_{self.model_key}.xml") for path in self.config['publish_net_paths'] ] self.xml_gram_paths = [ os.path.join(path, "grams.xml") for path in self.config['publish_gramm_paths'] ] self.test_result_paths = [ os.path.join(path, "test_info.txt") for path in self.config['test_results_paths'] ] self.publish_dataset_info_paths = [ os.path.join(path, "dataset_info.txt") for path in self.config['publish_dataset_info_paths'] ] self.tests_results_paths = self.config['publish_test_paths'] self.classes_dic = self.config['main_classes'] self.rev_classes_dic = { self.classes_dic[key]: ",".join([key for key in list(key) if key is not None]) for key in self.classes_dic }
def load_model(model_path, input_stoi): model = RNN( len(set(input_stoi.values())), 100, 256, 1, 2, True, 0.5, input_stoi['<pad>'] ) model.load_state_dict(torch.load(model_path)) model = model.eval() return model
def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss()
def test_sample(self): input_size = 5 hidden_size = 32 rnn = RNN(input_size, hidden_size=hidden_size) stop_token = 4 bytestring, probs, entropies = rnn.sample(stop_token, maxlen=20) self.assertTrue(max(bytestring) < rnn.input_size) self.assertTrue(min(bytestring) >= 0)
def load_model(args, train_len): model = RNN(args.emb_dim, args.hidden_dim) if torch.cuda.is_available(): model.cuda() loss_fnc = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) return model, loss_fnc, optimizer
def pretrain(restore_from=None): """Trains the Prior RNN""" # Read vocabulary from a file voc = Vocabulary(init_from_file="data/Voc") # Create a Dataset from a SMILES file moldata = MolData("data/mols_filtered.smi", voc) data = DataLoader(moldata, batch_size=128, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) Prior = RNN(voc) # Can restore from a saved RNN if restore_from: Prior.rnn.load_state_dict(torch.load(restore_from)) optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr=0.001) for epoch in range(1, 6): # When training on a few million compounds, this model converges # in a few of epochs or even faster. If model sized is increased # its probably a good idea to check loss against an external set of # validation SMILES to make sure we dont overfit too much. for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, _ = Prior.likelihood(seqs) loss = -log_p.mean() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Every 500 steps we decrease learning rate and print some information if step % 500 == 0 and step != 0 and False: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write("*" * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data[0])) seqs, likelihood, _ = Prior.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n")
def train_model(): """Do transfer learning for generating SMILES""" voc = Vocabulary(init_from_file='data/Voc') cano_smi_file('refined_smii.csv', 'refined_smii_cano.csv') moldata = MolData('refined_smii_cano.csv', voc) # Monomers 67 and 180 were removed because of the unseen [C-] in voc # DAs containing [se] [SiH2] [n] removed: 38 molecules data = DataLoader(moldata, batch_size=64, shuffle=True, drop_last=False, collate_fn=MolData.collate_fn) transfer_model = RNN(voc) if torch.cuda.is_available(): transfer_model.rnn.load_state_dict(torch.load('data/Prior.ckpt')) else: transfer_model.rnn.load_state_dict( torch.load('data/Prior.ckpt', map_location=lambda storage, loc: storage)) # for param in transfer_model.rnn.parameters(): # param.requires_grad = False optimizer = torch.optim.Adam(transfer_model.rnn.parameters(), lr=0.001) for epoch in range(1, 10): for step, batch in tqdm(enumerate(data), total=len(data)): seqs = batch.long() log_p, _ = transfer_model.likelihood(seqs) loss = -log_p.mean() optimizer.zero_grad() loss.backward() optimizer.step() if step % 5 == 0 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write('*' * 50) tqdm.write( "Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data[0])) seqs, likelihood, _ = transfer_model.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + '\n') torch.save(transfer_model.rnn.state_dict(), "data/transfer_model2.ckpt") torch.save(transfer_model.rnn.state_dict(), "data/transfer_modelw.ckpt")
def __init__(self): self.config = CONFIG self.dataset_path = self.config['dict_path'] self.model_key = self.config['model_key'] self.chars = self.config['chars'] self.gram_types = self.config['grammemes_types'] self.rnn = RNN(True) self.tester = Tester() self.pd_publish_paths = [ os.path.join(path, f"frozen_model_{self.model_key}.pb") for path in self.config['publish_net_paths'] ] self.xml_publish_paths = [ os.path.join(path, f"release_{self.model_key}.xml") for path in self.config['publish_net_paths'] ] self.xml_gram_paths = [ os.path.join(path, "grams.xml") for path in self.config['publish_gramm_paths'] ] self.xml_numbers_paths = [ os.path.join(path, "numbers.xml") for path in self.config['publish_numbers_paths'] ] self.xml_tags_paths = [ os.path.join(path, "tags.xml") for path in self.config['publish_tags_paths'] ] self.test_result_paths = [ os.path.join(path, "test_info.txt") for path in self.config['test_results_paths'] ] self.publish_dataset_info_paths = [ os.path.join(path, "dataset_info.txt") for path in self.config['publish_dataset_info_paths'] ] self.public_inflect_templates_paths = [ os.path.join(path, "inflect_templates.xml") for path in self.config['public_inflect_templates_paths'] ] self.classes_dic = self.config['main_classes'] self.rev_classes_dic = { self.classes_dic[key]: ",".join([key for key in list(key) if key is not None]) for key in self.classes_dic } with open(CONFIG['tags_path'], 'rb') as f: self.tags = pickle.load(f) with open(CONFIG['numb_data_path'], 'rb') as f: self.numb_data = pickle.load(f) with open(self.config['inflect_templates_path'], 'rb') as f: self.inflect_templates = pickle.load(f)
def main(_): # load data data, ix2word, word2ix = load_data() num_train = data.shape[0] vocab_size = len(ix2word) # variables for training X=tf.placeholder(tf.int32, [BATCH_SIZE, None]) y=tf.placeholder(tf.int32, [BATCH_SIZE, None]) rnn_model = RNN(model=model, batch_size=BATCH_SIZE, vocab_size=vocab_size, embedding_dim=embedding_dim, n_neurons=n_neurons, n_layers=3, lr=lr, keep_prob=keep_prob) loss, optimizer = rnn_model.train(X, y) # start trian start_time = time.time() with tf.Session() as sess: # Visualize graph # write loss into logs merged = tf.summary.merge_all() writer = tf.summary.FileWriter('./logs/', sess.graph) tf.global_variables_initializer().run() print("="*15+"strat training"+"="*15) for epc in range(NUM_EPOCH): print("="*15, "epoch: %d" % epc, "="*15) for step in range(num_train//BATCH_SIZE): # get batch data idx_strat = step*BATCH_SIZE idx_end = idx_strat+BATCH_SIZE batch_data = data[idx_strat:idx_end, ...] x_data = batch_data[:, :-1] y_data = batch_data[:, 1:] feed_dict={X:x_data,y:y_data} sess.run(optimizer, feed_dict=feed_dict) # print evaluation results for every 100 steps if step%eval_frequence==0: l = sess.run(loss,feed_dict=feed_dict) result = sess.run(merged,feed_dict=feed_dict) writer.add_summary(result, (epc*num_train//BATCH_SIZE)+step) input_seq = "湖光秋月两相和" result = generate_poem(rnn_model=rnn_model, sess=sess, input_seqs=input_seq, ix2word=ix2word,word2ix=word2ix, max_len=125, prefix_words=None) result_poem = ''.join(result) run_time = time.time() - start_time start_time = time.time() print("step: %d, run time: %.1f ms" % (step, run_time*1000/eval_frequence)) print("minibatch loss: %d" % l) print("generated poem length: %d, poem is: %s" % (len(result_poem), result_poem)) sys.stdout.flush() # save model if SAVE: saver = tf.train.Saver() saver.save(sess, CKPT_PATH+'rnn_model.ckpt')
def test_forward(self): input_size = 5 hidden_size = 32 rnn = RNN(input_size, hidden_size=hidden_size) lengths = [5, 3, 4, 2, 4] batch_size = len(lengths) inputs = [ torch.randn(lengths[i], input_size) for i in range(batch_size) ] logits, lens = rnn.forward(inputs) self.assertEqual(lens, sorted(lengths, key=lambda x: -x)) self.assertEqual(logits.shape, (batch_size, input_size, max(lengths)))
def main(): print("extracting corpus... ") # 导入词典 C = Corpus(conf) word2id, vocab_size = C.word2id, len(C.word2id) id2word = C.id2word # 导入数据 print("extracting data... ") train_data, valid_data, test_data = C.build_dataset(conf) train_size = train_data.size(1) # 实例化模型 model = RNN(vocab_size, conf.embed_size, conf.hidden_size, conf.num_layers).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) # 训练开始 loss_count = [] for epoch in range(conf.num_epochs): print("="*20,"epoch: %d" % epoch, "="*20) states = (torch.zeros(conf.num_layers, conf.batch_size, conf.hidden_size).to(device), torch.zeros(conf.num_layers, conf.batch_size, conf.hidden_size).to(device)) for i in range(0, train_size-conf.seq_length, conf.seq_length): batch_x = train_data[:, i:(i+conf.seq_length)].to(device) batch_y = train_data[:, (i+1) : ((i+1+conf.seq_length)%train_size)].to(device) # 前传 states = detach(states) outputs,states = model(batch_x, states) loss = criterion(outputs, batch_y.reshape(-1)) # BP optimizer.zero_grad() loss.backward() clip_grad_norm_(model.parameters(), 0.5) optimizer.step() step = (i+1) // conf.seq_length if step % conf.print_per_batch == 0: loss_count.append(loss.item()) valid_acc = eval_model(valid_data, conf, states, model) print("step: %d,\t Loss: %.3f,\t train Perplextity: %.3f,\t validation Perplextity: %.3f." % ( step, loss.item(), np.exp(loss.item()), valid_acc*100 )) # 展示loss曲线 save_results(loss_count, conf.result_fig_path, show=conf.show_loss) # 保存模型 if conf.save_model: print("save model: %s" % conf.model_path) torch.save(model, conf.model_path)
def Transfer(restore_from = None): """Trains the Prior RNN""" voc = Vocabulary(init_from_file="./Voc") moldata = MolData("tl_filtered.smi", voc) data = DataLoader(moldata, batch_size=32, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) Prior = RNN(voc) # Can restore from a saved RNN if restore_from: Prior.rnn.load_state_dict(torch.load(restore_from)) optimizer = torch.optim.Adam(Prior.rnn.parameters(), lr = 0.001) for epoch in range(1, 101): for step, batch in tqdm(enumerate(data), total=len(data)): # Sample from DataLoader seqs = batch.long() # Calculate loss log_p, _ = Prior.likelihood(seqs) loss = - log_p.mean() # Calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # Every 2 epoch we decrease learning rate and print some information if epoch % 2 == 0 and step == 1: #decrease_learning_rate(optimizer, decrease_by=0.03) decrease_learning_rate(optimizer, decrease_by=0.03) if epoch % 10 == 0 and step == 1: tqdm.write("*" * 50) tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format(epoch, step, loss.data[0])) seqs, likelihood, _ = Prior.sample(100) valid = 0 f = open('tran_output.smi', 'a') for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 f.write(smile + "\n") if i < 10: tqdm.write(smile) f.close() tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("*" * 50 + "\n") # Save the Prior torch.save(Prior.rnn.state_dict(), "data/100_epochs_transfer.ckpt")
def run(): category_lines, all_categories, n_categories = init_cate_dict() rnn = RNN(n_letters, n_categories) rnn.cuda() train_set, test_set = get_data_set(category_lines) random.shuffle(train_set) for e in range(EPOCH): batch_train(rnn, train_set, all_categories) model_testing(rnn, test_set, all_categories) model_path = os.path.join(os.getcwd(), 'rnn3.pkl') torch.save(rnn, model_path) # 保存整个网络
def main(): config = ConfigRNN.instance() loader = ACLIMDB(batch_size=config.BATCH_SIZE, word_embedding=config.WORD_EMBEDDING, is_eval=False, debug=config.DEBUG_MODE) vectors = loader.data.embedding_model.wv.vectors model = RNN(torch.from_numpy(vectors).float()) optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNTrainer(model, loader, optimizer) trainer.train(config.MAX_EPOCH, config.BATCH_SIZE)