def _load_model(model_type): emb_size = 200 hidden_size = 1500 seq_len = 35 # 70 batch_size = 20 vocab_size = 10000 num_layers = 2 dp_keep_prob = 0.35 # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("RNN_ADAM_0", "best_params.pt") else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("GRU_SGD_LR_SCHEDULE_0", "best_params.pt") if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
class Tester: """ 测试 """ def __init__(self, _hparams): self.test_loader = get_test_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.test_cap = _hparams.test_cap def testing(self, save_path, test_path): """ 测试 :param save_path: 模型的保存地址 :param test_path: 保存测试过程生成句子的路径 :return: """ print('*' * 20, 'test', '*' * 20) self.load_models(save_path) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.test_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(test_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.test_cap, test_path) for metric, score in result: print(metric, score) def load_models(self, save_path): ckpt = torch.load(save_path, map_location={'cuda:2': 'cuda:0' }) # 映射是因为解决保存模型的卡与加载模型的卡不一致的问题 encoder_state_dict = ckpt['encoder_state_dict'] self.encoder.load_state_dict(encoder_state_dict) decoder_state_dict = ckpt['decoder_state_dict'] self.decoder.load_state_dict(decoder_state_dict) def set_eval(self): self.encoder.eval() self.decoder.eval()
def _load_model(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob, PATH, model_type): # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
def export_RNN_regressor(checkpoint_path): """ :param checkpoint_path: relative path to a PyTorch .pth checkpoint :return: None, dumps a prediction text file in the model's training folder """ checkpoint = torch.load(checkpoint_path) model = RNN(checkpoint['net_config']) model.load_state_dict(checkpoint['model']) model = model.eval().cuda() test_dataset = TweetDataset(dataset_type='test') test_loader = DataLoader(test_dataset, batch_size=TRAIN_CONFIG['batch_size'], num_workers=TRAIN_CONFIG['workers'], collate_fn=collate_function, shuffle=False, pin_memory=True) with open(DATASET_CONFIG['test_csv_relative_path'], newline='') as csvfile: test_data = list(csv.reader(csvfile))[1:] ids = [datum[0] for datum in test_data] n = len(test_loader) with open( "checkpoints/{}/predictions.txt".format( checkpoint['train_config']['experiment_name']), 'w') as f: writer = csv.writer(f) writer.writerow(["TweetID", "NoRetweets"]) current_idx = 0 for batch_index, batch in enumerate(test_loader): printProgressBar(batch_index, n) batch_size = batch['numeric'].shape[0] numeric = batch['numeric'].cuda() text = batch['embedding'].cuda() prediction = torch.exp(model( text, numeric)) - 1 if EXPORT_CONFIG['log'] else model( text, numeric) if EXPORT_CONFIG['threshold']: prediction[ prediction > EXPORT_CONFIG['threshold']] = EXPORT_CONFIG['threshold'] for idx_in_batch in range(batch_size): writer.writerow([ str(ids[current_idx + idx_in_batch]), str(int(prediction[idx_in_batch].item())) ]) current_idx += batch_size print("Exportation done! :)")
model.train() prediction = model(X_train_dep_std) loss = loss_func(prediction, y_train_dep_std) optimizer.zero_grad() # clear gradients for this training step loss.backward() # back propagation, compute gradients optimizer.step() if iter % 100 == 0: print("iteration: %s, loss: %s" % (iter, loss.item())) # Save model save_filename = 'checkpoints/LSTM_DOUBLE_FC.pth' torch.save(model, save_filename) print('Saved as %s' % save_filename) # Start evaluating model model.eval() y_pred_dep_ = model(X_test_dep_std).detach().numpy() y_pred_dep = ss_y_dep.inverse_transform(y_pred_dep_[0, 144:]) print('The value of R-squared of water table depth is ', r2_score(Outputs[144:], y_pred_dep)) print('The value of Root mean squared error of water table depth is ', rmse(Outputs[144:], y_pred_dep)) print('The value of mean squared error of water table depth is ', mean_squared_error(Outputs[144:], y_pred_dep)) f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(6, 4)) ax1.plot(Outputs[144:], color="blue", linestyle="-", linewidth=1.5, label="Measurements") ax1.plot(y_pred_dep, color="green", linestyle="--", linewidth=1.5, label="Proposed model") plt.legend(loc='upper right')
bidirectional=args.bidirectional, tie_weights=args.tie_weights, nonlinearity=args.nonlinearity) else: # no embedding layer (one-hot encoding) model = OneHotRNN(vocabulary=vocab, rnn_type=args.rnn_type, hidden_size=args.hidden_size, n_layers=args.n_layers, dropout=args.dropout, bidirectional=args.bidirectional, nonlinearity=args.nonlinearity) # load the best model model.load_state_dict(torch.load(args.model_file)) model.eval() ## enable evaluation modes # set up output filename if args.sample_idx is not None: output_filename = 'sampled-SMILES-{}.smi'.format(args.sample_idx) else: output_filename = 'sampled-SMILES.smi' output_file = os.path.join(args.output_dir, output_filename) # sample a set of SMILES from the final, trained model sampled_count = 0 batch_size = 512 while sampled_count < args.mols_per_file: sampled_smiles, NLLs = model.sample(batch_size, return_smiles=True,
if DEBUG: break trainloss /= batch_idx if epoch % checkpoint == 0: print('Saving model!') save_model(MODEL_NAME, model_dir, epoch, batch_step_count, time_used_global, optimizer, encoder, decoder) print('[%d] epoch starts validating...'%epoch) resulting_captions = [] valloss = 0.0 counts = 0 with torch.no_grad(): encoder.eval() decoder.eval() for images, captions_calc_bleu, captions_calc_loss, lengths, image_ids in valloader: images = images.cuda() image_embeddings = encoder(images) generated_captions_calc_bleu, probs = decoder.beam_search_generator_v2(image_embeddings) for idx in range(images.size(0)): captions_calc_loss_one_image = captions_calc_loss[idx].cuda() captions_calc_bleu_one_image = captions_calc_bleu[idx] captions_lengths_one_image = lengths[idx].cuda() - 1 # calc loss targets_one_image = rnn_utils.pack_padded_sequence(captions_calc_loss_one_image[:, 1:], captions_lengths_one_image, batch_first=True)[0] no_captions_per_image = captions_calc_loss_one_image.size(0) generated_captions_calc_loss = decoder(image_embeddings[[idx]*no_captions_per_image],
class TextClassifier: def __init__(self, batch_size, iterations, initial_lr, hidden_size, dropout, kernel_sz, num_layers): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.use_cuda else 'cpu') self.data = DataReader() train_iter, val_iter, test_iter = self.data.init_dataset( batch_size, ('cuda:0' if self.use_cuda else 'cpu')) self.train_batch_loader = BatchGenerator(train_iter, 'text', 'label') self.val_batch_loader = BatchGenerator(val_iter, 'text', 'label') self.test_batch_loader = BatchGenerator(test_iter, 'text', 'label') # Store hyperparameters self.batch_size = batch_size self.iterations = iterations self.initial_lr = initial_lr # Create Model emb_size, emb_dim = self.data.TEXT.vocab.vectors.size() # padding = (math.floor(kernel_sz / 2), 0) # self.model = CNN(emb_size=emb_size, emb_dimension=emb_dim, # output_size=len(self.data.LABEL.vocab), # dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=padding, # out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors) self.model = RNN(emb_size=emb_size, emb_dimension=emb_dim, pretrained_emb=self.data.TEXT.vocab.vectors, output_size=len(self.data.LABEL.vocab), num_layers=num_layers, hidden_size=hidden_size, dropout=dropout) if self.use_cuda: self.model.cuda() def train(self, min_stride=3): train_loss_hist = [] val_loss_hist = [] train_acc_hist = [] val_acc_hist = [] test_acc_hist = [] best_score = 0.0 loss = 0.0 for itr in range(self.iterations): print("\nIteration: " + str(itr + 1)) optimizer = optim.SGD(self.model.parameters(), lr=self.initial_lr) self.model.train() total_loss = 0.0 total_acc = 0.0 steps = 0 data_iter = iter(self.train_batch_loader) for i in range(len(self.train_batch_loader)): ((x_batch, x_len_batch), y_batch) = next(data_iter) # if torch.min(x_len_batch) > min_stride: optimizer.zero_grad() loss, logits = self.model.forward(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 loss.backward() optimizer.step() train_loss_hist.append(total_loss / steps) train_acc_hist.append(total_acc / len(self.data.train_data)) val_loss, val_acc = self.eval_model(self.val_batch_loader, len(self.data.val_data)) val_loss_hist.append(val_loss) val_acc_hist.append(val_acc) if val_acc > best_score: best_score = val_acc test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.test_data)) print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " + str(total_acc / len(self.data.train_data)) + " }") print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) + " }") test_acc_hist.append(test_acc) return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc_hist def eval_model(self, batch_loader, N, min_stride=3): self.model.eval() total_loss = 0.0 total_acc = 0.0 steps = 0 batch_iterator = iter(batch_loader) with torch.no_grad(): for i in range(len(batch_loader)): ((x_batch, x_len_batch), y_batch) = next(batch_iterator) # if torch.min(x_len_batch) > min_stride: loss, logits = self.model(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() return (total_loss / N), (total_acc / N)
num_layers=argsdict["RNN_num_layers"], dp_keep_prob=1) gru = GRU(emb_size=argsdict["GRU_emb_size"], hidden_size=argsdict["GRU_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["GRU_num_layers"], dp_keep_prob=1) # Load the model weight rnn.load_state_dict(torch.load(args.RNN_path)) gru.load_state_dict(torch.load(args.GRU_path)) rnn.eval() gru.eval() # Initialize the hidden state hidden = [rnn.init_hidden(), gru.init_hidden()] # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) # Generate the word seed using random words # in the first 100 most common words. input = torch.randint(0, 100, (args.batch_size, 1)).squeeze() for name_model, model, init_hidden in zip(["RNN", "GRU"], [rnn, gru], hidden): print("------------------------------------") print(name_model)
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()
if __name__ == '__main__': # 1.data load print('data load start') train_texts, train_labels = read_imdb_split(train_data_path) train_data = IMDBDataset(train_texts, train_labels, word2idx) trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=collate_imdb, shuffle=True) test_texts, test_labels = read_imdb_split(test_data_path) test_data = IMDBDataset(test_texts, test_labels, word2idx, attack_label=1) testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, collate_fn=collate_imdb, shuffle=False) print('data load end') random.seed(11) np.random.seed(11) torch.manual_seed(11) # 2.train or test criterion = nn.CrossEntropyLoss() if mode == 1: model = RNN(vocab_size=vocab_size, embedding_dim=300, hidden_dim=300, output_dim=2).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.0004) train(model) else: baseline_model = RNN(vocab_size=vocab_size, embedding_dim=300, hidden_dim=300, output_dim=2).to(device) baseline_model.load_state_dict(torch.load(output_model_path)) baseline_model.eval() # 开启eval状态,不再随机dropout evaluate_model(baseline_model, testloader, criterion)