def test_running(): epochs = 100000 seq_batch_size = 100 print_yes = 100 loss_func = torch.nn.functional.nll_loss # create network and optimizer net = RNN(100, 120, 150, 2) net.to(device) # add cuda to device optim = torch.optim.Adam(net.parameters(), lr=3e-5) # main training loop: for epoch in range(epochs): dat = get_batch(train_data, seq_batch_size) dat = torch.LongTensor([vocab.find(item) for item in dat]) # pull x and y x_t = dat[:-1] y_t = dat[1:] hidden = net.init_hidden() # turn all into cuda x_t, y_t, hidden = x_t.to(device), y_t.to(device), hidden.to(device) # initialize hidden state and forward pass logprob, hidden = net.forward(x_t, hidden) loss = loss_func(logprob, y_t) # update optim.zero_grad() loss.backward() optim.step() # print the loss for every kth iteration if epoch % print_yes == 0: print('*' * 100) print('\n epoch {}, loss:{} \n'.format(epoch, loss)) # make sure to pass True flag for running on cuda print('sample speech:\n', run_words(net, vocab, 500, True))
def main(): epochs = 301 seq_batch_size = 200 print_yes = 100 iscuda = False # create our network, optimizer and loss function net = RNN(len(chars), 100, 150, 2) #instanciate a RNN object optim = torch.optim.Adam(net.parameters(), lr=6e-4) loss_func = torch.nn.functional.nll_loss if iscuda: net = net.cuda() # main training loop: for epoch in range(epochs): dat = getSequence(book, seq_batch_size) dat = torch.LongTensor( [chars.find(item) for item in dat] ) #find corresponding char index for each character and store this in tensor # pull x, y and initialize hidden state if iscuda: x_t = dat[:-1].cuda() y_t = dat[1:].cuda() hidden = net.init_hidden().cuda() else: x_t = dat[:-1] y_t = dat[1:] hidden = net.init_hidden() # forward pass logprob, hidden = net.forward(x_t, hidden) loss = loss_func(logprob, y_t) # update optim.zero_grad() loss.backward() optim.step() # print the loss for every kth iteration if epoch % print_yes == 0: print('*' * 60) print('\n epoch {}, loss:{} \n'.format(epoch, loss)) print('sample speech:\n', test_words(net, chars, seq_batch_size)) torch.save(net.state_dict(), 'trainedBook_v2.pt')
def no_test_forward(): loss_func = torch.nn.functional.nll_loss net = RNN(100, 100, 100) net.to(device) # add cuda to device optim = torch.optim.Adam(net.parameters(), lr=1e-4) # step 2: create a training batch of data, size 101, format this data and convert it to pytorch long tensors dat = get_batch(train_data, 100) dat = torch.LongTensor([vocab.find(item) for item in dat]) # step 3: convert our dat into input/output x_t = dat[:-1] y_t = dat[1:] ho = net.init_hidden() # remember to load all variables used by the model to the device, this means the i/o as well as the hidden state x_t, y_t, ho = x_t.to(device), y_t.to(device), ho.to(device) # test forward pass log_prob, hidden = net.forward(x_t, ho) # let's see if the forward pass of the next hidden state is already cuda #log_prob2, hidden2 = net.forward(x_t, hidden) loss = loss_func(log_prob, y_t) optim.zero_grad() loss.backward() optim.step()
class TextClassifier: def __init__(self, batch_size, iterations, initial_lr, hidden_size, dropout, kernel_sz, num_layers): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.use_cuda else 'cpu') self.data = DataReader() train_iter, val_iter, test_iter = self.data.init_dataset( batch_size, ('cuda:0' if self.use_cuda else 'cpu')) self.train_batch_loader = BatchGenerator(train_iter, 'text', 'label') self.val_batch_loader = BatchGenerator(val_iter, 'text', 'label') self.test_batch_loader = BatchGenerator(test_iter, 'text', 'label') # Store hyperparameters self.batch_size = batch_size self.iterations = iterations self.initial_lr = initial_lr # Create Model emb_size, emb_dim = self.data.TEXT.vocab.vectors.size() # padding = (math.floor(kernel_sz / 2), 0) # self.model = CNN(emb_size=emb_size, emb_dimension=emb_dim, # output_size=len(self.data.LABEL.vocab), # dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=padding, # out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors) self.model = RNN(emb_size=emb_size, emb_dimension=emb_dim, pretrained_emb=self.data.TEXT.vocab.vectors, output_size=len(self.data.LABEL.vocab), num_layers=num_layers, hidden_size=hidden_size, dropout=dropout) if self.use_cuda: self.model.cuda() def train(self, min_stride=3): train_loss_hist = [] val_loss_hist = [] train_acc_hist = [] val_acc_hist = [] test_acc_hist = [] best_score = 0.0 loss = 0.0 for itr in range(self.iterations): print("\nIteration: " + str(itr + 1)) optimizer = optim.SGD(self.model.parameters(), lr=self.initial_lr) self.model.train() total_loss = 0.0 total_acc = 0.0 steps = 0 data_iter = iter(self.train_batch_loader) for i in range(len(self.train_batch_loader)): ((x_batch, x_len_batch), y_batch) = next(data_iter) # if torch.min(x_len_batch) > min_stride: optimizer.zero_grad() loss, logits = self.model.forward(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 loss.backward() optimizer.step() train_loss_hist.append(total_loss / steps) train_acc_hist.append(total_acc / len(self.data.train_data)) val_loss, val_acc = self.eval_model(self.val_batch_loader, len(self.data.val_data)) val_loss_hist.append(val_loss) val_acc_hist.append(val_acc) if val_acc > best_score: best_score = val_acc test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.test_data)) print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " + str(total_acc / len(self.data.train_data)) + " }") print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) + " }") test_acc_hist.append(test_acc) return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc_hist def eval_model(self, batch_loader, N, min_stride=3): self.model.eval() total_loss = 0.0 total_acc = 0.0 steps = 0 batch_iterator = iter(batch_loader) with torch.no_grad(): for i in range(len(batch_loader)): ((x_batch, x_len_batch), y_batch) = next(batch_iterator) # if torch.min(x_len_batch) > min_stride: loss, logits = self.model(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() return (total_loss / N), (total_acc / N)
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()