def train_epoch(epoch): total_loss = 0 total_loss_reconstruction = 0 total_loss_topic = 0 model.train() for i in range(opt.epoch_size): batch = get_batch(train_dataset) optimizer.zero_grad() model.zero_grad() # Forward step if 'topic' in opt.model: loss_batch, loss_reconstruction, loss_topic = model.evaluate(batch) total_loss += loss_batch.data[0] / opt.sentence_len total_loss_reconstruction += loss_reconstruction.data[ 0] / opt.sentence_len total_loss_topic += loss_topic.data[0] / opt.sentence_len if epoch < 10: print( '[Train] time:{}, iter:{}, loss:{}, loss reconstruction: {}, loss topic: {}' .format(utils.time_since(start), i, loss_batch.data[0] / opt.sentence_len, loss_reconstruction.data[0] / opt.sentence_len, loss_topic.data[0] / opt.sentence_len)) warmup = 'Wh' if opt.model == 'char_rnn' else ['what'] test_sample = model.test(warmup, opt.sentence_len) try: print(test_sample) except: print('Unicode error') else: loss_batch = model.evaluate(batch) total_loss += loss_batch.data[0] / opt.sentence_len if epoch < 10: print('[Train] time:{}, iter:{}, loss:{}'.format( utils.time_since(start), i, loss_batch.data[0] / opt.sentence_len)) # Backward step loss_batch.backward() optimizer.step() if 'analyze' in dir(model): model.analyze([sentence[:5] for sentence in get_batch(train_dataset)]) return total_loss / opt.epoch_size, total_loss_reconstruction / opt.epoch_size, total_loss_topic / opt.epoch_size
def fetch_details(self): push = self.last_push() if not push: return None repo_name = push.get('repo', {}).get('name') created_at = time_to_local_epoch(push.get('created_at')) commits = push.get('payload', {}).get('commits', []) commit = commits[0] if commits else {} # Most recent commit commit_data = self.get_commit_data(commit) time_elapsed = time_since(created_at) details = { 'created_at': created_at, 'time_elapsed': time_elapsed, 'rough_time_elapsed': approx_time_elapsed(time_elapsed), 'repo': repo_name.split('/')[-1], # TODO: Use a regex 'repo_url': 'https://github.com/%s' % repo_name, 'type': push.get('type', '').strip('Event'), 'url': commit_data.get('html_url', ''), # human readable URL 'avatar_url': push.get('actor', {}).get('avatar_url', ''), 'location': self.get_location(), 'total_commits': self.total_commits(), 'file_types_str': self.file_types(commit_data) } return details
def fit(self, data, n_iterations, all_losses=[], print_every=100, plot_every=10, augment=False): """ This methods fits the parameters of the model. Training is performed to minimize the cross-entropy loss when predicting the next character given the prefix. Parameters ---------- data: object of type GeneratorData stores information about the generator data format such alphabet, etc n_iterations: int how many iterations of training will be performed all_losses: list (default []) list to store the values of the loss function print_every: int (default 100) feedback will be printed to std_out once every print_every iterations of training plot_every: int (default 10) value of the loss function will be appended to all_losses once every plot_every iterations of training augment: bool (default False) parameter specifying if SMILES enumeration will be used. For mode details on SMILES enumeration see https://arxiv.org/abs/1703.07076 Returns ------- all_losses: list list that stores the values of the loss function (learning curve) """ start = time.time() loss_avg = 0 if augment: smiles_augmentation = SmilesEnumerator() else: smiles_augmentation = None for epoch in trange(1, n_iterations + 1, desc='Training in progress...'): inp, target = data.random_training_set(smiles_augmentation) loss = self.train_step(inp, target) loss_avg += loss if epoch % print_every == 0: print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_iterations * 100, loss) ) print(self.evaluate(data=data, prime_str='<', predict_len=100), '\n') if epoch % plot_every == 0: all_losses.append(loss_avg / plot_every) loss_avg = 0 return all_losses
def main(n_instances=None): # Keep track of time elapsed and running averages start = time.time() losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every for epoch in range(1, n_epochs + 1): for idx, batch in enumerate(qaloader): # print(idx) # Get training data for this cycle training_pair = batch input_variable = Variable( training_pair[0]) # 1 x len(training_pair[0]) target_variable = Variable(training_pair[1]) # Run the train function loss = trainer.train(input_variable, target_variable, model, encoder_optimizer, decoder_optimizer, criterion) if idx % print_every == 0: losses.append(loss) writer.add_scalar( cfg.NAME, loss, (epoch - 1) * (len(qadataset) if n_instances is None else n_instances) + idx) # Keep track of loss print_loss_total += loss plot_loss_total += loss if epoch == 0: continue if idx % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since( start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg) print(print_summary) if n_instances is not None: if idx > n_instances: break with open(cfg.LOSSDIR, 'w') as f: f.write(','.join(['{:5.2}' for i in losses])) f.close() if cfg.NEED_SAVE: if cfg.save == 'all': pass elif cfg.save == 'last': epoch = 'last' # we overwrite the iterable variable but it's okay else: raise NotImplementedError torch.save(encoder, cfg.ENC_DUMP_PATH.format(epoch)) torch.save(decoder, cfg.DEC_DUMP_PATH.format(epoch)) writer.close()
def time_since(self, ts_str): # TODO: Use regex temp_ts_str = ts_str.split(' ') if len(temp_ts_str) == 1 and '0' not in temp_ts_str: temp_ts_str[1] = '0' + temp_ts_str[1] ts_str = " ".join(temp_ts_str) unix_tstmp = time.mktime(time.strptime(ts_str, self.TS_PATTERN)) return time_since(unix_tstmp)
def main(): input_lang, output_lang, pairs = utils.prepare_data( lang_name=target_language, _dir='data') encoder = model.EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = model.AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p) print("Encoder-Model: ", encoder) print("Decoder-Model: ", decoder) # Initialize optimizers and criterion encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) criterion = nn.NLLLoss() start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every # Begin training for epoch in range(1, n_epochs + 1): training_pair = utils.variables_from_pair(random.choice(pairs), input_lang, output_lang) input_variable = training_pair[0] target_variable = training_pair[1] # Run the train step epoch_loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += epoch_loss plot_loss_total += epoch_loss if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 time_since = utils.time_since(start, epoch / n_epochs) print('%s (%d %d%%) %.4f' % (time_since, epoch, epoch / n_epochs * 100, print_loss_avg)) if epoch % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 save_model(encoder, 'data/encoder_state_' + target_language) save_model(decoder, 'data/decoder_state_' + target_language) save_model(decoder.attention, 'data/decoder_attention_state_' + target_language) utils.show_plot(plot_losses)
def val(model, val_data, device): model.eval() start_time = time.time() acc_over_steps=[[] for _ in range(len_line)] test_loss_meter = tnt.meter.AverageValueMeter() test_accuracy_meter = tnt.meter.AverageValueMeter() criterion = t.nn.CrossEntropyLoss().to(device) sum_all = 0 time_num = 0 print("Total batch for test: {}".format(val_data.batch_num*(len_line-1))) h, c = None, None for j,(x, y) in enumerate(val_data): if j % (len_line-1) == 0: h, c = t.zeros((opt.num_layers, opt.val_batch_size, opt.hidden_size)), t.zeros( (opt.num_layers, opt.val_batch_size, opt.hidden_size)) h, c = h.to(device), c.to(device) x = x.reshape((1, opt.val_batch_size, 1)) x = t.from_numpy(x).type(t.FloatTensor).to(device) y = t.from_numpy(y).type(t.LongTensor).to(device) output, h, c,_ = model(x, h, c,positional_dis[j%(len_line-1)].to(device)) h, c = (h[0].detach(), h[1].detach()), (c[0].detach(), c[1].detach()) loss = criterion(output, y.contiguous().view(-1)) test_loss_meter.add(loss.item()) output = t.nn.functional.softmax(output, dim=1) output_numpy = output.cpu().detach().numpy().astype(np.float32) target_numpy = y.contiguous().view(-1).cpu().detach().numpy() for i in range(target_numpy.shape[0]): sum_all += np.log2(output_numpy[i][target_numpy[i]]) time_num += 1 pred = np.argmax(output_numpy, axis=1) accuracy = float((pred == target_numpy).astype(int).sum()) / float(target_numpy.size) test_accuracy_meter.add(accuracy) acc_over_steps[j%(len_line-1)].append(accuracy) if j % (len_line-1) == (len_line-2): print("Step: {}, Time {}, Acc: {}".format(j, time_since(start_time), test_accuracy_meter.value()[0])) bpc = -1 * (sum_all / time_num) print("TestLoss: {}, TestAccuracy: {}, TestBpc: {}, TestTime: {}. TotalChars: {}". format(test_loss_meter.value()[0], test_accuracy_meter.value()[0], bpc, time_since(start_time), time_num)) model.train() global min_bpc if(bpc<min_bpc): t.save(model.state_dict(), './checkpoints/net_{}.pth'.format(opt.model_name)) min_bpc=bpc
def train(lang_1, lang_2, pairs, encoder, decoder, output_dir, n_epochs=500000, learning_rate=0.001, print_every=1000, save_every=5000, debug=False): LOGGER.info('Starting training process...') save_every_epoch_start = time.time() for epoch in range(1, n_epochs + 1): start = time.time() LOGGER.debug('Start training epoch %i at %s' % (epoch, time_string())) # Train the particular iteration train_iter(lang_1, lang_2, pairs, encoder, decoder, len(pairs), print_every=print_every, learning_rate=learning_rate) LOGGER.debug('Finished training epoch %i at %s' % (epoch, time_string())) LOGGER.debug('Time taken for epoch %i = %s' % (epoch, time_since(start, epoch / n_epochs))) if epoch % save_every == 0: LOGGER.info('Saving model at epoch %i...' % epoch) LOGGER.info('Time taken for %i epochs = %s' % (save_every, time_since(save_every_epoch_start, epoch / n_epochs))) save_models(encoder, decoder, learning_rate, epoch, output_dir)
def val(model, val_loader, device, train_step, writer): model.eval() start_time = time.time() test_loss_meter = tnt.meter.AverageValueMeter() test_accuracy_meter = tnt.meter.AverageValueMeter() criterion = t.nn.CrossEntropyLoss().to(device) sum_all = 0 time_num = 0 print("Total batch for test: {}".format(len(val_loader))) hidden = None for step, test_data in enumerate(val_loader): test_x = test_data[:, :-1].unsqueeze(2) test_y = t.squeeze(test_data[:, 1:]) test_x = test_x.type(t.FloatTensor).to(device) test_y = test_y.type(t.LongTensor).to(device) output, hidden = model(test_x, hidden) hidden = (hidden[0].detach(), hidden[1].detach()) loss = criterion(output, test_y.contiguous().view(-1)) test_loss_meter.add(loss.item()) # 准确率和熵的计算 output = t.nn.functional.softmax(output, dim=1) output_numpy = output.cpu().detach().numpy().astype(np.float32) target_numpy = test_y.contiguous().view(-1).cpu().detach().numpy() for i in range(target_numpy.shape[0]): sum_all += np.log2(output_numpy[i][target_numpy[i]]) time_num += 1 pred = np.argmax(output_numpy, axis=1) accuracy = float((pred == target_numpy).astype(int).sum()) / float(target_numpy.size) test_accuracy_meter.add(accuracy) if step % 1000 == 0: print("Step: {}, Time {}, Acc: {}".format(step, time_since(start_time), test_accuracy_meter.value()[0])) bpc = -1 * (sum_all / time_num) print("TestLoss: {}, TestAccuracy: {}, TestBpc: {}, TestTime: {}. TotalChars: {}". format(test_loss_meter.value()[0], test_accuracy_meter.value()[0], bpc, time_since(start_time), time_num)) writer.add_scalar("TestLoss", test_loss_meter.value()[0], train_step) writer.add_scalar("TestAccuracy", test_accuracy_meter.value()[0], train_step) writer.add_scalar("TestBpc", bpc, train_step, train_step) writer.add_scalar("TestCompressionRatio", bpc / 8, train_step) model.train()
def train(self, dataset: Dataset, n_iter=50, print_every=10, save_every=10, plot_every=10): start = time.time() self.plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every training_pairs = dataset.training_pairs(n_iter, self.encoder.word_embedding) criterion = nn.NLLLoss() sos_id = self.encoder.word_embedding.word2index(SOS) eos_id = self.decoder.word_embedding.word2index(EOS) for iteration in tqdm(range(1, n_iter + 1)): training_pair = training_pairs[iteration - 1] input_tensor = training_pair[0] target_tensor = training_pair[1] loss = self._train(sos_id, eos_id, input_tensor, target_tensor, self.encoder, self.decoder, self.encoder_optimizer, self.decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iteration % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(start, iteration / n_iter), iteration, iteration / n_iter * 100, print_loss_avg)) if iteration % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every self.plot_losses.append(plot_loss_avg) plot_loss_total = 0 if iteration % save_every == 0: model_dir = '{}-{}_{}'.format(str(self.encoder.n_layers), str(self.decoder.n_layers), str(iteration)) model_name = '{}.torch'.format('backup_bidir_model') directory = os.path.join(settings.BASE_DIR, dataset.idx, settings.SAVE_DATA_DIR, model_dir) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'iteration': iteration, 'enc': self.encoder.state_dict(), 'dec': self.decoder.state_dict(), 'enc_opt': self.encoder_optimizer.state_dict(), 'dec_opt': self.decoder_optimizer.state_dict(), 'model': self, 'loss': loss }, os.path.join(directory, model_name)) with open(os.path.join(settings.BASE_DIR, dataset.idx, settings.SAVE_DATA_DIR, '.metadata'), 'w') as f: f.write(os.path.join(model_dir, model_name))
def test_epoch(epoch): total_loss = 0 total_loss_reconstruction = 0 total_loss_topic = 0 model.eval() for i in range(opt.epoch_size): batch = get_batch(test_dataset) # Forward step if 'topic' in opt.model: loss_batch, loss_reconstruction, loss_topic = model.evaluate(batch) total_loss += loss_batch.data[0] / opt.sentence_len total_loss_reconstruction += loss_reconstruction.data[ 0] / opt.sentence_len total_loss_topic += loss_topic.data[0] / opt.sentence_len if epoch < 10: print( '[Test] time:{}, iter:{}, loss:{}, loss reconstruction: {}, loss topic: {}' .format(utils.time_since(start), i, loss_batch.data[0] / opt.sentence_len, loss_reconstruction.data[0] / opt.sentence_len, loss_topic.data[0] / opt.sentence_len)) warmup = 'Wh' if opt.model == 'char_rnn' else ['what'] test_sample = model.test(warmup, opt.sentence_len) try: print(test_sample) except: print('Unicode error') else: loss_batch = model.evaluate(batch) total_loss += loss_batch.data[0] / opt.sentence_len if epoch < 10: print('[Test] time:{}, iter:{}, loss:{}'.format( utils.time_since(start), i, loss_batch.data[0] / opt.sentence_len)) return total_loss / opt.epoch_size, total_loss_reconstruction / opt.epoch_size, total_loss_topic / opt.epoch_size
def check_in_progress_inactive_cards(in_progress_cards): to_warn_inactive_cards_count = 0 for in_progress_card in in_progress_cards: in_progress_card_obj = Dict(in_progress_card) last_update_time = date_time_from_iso( in_progress_card_obj.dateLastActivity) time_since_update = time_since(last_update_time) inactive_time_in_seconds = time_since_update.total_seconds() if (inactive_time_in_seconds > MAXIMUM_INACTIVE_TIME_IN_SECONDS): log(f'Card {in_progress_card_obj.id} in inactive for longer than allowed, notifying' ) to_warn_inactive_cards_count += 1 notify_inactive_card(in_progress_card_obj, inactive_time_in_seconds / 60) log(f'Inactive cards count: {to_warn_inactive_cards_count}')
def train_iter(input_lang, target_lang, pairs, encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.001): start = time.time() plot_losses = [] print_loss_total = 0 plot_loss_total = 0 encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [ tensors_from_pair(input_lang, target_lang, random.choice(pairs)) for _ in range(n_iters) ] criterion = nn.NLLLoss() for iter in range(1, n_iters + 1): training_pair = training_pairs[iter - 1] input_tensor = training_pair[0] target_tensor = training_pair[1] loss = train_tensor(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 LOGGER.info('Iterations complete = %s/%s' % (iter, n_iters)) LOGGER.info('Loss = %s' % print_loss_avg) LOGGER.debug('%s (%d %d%%) %.4f' % (time_since(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0
def train_iters(input_lang, output_lang, pairs, encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01): start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [ tensors_from_pair(input_lang, output_lang, random.choice(pairs)) for i in range(n_iters) ] criterion = nn.NLLLoss() for iter in range(1, n_iters + 1): training_pair = training_pairs[iter - 1] input_tensor = training_pair[0] target_tensor = training_pair[1] loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 show_plot(plot_losses)
def train(self): start = time.time() prefix = "model/" prefix += time.strftime('%m-%d_%H-%M/', time.localtime(start)) if not os.path.exists(prefix): os.makedirs(prefix) best_loss = float('inf') self.best_model = self.model loss_avg = 0. for epoch in range(1, self.num_epochs + 1): loss = self.train_one_step(*self.sample()) loss_avg += loss if epoch % self.print_every == 0 and self.verbose: print('[%s, %d%%, %.4f]' % (time_since(start), epoch / self.num_epochs * 100, loss)) predicted = self.evaluate(topics=["霜", "月光", "故鄉"]) print('Sample: %s' % predicted) print('BLEU-2 score: %.5f\n' % self.BLEU_score(predicted, "牀前看月光,疑是地上霜。舉頭望山月,低頭思故鄉。")) if epoch % self.plot_every == 0: loss_avg /= self.plot_every self.all_losses.append(loss_avg) if loss_avg < best_loss: best_loss = loss_avg self.best_model = self.model loss_avg = 0 if epoch % self.save_every == 0: model_name = prefix model_name += "char_rnn_%d.model" % epoch torch.save(self.model, model_name) model_name = prefix model_name += "best_char_rnn.model" torch.save(self.best_model, model_name) print("save best model with loss %.3f" % best_loss)
def train(): total_loss = 0 # Reset every plot_every iters start = time.time() for iter in range(1, n_iters + 1): output, loss = compute(*random_train_example()) total_loss += loss if iter % print_every == 0: print('%s (%d %d%%) %.4f' % (time_since(start), iter, iter / n_iters * 100, loss)) if iter % plot_every == 0: all_losses.append(total_loss / plot_every) total_loss = 0 model_dir='result/model.pkl' loss_dir = 'result/all_losses.pkl' print("已保存训练数据:",loss_dir,model_dir) with open(loss_dir, 'wb') as out_data: pickle.dump(all_losses,out_data,pickle.HIGHEST_PROTOCOL) torch.save(rnn,model_dir)
def main(): # Keep track of time elapsed and running averages start = time.time() losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every for epoch, batch in enumerate(copyloader): training_pair = batch input_variable = Variable( training_pair[0]) # [batch_size x len(training_pair[0])] target_variable = Variable(training_pair[1]).view(-1) batch_size = input_variable.size(0) model.zero_grad() model.hidden = model.init_hidden(batch_size) # Run the train function model_scores = model(input_variable) loss = criterion(model_scores, target_variable) loss.backward() encoder_optimizer.step() loss_value = loss.cpu().data.squeeze()[0] if epoch % print_every == 0: losses.append(loss_value) # Keep track of loss print_loss_total += loss_value plot_loss_total += loss_value if epoch == 0: continue if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since( start, epoch / cfg.n_epochs), epoch, epoch / cfg.n_epochs * 100, print_loss_avg) print(print_summary)
def train(encoder, decoder, n_iters, pairs, input_lang, output_lang, print_every=1000, plot_every=1000, learning_rate=0.01): print('train begin:') start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder_optimizer = optim.SGD(filter(lambda p: p.requires_grad, encoder.parameters()), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) # Randomly sample pairs from training set. training_pairs = [] print('data trans') for i in range(n_iters): lang1_sample, lang2_sample= tensors_from_pair(random.choice(pairs), input_lang, output_lang) training_pairs.append((lang1_sample, lang2_sample)) print('over') loss_func = nn.NLLLoss() for iter_ in range(1, n_iters + 1): training_pair = training_pairs[iter_ - 1] # Get a training pair. input_tensor = training_pair[0] target_tensor = training_pair[1] loss = train_iteration(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_func) print_loss_total += loss plot_loss_total += loss if iter_ % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(start, iter_ / n_iters), iter_, iter_ / n_iters * 100, print_loss_avg)) if iter_ % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 return plot_losses
def train_iters( *, #data: Data, corpus: Corpus, encoder: EncoderRNN, decoder: AttnDecoderRNN, device: torch.device, n_iters: int, batch_size: int, teacher_forcing_ratio: float, print_every: int = 1000, learning_rate: float = 0.01) -> None: data = torch.utils.data.DataLoader(dataset=corpus, batch_size=batch_size) start: float = time.time() plot_losses: List[float] = [] print_loss_total: float = 0 # Reset every print_every plot_loss_total: float = 0 # Reset every plot_every encoder_optimizer: Optimizer = SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer: Optimizer = SGD(decoder.parameters(), lr=learning_rate) # # training_pairs: List[ParallelTensor] = [random.choice(data.pairs).tensors(source_vocab=data.source_vocab, # target_vocab=data.target_vocab, # device=device) # for _ in range(n_iters)] criterion: nn.NLLLoss = nn.NLLLoss( reduction='mean') #ignore_index=corpus.characters.pad_int) #for pair in parallel_data: # print(f"src={len(pair['data'])}\ttgt={len(pair['labels'])}") for iteration in range(1, n_iters + 1): # type: int # training_pair: ParallelTensor = training_pairs[iteration - 1] # input_tensor: torch.Tensor = training_pair.source # shape: [seq_len, batch_size=1] # target_tensor: torch.Tensor = training_pair.target # shape: [seq_len, batch_size=1] for batch in data: #print(f"batch['data'].shape={batch['data'].shape}\tbatch['labels'].shape{batch['labels'].shape}") #sys.exit() input_tensor: torch.Tensor = batch["data"].permute(1, 0) target_tensor: torch.Tensor = batch["labels"].permute(1, 0) actual_batch_size: int = min(batch_size, input_tensor.shape[1]) verify_shape( tensor=input_tensor, expected=[corpus.word_tensor_length, actual_batch_size]) verify_shape( tensor=target_tensor, expected=[corpus.label_tensor_length, actual_batch_size]) # print(f"input_tensor.shape={input_tensor.shape}\t\ttarget_tensor.shape={target_tensor.shape}") # sys.exit() loss: float = train(input_tensor=input_tensor, target_tensor=target_tensor, encoder=encoder, decoder=decoder, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, criterion=criterion, device=device, max_src_length=corpus.word_tensor_length, max_tgt_length=corpus.label_tensor_length, batch_size=actual_batch_size, start_of_sequence_symbol=corpus.characters. start_of_sequence_int, teacher_forcing_ratio=teacher_forcing_ratio) print_loss_total += loss plot_loss_total += loss if iteration % print_every == 0: print_loss_avg: float = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (time_since(since=start, percent=iteration / n_iters), iteration, iteration / n_iters * 100, print_loss_avg)) sys.stdout.flush()
output_variable, mask_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, batch_size=config.BATCH_SIZE) # Keep track of loss print_loss_total += loss plot_loss_total += loss if epoch % config.PRINT_STEP == 0: print_loss_avg = print_loss_total / config.PRINT_STEP print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % (time_since( start, epoch / config.NUM_ITER), epoch, epoch / config.NUM_ITER * 100, print_loss_avg) print(print_summary) if epoch % config.CHECKPOINT_STEP == 0: encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth") decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth") torch.save(encoder.state_dict(), encoder_path) torch.save(decoder.state_dict(), decoder_path) """ def evaluate(sentence, max_length=MAX_LENGTH): input_index, output_index = val_dataloader.indexes_from_sentence(sentence) input_variable = Variable(torch.LongTensor(input_index)) output_variable = Variable(torch.LongTensor(output_index)) input_variable = variable_from_sentence(chinese, sentence) input_length = input_variable.size()[0]
def run(self, serial, tag, tagname, pagename, soup, request, response): i, s = Index(), Store() try: uri = tag['href'] except KeyError: return True # Try to handle relative URIs if uri[0] == '.': uri = posixpath.normpath(os.path.join(pagename, uri)) # Try to handle the uri as a schema/path pair schema = '' path = uri try: schema, path = uri.split(':',1) except: pass known = False if schema == '': uri = i.resolve_alias(path) if uri != path: path = tag['href'] = uri if s.exists(uri) or uri in i.all_pages: known = True if(schema == ''): if s.is_attachment(pagename, path): tag['href'] = unicode(self.media + pagename + "/" + path) tag['title'] = self.schemas['attach']['title'] % {'uri':os.path.basename(path)} tag['class'] = self.schemas['attach']['class'] return False if(known): # this is a known Wiki link, so there is no need to run it through more plugins if request is False: # check for a direct outbound link if path in i.default_links: uri = i.default_links[path] (schema,netloc,path,parameters,query,fragment) = urlparse.urlparse(uri) tag['href'] = uri tag['title'] = self.schemas[schema]['title'] % {'uri':uri} tag['class'] = self.schemas[schema]['class'] return False tag['href'] = self.base + uri tag['class'] = "wiki" try: # to use indexed metadata to annotate links last = i.page_info[path]['last-modified'] tag['title'] = _('link_update_format') % (path,time_since(last)) except: tag['title'] = _('link_defined_notindexed_format') % path elif('#' in uri): # this is an anchor, leave it alone tag['href'] = self.base + uri tag['class'] = "anchor" try: exists = tag['title'] except: tag['title'] = _('link_anchor_format') % fragment else: if request is False: # remove unknown wiki links for RSS feeds tag.replace_with(tag.contents[0]) # format for online viewing try: exists = tag['class'] return True #we're done here, but this tag may need handling elsewhere except: tag['href'] = self.base + uri tag['class'] = "wikiunknown" tag['title'] = _('link_undefined_format') % path elif(schema in self.schemas.keys()): # this is an external link, so reformat it tag['title'] = self.schemas[schema]['title'] % {'uri':uri} tag['class'] = self.schemas[schema]['class'] #tag['target'] = '_blank' else: # assume this is an interwiki link (i.e., it seems to have a custom schema) tag['title'] = _('link_interwiki_format') % uri tag['class'] = "interwiki" tag['target'] = '_blank' # Signal that this tag needs further processing return True # We're done return False
def train(n_epochs): # prepare for saving os.system("mkdir -p " + opt.save_dir) # training best_valid_loss = 1e6 train_losses, valid_losses = [], [] valid_loss_reconstruction, valid_loss_topic = -1, -1 for i in range(0, n_epochs): train_loss, train_loss_reconstruction, train_loss_topic = train_epoch( i) train_losses.append(train_loss) try: valid_loss, valid_loss_reconstruction, valid_loss_topic = test_epoch( i) valid_losses.append(valid_loss) except: print('Error when testing epoch') valid_losses.append(1e6) # If model improved, save it if valid_losses[-1] < best_valid_loss: best_valid_loss = valid_losses[-1] # save utils.move(gpu=False, tensor_list=model.submodules) torch.save( { 'epoch': i, 'model': model, 'train_loss': train_losses, 'valid_loss': valid_losses, 'optimizer': optimizer, 'opt': opt }, opt.save_dir + 'checkpoint') utils.move(gpu=utils.is_remote(), tensor_list=model.submodules) # Print log string if 'topic' in opt.model: log_string = ( 'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}, ' 'train_loss_reconstruction: {:0.6f}, train_loss_topic: {:0.6f}, ' 'valid_loss_reconstruction: {:0.6f}, valid_loss_topic: {:0.6f}' ).format((i + 1) * opt.epoch_size, train_losses[-1], valid_losses[-1], best_valid_loss, opt.lrt, train_loss_reconstruction, train_loss_topic, valid_loss_reconstruction, valid_loss_topic) else: log_string = ( 'iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}' ).format((i + 1) * opt.epoch_size, train_losses[-1], valid_losses[-1], best_valid_loss, opt.lrt) print(log_string) utils.log(opt.save_dir + 'logs.txt', log_string, utils.time_since(start)) # Print example warmup = 'Wh' if opt.model == 'char_rnn' else ['what'] test_sample = model.test(warmup, opt.sentence_len) utils.log(opt.save_dir + 'examples.txt', test_sample) try: print(test_sample + '\n') except: traceback.print_exc()
def train(encoder, decoder, optim, optim_params, weight_init, grad_clip, is_ptr, training_pairs, n_epochs, teacher_force_ratio, print_every, plot_every, save_every): """ The training loop. """ np.random.seed(RANDOM_SEED), torch.manual_seed(RANDOM_SEED) encoder.train(), decoder.train() encoder_optim = optim(encoder.parameters(), **optim_params) decoder_optim = optim(decoder.parameters(), **optim_params) checkpoint = load_checkpoint("ptr" if is_ptr else "vanilla") if checkpoint: start_epoch = checkpoint["epoch"] first_iter = checkpoint["iter"] plot_losses = checkpoint["plot_losses"] print_loss_total = checkpoint["print_loss_total"] plot_loss_total = checkpoint["plot_loss_total"] encoder.load_state_dict(checkpoint["encoder"]) decoder.load_state_dict(checkpoint["decoder"]) encoder_optim.load_state_dict(checkpoint["encoder_optim"]) decoder_optim.load_state_dict(checkpoint["decoder_optim"]) else: start_epoch = 0 first_iter = 0 plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every encoder.apply(weight_init) # initialize weights decoder.apply(weight_init) # initialize weights criterion = nn.NLLLoss() size, n_iters = len(training_pairs), n_epochs * len(training_pairs) current_iter = start_epoch * size + first_iter start = time.time() for epoch in range(start_epoch, n_epochs): np.random.shuffle(training_pairs) start_iter = first_iter if epoch == start_epoch else 0 for i in range(start_iter, size): loss = train_step(training_pairs[i], encoder, decoder, encoder_optim, decoder_optim, is_ptr, criterion, teacher_force_ratio, grad_clip) print_loss_total += loss plot_loss_total += loss current_iter += 1 if current_iter % print_every == 0: print_loss_avg, print_loss_total = print_loss_total / print_every, 0 print('%s (epoch: %d iter: %d %d%%) %.4f' % (time_since(start, current_iter / n_iters), epoch, i + 1, current_iter / n_iters * 100, print_loss_avg)) if current_iter % plot_every == 0: plot_loss_avg, plot_loss_total = plot_loss_total / plot_every, 0 plot_losses.append(plot_loss_avg) if current_iter % save_every == 0: if i + 1 < size: save_epoch = epoch save_iter = i + 1 else: save_epoch = epoch + 1 save_iter = 0 save_checkpoint( { "epoch": save_epoch, "iter": save_iter, "plot_losses": plot_losses, "print_loss_total": print_loss_total, "plot_loss_total": plot_loss_total, "encoder": encoder.state_dict(), "decoder": decoder.state_dict(), "encoder_optim": encoder_optim.state_dict(), "decoder_optim": decoder_optim.state_dict(), }, "ptr" if is_ptr else "vanilla") show_plot(plot_losses, save=True)
def train(args, config, tx2_model, device, tx2_train_loader, tx2_test_loader, tx2_optimizer, epoch): vm_start = time.time() tx2_model.train() for li_idx in range(args.local_iters): for batch_idx, (vm_data, vm_target) in enumerate(tx2_train_loader, 1): # print(data.location) # print("vm data:", vm_data) if args.dataset_type == 'FashionMNIST' or args.dataset_type == 'MNIST': if args.model_type == 'LR': vm_data = vm_data.squeeze(1) vm_data = vm_data.view(-1, 28 * 28) else: pass if args.dataset_type == 'CIFAR10' or args.dataset_type == 'CIFAR100': if args.model_type == 'LSTM': vm_data = vm_data.permute(0, 2, 3, 1) vm_data = vm_data.contiguous().view(-1, 32, 32 * 3) else: pass vm_data, vm_target = vm_data.to(device), vm_target.to(device) # print('--[Debug] vm_data = ', vm_data.get()) if args.model_type == 'LSTM': hidden = tx2_model.initHidden(args.batch_size) hidden = hidden.send(vm_data.location) for col_idx in range(32): vm_data_col = vm_data[:, col_idx, :] vm_output, hidden = tx2_model(vm_data_col, hidden) else: vm_output = tx2_model(vm_data) tx2_optimizer.zero_grad() vm_loss = F.nll_loss(vm_output, vm_target) vm_loss.backward() tx2_optimizer.step() # vm_data = vm_data.get() if batch_idx % args.log_interval == 0: vm_loss = vm_loss.item() # <-- NEW: get the loss back #print("Epoch :{} batch_idx: {} print ok".format(epoch, batch_idx)) # print(vm_loss) print('-->[{}] Train Epoch: {} Local Iter: {} tx2: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( time_since(vm_start), epoch, li_idx, config.idx, batch_idx * args.batch_size, len(tx2_train_loader) * args.batch_size, 100. * batch_idx / len(tx2_train_loader), vm_loss)) # vm_data = vm_data.get() # vm_target = vm_target.get() # vm_output = vm_output.get() # if not batch_idx % args.log_interval == 0: # vm_loss = vm_loss.get() del vm_data del vm_target del vm_output del vm_loss if args.enable_vm_test: print('-->[{}] Test set: Epoch: {} tx2: {}'.format(time_since(vm_start), epoch, config.idx)) # <--test for each vm _, test_acc = test(args, vm_start, tx2_model, device, tx2_test_loader, epoch) # vm_models[vm_idx].move(param_server) # vm_models[vm_idx] = vm_models[vm_idx].get() # torch.cuda.empty_cache() gc.collect() return test_acc
mask_label_t, label_t) in enumerate(train_loader): words_t, words_num_t, sent_t = words_t.to(device), words_num_t.to( device), sent_t.to(device) mask_index_t, mask_num_t, = mask_index_t.to(device), mask_num_t.to( device) mask_label_t, label_t = mask_label_t.to(device), label_t.to(device) loss = train(net, words_t, words_num_t, sent_t, mask_index_t, mask_num_t, mask_label_t, label_t, mask_cross_entropy_loss, sent_cls_loss_fn, opt) train_loss += loss if (i + 1) % cfg.print_every == 0: lr_sche.step(train_loss) print('Epoch %d, %s, (%d -- %d %%), train loss %.3f' % (epoch, time_since(start, (i + 1) / len(train_loader)), i + 1, (i + 1) * 100 / len(train_loader), train_loss / cfg.print_every)) train_loss = 0.0 if (i + 1) % cfg.valid_every == 0: valid_loss, valid_mask_loss, valid_sent_cls_loss, valid_mask_acc, valid_sent_cls_acc = 0., 0., 0., 0., 0. for words_t, words_num_t, sent_t, mask_index_t, mask_num_t, mask_label_t, label_t in valid_loader: words_t, words_num_t, sent_t = words_t.to( device), words_num_t.to(device), sent_t.to(device) mask_index_t, mask_num_t, = mask_index_t.to( device), mask_num_t.to(device) mask_label_t, label_t = mask_label_t.to(device), label_t.to( device) total_loss, mask_loss, mask_acc, sent_cls_loss, sent_cls_acc = valid(
def train(train_loader, batch_size, vocab_size, bidirectional=None): """Run the training loop. Parameters ---------- train_loader : DataLoader The training dataset in -Loader format. batch_size : int vocab_size : int Returns ------- LSTMClassifier The trained LSTM. """ EMBEDDING_DIM = 32 HIDDEN_DIM = 128 if bidirectional: LSTM = model.BiLSTMClassifier( EMBEDDING_DIM, HIDDEN_DIM, vocab_size, batch_size) else: LSTM = model.LSTMClassifier( EMBEDDING_DIM, HIDDEN_DIM, vocab_size, batch_size) if use_cuda: LSTM = LSTM.cuda() loss_function = nn.NLLLoss() optimizer = optim.SGD(LSTM.parameters(), lr=0.1) print_every = 1 n_epochs = 1 for epoch in range(1, n_epochs+1): start = time.time() epoch_loss = 0 for data in train_loader: if use_cuda: sentence = Variable(data["sentence"].cuda()) label = Variable(data["language"].cuda()) else: sentence = Variable(data["sentence"]) label = Variable(data["language"]) LSTM.zero_grad() LSTM.hidden = LSTM.init_hidden() pred = LSTM(sentence) loss = loss_function(pred, label) epoch_loss += loss.data loss.backward() optimizer.step() if epoch % print_every == 0: loss_avg = epoch_loss / print_every print("%s (%d %d%%) %.4f" % ( utils.time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, loss_avg)) return LSTM
def main(n_instances=None): # Keep track of time elapsed and running averages start = time.time() losses = [] print_nll_loss_total = 0 # Reset every print_every print_g_loss_total = 0 print_d_loss_total = 0 plot_loss_total = 0 # Reset every plot_every for epoch in range(1, opt.n_epochs + 1): for idx, batch in enumerate(lmloader): nll_loss, gen_loss, disc_loss = trainer.train(opt, batch) if idx % print_every == 0: losses.append(nll_loss) if opt.tensorboard: step_no = (epoch - 1) * (len(lmdataset) // opt.batch_size if n_instances is None else n_instances) + idx if opt.adversarial: tag_dict = { 'nll_loss': nll_loss, 'generator_loss': gen_loss, 'discriminator_loss': disc_loss, } else: tag_dict = {'nll_loss': nll_loss} writer.add_scalars(prefix, tag_dict, step_no) if opt.plot_grad_norms: norms_dict = model.view_rnn_grad_norms() writer.add_scalars(prefix + 'norms', norms_dict, step_no) # writer.export_scalars_to_json() # possibly not required # Keep track of loss print_nll_loss_total += nll_loss if opt.adversarial: print_g_loss_total += gen_loss print_d_loss_total += disc_loss plot_loss_total += nll_loss if epoch == 0: continue if idx % print_every == 0: print_summary = '%s (%d %d%%) nll %.4f generator %.4f discriminator %.4f' % ( time_since(start, epoch / opt.n_epochs), epoch, epoch / opt.n_epochs * 100, print_nll_loss_total / print_every, print_g_loss_total / print_every, print_d_loss_total / print_every, ) logging.info(print_summary) print_nll_loss_total = 0 print_g_loss_total = 0 print_d_loss_total = 0 if n_instances is not None: if idx > n_instances: break # with open(cfg.LOSSDIR, 'w') as f: # f.write(','.join(['{:5.2}' for i in losses])) # f.close() if not opt.not_save: # add epoch and loss info fname = opt.save_path + '.' + prefix + '.epoch{:2}'.format( epoch) + '.loss{:4.1}.pt'.format(nll_loss) torch.save(model, fname) if opt.tensorboard: writer.close()
best_acc = config.best_acc print('init acc = %f' % (best_acc)) sd = net.state_dict() for epoch in range(config.epochs): start = time.time() train_loss = 0.0 for i, (words_t, label_t) in enumerate(train_loader): words_t, label_t = words_t.to(device), label_t.to(device) loss = train(net, words_t, label_t, loss_fn, opt) train_loss += loss if (i+1) % config.print_every == 0: lr_sche.step(train_loss) print('Epoch %d, %s, (%d -- %d %%), train loss %.3f' % (epoch, time_since(start, (i+1) / len(train_loader)), i+1, (i+1) * 100 / len(train_loader), train_loss / config.print_every)) train_loss = 0.0 if (i+1) % config.valid_every == 0: valid_loss, valid_acc = 0.0, 0.0 for words_t, label_t in valid_loader: words_t, label_t = words_t.to(device), label_t.to(device) val_l, val_a = valid(net, words_t, label_t, loss_fn) valid_loss += val_l valid_acc += val_a print('Epoch %d, step %d, valid loss %.3f, valid accuracy %.3f' % (epoch, i+1, valid_loss / len(valid_loader), valid_acc / len(valid_loader))) if best_acc < (valid_acc / len(valid_loader)): best_acc = (valid_acc / len(valid_loader)) sd = net.state_dict() print('best acc = %.3f' % (best_acc))
def train_iters(train_pairs, dev_pairs, model, n_iters, batch_size, print_every, learning_rate=config.args.learning_rate): # pairs[0]: questions_index_list # pairs[1]: answers_index_list # pairs[2]: label 0 or 1 # # pairs: ([[q1_index_list], [q2_index_list], ..., [qn_index_list]], # [[a1_index_list], [a2_index_list], ..., [an_index_list]], # [[l1_int], [l2_int], ..., [ln_int]]) start = time.time() print_loss_total = 0 # Reset every print_every # optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) # print(list(model.parameters())) optimizer = optim.Adam(model.parameters(), lr=0.001) training_pairs_list = [ tensors_from_pair(random_choice_pair_from_pairs(train_pairs)) for i in range(n_iters) ] criterion = nn.NLLLoss() batch_loss = 0.0 # 1 iter につき,1つのQAペア for iter in range(1, n_iters + 1): training_pair = training_pairs_list[iter - 1] answer_tensor = training_pair[0] question_tensor = training_pair[1] label_int = training_pair[2] ''' answer_tensor: tensor([[ 6], [ 8], [ 7], [ 2], [ 1]]) question_tensor: tensor([[ 56], [ 26621], [ 5], [ 6440], [ 4177], [ 1797], [ 1]]) label_int: 1 ''' # loss: NLLLoss y = model.forward(answer_tensor, question_tensor) label_tensor = get_label_tensor(label_int).to(config.device) loss = criterion(y, label_tensor) print_loss_total += loss.item() batch_loss += loss if iter % batch_size == 0: # print(model.encoder.gru.) optimizer.zero_grad() batch_loss.backward() optimizer.step() batch_loss = 0.0 if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 # 訓練データでの評価 train_score = evaluate_randomly(train_pairs, model, n_iters=100) # 開発データでの評価 dev_score = evaluate_randomly(dev_pairs, model, n_iters=100) print( 'Time:%s (%d %d%%) Loss:%.4f Accuracy(train data):%s Accuracy(dev data):%s' % (utils.time_since(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg, train_score, dev_score))
if IO.yes_btn_pressed: show_publishing() if IO.no_btn_pressed: show_idle() """ start screaming """ if IO.sound_level_high and not any(states): show_screaming() print("scream_start", format_time(scream_start)) """ end of screaming => fail """ if not IO.sound_level_high and scream_start: show_fail() print("fail_start", format_time(fail_start)) """ end of fail => idle """ if fail_start: fail_duration = time_since(fail_start) if is_after(CONST["FAIL_TIME_MS"], fail_duration): show_idle() """ end of screaming => success """ if IO.sound_level_high and scream_start: scream_duration = time_since(scream_start) if is_after(CONST["CHALLENGE_TIME_MS"], scream_duration): show_success() print("success_start", format_time(success_start)) """ end of success => choice """ if success_start: success_duration = time_since(success_start) if is_after(CONST["SUCCESS_TIME_MS"], success_duration): show_choice() print("choice_start", format_time(choice_start)) """ end of choice """
# Run the train function loss = train(input_variable, output_variable, mask_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, batch_size=config.BATCH_SIZE) # Keep track of loss print_loss_total += loss plot_loss_total += loss print_loss_avg = 1 if epoch % config.PRINT_STEP == 0: print_loss_avg = print_loss_total / config.PRINT_STEP print_loss_total = 0 writer.add_scalar('Loss', print_loss_avg, epoch) print('epoch: %d 耗时%s 损失值在 %.8f' % \ (epoch, time_since(start, epoch / config.NUM_ITER), print_loss_avg)) if epoch % config.CHECKPOINT_STEP == 0: # or print_loss_avg <= 0.5: # 构建预训练模型 取消注释 encoder_path = os.path.join(config.MODEL_DIR, "encoder_%s.pth" % epoch) decoder_path = os.path.join(config.MODEL_DIR, "decoder_%s.pth" % epoch) torch.save(encoder.state_dict(), encoder_path) torch.save(decoder.state_dict(), decoder_path) print("%d models has saved%s" % (epoch, decoder_path)) writer.close() print("done")
def train(args, model, train_loader, eval_loader, num_epochs, output, opt=None, s_epoch=0): device = args.device # Scheduler learning rate lr_default = args.lr lr_decay_step = 2 lr_decay_rate = 0.75 lr_decay_epochs = (range(10, 20, lr_decay_step) if eval_loader is not None else range(10, 20, lr_decay_step)) gradual_warmup_steps = [ 0.5 * lr_default, 1.0 * lr_default, 1.5 * lr_default, 2.0 * lr_default, ] saving_epoch = 15 # Start point for model saving grad_clip = args.clip_norm utils.create_dir(output) # Adamax optimizer optim = (torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()), lr=lr_default) if opt is None else opt) # Loss function criterion = torch.nn.BCEWithLogitsLoss(reduction="sum") ae_criterion = torch.nn.MSELoss() # write hyper-parameter to log file logger = utils.Logger(os.path.join(output, "log.txt")) logger.write(args.__repr__()) utils.print_model(model, logger) logger.write( "optim: adamax lr=%.4f, decay_step=%d, decay_rate=%.2f, grad_clip=%.2f" % (lr_default, lr_decay_step, lr_decay_rate, grad_clip)) # create trainer trainer = Trainer(args, model, criterion, optim, ae_criterion) update_freq = int(args.update_freq) wall_time_start = time.time() best_eval_score = 0 # Epoch passing in training phase for epoch in range(s_epoch, num_epochs): total_loss = 0 train_score = 0 total_norm = 0 count_norm = 0 num_updates = 0 t = time.time() N = len(train_loader.dataset) num_batches = int(N / args.batch_size + 1) if epoch < len(gradual_warmup_steps): trainer.optimizer.param_groups[0]["lr"] = gradual_warmup_steps[ epoch] logger.write("gradual warm up lr: %.4f" % trainer.optimizer.param_groups[0]["lr"]) elif epoch in lr_decay_epochs: trainer.optimizer.param_groups[0]["lr"] *= lr_decay_rate logger.write("decreased lr: %.4f" % trainer.optimizer.param_groups[0]["lr"]) else: logger.write("lr: %.4f" % trainer.optimizer.param_groups[0]["lr"]) # Predicting and computing score for i, (v, q, a, _, _, _) in enumerate(train_loader): if args.maml: v[0] = v[0].reshape(v[0].shape[0], 84, 84).unsqueeze(1) if args.autoencoder: v[1] = v[1].reshape(v[1].shape[0], 128, 128).unsqueeze(1) v[0] = v[0].to(device) v[1] = v[1].to(device) q = q.to(device) a = a.to(device) sample = [v, q, a] if i < num_batches - 1 and (i + 1) % update_freq > 0: trainer.train_step(sample, update_params=False) else: loss, grad_norm, batch_score = trainer.train_step( sample, update_params=True) total_norm += grad_norm count_norm += 1 total_loss += loss.item() train_score += batch_score num_updates += 1 if num_updates % int(args.print_interval / update_freq) == 0: print( "Iter: {}, Loss {:.4f}, Norm: {:.4f}, Total norm: {:.4f}, Num updates: {}, Wall time: {:.2f}, ETA: {}" .format( i + 1, total_loss / ((num_updates + 1)), grad_norm, total_norm, num_updates, time.time() - wall_time_start, utils.time_since(t, i / num_batches), )) total_loss /= num_updates train_score = 100 * train_score / (num_updates * args.batch_size) # Evaluation if eval_loader is not None: print("Evaluating...") trainer.model.train(False) eval_score, bound = evaluate(model, eval_loader, args) trainer.model.train(True) logger.write("epoch %d, time: %.2f" % (epoch, time.time() - t)) logger.write("\ttrain_loss: %.2f, norm: %.4f, score: %.2f" % (total_loss, total_norm / count_norm, train_score)) if eval_loader is not None: logger.write("\teval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)) # Save per epoch if epoch >= saving_epoch: model_path = os.path.join(output, "model_epoch%d.pth" % epoch) utils.save_model(model_path, model, epoch, trainer.optimizer) # Save best epoch if eval_loader is not None and eval_score > best_eval_score: model_path = os.path.join(output, "model_epoch_best.pth") utils.save_model(model_path, model, epoch, trainer.optimizer) best_eval_score = eval_score
def test(args, start, model, device, test_loader, epoch): model.eval() test_loss = 0.0 test_accuracy = 0.0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) if args.dataset_type == 'FashionMNIST' or args.dataset_type == 'MNIST': if args.model_type == 'LR': data = data.squeeze(1) data = data.view(-1, 28 * 28) else: pass if args.dataset_type == 'CIFAR10' or args.dataset_type == 'CIFAR100': if args.model_type == 'LSTM': data = data.view(-1, 32, 32 * 3) else: pass if args.model_type == 'LSTM': hidden = model.initHidden(args.test_batch_size) hidden = hidden.send(data.location) for col_idx in range(32): data_col = data[:, col_idx, :] output, hidden = model(data_col, hidden) else: output = model(data) # sum up batch loss test_loss += F.nll_loss(output, target, reduction='sum').item() # get the index of the max log-probability pred = output.argmax(1, keepdim=True) batch_correct = pred.eq(target.view_as(pred)).sum().item() correct += batch_correct #print('--[Debug][in Test set] batch correct:', batch_correct) # if not args.enable_vm_test: # printer('--[Debug][in Test set] batch correct: {}'.format(batch_correct), fid) # data = data.get() # target = target.get() # output = output.get() # pred = pred.get() if args.model_type == 'LSTM': #hidden = hidden.get() del hidden del data del target del output del pred del batch_correct test_loss /= len(test_loader.dataset) test_accuracy = np.float(1.0 * correct / len(test_loader.dataset)) if args.enable_vm_test: print('-->[{}] Test set: Epoch: {} Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format( time_since(start), epoch, test_loss, correct, len(test_loader.dataset), 100. * test_accuracy)) else: print('[{}] Test set: Epoch: {} Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( time_since(start), epoch, test_loss, correct, len(test_loader.dataset), 100. * test_accuracy)) gc.collect() return test_loss, test_accuracy