def _save_training_state(self): save_training_state( epsilon=Config.train.epsilon, replay_memory=self.agent.replay_memory.get_memory(), last_step=self.last_step, ep_reward_queue=self.ep_reward_queue) print('training state saved.')
def train_save(epoch, model, optimizer, param_val, print_to=sys.stdout, epoc_start=0): """ @pre: the return of @a model is the score of each category, which we will use cross-entropy loss """ print(param_val) model.train() # set training mode iteration = 0 for ep in range(epoc_start, epoc_start + epoch): t0 = time.time() for batch_idx, (data, target, _) in enumerate(trainset_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() scores = model(data) loss = F.cross_entropy(scores, target) loss.backward() optimizer.step() if iteration % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( ep, batch_idx * len(data), len(trainset_loader.dataset), 100. * batch_idx / len(trainset_loader), loss.item()), flush=True, file=print_to) if batch_idx == 800: test(model, print_to=print_to) model.train() iteration += 1 save_training_state( os.path.join(savedPath, '2dResNet-' + param_val + '-%d.pth' % (ep + 1)), model, optimizer) test(model, print_to=print_to) model.train() t1 = time.time() print("Epoch %d done, takes %fs" % (ep + 1, t1 - t0), flush=True) print("Epoch %d done, takes %fs" % (ep + 1, t1 - t0), flush=True, file=print_to)
def _run(dataloader, data_file_name, all_doc_ids, word_to_ind_dict, context_size, num_noise_words, vec_dim, num_epochs, batch_size, lr, model_ver, vec_combine_method, save_all, generate_plot, model_ver_is_dbow, model_ver_is_dm): vocabulary_size = len(word_to_ind_dict) print ('vocab size: ', vocabulary_size) if model_ver_is_dbow: model = DBOW(vec_dim=vec_dim, num_docs=len(all_doc_ids), num_words=vocabulary_size) cost_func = NegativeSampling() elif model_ver_is_dm: model = DM(vec_dim=vec_dim, num_docs=len(all_doc_ids), num_words=vocabulary_size) cost_func = NegativeSampling() else: print 'Initializing spline model' model = DMSpline(vec_dim, num_splines=len(all_doc_ids), num_words=vocabulary_size) cost_func = NegativeSamplingWithSpline() # Only apply weight decay to the offset vectors params_to_decay = [] other_params = [] for name, param in model.named_parameters(): if name == '_D': params_to_decay.append(param) else: other_params.append(param) # optimizer1 = Adam(params=params_to_decay, lr=lr, weight_decay=1e-5) optimizer1 = Adam(params=params_to_decay, lr=lr) optimizer2 = Adam(params=other_params, lr=lr) if torch.cuda.is_available(): model.cuda() num_batches = len(dataloader) num_docs = len(all_doc_ids) print("Dataset comprised of {:d} documents.".format(num_docs)) print ("Num batches: ", num_batches) print("Vocabulary size is {:d}.\n".format(vocabulary_size)) print("Training started.") # print('num batches: ', num_batches) # exit() # num_epochs = 1 # num_batches = 5 best_loss = float("inf") prev_model_file_path = None for epoch_i in range(num_epochs): epoch_start_time = time.time() loss = [] ind_to_word_dict = {} for w in word_to_ind_dict: ind_to_word_dict[word_to_ind_dict[w]] = w for batch_i, batch in enumerate(dataloader): # print 'curr batch: ', batch_i curr_doc_ids, curr_context_ids, curr_target_noise_ids = batch if torch.cuda.is_available(): curr_doc_ids = curr_doc_ids.cuda() curr_context_ids = curr_context_ids.cuda() curr_target_noise_ids = curr_target_noise_ids.cuda() # curr_ind = 0 # batch_row = curr_context_words[curr_ind] # for val in range(0, len(curr_context_ids)): # for val2 in range(0, len(curr_context_ids[val])): # print (curr_context_ids[val][val2].item(), ind_to_word_dict[curr_context_ids[val][val2].item()]) # print ('doc id: ', curr_doc_ids[val]) # exit() if model_ver_is_dbow: x = model.forward(batch.doc_ids, batch.target_noise_ids) elif model_ver_is_dm: x = model.forward( curr_context_ids, curr_doc_ids, curr_target_noise_ids) x = cost_func.forward(x) else: x, D = model.forward( batch.x_vals, batch.doc_ids, batch.context_ids, batch.target_noise_ids) x = cost_func.forward(x, D, batch.doc_ids) loss.append(x.item()) model.zero_grad() x.backward() optimizer1.step() optimizer2.step() _print_progress(epoch_i, batch_i, num_batches) # break # end of epoch loss = torch.mean(torch.FloatTensor(loss)) is_best_loss = loss < best_loss best_loss = min(loss, best_loss) print ('loss: ', loss, 'best_loss: ', best_loss, 'is_best_loss: ', is_best_loss) state = { 'epoch': epoch_i + 1, 'model_state_dict': model.state_dict(), 'best_loss': best_loss, 'optimizer1_state_dict': optimizer1.state_dict(), 'optimizer2_state_dict': optimizer2.state_dict(), 'word_to_index_dict': word_to_ind_dict } prev_model_file_path = save_training_state( data_file_name, model_ver, vec_combine_method, context_size, num_noise_words, vec_dim, num_docs, vocabulary_size, batch_size, lr, epoch_i, loss, state, save_all, generate_plot, is_best_loss, prev_model_file_path, model_ver_is_dbow) epoch_total_time = round(time.time() - epoch_start_time) print(" ({:f}s) - loss: {:.4f}".format(epoch_total_time, loss))
def _save_training_state(self): save_training_state(last_step=self.last_step, ep_reward_queue=self.ep_reward_queue) print('training state saved.')