def train_reorder_dream(): dr_model.train() # turn on training mode for dropout dr_hidden = dr_model.init_hidden(dr_config.batch_size) total_loss = 0 start_time = time() num_batchs = ceil(len(train_ub) / dr_config.batch_size) for i, x in enumerate(batchify(train_ub, dr_config.batch_size, is_reordered=True)): baskets, lens, ids, r_baskets, h_baskets = x dr_hidden = repackage_hidden(dr_hidden) # repackage hidden state for RNN dr_model.zero_grad() # optim.zero_grad() dynamic_user, _ = dr_model(baskets, lens, dr_hidden) loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config) try: loss.backward() except RuntimeError: # for debugging print('caching') tmp = {'baskets': baskets, 'ids': ids, 'r_baskets': r_baskets, 'h_baskets': h_baskets, 'dynamic_user': dynamic_user, 'item_embedding': dr_model.encode.weight} print(baskets) print(ids) print(r_baskets) print(h_baskets) print(dr_model.encode.weight) print(dynamic_user.data) with open('tmp.pkl', 'wb') as f: pickle.dump(tmp, f, pickle.HIGHEST_PROTOCOL) break # Clip to avoid gradient exploding torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip) # Parameter updating # manual SGD # for p in dr_model.parameters(): # Update parameters by -lr*grad # p.data.add_(- dr_config.learning_rate, p.grad.data) # adam grad_norm = get_grad_norm(dr_model) previous_params = deepcopy(list(dr_model.parameters())) optim.step() total_loss += loss.data params = deepcopy(list(dr_model.parameters())) delta = get_weight_update(previous_params, params) weight_update_ratio = get_ratio_update(delta, params) # Logging if i % dr_config.log_interval == 0 and i > 0: elapsed = (time() - start_time) * 1000 / dr_config.log_interval cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size # turn tensor into float total_loss = 0 start_time = time() print( '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i, num_batchs, elapsed, cur_loss))
def train_dream(): dr_model.train() # turn on training mode for dropout dr_hidden = dr_model.init_hidden(dr_config.batch_size) total_loss = 0 start_time = time() num_batchs = ceil(len(train_ub) / dr_config.batch_size) for i, x in enumerate(batchify(train_ub, dr_config.batch_size)): baskets, lens, _ = x dr_hidden = repackage_hidden( dr_hidden) # repackage hidden state for RNN dr_model.zero_grad() # optim.zero_grad() dynamic_user, _ = dr_model(baskets, lens, dr_hidden) loss = bpr_loss(baskets, dynamic_user, dr_model.encode.weight, dr_config) loss.backward() # Clip to avoid gradient exploding torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip) # Parameter updating # manual SGD # for p in dr_model.parameters(): # Update parameters by -lr*grad # p.data.add_(- dr_config.learning_rate, p.grad.data) # adam grad_norm = get_grad_norm(dr_model) previous_params = deepcopy(list(dr_model.parameters())) optim.step() total_loss += loss.data params = deepcopy(list(dr_model.parameters())) delta = get_weight_update(previous_params, params) weight_update_ratio = get_ratio_update(delta, params) # Logging if i % dr_config.log_interval == 0 and i > 0: elapsed = (time() - start_time) * 1000 / dr_config.log_interval cur_loss = total_loss.item( ) / dr_config.log_interval / dr_config.batch_size # turn tensor into float total_loss = 0 start_time = time() print( '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |' .format(epoch, i, num_batchs, elapsed, cur_loss)) writer.add_scalar('model/train_loss', cur_loss, epoch * num_batchs + i) writer.add_scalar('model/grad_norm', grad_norm, epoch * num_batchs + i) writer.add_scalar('model/weight_update_ratio', weight_update_ratio, epoch * num_batchs + i)