def run_batch(sample, model, loss_func=None, optimizer=None, phase=None): """ Run a batch for phase = {train, valid, test} """ if phase == 'Train': model.train() else: model.eval() # test model,close dropout... x = to_var(sample['sentence']) label_pre = model(x) # [bs, 6] if phase == 'Train': label_gt = to_var(sample['label']) # [bs, 6] loss = loss_func(label_pre, label_gt) optimizer.zero_grad() # clear gradients for this training step loss.backward() # bp, compute gradients optimizer.step() # apply gradients return loss.data[0], label_pre.data elif phase == 'Valid': label_gt = to_var(sample['label']) # [bs, 6] loss = loss_func(label_pre, label_gt) return loss.data[0], label_pre.data else: return label_pre.data
def run_batch(sample, model, loss_func, optimizer=None, phase=None): if phase == 'Train': model.train() else: model.eval() img = to_var(sample['img']) # [bs, 6, H, W] label_pre = model(img) if phase == 'Train' or phase == 'Valid': label = to_var(sample['label']) # [bs, 6] loss1 = loss_func(label_pre[:, :3], label[:, :3]) loss2 = loss_func(label_pre[:, 3:], label[:, 3:]) loss = loss1 + args.beta * loss2 loss_x = loss_func(label_pre[:, 0], label[:, 0]) loss_y = loss_func(label_pre[:, 1], label[:, 1]) loss_z = loss_func(label_pre[:, 2], label[:, 2]) loss_tx = loss_func(label_pre[:, 3], label[:, 3]) loss_ty = loss_func(label_pre[:, 4], label[:, 4]) loss_tz = loss_func(label_pre[:, 5], label[:, 5]) if phase == 'Train': optimizer.zero_grad() # clear gradients for this training step loss.backward() # bp, compute gradients optimizer.step() # apply gradients return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data, \ loss_x.data[0], loss_y.data[0], loss_z.data[0], loss_tx.data[0], loss_ty.data[0], loss_tz.data[0] else: return label_pre.data
def run_batch_2(sample, model, loss_func=None, optimizer=None): """ cnn-lstm 不同time_step一起训练 """ model.train() loss_mean = [] loss1_mean = [] loss2_mean = [] for sample_batch in sample: img1 = to_var( sample_batch['img1'] ) # as for cnn: [bs, 6, H, W], as for cnn-lstm: [N, T, 6, H, W] img2 = to_var(sample_batch['img2']) label_pre = model(img1, img2) # [32, 6] label = to_var(sample_batch['label']) # [bs, 6] label = label.view(-1, 6) loss1 = loss_func(label_pre[:, :3], label[:, :3]) loss2 = loss_func(label_pre[:, 3:], label[:, 3:]) loss = loss1 + args.beta * loss2 loss1_mean.append(loss1.data[0]) loss2_mean.append(loss2.data[0]) loss_mean.append(loss.data[0]) optimizer.zero_grad() # clear gradients for this training step loss.backward() # bp, compute gradients optimizer.step() # apply gradients loss1_mean = np.mean(loss1_mean) loss2_mean = np.mean(loss2_mean) loss_mean = np.mean(loss_mean) return loss1_mean.data[0], loss2_mean.data[0], loss_mean.data[0]
def run_batch(sample, model, optimizer, loss_func, phase='Train'): if phase == 'Train': model.train() else: model.eval() source = to_var(sample['source'].transpose(0, 1)) # T x B target = to_var(sample['target'].transpose(0, 1)) # T x B loss = 0 # Added onto for each word time_step, _ = tuple(target.size()) # Run words through encoder # TODO: notice that must use model.module to call class method in nn.DataParallel encoder_hidden = model.module.init_hidden(args.batch_size) encoder_outputs, encoder_hidden = model.module.encoder( source, encoder_hidden) decoder_context = to_var(torch.zeros(args.batch_size, args.hidden_size)) decoder_hidden = encoder_hidden # Use last hidden state from encoder to start decoder # Choose whether to use teacher forcing # use_teacher_forcing = random.random() < args.tf_ratio use_teacher_forcing = True count = 0 if use_teacher_forcing: # Teacher forcing: Use the ground-truth target as the next input for i in range(time_step - 1): decoder_output, decoder_context, decoder_hidden, decoder_attention = model( target[i, :], decoder_context, decoder_hidden, encoder_outputs) # decoder_output: B x Vocab, target: B x T loss += loss_func(decoder_output, target[i + 1, :]) count += 1 # if target[i+1] == args.EOS_ID: # break else: # Without teacher forcing: use network's own prediction as the next input decoder_input = target[0] # SOS_ID for i in range(time_step - 1): decoder_output, decoder_context, decoder_hidden, decoder_attention = model( decoder_input, decoder_context, decoder_hidden, encoder_outputs) loss += loss_func(decoder_output[0], target[i]) _, top_id = decoder_output.data.topk(1) ni = top_id[0][0] decoder_input = to_var(torch.LongTensor( [[ni]])) # Chosen word is next input count += 1 if ni == args.EOS_ID: break # BP optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() return loss.data[0] / count
def encoder_sentence(self, words_input): """ :param words_input: T x B :return: que_output: T x B x N hidden: layer*direction x B x N """ embedded = self.embedding(words_input) h_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size)) c_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size)) output, hidden = self.lstm_encoder(embedded, (h_0, c_0)) return hidden
def run_batch(sample, model, loss_func=None, optimizer=None, phase=None): """ 训练、验证: run_batch(sample, model, loss_func, optimizer, phase='Train') run_batch(sample, model, loss_func, phase='Valid') 返回估计位姿以及loss 测试: run_batch(sample, model, phase='Test') 返回估计位姿 """ if phase == 'Train': model.train() else: model.eval() # 启用测试模式,关闭dropout img1 = to_var( sample['img1'] ) # as for cnn: [bs, 6, H, W], as for cnn-lstm: [N, T, 6, H, W] img2 = to_var(sample['img2']) label_pre = model(img1, img2) # [32, 6] # conv_out = x_conv.data.cpu().numpy() # lstm_out = x_lstm.data.cpu().numpy() # print('Conv >>> min: {:.5f}, max: {:.5f}'.format(np.min(conv_out), np.max(conv_out))) # print('LSTM >>> min: {:.5f}, max: {:.5f}'.format(np.min(lstm_out), np.max(lstm_out))) if phase == 'Train' or phase == 'Valid': label = to_var(sample['label']) # [bs, 6] label = label.view(-1, 6) loss1 = loss_func(label_pre[:, :3], label[:, :3]) loss2 = loss_func(label_pre[:, 3:], label[:, 3:]) loss = loss1 + args.beta * loss2 # loss_x = loss_func(label_pre[:, 0], label[:, 0]) # loss_y = loss_func(label_pre[:, 1], label[:, 1]) # loss_z = loss_func(label_pre[:, 2], label[:, 2]) # loss_tx = loss_func(label_pre[:, 3], label[:, 3]) # loss_ty = loss_func(label_pre[:, 4], label[:, 4]) # loss_tz = loss_func(label_pre[:, 5], label[:, 5]) if phase == 'Train': optimizer.zero_grad() # clear gradients for this training step loss.backward() # bp, compute gradients optimizer.step() # apply gradients return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data # return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data, \ # loss_x.data[0], loss_y.data[0], loss_z.data[0], loss_tx.data[0], loss_ty.data[0], loss_tz.data[0] else: return label_pre.data
def run_batch(sample, model, optimizer, loss_func, args, phase='Train'): if phase == 'Train': model.train() else: model.eval() query = to_var(sample['query']) answer = to_var(sample['answer']) label = to_var(sample['label']) logits = model(query, answer) loss = loss_func(logits, label) # BP optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() return loss.data[0]
def run_batch(sample, model, loss_func=None, optimizer=None): model.train() count = 0 loss_mean = 0 for sample_batch in sample: img_1 = to_var(sample_batch['img_1']) img_2 = to_var(sample_batch['img_2']) label_pre = model(img_1, img_2) loss = loss_func(label_pre, sample_batch['label'].reshape(-1, 6).to(device)) loss_mean += loss.item() count += 1 optimizer.zero_grad() loss.backward() optimizer.step() loss_mean /= count return loss_mean
def forward(self, hidden, outputs): """ input: hidden: output of decoder just one time step > B x N encoder_outputs: outputs of encoder > T_encoder x B x N return: weights: T x B """ ts, bs, _ = tuple(outputs.size()) # Create variable to store attention energies attn_energies = to_var(torch.zeros(bs, ts)) # Calculate energies for each encoder output for i in range(ts): for j in range(bs): # 整个batch下hidden与encoder前面所有时刻的输出计算分数 attn_energies[j, i] = self.score(hidden[j, :], outputs[i, j, :]) # Normalize energies to weights in range 0 to 1, resize to B x 1 x T return F.softmax(attn_energies, dim=1).unsqueeze(1)
def run_test(model, seq, dir_model=None, epoch=None, dir_time=None): """ 训练阶段对一段完整的轨迹进行测试,或者测试阶段直接用于测试 训练过程中测试: 1. 计算一段完整场景中所有相对姿态的预测值 cnn-lstm: 手动写读图的代码,从而可以处理场景末尾图片序列长度不足一个batch的情况 cnn: 采用DataLoader读取,较为方便 2. 计算绝对姿态,并画出轨迹 训练阶段保存轨迹图 测试阶保存轨迹图、相对位姿、绝对位姿 """ print('\nTest sequence {:02d} >>>'.format(seq)) if args.net_architecture == 'cnn-lstm': model.eval() img_list = glob(dir_data + '/{:02d}/image_2/*.png'.format(seq)) img_list.sort() ip = args.img_pairs iter_1 = int(math.floor((len(img_list) - 1) / ip)) iter_2 = int(math.ceil((len(img_list) - 1) / ip)) pose_ret = [] for i in tqdm(np.arange(iter_1)): img_seq = [] for img_path in img_list[i * ip:(i + 1) * ip + 1]: img = read_image(img_path) img_seq.append(img) x1 = np.stack(img_seq[:-1], 0) x1 = np.transpose(x1, [0, 3, 1, 2]) # [10, C, H, W] x1 = x1[np.newaxis, :, :, :, :] # [1, 10, C, H, W] x1 = to_var(torch.from_numpy(x1)) x2 = np.stack(img_seq[1:], 0) x2 = np.transpose(x2, [0, 3, 1, 2]) # [10, C, H, W] x2 = x2[np.newaxis, :, :, :, :] # [1, 10, C, H, W] x2 = to_var(torch.from_numpy(x2)) pose_out = model(x1, x2) pose_ret.extend(pose_out.data.cpu().numpy()) ns = iter_1 * ip if iter_1 != iter_2: print('Process for the last {:d} images...'.format( len(img_list) - ns)) img_seq = [] for img_path in img_list[ns:]: img = read_image(img_path) img_seq.append(img) x1 = np.stack(img_seq[:-1], 0) x1 = np.transpose(x1, [0, 3, 1, 2]) # [10, C, H, W] x1 = x1[np.newaxis, :, :, :, :] # [1, 10, C, H, W] x1 = to_var(torch.from_numpy(x1)) x2 = np.stack(img_seq[1:], 0) x2 = np.transpose(x2, [0, 3, 1, 2]) # [10, C, H, W] x2 = x2[np.newaxis, :, :, :, :] # [1, 10, C, H, W] x2 = to_var(torch.from_numpy(x2)) pose_out = model(x1, x2) pose_ret.extend(pose_out.data.cpu().numpy()) else: data_set = KITTIDataSet(dir_data=dir_data, dir_label=dir_label, phase='Test', seq=seq) loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) pose_ret = [] for _, sample_batch in enumerate(tqdm(loader)): pose_pre = run_batch(sample=sample_batch, model=model, phase='Test') pose_ret.extend(pose_pre.cpu().numpy()) pose_abs = cal_absolute_from_relative(pose_ret) if args.phase == 'Test': np.savetxt(dir_time + '/pose_{:d}.txt'.format(seq), pose_ret) np.savetxt((dir_time + '/{:02d}.txt'.format(seq)), pose_abs) plot_from_pose(seq=seq, dir_save=dir_time, pose_abs=pose_abs, args=args) print('Save pose and trajectory in {:s}'.format(dir_time)) else: plot_from_pose(seq=seq, dir_save=dir_model, pose_abs=pose_abs, epoch=epoch, args=args) print('Save trajectory in {:s}'.format(dir_model))
def init_hidden(self, batch_size): hidden = to_var(torch.zeros(self.layer_num, batch_size, self.hidden_size)) return hidden
Return: output: all time step > T x B x N hidden: last time step state > layer*direction x B x N """ word_embedded = self.embedding(words_input).contiguous() hidden = hidden.contiguous() output, hidden = self.gru_encoder(word_embedded, hidden) return output, hidden def init_hidden(self, batch_size): hidden = to_var(torch.zeros(self.layer_num, batch_size, self.hidden_size)) return hidden if __name__ == '__main__': torch.set_default_tensor_type('torch.FloatTensor') model = Seq2Seq(attn_model='general', vocab_size=25003, input_size=256, hidden_size=256, layer_num=2) print(model) if torch.cuda.is_available(): model = nn.DataParallel(model.cuda(), device_ids=[0, 1]) encoder_hidden = model.module.init_hidden(128) source = to_var(torch.from_numpy(np.arange(128*30).reshape(30, 128))) encoder_outputs, encoder_hidden = model.module.encoder(source, encoder_hidden) print(encoder_outputs.size(), encoder_hidden.size())
def encoder_context(self, vec_input): h_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size)) c_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size)) output, hidden = self.lstm_encoder(vec_input, (h_0, c_0)) return hidden
:param query: T1 x B :param answer: T2 x B :return: logits: B x Class """ que_hidden = self.encoder_sentence(query) ans_hidden = self.encoder_sentence(answer) que_hidden = torch.unsqueeze(que_hidden[-1][0], dim=0) ans_hidden = torch.unsqueeze(ans_hidden[-1][0], dim=0) context_input = torch.cat((que_hidden, ans_hidden), dim=0) # 2 x B x N con_hidden = self.encoder_context(context_input) out_hidden = con_hidden[-1][1] # B x N logits = self.sigmoid(out_hidden) # B x C return logits if __name__ == '__main__': # TODO 准备数据:1. query [None, 30], 2. answer [None, 32], 3. label [None, 1] model = HierarchicalEncoder(vocab_size=1000, batch_size=8, input_size=256, hidden_size=256, layer_num=2) model.cuda() que_batch = to_var(torch.from_numpy(np.arange(8 * 30).reshape(30, 8))) ans_batch = to_var(torch.from_numpy(np.arange(8 * 30).reshape(30, 8))) label_batch = to_var(torch.IntTensor([0, 1, 1, 1, 0, 0, 0, 1])) logits = model(que_batch, ans_batch) print(logits.size())