Esempio n. 1
0
def run_batch(sample, model, loss_func=None, optimizer=None, phase=None):
    """
        Run a batch for phase = {train, valid, test}
    """
    if phase == 'Train':
        model.train()
    else:
        model.eval()  # test model,close dropout...

    x = to_var(sample['sentence'])
    label_pre = model(x)  # [bs, 6]

    if phase == 'Train':
        label_gt = to_var(sample['label'])  # [bs, 6]
        loss = loss_func(label_pre, label_gt)
        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # bp, compute gradients
        optimizer.step()  # apply gradients
        return loss.data[0], label_pre.data

    elif phase == 'Valid':
        label_gt = to_var(sample['label'])  # [bs, 6]
        loss = loss_func(label_pre, label_gt)
        return loss.data[0], label_pre.data

    else:
        return label_pre.data
def run_batch(sample, model, loss_func, optimizer=None, phase=None):
    if phase == 'Train':
        model.train()
    else:
        model.eval()

    img = to_var(sample['img'])  # [bs, 6, H, W]
    label_pre = model(img)

    if phase == 'Train' or phase == 'Valid':
        label = to_var(sample['label'])  # [bs, 6]
        loss1 = loss_func(label_pre[:, :3], label[:, :3])
        loss2 = loss_func(label_pre[:, 3:], label[:, 3:])
        loss = loss1 + args.beta * loss2

        loss_x = loss_func(label_pre[:, 0], label[:, 0])
        loss_y = loss_func(label_pre[:, 1], label[:, 1])
        loss_z = loss_func(label_pre[:, 2], label[:, 2])
        loss_tx = loss_func(label_pre[:, 3], label[:, 3])
        loss_ty = loss_func(label_pre[:, 4], label[:, 4])
        loss_tz = loss_func(label_pre[:, 5], label[:, 5])

        if phase == 'Train':
            optimizer.zero_grad()  # clear gradients for this training step
            loss.backward()  # bp, compute gradients
            optimizer.step()  # apply gradients

        return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data, \
            loss_x.data[0], loss_y.data[0], loss_z.data[0], loss_tx.data[0], loss_ty.data[0], loss_tz.data[0]
    else:
        return label_pre.data
Esempio n. 3
0
def run_batch_2(sample, model, loss_func=None, optimizer=None):
    """
    cnn-lstm 不同time_step一起训练
    """
    model.train()

    loss_mean = []
    loss1_mean = []
    loss2_mean = []
    for sample_batch in sample:
        img1 = to_var(
            sample_batch['img1']
        )  # as for cnn: [bs, 6, H, W], as for cnn-lstm: [N, T, 6, H, W]
        img2 = to_var(sample_batch['img2'])
        label_pre = model(img1, img2)  # [32, 6]

        label = to_var(sample_batch['label'])  # [bs, 6]
        label = label.view(-1, 6)
        loss1 = loss_func(label_pre[:, :3], label[:, :3])
        loss2 = loss_func(label_pre[:, 3:], label[:, 3:])
        loss = loss1 + args.beta * loss2

        loss1_mean.append(loss1.data[0])
        loss2_mean.append(loss2.data[0])
        loss_mean.append(loss.data[0])

        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # bp, compute gradients
        optimizer.step()  # apply gradients

    loss1_mean = np.mean(loss1_mean)
    loss2_mean = np.mean(loss2_mean)
    loss_mean = np.mean(loss_mean)
    return loss1_mean.data[0], loss2_mean.data[0], loss_mean.data[0]
Esempio n. 4
0
def run_batch(sample, model, optimizer, loss_func, phase='Train'):
    if phase == 'Train':
        model.train()
    else:
        model.eval()

    source = to_var(sample['source'].transpose(0, 1))  # T x B
    target = to_var(sample['target'].transpose(0, 1))  # T x B
    loss = 0  # Added onto for each word
    time_step, _ = tuple(target.size())

    # Run words through encoder
    # TODO: notice that must use model.module to call class method in nn.DataParallel
    encoder_hidden = model.module.init_hidden(args.batch_size)
    encoder_outputs, encoder_hidden = model.module.encoder(
        source, encoder_hidden)

    decoder_context = to_var(torch.zeros(args.batch_size, args.hidden_size))
    decoder_hidden = encoder_hidden  # Use last hidden state from encoder to start decoder

    # Choose whether to use teacher forcing
    # use_teacher_forcing = random.random() < args.tf_ratio
    use_teacher_forcing = True
    count = 0
    if use_teacher_forcing:
        # Teacher forcing: Use the ground-truth target as the next input
        for i in range(time_step - 1):
            decoder_output, decoder_context, decoder_hidden, decoder_attention = model(
                target[i, :], decoder_context, decoder_hidden, encoder_outputs)
            # decoder_output: B x Vocab, target: B x T
            loss += loss_func(decoder_output, target[i + 1, :])
            count += 1
            # if target[i+1] == args.EOS_ID:
            #     break

    else:
        # Without teacher forcing: use network's own prediction as the next input
        decoder_input = target[0]  # SOS_ID
        for i in range(time_step - 1):
            decoder_output, decoder_context, decoder_hidden, decoder_attention = model(
                decoder_input, decoder_context, decoder_hidden,
                encoder_outputs)
            loss += loss_func(decoder_output[0], target[i])
            _, top_id = decoder_output.data.topk(1)
            ni = top_id[0][0]
            decoder_input = to_var(torch.LongTensor(
                [[ni]]))  # Chosen word is next input
            count += 1
            if ni == args.EOS_ID:
                break

    # BP
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
    optimizer.step()

    return loss.data[0] / count
    def encoder_sentence(self, words_input):
        """
        :param words_input: T x B
        :return:
            que_output: T x B x N
            hidden: layer*direction x B x N
        """
        embedded = self.embedding(words_input)
        h_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size))
        c_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size))
        output, hidden = self.lstm_encoder(embedded, (h_0, c_0))

        return hidden
Esempio n. 6
0
def run_batch(sample, model, loss_func=None, optimizer=None, phase=None):
    """
    训练、验证:
        run_batch(sample, model, loss_func, optimizer, phase='Train')
        run_batch(sample, model, loss_func, phase='Valid')
        返回估计位姿以及loss
    测试:
        run_batch(sample, model, phase='Test')
        返回估计位姿
    """
    if phase == 'Train':
        model.train()
    else:
        model.eval()  # 启用测试模式,关闭dropout

    img1 = to_var(
        sample['img1']
    )  # as for cnn: [bs, 6, H, W], as for cnn-lstm: [N, T, 6, H, W]
    img2 = to_var(sample['img2'])
    label_pre = model(img1, img2)  # [32, 6]
    # conv_out = x_conv.data.cpu().numpy()
    # lstm_out = x_lstm.data.cpu().numpy()
    # print('Conv >>> min: {:.5f}, max: {:.5f}'.format(np.min(conv_out), np.max(conv_out)))
    # print('LSTM >>> min: {:.5f}, max: {:.5f}'.format(np.min(lstm_out), np.max(lstm_out)))

    if phase == 'Train' or phase == 'Valid':
        label = to_var(sample['label'])  # [bs, 6]
        label = label.view(-1, 6)
        loss1 = loss_func(label_pre[:, :3], label[:, :3])
        loss2 = loss_func(label_pre[:, 3:], label[:, 3:])
        loss = loss1 + args.beta * loss2

        # loss_x = loss_func(label_pre[:, 0], label[:, 0])
        # loss_y = loss_func(label_pre[:, 1], label[:, 1])
        # loss_z = loss_func(label_pre[:, 2], label[:, 2])
        # loss_tx = loss_func(label_pre[:, 3], label[:, 3])
        # loss_ty = loss_func(label_pre[:, 4], label[:, 4])
        # loss_tz = loss_func(label_pre[:, 5], label[:, 5])

        if phase == 'Train':
            optimizer.zero_grad()  # clear gradients for this training step
            loss.backward()  # bp, compute gradients
            optimizer.step()  # apply gradients

        return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data
        # return loss.data[0], loss1.data[0], loss2.data[0], label_pre.data, \
        #     loss_x.data[0], loss_y.data[0], loss_z.data[0], loss_tx.data[0], loss_ty.data[0], loss_tz.data[0]
    else:
        return label_pre.data
def run_batch(sample, model, optimizer, loss_func, args, phase='Train'):
    if phase == 'Train':
        model.train()
    else:
        model.eval()

    query = to_var(sample['query'])
    answer = to_var(sample['answer'])
    label = to_var(sample['label'])
    logits = model(query, answer)
    loss = loss_func(logits, label)

    # BP
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
    optimizer.step()

    return loss.data[0]
Esempio n. 8
0
def run_batch(sample, model, loss_func=None, optimizer=None):
    model.train()

    count = 0
    loss_mean = 0
    for sample_batch in sample:
        img_1 = to_var(sample_batch['img_1'])
        img_2 = to_var(sample_batch['img_2'])
        label_pre = model(img_1, img_2)

        loss = loss_func(label_pre,
                         sample_batch['label'].reshape(-1, 6).to(device))
        loss_mean += loss.item()
        count += 1

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    loss_mean /= count
    return loss_mean
    def forward(self, hidden, outputs):
        """
        input:
            hidden: output of decoder just one time step > B x N
            encoder_outputs: outputs of encoder > T_encoder x B x N
        return:
            weights: T x B
        """
        ts, bs, _ = tuple(outputs.size())

        # Create variable to store attention energies
        attn_energies = to_var(torch.zeros(bs, ts))

        # Calculate energies for each encoder output
        for i in range(ts):
            for j in range(bs):     # 整个batch下hidden与encoder前面所有时刻的输出计算分数
                attn_energies[j, i] = self.score(hidden[j, :], outputs[i, j, :])

        # Normalize energies to weights in range 0 to 1, resize to B x 1 x T
        return F.softmax(attn_energies, dim=1).unsqueeze(1)
Esempio n. 10
0
def run_test(model, seq, dir_model=None, epoch=None, dir_time=None):
    """
    训练阶段对一段完整的轨迹进行测试,或者测试阶段直接用于测试

    训练过程中测试:
    1. 计算一段完整场景中所有相对姿态的预测值
    cnn-lstm:
        手动写读图的代码,从而可以处理场景末尾图片序列长度不足一个batch的情况
    cnn:
        采用DataLoader读取,较为方便

    2. 计算绝对姿态,并画出轨迹
    训练阶段保存轨迹图
    测试阶保存轨迹图、相对位姿、绝对位姿
    """
    print('\nTest sequence {:02d} >>>'.format(seq))
    if args.net_architecture == 'cnn-lstm':
        model.eval()
        img_list = glob(dir_data + '/{:02d}/image_2/*.png'.format(seq))
        img_list.sort()
        ip = args.img_pairs
        iter_1 = int(math.floor((len(img_list) - 1) / ip))
        iter_2 = int(math.ceil((len(img_list) - 1) / ip))
        pose_ret = []
        for i in tqdm(np.arange(iter_1)):
            img_seq = []
            for img_path in img_list[i * ip:(i + 1) * ip + 1]:
                img = read_image(img_path)
                img_seq.append(img)
            x1 = np.stack(img_seq[:-1], 0)
            x1 = np.transpose(x1, [0, 3, 1, 2])  # [10, C, H, W]
            x1 = x1[np.newaxis, :, :, :, :]  # [1, 10, C, H, W]
            x1 = to_var(torch.from_numpy(x1))

            x2 = np.stack(img_seq[1:], 0)
            x2 = np.transpose(x2, [0, 3, 1, 2])  # [10, C, H, W]
            x2 = x2[np.newaxis, :, :, :, :]  # [1, 10, C, H, W]
            x2 = to_var(torch.from_numpy(x2))
            pose_out = model(x1, x2)
            pose_ret.extend(pose_out.data.cpu().numpy())

        ns = iter_1 * ip
        if iter_1 != iter_2:
            print('Process for the last {:d} images...'.format(
                len(img_list) - ns))
            img_seq = []
            for img_path in img_list[ns:]:
                img = read_image(img_path)
                img_seq.append(img)
            x1 = np.stack(img_seq[:-1], 0)
            x1 = np.transpose(x1, [0, 3, 1, 2])  # [10, C, H, W]
            x1 = x1[np.newaxis, :, :, :, :]  # [1, 10, C, H, W]
            x1 = to_var(torch.from_numpy(x1))

            x2 = np.stack(img_seq[1:], 0)
            x2 = np.transpose(x2, [0, 3, 1, 2])  # [10, C, H, W]
            x2 = x2[np.newaxis, :, :, :, :]  # [1, 10, C, H, W]
            x2 = to_var(torch.from_numpy(x2))
            pose_out = model(x1, x2)
            pose_ret.extend(pose_out.data.cpu().numpy())
    else:
        data_set = KITTIDataSet(dir_data=dir_data,
                                dir_label=dir_label,
                                phase='Test',
                                seq=seq)
        loader = DataLoader(data_set,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.workers)
        pose_ret = []
        for _, sample_batch in enumerate(tqdm(loader)):
            pose_pre = run_batch(sample=sample_batch,
                                 model=model,
                                 phase='Test')
            pose_ret.extend(pose_pre.cpu().numpy())

    pose_abs = cal_absolute_from_relative(pose_ret)

    if args.phase == 'Test':
        np.savetxt(dir_time + '/pose_{:d}.txt'.format(seq), pose_ret)
        np.savetxt((dir_time + '/{:02d}.txt'.format(seq)), pose_abs)
        plot_from_pose(seq=seq,
                       dir_save=dir_time,
                       pose_abs=pose_abs,
                       args=args)
        print('Save pose and trajectory in {:s}'.format(dir_time))
    else:
        plot_from_pose(seq=seq,
                       dir_save=dir_model,
                       pose_abs=pose_abs,
                       epoch=epoch,
                       args=args)
        print('Save trajectory in {:s}'.format(dir_model))
    def init_hidden(self, batch_size):
        hidden = to_var(torch.zeros(self.layer_num, batch_size, self.hidden_size))

        return hidden
        Return:
            output: all time step > T x B x N
            hidden: last time step state > layer*direction x B x N
        """
        word_embedded = self.embedding(words_input).contiguous()
        hidden = hidden.contiguous()
        output, hidden = self.gru_encoder(word_embedded, hidden)

        return output, hidden

    def init_hidden(self, batch_size):
        hidden = to_var(torch.zeros(self.layer_num, batch_size, self.hidden_size))

        return hidden

if __name__ == '__main__':
    torch.set_default_tensor_type('torch.FloatTensor')
    model = Seq2Seq(attn_model='general',
                    vocab_size=25003,
                    input_size=256,
                    hidden_size=256,
                    layer_num=2)
    print(model)
    if torch.cuda.is_available():
        model = nn.DataParallel(model.cuda(), device_ids=[0, 1])

    encoder_hidden = model.module.init_hidden(128)
    source = to_var(torch.from_numpy(np.arange(128*30).reshape(30, 128)))
    encoder_outputs, encoder_hidden = model.module.encoder(source, encoder_hidden)
    print(encoder_outputs.size(), encoder_hidden.size())
    def encoder_context(self, vec_input):
        h_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size))
        c_0 = to_var(torch.zeros(self.layer_num, self.batch_size, self.hidden_size))
        output, hidden = self.lstm_encoder(vec_input, (h_0, c_0))

        return hidden
        :param query: T1 x B
        :param answer: T2 x B
        :return:
            logits: B x Class
        """
        que_hidden = self.encoder_sentence(query)
        ans_hidden = self.encoder_sentence(answer)
        que_hidden = torch.unsqueeze(que_hidden[-1][0], dim=0)
        ans_hidden = torch.unsqueeze(ans_hidden[-1][0], dim=0)
        context_input = torch.cat((que_hidden, ans_hidden), dim=0)  # 2 x B x N
        con_hidden = self.encoder_context(context_input)
        out_hidden = con_hidden[-1][1]  # B x N
        logits = self.sigmoid(out_hidden)   # B x C

        return logits


if __name__ == '__main__':
    # TODO 准备数据:1. query [None, 30], 2. answer [None, 32], 3. label [None, 1]
    model = HierarchicalEncoder(vocab_size=1000,
                                batch_size=8,
                                input_size=256,
                                hidden_size=256,
                                layer_num=2)
    model.cuda()
    que_batch = to_var(torch.from_numpy(np.arange(8 * 30).reshape(30, 8)))
    ans_batch = to_var(torch.from_numpy(np.arange(8 * 30).reshape(30, 8)))
    label_batch = to_var(torch.IntTensor([0, 1, 1, 1, 0, 0, 0, 1]))
    logits = model(que_batch, ans_batch)
    print(logits.size())