def forward(self, vocab):
        with torch.no_grad():
            batch_shape = vocab['sentence'].shape
            s_embedding = self.embedding(vocab['sentence'].cuda())
            a_embedding = self.embedding(vocab['aspect'].cuda())

            packed_s = pack_padded_sequence(s_embedding, vocab['sent_len'], batch_first=True)

        out_s, (h_s, c1) = self.lstm_s(packed_s) # packed output
        out_a, (h_a, c2) = self.lstm_a(a_embedding)

        with torch.no_grad():
            unpacked_out_s, _ = pad_packed_sequence(out_s, batch_first=True)

        # Pair-wise interaction matrix
        I_matrix = torch.bmm(unpacked_out_s, out_a.permute(0,2,1))

        # Column-wise softmax
        a2s_attn = F.softmax(I_matrix, dim=1)

        # Row-wise softmax => Column-wise average => aspect attention
        s2a_attn = F.softmax(I_matrix, dim=2)
        a_attn = torch.mean(s2a_attn, dim=1)

        # Final sentence attn => weighted sum of each individual a2s_attn
        s_attn = torch.bmm(a2s_attn, a_attn.unsqueeze(-1))

        final_rep = torch.bmm(unpacked_out_s.permute(0,2,1), s_attn).squeeze(-1)
        pred = self.fc(final_rep)
        return pred
Exemple #2
0
def softmax(tensor):
    r"""
    Wrapper around softmax to make it work with both Tensors and Variables.
    TODO: Remove once https://github.com/pytorch/pytorch/issues/2633 is resolved.
    """
    if not isinstance(tensor, Variable):
        return F.softmax(Variable(tensor), -1).data
    return F.softmax(tensor, -1)
Exemple #3
0
def train(model,trainLoader,criterion, optimizer,evalData = None,
            epoch=1,echoStep=100,evalStep=1000,saveStep=5000,savePath="./"):
    
    if evalData != None:
        evalX,evalY = evalData
        if torch.cuda.is_available():
            evalY = evalY.cuda()
            if isinstance (evalX,list):
                for ti,t in enumerate(evalX):
                    evalX[ti] = evalX[ti].cuda()
            else:
                evalX = evalX.cuda()

    batchLen = len(trainLoader)
    for epochIdx in xrange(epoch):
        for i,batch in enumerate(trainLoader,batchLen * epochIdx + 1):
            x, y = batch            
            if torch.cuda.is_available():
                y = y.cuda()
                if isinstance (x,list):
                    for ti,t in enumerate(x):
                        x[ti] = x[ti].cuda()
                else:
                    x = x.cuda()
            out = model(x)
            loss = criterion(out, y)
            
            prob = F.softmax(out, 1) 
            pred = torch.argmax(out, dim=1)
            correct = pred.eq(y).sum()
            acc = float(correct) / len(y)
            
            #print loss
            if i % echoStep == 0:
                print "Step %d/%d/%d : Loss %.4f , Acc %.4f " %(i,batchLen*epoch,epochIdx+1,float(loss),acc)
            #evaluate
            if i % evalStep == 0 and evalData != None:
                evalOut = model(evalX)
                evalLoss = criterion(evalOut, evalY)
                correct = torch.argmax(F.softmax(evalOut, 1) , dim=1).eq(evalY).sum()
                evalAcc = float(correct) / len(evalY)
                print "------------------------------------------------"
                print "Evaluate %d Sample : Loss %.4f , Acc %.4f " %(evalY.size(0),float(evalLoss),evalAcc)
                print
            #save model        
            if i % saveStep == 0:
                outFile = "%s/m_%d_%d.pt" %(savePath,i,epochIdx+1)
                torch.save(model.state_dict(),outFile)
                print "Save model : %s" %(outFile)

            #backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    outFile = "%s/final.pt" %(savePath)
    torch.save(model.state_dict(),outFile)
    print "Save model : %s" %(outFile)
Exemple #4
0
def validate(eval_loader, model, log, global_step, epoch):
    class_criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=NO_LABEL).cuda()
    meters = AverageMeterSet()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(eval_loader):
        meters.update('data_time', time.time() - end)

        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target.cuda(async=True), volatile=True)

        minibatch_size = len(target_var)
        labeled_minibatch_size = target_var.data.ne(NO_LABEL).sum()
        assert labeled_minibatch_size > 0
        meters.update('labeled_minibatch_size', labeled_minibatch_size)

        # compute output
        output1, output2 = model(input_var)
        softmax1, softmax2 = F.softmax(output1, dim=1), F.softmax(output2, dim=1)
        class_loss = class_criterion(output1, target_var) / minibatch_size

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output1.data, target_var.data, topk=(1, 5))
        meters.update('class_loss', class_loss.data[0], labeled_minibatch_size)
        meters.update('top1', prec1[0], labeled_minibatch_size)
        meters.update('error1', 100.0 - prec1[0], labeled_minibatch_size)
        meters.update('top5', prec5[0], labeled_minibatch_size)
        meters.update('error5', 100.0 - prec5[0], labeled_minibatch_size)

        # measure elapsed time
        meters.update('batch_time', time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            LOG.info(
                'Test: [{0}/{1}]\t'
                'Time {meters[batch_time]:.3f}\t'
                'Data {meters[data_time]:.3f}\t'
                'Class {meters[class_loss]:.4f}\t'
                'Prec@1 {meters[top1]:.3f}\t'
                'Prec@5 {meters[top5]:.3f}'.format(
                    i, len(eval_loader), meters=meters))

    LOG.info(' * Prec@1 {top1.avg:.3f}\tPrec@5 {top5.avg:.3f}'
          .format(top1=meters['top1'], top5=meters['top5']))
    log.record(epoch, {
        'step': global_step,
        **meters.values(),
        **meters.averages(),
        **meters.sums()
    })

    return meters['top1'].avg
 def forward(self, x):
     x = F.relu(self.lin1(x))
     out = self.head(x)
     #print(out)
     splits = out.view(x.size()[0],2,9).chunk(2,1)
     #print(splits[1])
     #return torch.stack(list(map(lambda s: F.softmax(s[0]), splits)), 0)
     #print(F.softmax(splits[0]).view(x.size()[0],9))
     print(torch.sum(F.softmax(splits[0]).view(x.size()[0],9),dim=1))
     return F.softmax(splits[0]),F.softmax(splits[1])
Exemple #6
0
def softmax_mse_loss(input_logits, target_logits):
    """Takes softmax on both sides and returns MSE loss

    Note:
    - Returns the sum over all examples. Divide by the batch size afterwards
      if you want the mean.
    - Sends gradients to inputs but not the targets.
    """
    assert input_logits.size() == target_logits.size()
    input_softmax = F.softmax(input_logits, dim=1)
    target_softmax = F.softmax(target_logits, dim=1)
    num_classes = input_logits.size()[1]
    return F.mse_loss(input_softmax, target_softmax, size_average=False) / num_classes
Exemple #7
0
    def _region_proposal(self, net_conv_level1, net_conv_level2, net_conv_level3):
        if cfg.NUM_ANCHORS_LEVEL1 != 0:
            rpn_level1 = F.relu(self.rpn_net_level1(net_conv_level1))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level1 = self.rpn_bbox_pred_net_level1(rpn_level1).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level1 = self.rpn_cls_score_net_level1(rpn_level1).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL1, rpn_bbox_pred_level1.size(1), rpn_bbox_pred_level1.size(2), rpn_bbox_pred_level1.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()

            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level1 = F.softmax(rpn_cls_score_level1) 
            self._predictions["rpn_cls_score_level1"] = rpn_cls_score_level1
            self._predictions["rpn_cls_prob_level1"] = rpn_cls_prob_level1
            self._predictions["rpn_bbox_pred_level1"] = rpn_bbox_pred_level1

        if cfg.NUM_ANCHORS_LEVEL2 != 0:
            rpn_level2 = F.relu(self.rpn_net_level2(net_conv_level2))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level2 = self.rpn_bbox_pred_net_level2(rpn_level2).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level2 = self.rpn_cls_score_net_level2(rpn_level2).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL2, rpn_bbox_pred_level2.size(1), rpn_bbox_pred_level2.size(2), rpn_bbox_pred_level2.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()

            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level2 = F.softmax(rpn_cls_score_level2) 
            self._predictions["rpn_cls_score_level2"] = rpn_cls_score_level2
            self._predictions["rpn_cls_prob_level2"] = rpn_cls_prob_level2
            self._predictions["rpn_bbox_pred_level2"] = rpn_bbox_pred_level2

        if cfg.NUM_ANCHORS_LEVEL3 != 0:
            rpn_level3 = F.relu(self.rpn_net_level3(net_conv_level3))
            # batch x w x h x l x (num_anchors x 6)
            rpn_bbox_pred_level3 = self.rpn_bbox_pred_net_level3(rpn_level3).permute(0, 2, 3, 4, 1).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_score_level3 = self.rpn_cls_score_net_level3(rpn_level3).view(self.batch_size, 2, cfg.NUM_ANCHORS_LEVEL3, rpn_bbox_pred_level3.size(1), rpn_bbox_pred_level3.size(2), rpn_bbox_pred_level3.size(3)).permute(0, 1, 3, 4, 5, 2).contiguous()
            # batch x 2 x w x h x l x num_anchors
            rpn_cls_prob_level3 = F.softmax(rpn_cls_score_level3) 
            self._predictions["rpn_cls_score_level3"] = rpn_cls_score_level3
            self._predictions["rpn_cls_prob_level3"] = rpn_cls_prob_level3
            self._predictions["rpn_bbox_pred_level3"] = rpn_bbox_pred_level3

        if self._mode == 'TRAIN':
            self._anchor_target_layer(
                    [*rpn_cls_score_level1.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL1 != 0 else None, 
                    [*rpn_cls_score_level2.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL2 != 0 else None,
                    [*rpn_cls_score_level3.shape[2:5]] if cfg.NUM_ANCHORS_LEVEL3 != 0 else None)

        self._proposal_layer(rpn_cls_prob_level1 if cfg.NUM_ANCHORS_LEVEL1 != 0 else None,
                             rpn_bbox_pred_level1 if cfg.NUM_ANCHORS_LEVEL1 !=0 else None,
                             rpn_cls_prob_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
                             rpn_bbox_pred_level2 if cfg.NUM_ANCHORS_LEVEL2 !=0 else None,
                             rpn_cls_prob_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None,
                             rpn_bbox_pred_level3 if cfg.NUM_ANCHORS_LEVEL3 !=0 else None)
Exemple #8
0
 def forward(self, x):
     #x: seq_len * batch_size * num
     if not self.training:
         seq_len = x.size()[0]
         return torch.stack([F.softmax(x[i], dim=1) for i in range(seq_len)], 0)
     else:
         return x
Exemple #9
0
    def tree_backup(self, tree_result, batch_size):
        backup_values = tree_result["values"][-1]
        for i in range(1, self.tree_depth + 1):
            one_step_backup = tree_result["rewards"][-i] + self.gamma*backup_values

            if i < self.tree_depth:
                one_step_backup = one_step_backup.view(batch_size, -1, self.num_actions)

                if self.value_aggregation == "max":
                    max_backup = one_step_backup.max(2)[0]
                elif self.value_aggregation == "logsumexp":
                    max_backup = logsumexp(one_step_backup, 2)
                elif self.value_aggregation == "softmax":
                    max_backup = (one_step_backup * F.softmax(one_step_backup, dim=2)).sum(dim=2)
                else:
                    raise ValueError("Unknown value aggregation function %s" % self.value_aggregation)

                backup_values = ((1 - self.td_lambda) * tree_result["values"][-i-1] +
                                 (self.td_lambda) * max_backup.view(-1, 1))
            else:
                backup_values = one_step_backup

        backup_values = backup_values.view(batch_size, self.num_actions)

        return backup_values
    def forward(self, hidden, encoder_outputs,encoder_lengths=None,return_weight=False):
        """
        hidden : query (previous hidden) B,1,D <FloatTensor>
        encoder_outputs : context (encoder outputs) B,T,D <FloatTensor>
        encoder_lengths : list[int]
        """
        q, c = hidden, encoder_outputs

        batch_size_q, n_q, dim_q = q.size()
        batch_size_c, n_c, dim_c = c.size()

        if not (batch_size_q == batch_size_c):
            msg = 'batch size mismatch (query: {}, context: {}, value: {})'
            raise ValueError(msg.format(q.size(), c.size()))

        batch_size = batch_size_q
        
        s = self.score(q,c)
        
        # 인코딩 마스킹
        if encoder_lengths is not None:
            mask = s.data.new(batch_size, n_q, n_c)
            mask = self.fill_context_mask(mask, sizes=encoder_lengths, v_mask=float('-inf'), v_unmask=0)
            s = Variable(mask) + s
        
        # softmax로 normalize
        w = F.softmax(s,2) # B,1,T
        
        # Combine
        z = w.bmm(c)
        if return_weight:
            return w, z
        return z
    def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, 
        context_attention, question_attention, 
        context_indices, question_indices, 
        oov_to_limited_idx):

        size = list(outputs.size())

        size[-1] = self.generative_vocab_size
        scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
        p_vocab = F.softmax(scores, dim=scores.dim()-1)
        scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab

        effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
        if self.generative_vocab_size < effective_vocab_size:
            size[-1] = effective_vocab_size - self.generative_vocab_size
            buff = scaled_p_vocab.new_full(size, EPSILON)
            scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)

        # p_context_ptr
        scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), 
            (context_question_switches * (1 - vocab_pointer_switches)).expand_as(context_attention) * context_attention)

        # p_question_ptr
        scaled_p_vocab.scatter_add_(scaled_p_vocab.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), 
            ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(question_attention) * question_attention)

        return scaled_p_vocab
    def forward(self, xt, fc_feats, att_feats, p_att_feats, state):
        # The p_att_feats here is already projected
        att_size = att_feats.numel() // att_feats.size(0) // self.att_feat_size
        att = p_att_feats.view(-1, att_size, self.att_hid_size)
        
        att_h = self.h2att(state[0][-1])                        # batch * att_hid_size
        att_h = att_h.unsqueeze(1).expand_as(att)            # batch * att_size * att_hid_size
        dot = att + att_h                                   # batch * att_size * att_hid_size
        dot = F.tanh(dot)                                # batch * att_size * att_hid_size
        dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
        dot = self.alpha_net(dot)                           # (batch * att_size) * 1
        dot = dot.view(-1, att_size)                        # batch * att_size
        
        weight = F.softmax(dot)                             # batch * att_size
        att_feats_ = att_feats.view(-1, att_size, self.att_feat_size) # batch * att_size * att_feat_size
        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size

        all_input_sums = self.i2h(xt) + self.h2h(state[0][-1])
        sigmoid_chunk = all_input_sums.narrow(1, 0, 3 * self.rnn_size)
        sigmoid_chunk = F.sigmoid(sigmoid_chunk)
        in_gate = sigmoid_chunk.narrow(1, 0, self.rnn_size)
        forget_gate = sigmoid_chunk.narrow(1, self.rnn_size, self.rnn_size)
        out_gate = sigmoid_chunk.narrow(1, self.rnn_size * 2, self.rnn_size)

        in_transform = all_input_sums.narrow(1, 3 * self.rnn_size, 2 * self.rnn_size) + \
            self.a2c(att_res)
        in_transform = torch.max(\
            in_transform.narrow(1, 0, self.rnn_size),
            in_transform.narrow(1, self.rnn_size, self.rnn_size))
        next_c = forget_gate * state[1][-1] + in_gate * in_transform
        next_h = out_gate * F.tanh(next_c)

        output = self.dropout(next_h)
        state = (next_h.unsqueeze(0), next_c.unsqueeze(0))
        return output, state
def iterate_batches(envs, net, device="cpu"):
    n_actions = envs[0].action_space.n
    act_selector = ptan.actions.ProbabilityActionSelector()
    obs = [e.reset() for e in envs]
    batch_dones = [[False] for _ in range(NUM_ENVS)]
    total_reward = [0.0] * NUM_ENVS
    total_steps = [0] * NUM_ENVS
    mb_obs = np.zeros((NUM_ENVS, REWARD_STEPS) + IMG_SHAPE, dtype=np.uint8)
    mb_rewards = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
    mb_values = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.float32)
    mb_actions = np.zeros((NUM_ENVS, REWARD_STEPS), dtype=np.int32)
    mb_probs = np.zeros((NUM_ENVS, REWARD_STEPS, n_actions), dtype=np.float32)

    while True:
        batch_dones = [[dones[-1]] for dones in batch_dones]
        done_rewards = []
        done_steps = []
        for n in range(REWARD_STEPS):
            obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
            mb_obs[:, n] = obs_v.data.cpu().numpy()
            logits_v, values_v = net(obs_v)
            probs_v = F.softmax(logits_v, dim=1)
            probs = probs_v.data.cpu().numpy()
            actions = act_selector(probs)
            mb_probs[:, n] = probs
            mb_actions[:, n] = actions
            mb_values[:, n] = values_v.squeeze().data.cpu().numpy()
            for e_idx, e in enumerate(envs):
                o, r, done, _ = e.step(actions[e_idx])
                total_reward[e_idx] += r
                total_steps[e_idx] += 1
                if done:
                    o = e.reset()
                    done_rewards.append(total_reward[e_idx])
                    done_steps.append(total_steps[e_idx])
                    total_reward[e_idx] = 0.0
                    total_steps[e_idx] = 0
                obs[e_idx] = o
                mb_rewards[e_idx, n] = r
                batch_dones[e_idx].append(done)
        # obtain values for the last observation
        obs_v = ptan.agent.default_states_preprocessor(obs).to(device)
        _, values_v = net(obs_v)
        values_last = values_v.squeeze().data.cpu().numpy()

        for e_idx, (rewards, dones, value) in enumerate(zip(mb_rewards, batch_dones, values_last)):
            rewards = rewards.tolist()
            if not dones[-1]:
                rewards = discount_with_dones(rewards + [value], dones[1:] + [False], GAMMA)[:-1]
            else:
                rewards = discount_with_dones(rewards, dones[1:], GAMMA)
            mb_rewards[e_idx] = rewards

        out_mb_obs = mb_obs.reshape((-1,) + IMG_SHAPE)
        out_mb_rewards = mb_rewards.flatten()
        out_mb_actions = mb_actions.flatten()
        out_mb_values = mb_values.flatten()
        out_mb_probs = mb_probs.flatten()
        yield out_mb_obs, out_mb_rewards, out_mb_actions, out_mb_values, out_mb_probs, \
              np.array(done_rewards), np.array(done_steps)
Exemple #14
0
    def forward(self, x, y, y_mask):
        """Input shapes:
            x = batch * len1 * h
            y = batch * len2 * h
            y_mask = batch * len2
        Output shapes:
            matched_seq = batch * len1 * h
        """
        # Project vectors
        if self.linear:
            x_proj = self.linear(x.view(-1, x.size(2))).view(x.size())
            x_proj = F.relu(x_proj)
            y_proj = self.linear(y.view(-1, y.size(2))).view(y.size())
            y_proj = F.relu(y_proj)
        else:
            x_proj = x
            y_proj = y

        # Compute scores
        scores = x_proj.bmm(y_proj.transpose(2, 1))

        # Mask padding
        y_mask = y_mask.unsqueeze(1).expand(scores.size())
        scores.data.masked_fill_(y_mask.data, -float('inf'))

        # Normalize with softmax
        alpha_flat = F.softmax(scores.view(-1, y.size(1)))
        alpha = alpha_flat.view(-1, x.size(1), y.size(1))

        # Take weighted average
        matched_seq = alpha.bmm(y)
        return matched_seq
    def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, 
        context_attention, question_attention, 
        context_indices, question_indices, 
        oov_to_limited_idx):

        size = list(outputs.size())

        size[-1] = self.generative_vocab_size
        scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
        p_vocab = F.softmax(scores, dim=scores.dim()-1)
        scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab

        effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
        if self.generative_vocab_size < effective_vocab_size:
            size[-1] = effective_vocab_size - self.generative_vocab_size
            buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON))
            scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)

        p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention)
        scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr

        p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention)
        scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr

        probs = scaled_p_vocab + scaled_p_context_ptr + scaled_p_question_ptr
        return probs
def test(model, device, test_loader):
    model.to(device)
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        y_pred = []
        y_true = []
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            output = torch.mean(output.view(output.size(0), output.size(1), -1), dim=2)
            test_loss += F.cross_entropy(output, target)
            output = F.softmax(output, dim=1)
            confidence, pred = output.max(1)
            print('confidence: {}, prediction: {}, ground truth: {}'.format(confidence.cpu().numpy(), pred.cpu().numpy(), target.cpu().numpy()))
            y_pred += pred.data.tolist()
            y_true += target.data.tolist()
            correct += pred.eq(target.view_as(pred)).sum().item()

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print(metrics.classification_report(np.asarray(y_true), np.asarray(y_pred)))
    print('confusion matrix: \n', metrics.confusion_matrix(np.asarray(y_true), np.asarray(y_pred)))
    print('\n')
def train_a2c(net, mb_obs, mb_rewards, mb_actions, mb_values, optimizer, tb_tracker, step_idx, device="cpu"):
    optimizer.zero_grad()
    mb_adv = mb_rewards - mb_values
    adv_v = torch.FloatTensor(mb_adv).to(device)
    obs_v = torch.FloatTensor(mb_obs).to(device)
    rewards_v = torch.FloatTensor(mb_rewards).to(device)
    actions_t = torch.LongTensor(mb_actions).to(device)
    logits_v, values_v = net(obs_v)
    log_prob_v = F.log_softmax(logits_v, dim=1)
    log_prob_actions_v = adv_v * log_prob_v[range(len(mb_actions)), actions_t]

    loss_policy_v = -log_prob_actions_v.mean()
    loss_value_v = F.mse_loss(values_v.squeeze(-1), rewards_v)

    prob_v = F.softmax(logits_v, dim=1)
    entropy_loss_v = (prob_v * log_prob_v).sum(dim=1).mean()
    loss_v = ENTROPY_BETA * entropy_loss_v + VALUE_LOSS_COEF * loss_value_v + loss_policy_v
    loss_v.backward()
    nn_utils.clip_grad_norm_(net.parameters(), CLIP_GRAD)
    optimizer.step()

    tb_tracker.track("advantage", mb_adv, step_idx)
    tb_tracker.track("values", values_v, step_idx)
    tb_tracker.track("batch_rewards", rewards_v, step_idx)
    tb_tracker.track("loss_entropy", entropy_loss_v, step_idx)
    tb_tracker.track("loss_policy", loss_policy_v, step_idx)
    tb_tracker.track("loss_value", loss_value_v, step_idx)
    tb_tracker.track("loss_total", loss_v, step_idx)
    return obs_v
Exemple #18
0
 def forward(self, x):
     x = self.features(x)
     a = self.conv6_1(x)
     b = self.conv6_2(x)
     c = self.conv6_3(x)
     a = F.softmax(a, dim=1)
     return c, b, a
    def routing(self, x, b_IJ, W,batch_size,routing_iter):
        x1 = x.view(batch_size, 256, 1, 6, 6)
        x_tile = x1.repeat(1, 1, 10, 1, 1)
        x_view = x_tile.view(batch_size, 1152, 10, 8, 1)
        stride_i = W.repeat(batch_size, 1, 1, 1, 1)
        stride_j = stride_i.view(batch_size, 1152, 10, 16, 8)
        dot_op = torch.matmul(stride_j, x_view)
        dot_op_stopped = Variable(dot_op.data.clone(), requires_grad=False)

        for r_iter in range(routing_iter):
            id_capsule = F.softmax(b_IJ, dim=2)
            if r_iter == routing_iter - 1:
                route_I = torch.mul(id_capsule, dot_op)
                route_I_sum = torch.sum(route_I, dim=1, keepdim=True) + self.bias
                V_J = squash(route_I_sum,self.epsilon)
            if r_iter < routing_iter - 1:

                dot_op_stopped_tmp = dot_op_stopped.data.numpy()
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 16, 1))
                id_capsule_tmp = id_capsule.data.numpy()
                route_I_tmp = id_capsule_tmp * dot_op_stopped_tmp
                route_I_tmp_sum = np.sum(route_I_tmp, axis=1, keepdims=True) + self.bias.data.numpy()
                V_J_tmp = squash(torch.Tensor(route_I_tmp_sum),self.epsilon)

                V_J_tmp_tiled = np.tile(V_J_tmp.numpy(), (1, 1152, 1, 1, 1))
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 1, 16))

                u_produce_v = np.matmul(dot_op_stopped_tmp, V_J_tmp_tiled)

                b_IJ.data += torch.Tensor(u_produce_v)

        return V_J
Exemple #20
0
def loss(anchors, data, pred, threshold):
    iou = pred['iou']
    device_id = iou.get_device() if torch.cuda.is_available() else None
    rows, cols = pred['feature'].size()[-2:]
    iou_matrix, _iou, _, _data = iou_match(pred['yx_min'].data, pred['yx_max'].data, data)
    anchors = utils.ensure_device(anchors, device_id)
    positive = fit_positive(rows, cols, *(data[key] for key in 'yx_min, yx_max'.split(', ')), anchors)
    negative = ~positive & (_iou < threshold)
    _center_offset, _size_norm = fill_norm(*(_data[key] for key in 'yx_min, yx_max'.split(', ')), anchors)
    positive, negative, _iou, _center_offset, _size_norm, _cls = (torch.autograd.Variable(t) for t in (positive, negative, _iou, _center_offset, _size_norm, _data['cls']))
    _positive = torch.unsqueeze(positive, -1)
    loss = {}
    # iou
    loss['foreground'] = F.mse_loss(iou[positive], _iou[positive], size_average=False)
    loss['background'] = torch.sum(square(iou[negative]))
    # bbox
    loss['center'] = F.mse_loss(pred['center_offset'][_positive], _center_offset[_positive], size_average=False)
    loss['size'] = F.mse_loss(pred['size_norm'][_positive], _size_norm[_positive], size_average=False)
    # cls
    if 'logits' in pred:
        logits = pred['logits']
        if len(_cls.size()) > 3:
            loss['cls'] = F.mse_loss(F.softmax(logits, -1)[_positive], _cls[_positive], size_average=False)
        else:
            loss['cls'] = F.cross_entropy(logits[_positive].view(-1, logits.size(-1)), _cls[positive].view(-1))
    # normalize
    cnt = float(np.multiply.reduce(positive.size()))
    for key in loss:
        loss[key] /= cnt
    return loss, dict(iou=_iou, data=_data, positive=positive, negative=negative)
Exemple #21
0
 def forward(self, image_feat, question_embedding):
     att1 = self.att1.compute_raw_att(image_feat, question_embedding)
     att2 = self.att2.compute_raw_att(image_feat, question_embedding)
     raw_attention = att1 + att2
     # softmax across locations
     attention = F.softmax(raw_attention, dim=1).expand_as(image_feat)
     return attention
def calc_loss(batch, net, tgt_net, gamma, device="cpu", save_prefix=None):
    states, actions, rewards, dones, next_states = common.unpack_batch(batch)
    batch_size = len(batch)

    states_v = torch.tensor(states).to(device)
    actions_v = torch.tensor(actions).to(device)
    next_states_v = torch.tensor(next_states).to(device)

    # next state distribution
    next_distr_v, next_qvals_v = tgt_net.both(next_states_v)
    next_actions = next_qvals_v.max(1)[1].data.cpu().numpy()
    next_distr = tgt_net.apply_softmax(next_distr_v).data.cpu().numpy()

    next_best_distr = next_distr[range(batch_size), next_actions]
    dones = dones.astype(np.bool)

    # project our distribution using Bellman update
    proj_distr = common.distr_projection(next_best_distr, rewards, dones, Vmin, Vmax, N_ATOMS, gamma)

    # calculate net output
    distr_v = net(states_v)
    state_action_values = distr_v[range(batch_size), actions_v.data]
    state_log_sm_v = F.log_softmax(state_action_values, dim=1)
    proj_distr_v = torch.tensor(proj_distr).to(device)

    if save_prefix is not None:
        pred = F.softmax(state_action_values, dim=1).data.cpu().numpy()
        save_transition_images(batch_size, pred, proj_distr, next_best_distr, dones, rewards, save_prefix)

    loss_v = -state_log_sm_v * proj_distr_v
    return loss_v.sum(dim=1).mean()
Exemple #23
0
 def predict(self, x, attn_type = "hard"):
     #predict with greedy decoding
     emb = self.embedding(x)
     h = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     c = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     enc_h, _ = self.encoder(emb, (h, c))
     y = [Variable(torch.zeros(x.size(0)).long())]
     self.attn = []        
     for t in range(x.size(1)):
         emb_t = self.embedding(y[-1])
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c))
         scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2)
         attn_dist = F.softmax(scores, dim = 1)
         self.attn.append(attn_dist.data)
         if attn_type == "hard":
             _, argmax = attn_dist.max(1)
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1))
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)                    
         else:                
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1))
         _, next_token = pred.max(1)
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1)
     return torch.stack(y, 0).transpose(0, 1)
    def action_probs(self, x):
        x = self(x)

        # log_probs = F.log_softmax(x)
        probs = F.softmax(x)

        return probs
def sample(dataloader):
    for batch in dataloader:
        output_seq = Variable(batch['output_seq'])
        del (batch['output_seq'])
        for k in batch:
            batch[k] = Variable(batch[k])
        if DEVICE_NO != -1:
            output_seq = output_seq.cuda(DEVICE_NO)
            for k in batch:
                batch[k] = batch[k].cuda(DEVICE_NO)
        pred = uf.forward(**batch)
        pred = F.softmax(pred, dim=-1)
        prob, label = torch.max(pred, dim=-1)
        for i in range(len(list(batch.values())[0])):
            out_seq = []
            for j in range(int(batch['sent_len'][i])):
                word = idx2word[int(batch['word_seq'][i, int(j)])]
                pos = idx2pos[int(batch['pos_seq'][i, int(j)])]
                l_true = idx2label[int(output_seq[i, int(j)])]
                p = float(prob[i, int(j)])
                l = idx2label[int(label[i, int(j)])] if p > PROB_THRESH else 'O'
                out_seq.append([word, pos, l, l_true])
            out_seq = change_seq_format(out_seq)
            for item in out_seq:
                print('{}/{}/{}'.format(item[0], item[3], item[2]), end=' ')
            print('')
        input('input to continue:')
Exemple #26
0
    def forward_dot(self, hid, ctx, ctx_mask):
        r"""Computes Luong-style dot attention probabilities between
        decoder's hidden state and source annotations.

        Arguments:
            hid(Variable): A set of decoder hidden states of shape `T*B*H`
                where `T` == 1, `B` is batch dim and `H` is hidden state dim.
            ctx(Variable): A set of annotations of shape `S*B*C` where `S`
                is the source timestep dim, `B` is batch dim and `C`
                is annotation dim.
            ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
                in the padded timesteps.

        Returns:
            scores(Variable): A variable of shape `S*B` containing normalized
                attention scores for each position and sample.
            z_t(Variable): A variable of shape `B*H` containing the final
                attended context vector for this target decoding timestep.
        """
        # Apply transformations first to make last dims both C and then
        # shuffle dims to prepare for batch mat-mult
        ctx_ = self.ctx2ctx(ctx).permute(1, 2, 0)   # S*B*C -> S*B*C -> B*C*S
        hid_ = self.hid2ctx(hid).permute(1, 0, 2)   # T*B*H -> T*B*C -> B*T*C

        # 'dot' scores of B*T*S
        scores = F.softmax(torch.bmm(hid_, ctx_), dim=-1)

        # Transform back to hidden_dim for further decoders
        # B*T*S x B*S*C -> B*T*C -> B*T*H
        z_t = self.ctx2hid(torch.bmm(scores, ctx.transpose(0, 1)))

        return scores.transpose(0, 1), z_t.transpose(0, 1)
Exemple #27
0
    def forward(self, images, questions):

        N, T, _, _, _ = images.size()

        # bs x 5 x 3 x 224 x 224
        img_feats = self.cnn(images.contiguous().view(
            -1, images.size(2), images.size(3), images.size(4)))
        img_feats = self.cnn_fc_layer(img_feats)

        img_feats_tr = self.img_tr(img_feats)

        ques_feats = self.q_rnn(questions)
        ques_feats_repl = ques_feats.view(N, 1, -1).repeat(1, T, 1)
        ques_feats_repl = ques_feats_repl.view(N * T, -1)

        ques_feats_tr = self.ques_tr(ques_feats_repl)

        ques_img_feats = torch.cat([ques_feats_tr, img_feats_tr], 1)

        att_feats = self.att(ques_img_feats)
        att_probs = F.softmax(att_feats.view(N, T), dim=1)
        att_probs2 = att_probs.view(N, T, 1).repeat(1, 1, 64)

        att_img_feats = torch.mul(att_probs2, img_feats.view(N, T, 64))
        att_img_feats = torch.sum(att_img_feats, dim=1)

        mul_feats = torch.mul(ques_feats, att_img_feats)

        scores = self.classifier(mul_feats)

        return scores, att_probs
def test(dataloader, out=sys.stdout):
    for batch in dataloader:
        if 'output_seq' in batch:
            del batch['output_seq']
        for k in batch:
            batch[k] = Variable(batch[k])
        if DEVICE_NO != -1:
            for k in batch:
                batch[k] = batch[k].cuda(DEVICE_NO)
        pred = uf.forward(**batch)
        pred = F.softmax(pred, dim=-1)
        prob, label = torch.max(pred, dim=-1)
        for i in range(len(list(batch.values())[0])):
            out_seq = []
            for j in range(int(batch['sent_len'][i])):
                word = idx2word[int(batch['word_seq'][i, int(j)])]
                pos = idx2pos[int(batch['pos_seq'][i, int(j)])]
                p = float(prob[i, int(j)])
                l = idx2label[int(label[i, int(j)])] if p > PROB_THRESH else 'O'
                out_seq.append([word, pos, l])
            out_seq = change_seq_format(out_seq)
            #    out.write('{}/{}/{} '.format(word, pos, l))
            for item in out_seq:
                out.write('{}/{}/{} '.format(item[0], item[1], item[2]))
            out.write('\n')
Exemple #29
0
	def forward(self, sequence, graph):
		"""
		Apply self-attention to the sequence, ignores
		the graph
		"""
		sequence = sequence.squeeze(1)	
		
		#get the dimension
		n, d = sequence.size()
		
		#project the sequence into key, value, and query sequences
		keySeq = f.relu(self.keyProj(sequence))
		valueSeq = f.relu(self.valueProj(sequence))
		querySeq = f.relu(self.queryProj(sequence))
		
		#combine query with each key
		#a_ijh = softmax( (q_ih^T k_jh) / sqrt(d) )
		#the result is, row i is the importance of the sequence for key i
		importance = f.softmax(t.matmul(querySeq, keySeq.permute(1,0)) * math.sqrt(d),0).permute(1,0)

		#apply the importance weights to the value sequence
		attention = t.matmul(valueSeq.permute(1,0), importance).permute(1,0)
	
		#sum the sequence for a complete representation
		final = t.sum(attention, 0)
		
		return attention.unsqueeze(1), final
Exemple #30
0
 def __call__(self):
     image_bgr = self.get_image()
     tensor = self.conv_tensor(image_bgr)
     pred = pybenchmark.profile('inference')(model._inference)(self.inference, torch.autograd.Variable(tensor, volatile=True))
     rows, cols = pred['feature'].size()[-2:]
     iou = pred['iou'].data.contiguous().view(-1)
     yx_min, yx_max = (pred[key].data.view(-1, 2) for key in 'yx_min, yx_max'.split(', '))
     logits = get_logits(pred)
     prob = F.softmax(logits, -1).data.view(-1, logits.size(-1))
     ret = postprocess(self.config, iou, yx_min, yx_max, prob)
     image_result = image_bgr.copy()
     if ret is not None:
         iou, yx_min, yx_max, cls, score = ret
         try:
             scale = self.scale
         except AttributeError:
             scale = utils.ensure_device(torch.from_numpy(np.array(image_result.shape[:2], np.float32) / np.array([rows, cols], np.float32)))
             self.scale = scale
         yx_min, yx_max = ((t * scale).cpu().numpy().astype(np.int) for t in (yx_min, yx_max))
         image_result = self.draw_bbox(image_result, yx_min, yx_max, cls)
     cv2.imshow('detection', image_result)
     if self.args.output:
         self.writer.write(image_result)
     if cv2.waitKey(0 if self.args.pause else 1) in self.keys:
         root = os.path.join(self.model_dir, 'snapshot')
         os.makedirs(root, exist_ok=True)
         path = os.path.join(root, time.strftime(self.args.format))
         cv2.imwrite(path, image_bgr)
         logging.warning('image dumped into ' + path)
    def postprocess_detections(self, class_logits, sub_cls_logits, box_regression, proposals, image_shapes):
        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
        device = class_logits.device
        num_classes = class_logits.shape[-1]
        num_sub_cls = 17

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)
        pred_sub_scores = torch.sigmoid(sub_cls_logits)
        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            # and just assign to pred_boxes instead of pred_boxes_list
            pred_boxes_list = [pred_boxes]
            pred_scores_list = [pred_scores]
            pred_sub_list = [pred_sub_scores]
        else:
            pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
            pred_scores_list = pred_scores.split(boxes_per_image, 0)
            pred_sub_list = pred_sub_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_subs = []
        for boxes, scores,sub_scores, image_shape in zip(pred_boxes_list, pred_scores_list,pred_sub_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)
            sub_labels = torch.arange(num_sub_cls, device=device)
            sub_labels = sub_labels.view(1, -1).expand_as(sub_scores) * (sub_scores > 0.5)
            sub_labels = torch.repeat_interleave(sub_labels, num_classes, dim=0)
            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)
            sub_labels = sub_labels.reshape(-1, 17)
            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels, sub_labels= boxes[inds], scores[inds], labels[inds], sub_labels[inds]
            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, sub_labels = boxes[keep], scores[keep], labels[keep], sub_labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, sub_labels= boxes[keep], scores[keep], labels[keep], sub_labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_subs.append(sub_labels)

        return all_boxes, all_scores, all_labels, all_subs
    def _forward_loop(self,
                      state: Dict[str, torch.Tensor],
                      target_tokens: Dict[str, torch.LongTensor] = None) -> Dict[str, torch.Tensor]:
        """
        Make forward pass during training or do greedy search during prediction.

        Notes
        -----
        We really only use the predictions from the method to test that beam search
        with a beam size of 1 gives the same results.
        """
        # shape: (batch_size, max_input_sequence_length)
        source_mask = state["source_mask"]

        batch_size = source_mask.size()[0]

        if target_tokens:
            # shape: (batch_size, max_target_sequence_length)
            targets = target_tokens["tokens"]

            _, target_sequence_length = targets.size()

            # The last input from the target is either padding or the end symbol.
            # Either way, we don't have to process it.
            num_decoding_steps = target_sequence_length - 1
        else:
            num_decoding_steps = self._max_decoding_steps

        # Initialize target predictions with the start index.
        # shape: (batch_size,)
        last_predictions = source_mask.new_full((batch_size,), fill_value=self._start_index)

        step_logits: List[torch.Tensor] = []
        step_predictions: List[torch.Tensor] = []
        step_attn_weights: List[torch.Tensor] = []
        for timestep in range(num_decoding_steps):
            if self.training and torch.rand(1).item() < self._scheduled_sampling_ratio:
                # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio
                # during training.
                # shape: (batch_size,)
                input_choices = last_predictions
            elif not target_tokens:
                # shape: (batch_size,)
                input_choices = last_predictions
            else:
                # shape: (batch_size,)
                input_choices = targets[:, timestep]

            # shape: (batch_size, num_classes)
            # shape: (batch_size, input_max_size)
            input_weights, output_projections, state = self._prepare_output_projections(input_choices, state)

            step_attn_weights.append(input_weights.unsqueeze(1))

            # list of tensors, shape: (batch_size, 1, num_classes)
            step_logits.append(output_projections.unsqueeze(1))

            # shape: (batch_size, num_classes)
            class_probabilities = F.softmax(output_projections, dim=-1)

            # shape (predicted_classes): (batch_size,)
            _, predicted_classes = torch.max(class_probabilities, 1)

            # shape (predicted_classes): (batch_size,)
            last_predictions = predicted_classes

            step_predictions.append(last_predictions.unsqueeze(1))

        # shape: (batch_size, num_decoding_steps)
        predictions = torch.cat(step_predictions, 1)

        # shape: (batch_size, num_decoding_steps, max_input_sequence_length)
        attention_input_weights = torch.cat(step_attn_weights, 1)

        output_dict = {"predictions": predictions, 'attention_input_weights': attention_input_weights}

        if target_tokens:
            # shape: (batch_size, num_decoding_steps, num_classes)
            logits = torch.cat(step_logits, 1)

            # shape: (batch_size, num_decoding_steps, max_input_sequence_length)
            attn_weights = torch.cat(step_attn_weights, 1)

            # Compute loss.
            target_mask = util.get_text_field_mask(target_tokens)
            loss = self._get_loss(logits, targets, target_mask)

            coverage_loss = self._get_coverage_loss(attn_weights, source_mask, target_mask)
            assert coverage_loss < 1
            self._coverage_loss(coverage_loss.detach().cpu().item())

            output_dict["loss"] = loss + self._coverage_lambda * coverage_loss

        return output_dict
Exemple #33
0
def main(args):
    # Build data loader
    if not os.path.isdir(args.model_path):
        os.makedirs(args.model_path)

    data_loader,ds_class = get_loader(args.data_dir, args.seq_len, args.batch_size,
                             shuffle=True, num_workers=args.num_workers, ds = args.ds) 

    # Build eval data loader
    eval_data_loader,_ = get_loader(args.data_dir_test, args.seq_len, args.batch_size,
                             shuffle=True, num_workers=args.num_workers, ds = args.ds, lbl2id = ds_class.lbl2id) 
    
    model = SkeletonAction(args.input_size, args.hidden_size, args.num_class, args.num_layers, args.use_bias, args.dropout)


    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    if torch.cuda.is_available():
        model.cuda()
        criterion = criterion.cuda()

    params = model.parameters()
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Load the trained model parameters
    # Now, we try to find the latest encoder and decoder model.
    if os.path.isdir(args.model_path) and os.listdir(args.model_path):
        m_fn = max(glob.glob(os.path.join(args.model_path, 'model*')), key = os.path.getctime)
        if m_fn:
            model.load_state_dict(torch.load(m_fn))

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        total_train = 0
        total_correct = 0
        total_train_2 = 0
        total_correct_2 = 0
        for i_step, (lbl, data, length) in enumerate(data_loader):
            # Set mini-batch dataset
            lbl = Variable(lbl)
            data = Variable(data)
            mask = torch.zeros(data.size(0), data.size(1))
            for i,m in zip(length, mask):
                m[0:i[0]] = 1
            mask = Variable(mask)
            if torch.cuda.is_available():
                lbl = lbl.cuda()
                data = data.cuda()
                mask = mask.cuda()
            model.zero_grad()
            opt = model(data)
            # compute accuracy.        
            pred_lbl = opt.max(dim = -1)[1].data.cpu()
            gt_lbl = lbl.data.cpu()
            cnt = torch.LongTensor(lbl.size(0), args.num_class).zero_()
            for i in range(pred_lbl.size(0)):
                for j in range(length[i][0]):
                    cnt[i][pred_lbl[i,j]] += 1
            cnt_lbl = cnt.max(dim = -1)[1]
            total_train += data.size(0)
            total_correct += (cnt_lbl.squeeze() == gt_lbl.squeeze()).sum()

            prob = F.softmax(opt.view(opt.size(0) * opt.size(1), opt.size(2)))
            prob = prob.view(opt.size(0), opt.size(1), opt.size(2))
            prob = prob.sum(dim = 1)
            pred_lbl = prob.max(dim = -1)[1].data.cpu()
            total_correct_2 += (pred_lbl.squeeze() == gt_lbl.squeeze()).sum()


                    
            lbl = lbl.squeeze().unsqueeze(1)
            lbl = lbl.repeat(1, opt.size(1)).contiguous()
            lbl = lbl.view(lbl.size(0) * lbl.size(1))
            opt = opt.contiguous()
            opt = opt.view(opt.size(0) * opt.size(1), opt.size(2))
            log_p = F.log_softmax(opt)
            loss = - (mask.squeeze() * log_p[torch.LongTensor(range(opt.size(0))).cuda(), lbl.squeeze().data]).sum() / mask.sum()
            local_acc =  (cnt_lbl.squeeze() == gt_lbl.squeeze()).sum() * 1.0 / data.size(0)
            local_acc2 =  (pred_lbl.squeeze() == gt_lbl.squeeze()).sum() * 1.0 / data.size(0)
            if i_step % args.log_step == 0:
                logging.info('Epoch [%d/%d], [%d/%d], Loss: %.4f, accuracy: %5.4f, accuracy2: %5.4f',
                              epoch, args.num_epochs, 
                              i_step, len(data_loader),
                                loss.data[0], local_acc, local_acc2)
            #loss = criterion(opt, lbl)
            loss.backward()
            optimizer.step()
            # Eval the trained model
            if i_step % args.eval_step == 0:
                model.eval()
                total_num = 0
                correct_num = 0
                correct_num2 = 0
                for k_step, (lbl, data, length) in enumerate(eval_data_loader):
                    lbl = Variable(lbl)
                    data = Variable(data)
                    mask = torch.zeros(data.size(0), data.size(1))
                    for i,m in zip(length, mask):
                        m[0:i[0]] = 1
                    if torch.cuda.is_available():
                        lbl = lbl.cuda()
                        data = data.cuda()
                        mask = mask.cuda()
        
                    mask = Variable(mask)
                    model.zero_grad()
                    opt = model(data)
                    pred_lbl = opt.max(dim = -1)[1].data.cpu()
                    gt_lbl = lbl.data.cpu()
                    cnt = torch.LongTensor(lbl.size(0), args.num_class).zero_()
                    for i in range(pred_lbl.size(0)):
                        for j in range(length[i][0]):
                            cnt[i][pred_lbl[i,j]] += 1
                    cnt = cnt.max(dim = -1)[1]
                    total_num += data.size(0)
                    correct_num += (cnt.squeeze() == gt_lbl.squeeze()).sum()

                    prob = F.softmax(opt.view(opt.size(0) * opt.size(1), opt.size(2)))
                    prob = prob.view(opt.size(0), opt.size(1), opt.size(2))
                    prob = prob.sum(dim = 1)
                    pred_lbl = prob.max(dim = -1)[1].data.cpu()
                    correct_num2 += (pred_lbl.squeeze() == gt_lbl.squeeze()).sum()


                    lbl = lbl.squeeze().unsqueeze(1)
                    lbl = lbl.repeat(1, opt.size(1)).contiguous()
                    lbl = lbl.view(lbl.size(0) * lbl.size(1)) 
                    opt = opt.contiguous()
                    opt = opt.view(opt.size(0) * opt.size(1), opt.size(2))
                    #loss = criterion(opt, lbl)
                    log_p = F.log_softmax(opt)
                    loss = - (mask.squeeze() * log_p[torch.LongTensor(range(opt.size(0))).cuda(), lbl.squeeze().data]).sum() / mask.sum()
                accuracy = correct_num * 1.0 / total_num
                accuracy2 = correct_num2 * 1.0 / total_num
                logging.info('Validating [%d], Loss: %.4f, accuracy: %.4f, accuracy2 = %.4f'
                            ,epoch,
                                loss.data[0], accuracy, accuracy2) 
         
                model.train()

        if epoch % 10 == 0:
            logging.info('Epoch [%d/%d], Loss: %.4f, accuracy: %5.4f'
                          ,epoch, args.num_epochs, 
                            loss.data[0], accuracy)
                # Save the models
            torch.save(model.state_dict(), 
                os.path.join(args.model_path, 
                        'model-%d.pkl' %(epoch+1)))
Exemple #34
0
    def forward(self, up, down):

        refimg_fea = self.feature_extraction(up)  # reference image feature
        targetimg_fea = self.feature_extraction(down)  # target image feature

        #matching
        cost = Variable(
            torch.FloatTensor(refimg_fea.shape[0], refimg_fea.shape[1] * 2,
                              self.maxdisp // 4 * 3, refimg_fea.shape[2],
                              refimg_fea.shape[3]).zero_()).cuda()

        for i in range(self.maxdisp // 4 * 3):
            if i > 0:
                cost[:, :refimg_fea.size()[1],
                     i, :, :] = refimg_fea[:, :, :, :]
                cost[:, refimg_fea.size()[1]:,
                     i, :, :] = shift_down[:, :, :, :]
                shift_down = self.forF(shift_down)
            else:
                cost[:, :refimg_fea.size()[1], i, :, :] = refimg_fea
                cost[:, refimg_fea.size()[1]:, i, :, :] = targetimg_fea
                shift_down = self.forF(targetimg_fea)

        cost = cost.contiguous()

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0
        out1, pre1, post1 = self.dres2(cost0, None, None)
        out1 = out1 + cost0

        out2, pre2, post2 = self.dres3(out1, pre1, post1)
        out2 = out2 + cost0

        out3, pre3, post3 = self.dres4(out2, pre1, post2)
        out3 = out3 + cost0

        cost1 = self.classif1(out1)
        cost2 = self.classif2(out2) + cost1
        cost3 = self.classif3(out3) + cost2

        cost1 = F.upsample(
            cost1, [self.maxdisp * 3,
                    up.size()[2], up.size()[3]],
            mode='trilinear'
        )  # when within units, the maxdisp needs to be modified
        cost2 = F.upsample(
            cost2, [self.maxdisp * 3,
                    up.size()[2], up.size()[3]],
            mode='trilinear')

        cost1 = torch.squeeze(cost1, 1)
        pred1 = F.softmax(cost1, dim=1)
        pred1 = disparityregression_sub3(self.maxdisp)(pred1)

        cost2 = torch.squeeze(cost2, 1)
        pred2 = F.softmax(cost2, dim=1)
        pred2 = disparityregression_sub3(self.maxdisp)(pred2)

        cost3 = F.upsample(
            cost3, [self.maxdisp * 3,
                    up.size()[2], up.size()[3]],
            mode='trilinear')
        cost3 = torch.squeeze(cost3, 1)
        pred3 = F.softmax(cost3, dim=1)
        pred3 = disparityregression_sub3(self.maxdisp)(pred3)

        return pred1, pred2, pred3
def sample_sequence_beam(model, length, args, start_token=None, batch_size=None, context=None, temperature=1, top_k=0, device='cuda', sample=True, beam_size = 5, tokenizer= None, max_len = -1, min_len = -1):
    '''
    Use beam search to sample a sequence conditioned on the given context
    '''
    
    
    
    
    if start_token is None:
        assert context is not None, 'Specify exactly one of start_token and context!'
        context = torch.tensor(context, device=device, dtype=torch.long).unsqueeze(0).repeat(batch_size, 1)
    else:
        assert context is None, 'Specify exactly one of start_token and context!'
        context = torch.full((batch_size, 1), start_token, device=device, dtype=torch.long)

    past = None
    
    
    # if specified, limit max generation length to input sentence length
    if args.max_len_inp:
        length = min([length, context.numel() + 1]) # +1 for endchar
    
#    full_beam = []
    
    candidates = [{'prev':torch.tensor(context), 'output':torch.tensor(context),'past':None,'ended':False, 'score':0}]
    
    done_list = []
    
    with torch.no_grad():
        for i in trange(length):
             # the beam for the ith place
            
            candidates_sorted = sorted(candidates, key=lambda v: v['score'])
            k_best = candidates_sorted[:beam_size] # get the best ones
            
            
            candidates = []
            for cand in k_best:
                
                past = None#cand['past']
                prev = torch.tensor(cand['output'])#cand['prev']
                output_0 = torch.tensor(cand['output'])
                

                logits, past = model(prev, past=past)
                logits = logits[:, -1, :] / temperature
                logits = top_k_logits(logits, k=top_k)
                log_probs = F.softmax(logits, dim=-1)
                
                vals, prev = torch.topk(log_probs, k=beam_size, dim=-1)
                
                vals = vals.view(beam_size)
                
                for j in range(prev.numel()): #for each candidate expansion

                    output = torch.cat((torch.tensor(output_0), torch.tensor(prev[:,j].view(1,1))), dim=1)
                    
                    
                    str_out = tokenizer.decode(output[0,:].tolist())
                    score = cand['score'] - vals[j].item()
                    
                    done = args.end_tok in str_out #'<|endoftext|>' in str_out
                    
                    cand_out = {'prev':prev, 'output':output.data,'past':past, 'score':score,'str_out':str_out}
                    
                    # if contains end token or reached end length, dump into done
                    if done or i == length - 1: 
                        # put in the done pile
                        done_list += [cand_out]
                        
                        
                    else: 
                        # put in the beam
                        candidates += [cand_out]
        
        if max_len != -1: # if we have specified a max length
            tmp_done_list = []
            
            for d in done_list:
                
                # remove '<|endoftext|>' if applicable
                str_out = tokenizer.decode(d['output'][0,:].tolist()) 
                trimmed_ind = str_out.find(args.end_tok)#('<|endoftext|>')
                if trimmed_ind == -1:
                    trimmed_ind = len(str_out)
                str_out = str_out[:trimmed_ind]
                tok_len = len(tokenizer.encode(str_out))
                
                if tok_len <= max_len + 1:
                    print('encoded {}'.format(tokenizer.encode(str_out)))
                    tmp_done_list += [d]
            done_list = tmp_done_list
                
        if min_len != -1:
            # trim all of the out strings so min_len is meaningful
            done_list_tmp = []
            for d in done_list:
                str_out = tokenizer.decode(d['output'][0,:].tolist()) 
                str_out = trim_text(str_out, args.end_tok, 10000) # remove end_token if you need to

                if len(tokenizer.encode(str_out)) >= context.numel() + min_len:
                    done_list_tmp += [d]
            done_list = done_list_tmp
                   
        output = max(done_list, key = lambda v: v['score'])['output']
                    
    return output
Exemple #36
0
    def generate(self, src_enc, src_len, tgt_lang_id, max_len=200, sample_temperature=None):
        """
        Decode a sentence given initial start.
        `x`:
            - LongTensor(bs, slen)
                <EOS> W1 W2 W3 <EOS> <PAD>
                <EOS> W1 W2 W3   W4  <EOS>
        `lengths`:
            - LongTensor(bs) [5, 6]
        `positions`:
            - False, for regular "arange" positions (LM)
            - True, to reset positions from the new generation (MT)
        `langs`:
            - must be None if the model only supports one language
            - lang_id if only one language is involved (LM)
            - (lang_id1, lang_id2) if two languages are involved (MT)
        """

        # input batch
        bs = len(src_len)
        assert src_enc.size(0) == bs

        # generated sentences
        generated = src_len.new(max_len, bs)  # upcoming output
        generated.fill_(self.pad_index)       # fill upcoming ouput with <PAD>
        generated[0].fill_(self.eos_index)    # we use <EOS> for <BOS> everywhere

        # positions
        positions = src_len.new(max_len).long()
        positions = torch.arange(max_len, out=positions).unsqueeze(1).expand(max_len, bs)

        # language IDs
        langs = src_len.new(max_len).long().fill_(tgt_lang_id)
        langs = langs.unsqueeze(1).expand(max_len, bs)

        # current position / max lengths / length of generated sentences / unfinished sentences
        cur_len = 1
        gen_len = src_len.clone().fill_(1)
        unfinished_sents = src_len.clone().fill_(1)

        # cache compute states
        cache = {'slen': 0}

        while cur_len < max_len:

            # compute word scores
            tensor = self.forward(
                'fwd',
                x=generated[:cur_len],
                lengths=gen_len,
                positions=positions[:cur_len],
                langs=langs[:cur_len],
                causal=True,
                src_enc=src_enc,
                src_len=src_len,
                cache=cache
            )
            assert tensor.size() == (1, bs, self.dim), (cur_len, max_len, src_enc.size(), tensor.size(), (1, bs, self.dim))
            tensor = tensor.data[-1, :, :].type_as(src_enc)  # (bs, dim)
            scores = self.pred_layer.get_scores(tensor)      # (bs, n_words)

            # select next words: sample or greedy
            if sample_temperature is None:
                if self.mask_gen_lang is True:
                    next_words = torch.topk(scores, self.mask_topk)[1].squeeze(1)
                else:
                    next_words = torch.topk(scores, 1)[1].squeeze(1)
            else:
                if self.mask_gen_lang is True:
                    next_words = torch.multinomial(F.softmax(scores / sample_temperature, dim=1), self.mask_topk).squeeze(1)
                else:
                    next_words = torch.multinomial(F.softmax(scores / sample_temperature, dim=1), 1).squeeze(1)

            if self.mask_gen_lang is True:
                tmp_next_words = torch.zeros(bs, dtype=torch.long)
                for j, next_word in enumerate(next_words.cpu()):
                    has_tgt_id = False
                    for i, wi in enumerate(next_word):
                        if language_detect(self.dico.id2word[wi.item()], self.id2lang[tgt_lang_id]):
                            has_tgt_id = True
                            tmp_next_words[j] = wi
                            break
                    if has_tgt_id is False:
                        tmp_next_words[j] = next_words[j, 0]
                next_words = tmp_next_words.cuda()

            assert next_words.size() == (bs,)

            # update generations / lengths / finished sentences / current length
            generated[cur_len] = next_words * unfinished_sents + self.pad_index * (1 - unfinished_sents)
            gen_len.add_(unfinished_sents)
            unfinished_sents.mul_(next_words.ne(self.eos_index).long())
            cur_len = cur_len + 1

            # stop when there is a </s> in each sentence, or if we exceed the maximul length
            if unfinished_sents.max() == 0:
                break

        # add <EOS> to unfinished sentences
        if cur_len == max_len:
            generated[-1].masked_fill_(unfinished_sents.byte(), self.eos_index)

        # sanity check
        assert (generated == self.eos_index).sum() == 2 * bs

        return generated[:cur_len], gen_len
 def forward(self, x):
     x = self.features(x)
     x = x.view(x.size(0), -1)
     x = self.classifier(x)
     x = F.softmax(x, dim=1)
     return x
 ('instance_norm', (S, S, S), (non_differentiable(torch.zeros(S)), non_differentiable(torch.ones(S))),),
 ('layer_norm', (S, S, S, S), ([5],), '',
  (False, ['aten::contiguous', 'aten::_batch_norm_impl_index'])),
 ('layer_norm', (S, S, S, S), ([5], non_differentiable(torch.rand(S)),), 'with_only_weight',
  (False, ['aten::contiguous', 'aten::_batch_norm_impl_index'])),
 ('layer_norm', (S, S, S, S), ([5], None, non_differentiable(torch.rand(S)),), 'with_only_bias',
  (False, ['aten::contiguous', 'aten::_batch_norm_impl_index'])),
 ('layer_norm', (S, S, S, S), ([5], non_differentiable(torch.rand(S)),
                               non_differentiable(torch.rand(S))), 'with_weight_and_bias',
  (False, ['aten::contiguous', 'aten::_batch_norm_impl_index', 'aten::addcmul'])),
 ('group_norm', (S, S, S), (1, torch.rand(5),),),
 ('local_response_norm', (S, S, S), (2, ),),
 ('nll_loss', F.log_softmax(torch.randn(3, 5), dim=0), (torch.tensor([1, 0, 4]),), '',),
 ('poisson_nll_loss', torch.rand(S, 2), (torch.rand(S, 2),),),
 ('poisson_nll_loss', torch.rand(S, 2), (torch.rand(S, 2), True, True), 'full'),
 ('kl_div', F.log_softmax(torch.randn(S, 10), 1), (F.softmax(torch.randn(S, 10), 1),),),
 ('cross_entropy', (3, S), (torch.randint(S, (3,), dtype=torch.int64),),),
 ('binary_cross_entropy_with_logits', (3,), (torch.empty(3).random_(2), ),),
 ('smooth_l1_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('huber_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('l1_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('mse_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('smooth_l1_loss', (3, S), ((torch.rand(3, S)),), 'with_grad'),
 ('huber_loss', (3, S), ((torch.rand(3, S)),), 'with_grad'),
 ('l1_loss', (3, S), ((torch.rand(3, S)),), 'with_grad'),
 ('mse_loss', (3, S), ((torch.rand(3, S)),), 'with_grad'),
 ('margin_ranking_loss', (S,), ((S,), (S,)),),
 ('hinge_embedding_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('soft_margin_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('multilabel_soft_margin_loss', (3, S), (non_differentiable(torch.rand(3, S)),),),
 ('cosine_embedding_loss', (S, S), ((S, S), non_differentiable(torch.rand(S,))),),
Exemple #39
0
def siamese_track(state, im):
    refine_enable = True
    mask_enable = True
    device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
    debug = True
    p = state['p']
    net = state['net']
    avg_chans = state['avg_chans']
    window = state['window']
    targets = state["targets"]

    zf_lists = []

    BLUE = [255, 255, 255]
    for i, target in enumerate(targets):
        wc_x = target["target_sz"][1] + p.context_amount * sum(target["target_sz"])
        hc_x = target["target_sz"][0] + p.context_amount * sum(target["target_sz"])
        target["s_z"] = np.sqrt(wc_x * hc_x)

        target["scale_x"] = p.exemplar_size / target["s_z"]
        d_search = (p.instance_size - p.exemplar_size) / 2
        pad = d_search / target["scale_x"]
        target["s_z"] = target["s_z"] + 2 * pad
        target["crop_box"] = [target["target_pos"][0] - round(target["s_z"]) / 2,
                              target["target_pos"][1] - round(target["s_z"]) / 2, round(target["s_z"]),
                              round(target["s_z"])]
        zf_lists.append(target["zf"])
        crop_box = target["crop_box"]

    # extract scaled crops for search region x at previous target position

    targets = get_subwindow_tracking(im, p.instance_size, avg_chans, targets=targets)

    # x_crop = Variable(get_subwindow_tracking(im, target_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))
    tracking_data_list = []
    tracking_data = dict()

    for target, zf in zip(targets, zf_lists):
        target["x_crop"] = Variable(target["im_to_torch"].unsqueeze(0))
        target["x_crop"] = target["x_crop"].to(device)
        tracking_data_list.append({"x_crop": target["x_crop"], "zf": zf})

    if mask_enable:
        results = net.track_mask(search=targets[0]["x_crop"], lists=tracking_data_list)

    # else:
    #     score, delta = net.track(x_crop.to(device))

    for result in results:
        delta = result["rpn_pred_loc"]
        score = result["rpn_pred_cls"]
        delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy()
        score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:,
                1].cpu().numpy()

        delta[0, :] = delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0]
        delta[1, :] = delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * p.anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * p.anchor[:, 3]
        result["rpn_pred_loc"] = delta
        result["rpn_pred_cls"] = score

    def change(r):
        return np.maximum(r, 1. / r)

    def sz(w, h):
        pad = (w + h) * 0.5
        sz2 = (w + pad) * (h + pad)
        return np.sqrt(sz2)

    def sz_wh(wh):
        pad = (wh[0] + wh[1]) * 0.5
        sz2 = (wh[0] + pad) * (wh[1] + pad)
        return np.sqrt(sz2)

    # size penalty
    count = 0
    for target, result in zip(targets, results):
        delta = result["rpn_pred_loc"]
        score = result["rpn_pred_cls"]
        crop_box = target["crop_box"]

        target_sz_in_crop = target["target_sz"] * target["scale_x"]
        s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop)))  # scale penalty
        r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :]))  # ratio penalty
        penalty = np.exp(-(r_c * s_c - 1) * p.penalty_k)
        pscore = penalty * score

        pscore = pscore * (1 - p.window_influence) + window * p.window_influence
        best_pscore_id = np.argmax(pscore)

        pred_in_crop = delta[:, best_pscore_id] / target["scale_x"]
        lr = penalty[best_pscore_id] * score[best_pscore_id] * p.lr  # lr for OTB

        res_x = pred_in_crop[0] + target["target_pos"][0]
        res_y = pred_in_crop[1] + target["target_pos"][1]

        res_w = target["target_sz"][0] * (1 - lr) + pred_in_crop[2] * lr
        res_h = target["target_sz"][1] * (1 - lr) + pred_in_crop[3] * lr

        target["target_pos"] = np.array([res_x, res_y])
        target["target_sz"] = np.array([res_w, res_h])

        if mask_enable:
            best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, p.score_size, p.score_size))
            delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1]

            if refine_enable:
                mask = net.track_refine((delta_y, delta_x), index=count).to(device).sigmoid().squeeze().view(
                    p.out_size, p.out_size).cpu().data.numpy()
            else:
                mask = mask[0, :, delta_y, delta_x].sigmoid(). \
                    squeeze().view(p.out_size, p.out_size).cpu().data.numpy()

            count += 1

            def crop_back(image, bbox, out_sz, padding=-1):
                a = (out_sz[0] - 1) / bbox[2]
                b = (out_sz[1] - 1) / bbox[3]
                c = -a * bbox[0]
                d = -b * bbox[1]
                mapping = np.array([[a, 0, c],
                                    [0, b, d]]).astype(np.float)
                crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]),
                                      flags=cv2.INTER_LINEAR,
                                      borderMode=cv2.BORDER_CONSTANT,
                                      borderValue=padding)
                return crop

            s = crop_box[2] / p.instance_size
            sub_box = [crop_box[0] + (delta_x - p.base_size / 2) * p.total_stride * s,
                       crop_box[1] + (delta_y - p.base_size / 2) * p.total_stride * s,
                       s * p.exemplar_size, s * p.exemplar_size]
            s = p.out_size / sub_box[2]
            back_box = [-sub_box[0] * s, -sub_box[1] * s, state['im_w'] * s, state['im_h'] * s]
            mask_in_img = crop_back(mask, back_box, (state['im_w'], state['im_h']))

            target_mask = (mask_in_img > p.seg_thr).astype(np.uint8)
            if cv2.__version__[-5] == '4':
                contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            else:
                _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            cnt_area = [cv2.contourArea(cnt) for cnt in contours]

            if len(contours) != 0 and np.max(cnt_area) > 100:
                contour = contours[np.argmax(cnt_area)]  # use max area polygon
                polygon = contour.reshape(-1, 2)
                # pbox = cv2.boundingRect(polygon)  # Min Max Rectangle
                prbox = cv2.boxPoints(cv2.minAreaRect(polygon))  # Rotated Rectangle

                # box_in_img = pbox
                rbox_in_img = prbox
            else:  # empty mask
                location = cxy_wh_2_rect(target["target_pos"], target["target_sz"])
                rbox_in_img = np.array([[location[0], location[1]],
                                        [location[0] + location[2], location[1]],
                                        [location[0] + location[2], location[1] + location[3]],
                                        [location[0], location[1] + location[3]]])

        target["target_pos"][0] = max(0, min(state['im_w'], target["target_pos"][0]))
        target["target_pos"][1] = max(0, min(state['im_h'], target["target_pos"][1]))
        target["target_sz"][0] = max(10, min(state['im_w'], target["target_sz"][0]))
        target["target_sz"][1] = max(10, min(state['im_h'], target["target_sz"][1]))
        # print("new targetPos {} and targetsize {} \n".format(target["target_pos"],target["target_sz"])) 

        target["mask"] = mask_in_img if mask_enable else []
        target['ploygon'] = rbox_in_img if mask_enable else []
        target["score"] = score[best_pscore_id]

    state["targets"] = targets
    return state
Exemple #40
0
def test_model(model, hist, criterion, dataloaders, dataset_sizes, half=False):
    """
    Testing function. 
    Print the loss and accuracy after the inference on the testset.
    """
    print("\n\n**TESTING**\n")

    sincetime = time.time()

    phase = "test"
    model.eval()  # Set model to evaluate mode

    running_loss = 0.0
    running_corrects = 0

    list_y_pred = []
    list_y_true = []
    list_probs = []

    nb_batches = len(dataloaders[phase])

    pbar = tqdm.tqdm([i for i in range(nb_batches)])

    # Iterate over data.
    for batch_idx, (inputs, labels) in enumerate(dataloaders[phase]):
        pbar.update()
        pbar.set_description("Processing batch %s" % str(batch_idx + 1))
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        # After Quantization
        if half:
            inputs = inputs.half()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            probs = softmax(outputs, 1)
            loss = criterion(outputs, labels)

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        list_y_pred.append(int(preds.cpu()))
        list_y_true.append(int(labels.data.cpu()))
        list_probs.append(probs.cpu())

    pbar.close()

    test_loss = running_loss / dataset_sizes[phase]
    test_acc = running_corrects.double() / dataset_sizes[phase]
    test_acc = round(float(test_acc), 4)
    hist['test_acc'] = test_acc

    hist['y_pred'] = list_y_pred
    hist['probs'] = np.stack(list_probs).reshape(-1, 3)
    hist['y_true'] = list_y_true

    print('\nTest stats -  Loss: {:.4f} Acc: {:.2f}%'.format(
        test_loss, test_acc * 100))

    print("Inference on Testset complete in {:.1f}s\n".format(time.time() -
                                                              sincetime))

    return hist
 def softmax(self, inp, h):
     raw_score = inp.bmm(h.unsqueeze(2))
     score = F.softmax(raw_score, dim=1)
     return score
Exemple #42
0
 def soft_attn(self, direction):
     di0 = self.bn_di0(direction)
     di = F.relu(self.bn_di(self.fc_di(di0)))
     x_di = self.fc_attn(di)
     attn = F.softmax(x_di, 1)
     return attn
 def output(task_name, immediate_output_dict):
     module_name = f"{task_name}_pred_head"
     return F.softmax(immediate_output_dict[module_name], dim=1)
Exemple #44
0
    def forward(self, x):
        output = self._forward(x)
        proba = F.softmax(output, dim=1)

        return proba
Exemple #45
0
 def forward(self, x):
     x = self.affine1(x)
     x = F.relu(x)
     x = F.relu(self.affine2(x))
     action_scores = self.affine3(x)
     return F.softmax(action_scores, dim=1)
 def get_pred(x):
     if resize:
         x = up(x)
     x = inception_model(x)
     return F.softmax(x).data.cpu().numpy()
    def train(self):
        progress = sly.Progress('Model training: ', self.epochs * self.train_iters)
        self.model.train()

        lr_decr = self.config['lr_decreasing']
        policy = LRPolicyWithPatience(
            optim_cls=Adam,
            init_lr=self.config['lr'],
            patience=lr_decr['patience'],
            lr_divisor=lr_decr['lr_divisor'],
            model=self.model
        )
        best_val_loss = float('inf')

        debug_saver = None
        debug_save_prob = float(os.getenv('DEBUG_PATCHES_PROB', 0.0))
        if debug_save_prob > 0:
            target_multi = int(255.0 / len(self.out_classes))
            debug_saver = DebugSaver(odir=os.path.join(sly.TaskPaths.DEBUG_DIR, 'debug_patches'),
                                     prob=debug_save_prob,
                                     target_multi=target_multi)

        for epoch in range(self.epochs):
            sly.logger.info("Before new epoch", extra={'epoch': self.epoch_flt})

            for train_it, (inputs_cpu, targets_cpu) in enumerate(self.data_loaders['train']):
                inputs, targets = cuda_variable(inputs_cpu), cuda_variable(targets_cpu)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)

                if debug_saver is not None:
                    out_cls = functional.softmax(outputs, dim=1)
                    debug_saver.process(inputs_cpu, targets_cpu, out_cls.data.cpu())

                policy.optimizer.zero_grad()
                loss.backward()
                policy.optimizer.step()

                metric_values_train = {'loss': loss.data[0]}
                for name, metric in self.metrics.items():
                    metric_values_train[name] = metric(outputs, targets)

                progress.iter_done_report()

                self.epoch_flt = epoch_float(epoch, train_it + 1, self.train_iters)
                sly.report_metrics_training(self.epoch_flt, metric_values_train)

                if self.eval_planner.need_validation(self.epoch_flt):
                    metrics_values_val = self._validation()
                    self.eval_planner.validation_performed()

                    val_loss = metrics_values_val['loss']
                    model_is_best = val_loss < best_val_loss
                    if model_is_best:
                        best_val_loss = val_loss
                        sly.logger.info('It\'s been determined that current model is the best one for a while.')

                    self._save_model_snapshot(model_is_best, opt_data={
                        'epoch': self.epoch_flt,
                        'val_metrics': metrics_values_val,
                    })

                    policy.reset_if_needed(val_loss, self.model)

            sly.logger.info("Epoch was finished", extra={'epoch': self.epoch_flt})
Exemple #48
0
def _softmax(x):
    return F.softmax(x, dim=-1)
 def forward(self, inp):
     scores = F.softmax(self.scorer(inp), dim=1)
     cont = scores.transpose(1, 2).bmm(inp).squeeze(1)
     return cont
Exemple #50
0
 def predict_class_probs(self, x):
     probs = F.softmax(self.forward(x), dim=1)
     return probs
Exemple #51
0
 def decode(self, z):
     d1 = F.softmax(self.fcd1(z), dim=1)
     return d1
    def forward(self, records_u, is_train):

        predicted_scores = Variable(
            torch.zeros(records_u.get_predicting_records_cnt(
                mod=0), self.nb_cnt + 1)) if is_train else []
        records_al = records_u.get_records(
            mod=0) if is_train else records_u.get_records(mod=2)
        vids_visited = set(
            [record.vid for record in records_u.get_records(mod=0)])
        emb_u = self.embedder_u(
            Variable(torch.LongTensor([records_u.uid])).view(1,
                                                             -1)).view(1, -1)
        emb_u = F.relu(emb_u)
        emb_t_al = Variable(torch.zeros(len(records_al), self.emb_dim_t))
        hidden_long_al = Variable(torch.zeros(len(records_al),
                                              self.hidden_dim))
        hidden_short_al = Variable(
            torch.zeros(len(records_al), self.hidden_dim))
        feature_al = Variable(torch.zeros(len(records_al), self.att_dim))

        hidden_long = self.init_hidden()
        hidden_short = self.init_hidden()
        for idx, record in enumerate(
                records_u.get_records(mod=0)):  # can only use train data
            if record.is_first:
                hidden_short = self.init_hidden()
            emb_t_al[idx] = F.relu(
                self.embedder_t(
                    Variable(torch.LongTensor([record.tid])).view(1, -1)).view(
                        1, -1))  # current time embedding
            feature_al[idx] = torch.cat(
                (F.relu(hidden_long), F.relu(hidden_short), emb_t_al[idx].view(
                    1, -1)), 1)
            emb_v = self.embedder_v(
                Variable(torch.LongTensor([record.vid])).view(1, -1)).view(
                    1, -1)  # feature: current time + previous hiddens
            hidden_long = self.rnn_long(emb_v, hidden_long)
            hidden_short = self.rnn_short(emb_v, hidden_short)
            hidden_long_al[idx] = F.relu(hidden_long)
            hidden_short_al[idx] = F.relu(hidden_short)

        id = 0
        id_vids_true = []
        id_vids = []
        for idx, record in enumerate(records_al):
            if idx >= records_u.test_idx:  # append the states of testing records
                if record.is_first:
                    hidden_short = self.init_hidden()
                emb_t_al[idx] = F.relu(
                    self.embedder_t(
                        Variable(torch.LongTensor([record.tid
                                                   ])).view(1,
                                                            -1)).view(1, -1))
                feature_al[idx] = torch.cat(
                    (F.relu(hidden_long), F.relu(hidden_short),
                     emb_t_al[idx].view(1, -1)), 1)
                emb_v = self.embedder_v(
                    Variable(torch.LongTensor([record.vid
                                               ])).view(1, -1)).view(1, -1)
                hidden_long = self.rnn_long(emb_v, hidden_long)
                hidden_short = self.rnn_short(emb_v, hidden_short)
                hidden_long_al[idx] = F.relu(hidden_long)
                hidden_short_al[idx] = F.relu(hidden_short)
            if record.is_last or (not is_train and idx < records_u.test_idx):
                continue
            vids_visited.add(record.vid)
            vid_candidates = self.get_vids_candidate(record.rid, vids_visited,
                                                     record.vid_next, is_train)
            id_vids_true.append(record.vid_next)
            id_vids.append(vid_candidates)
            scores_u = self.decoder_u(
                emb_u,
                Variable(torch.LongTensor(vid_candidates)).view(1, -1))
            scores_t = self.decoder_t(
                emb_t_al[idx + 1].view(1, -1),
                Variable(torch.LongTensor(vid_candidates)).view(1, -1))
            scores_hl = self.decoder_hl(
                hidden_long_al[idx].view(1, -1),
                Variable(torch.LongTensor(vid_candidates)).view(1, -1))
            scores_hs = self.decoder_hs(
                hidden_short_al[idx].view(1, -1),
                Variable(torch.LongTensor(vid_candidates)).view(1, -1))
            scores_d_all = self.get_scores_d_all(records_u, idx,
                                                 vid_candidates, feature_al,
                                                 is_train)
            if self.mod == 0:
                scores_merge = torch.cat(
                    (scores_u, scores_t, scores_hl, scores_hs, scores_d_all),
                    0).t()
                if is_train:
                    predicted_scores[id] = F.sigmoid(
                        F.linear(scores_merge,
                                 F.relu(self.merger_weight),
                                 bias=None).t())
                else:
                    predicted_scores.append(
                        F.softmax(
                            F.linear(scores_merge,
                                     F.relu(self.merger_weight),
                                     bias=None).t()))
            elif self.mod == 1:
                scores_d_pre = self.get_scores_d_pre(records_u, idx,
                                                     vid_candidates,
                                                     feature_al, is_train)
                scores_merge = torch.cat(
                    (scores_u, scores_t, scores_hl, scores_hs, scores_d_all,
                     scores_d_pre), 0).t()
                if is_train:
                    predicted_scores[id] = F.sigmoid(
                        F.linear(scores_merge,
                                 F.relu(self.merger_weight),
                                 bias=None).t())
                else:
                    predicted_scores.append(
                        F.softmax(
                            F.linear(scores_merge,
                                     F.relu(self.merger_weight),
                                     bias=None).t()))
            elif self.mod == 2:
                scores_d_pre = self.get_scores_d_pre(records_u, idx,
                                                     vid_candidates,
                                                     feature_al, is_train)
                scores_merge = torch.cat(
                    (scores_u, scores_t, scores_hl, scores_hs, scores_d_all,
                     scores_d_pre), 0).t()
                gap_time = (records_al[idx + 1].dt -
                            record.dt).total_seconds() / 60 / 60
                gap_time_int = int(gap_time)
                weight_lower = gap_time_int + 1 - gap_time
                weight_upper = gap_time - gap_time_int
                merger_weight_linear = F.relu(self.merger_weight_al[
                    gap_time_int]) * weight_lower + F.relu(
                        self.merger_weight_al[gap_time_int + 1]) * weight_upper
                scores_pre_final = F.linear(scores_merge,
                                            merger_weight_linear,
                                            bias=None).t()
                if is_train:
                    predicted_scores[id] = F.sigmoid(scores_pre_final)
                else:
                    predicted_scores.append(F.softmax(scores_pre_final))
            elif self.mod == 3:
                scores_d_pre = self.get_scores_d_pre(records_u, idx,
                                                     vid_candidates,
                                                     feature_al, is_train)
                scores_merge = torch.cat(
                    (scores_u, scores_t, scores_hl, scores_hs, scores_d_all,
                     scores_d_pre), 0).t()
                gap_time = (records_al[idx + 1].dt -
                            record.dt).total_seconds() / 60 / 60
                gap_time_int = int(gap_time)
                if is_train:
                    predicted_scores[id] = F.sigmoid(
                        F.linear(scores_merge,
                                 F.relu(self.merger_weight_al[gap_time_int]),
                                 bias=None).t())
                else:
                    predicted_scores.append(
                        F.softmax(
                            F.linear(scores_merge,
                                     F.relu(
                                         self.merger_weight_al[gap_time_int]),
                                     bias=None).t()))
            id += 1
        return predicted_scores, id_vids, id_vids_true
Exemple #53
0
 def inference(self, label_score, k=1):
     label_prob = F.softmax(label_score, dim=-1)
     label_prob, label_pred = label_prob.data.topk(k)
     return label_prob, label_pred
Exemple #54
0
def evaluate_metrics(p,
                     img_dict,
                     model='resnet',
                     times=1,
                     metrics=[],
                     outpath_root='.',
                     labs_vs_gt=None):
    GT = labs_vs_gt[0]
    labs = labs_vs_gt[1]
    num_imgs = len(img_dict)
    model = model
    if model == 'resnet':
        arch = models.resnet18(pretrained=True).eval()
    elif model == 'vgg':
        arch = models.vgg16(pretrained=True).eval()
    elif model == 'alexnet':
        arch = models.alexnet(pretrained=True).eval()

    if torch.cuda.is_available():
        arch = arch.cuda()

    start = time.time()
    now = start
    times = times
    average_drop, increase_in_confidence = 0.0, 0.0
    deletion, insertion = [], []
    if metrics is not []:
        for _ in range(times):
            for i, (k, img) in enumerate(img_dict.items()):
                outpath = outpath_root + f'{k}_{img}/'
                inp_0 = load_image(p + '/' + img)
                os.mkdir(outpath)
                inp_0.save(f'{outpath}{img}')
                inp = apply_transforms(inp_0)
                if torch.cuda.is_available():
                    inp = inp.cuda()
                #print(f'Before test.run: {round(time.time() - now, 0)}s')
                now = time.time()
                out, scorecam_map = expmap.get_explanation_map(arch=model,
                                                               img=p + '/' +
                                                               img)
                F.to_pil_image(
                    scorecam_map.squeeze(0)).save(f'{outpath}/exp_map.png')
                #print(f'After test.run: {round(time.time() - now, 0)}s')
                now = time.time()
                if torch.cuda.is_available():
                    scorecam_map = scorecam_map.cuda()
                #print(f'Before arch: {round(time.time() - now, 0)}s')
                now = time.time()
                out_sal = FF.softmax(arch(inp * scorecam_map), dim=1)
                #print(f'After arch: {round(time.time() - now, 0)}s')
                now = time.time()
                # print(type(out_sal),out_sal.shape)
                Y_i_c = out.max(1)[0].item()
                class_idx = out.max(1)[-1].item()
                class_name = labs[class_idx]
                gt_name = GT[str(img[-13:-5])][0].split()[1]
                O_i_c = out_sal[:, class_idx][0].item()
                # print(f'#-------------------------------------------------------------------#')
                # print(f'{Y_i_c},{out.max(1)[-1].item()},\n{O_i_c},{out_sal.max(1)[-1].item()}\n')
                # print(f'{Y_i_c},{O_i_c},{max(0.0,Y_i_c-O_i_c)},{max(0,Y_i_c-O_i_c)/Y_i_c}')
                # print('#-------------------------------------------------------------------#')
                if 'average_drop' in metrics and 'increase_in_confidence' in metrics:
                    average_drop, increase_in_confidence = ADIC.average_drop_and_increase_of_confidence(
                        average_drop, increase_in_confidence, Y_i_c, O_i_c)
                if 'deletion' in metrics and 'insertion' in metrics:
                    precision = 100
                    deletion, insertion = DAI.deletion_and_insertion(
                        deletion,
                        insertion,
                        inp,
                        scorecam_map,
                        arch,
                        step=1 / precision)
                    #print(deletion, insertion)

                    #deletion_score = round(torch.tensor(deletion).sum().item() / precision,3)
                    #insertion_score = round(torch.tensor(insertion).sum().item() / precision,3)
                    deletion_score = round(
                        SKM.auc(
                            torch.arange(0, 1, 1 / precision).numpy(),
                            torch.tensor(deletion).numpy()), 3)
                    insertion_score = round(
                        SKM.auc(
                            torch.arange(0, 1, 1 / precision).numpy(),
                            torch.tensor(insertion).numpy()), 3)
                    plot(torch.arange(0, 1, 1 / precision),
                         [deletion, insertion],
                         label=[
                             f'deletion={deletion_score}',
                             f'insertion={insertion_score}'
                         ],
                         path=f'{outpath}plot_{k}.png',
                         title=f'label={class_name}, GT={gt_name}')

                    print(f'The final deletion is: {deletion_score}')
                    print(f'The final insertion is: {insertion_score}')
                    deletion, insertion = [], []
                print(f'After one img: {int(time.time() - now)}s')
                now = time.time()

            print(f'In {num_imgs} images')
            if 'average_drop' in metrics and 'increase_in_confidence' in metrics:
                average_drop *= 100 / num_imgs
                increase_in_confidence *= 100 / num_imgs
                print(f'The final AVG drop is: {round(average_drop, 2)}%')
                print(
                    f'The final Increase in Confidence is: {round(increase_in_confidence, 2)}%'
                )

        print(f'Execution time: {int(time.time() - start)}s')
Exemple #55
0
 def forward(self, v, q, v_mask, q_mask):
     """
     v: visual feature      [batch, num_obj, feat_size]
     q: question            [batch, max_len, feat_size]
     v_mask                 [batch, num_obj]
     q_mask                 [batch, max_len]
     """
     batch_size, num_obj = v_mask.shape
     _, max_len = q_mask.shape
     # transfor features
     v_trans = self.v_lin(v)
     q_trans = self.q_lin(q)
     # mask all padding object/word features
     if APPLY_MASK:
         v_trans = v_trans * v_mask.unsqueeze(2)
         q_trans = q_trans * q_mask.unsqueeze(2)
     # split for different use of purpose
     v_key, v_qry, v_val = torch.split(v_trans, v_trans.size(2) // 3, dim=2)
     q_key, q_qry, q_val = torch.split(q_trans, q_trans.size(2) // 3, dim=2)
     # apply multi-head
     v_key_set = torch.split(v_key, v_key.size(2) // self.num_head, dim=2)
     v_qry_set = torch.split(v_qry, v_qry.size(2) // self.num_head, dim=2)
     v_val_set = torch.split(v_val, v_val.size(2) // self.num_head, dim=2)
     q_key_set = torch.split(q_key, q_key.size(2) // self.num_head, dim=2)
     q_qry_set = torch.split(q_qry, q_qry.size(2) // self.num_head, dim=2)
     q_val_set = torch.split(q_val, q_val.size(2) // self.num_head, dim=2)
     # multi-head
     for i in range(self.num_head):
         v_key_slice, v_qry_slice, v_val_slice = v_key_set[i], v_qry_set[
             i], v_val_set[i]  #[batch, num_obj, feat_size]
         q_key_slice, q_qry_slice, q_val_slice = q_key_set[i], q_qry_set[
             i], q_val_set[i]  #[batch, max_len, feat_size]
         # inner product & set padding object/word attention to negative infinity & normalized by square root of hidden dimension
         q2v = (v_qry_slice @ q_key_slice.transpose(1, 2)) / (
             (self.output_size // self.num_head)**0.5
         )  #[batch, num_obj, max_len]
         v2q = (q_qry_slice @ v_key_slice.transpose(1, 2)) / (
             (self.output_size // self.num_head)**0.5
         )  #[batch, max_len, num_obj]
         if APPLY_MASK:
             q2v.masked_fill_(
                 q_mask.unsqueeze(1).expand([batch_size, num_obj,
                                             max_len]) == 0, -float('inf'))
             v2q.masked_fill_(
                 v_mask.unsqueeze(1).expand([batch_size, max_len,
                                             num_obj]) == 0, -float('inf'))
         # softmax attention
         interMAF_q2v = F.softmax(q2v, dim=2).unsqueeze(
             3)  #[batch, num_obj, max_len, 1]
         interMAF_v2q = F.softmax(v2q, dim=2).unsqueeze(
             3)  #[batch, max_len, num_obj, 1]
         # calculate update input (each head of multi-head is calculated independently and concatenate together)
         v_update = (interMAF_q2v * q_val_slice.unsqueeze(1)).sum(2) if (
             i == 0) else torch.cat(
                 (v_update,
                  (interMAF_q2v * q_val_slice.unsqueeze(1)).sum(2)),
                 dim=2)
         q_update = (interMAF_v2q * v_val_slice.unsqueeze(1)).sum(2) if (
             i == 0) else torch.cat(
                 (q_update,
                  (interMAF_v2q * v_val_slice.unsqueeze(1)).sum(2)),
                 dim=2)
     # update new feature
     cat_v = torch.cat((v, v_update), dim=2)
     cat_q = torch.cat((q, q_update), dim=2)
     updated_v = self.v_output(cat_v)
     updated_q = self.q_output(cat_q)
     return updated_v, updated_q
 def masked_unk_softmax(self, x, dim, mask_idx):
     x1 = F.softmax(x, dim=dim)
     x1[:, mask_idx] = 0
     x1_sum = torch.sum(x1, dim=1, keepdim=True)
     y = x1 / x1_sum
     return y
Exemple #57
0
            img_name = sample['img_name'][0]
        img = cv2.imread(img_name)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        sample2 = {}
        sample2['img'] = img
        sample2 = transform(sample2)
        sample['img'] = sample2['img']
        
        img = sample['img']
        img = img.unsqueeze(0).to(device)
        #img_name = sample['img_name']

        with torch.no_grad():
            seg_pred, exist_pred = net(img)[:2]
        seg_pred = F.softmax(seg_pred, dim=1)
        seg_pred = seg_pred.cpu().numpy()
        exist_pred = exist_pred.cpu().numpy()

        b=0
        seg = seg_pred[b]
        exist = [1 if exist_pred[b, i] > 0.5 else 0 for i in range(4)]
        if dataset_name == 'Tusimple':
            lane_coords = getLane.prob2lines_tusimple(seg, exist, resize_shape=original_shape[::-1], y_px_gap=10, pts=56)
        elif dataset_name == 'CULane':
            lane_coords = getLane.prob2lines_CULane(seg, exist, resize_shape=original_shape[::-1], y_px_gap=20, pts=18)
        for i in range(len(lane_coords)):
            lane_coords[i] = sorted(lane_coords[i], key=lambda pair: pair[1])
            
        img_vis = cv2.imread(img_name)
        #img_vis = cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB)
Exemple #58
0
    def forward(self, v, q, v_mask, q_mask):
        """
        v: visual feature      [batch, num_obj, feat_size]
        q: question            [batch, max_len, feat_size]
        v_mask                 [batch, num_obj]
        q_mask                 [batch, max_len]
        """
        batch_size, num_obj = v_mask.shape
        _, max_len = q_mask.shape
        # conditioned gating vector
        if APPLY_MASK:
            v_mean = (v *
                      v_mask.unsqueeze(2)).sum(1) / v_mask.sum(1).unsqueeze(1)
            q_mean = (q *
                      q_mask.unsqueeze(2)).sum(1) / q_mask.sum(1).unsqueeze(1)
        else:
            v_mean = v.sum(1) / num_obj
            q_mean = q.sum(1) / max_len

        v4q_gate = self.sigmoid(self.v4q_gate_lin(v_mean)).unsqueeze(
            1)  #[batch, 1, feat_size]
        q4v_gate = self.sigmoid(self.q4v_gate_lin(q_mean)).unsqueeze(
            1)  #[batch, 1, feat_size]

        # key, query, value
        v_trans = self.v_lin(v)
        q_trans = self.q_lin(q)
        # mask all padding object/word features
        if APPLY_MASK:
            v_trans = v_trans * v_mask.unsqueeze(2)
            q_trans = q_trans * q_mask.unsqueeze(2)
        # split for different use of purpose
        v_key, v_qry, v_val = torch.split(v_trans, v_trans.size(2) // 3, dim=2)
        q_key, q_qry, q_val = torch.split(q_trans, q_trans.size(2) // 3, dim=2)
        # apply conditioned gate
        gated_v_qry = (1 + q4v_gate) * v_qry
        gated_v_key = (1 + q4v_gate) * v_key
        gated_v_val = (1 + q4v_gate) * v_val
        gated_q_qry = (1 + v4q_gate) * q_qry
        gated_q_key = (1 + v4q_gate) * q_key
        gated_q_val = (1 + v4q_gate) * q_val

        # apply multi-head
        v_key_set = torch.split(gated_v_key,
                                gated_v_key.size(2) // self.num_head,
                                dim=2)
        v_qry_set = torch.split(gated_v_qry,
                                gated_v_qry.size(2) // self.num_head,
                                dim=2)
        v_val_set = torch.split(gated_v_val,
                                gated_v_val.size(2) // self.num_head,
                                dim=2)
        q_key_set = torch.split(gated_q_key,
                                gated_q_key.size(2) // self.num_head,
                                dim=2)
        q_qry_set = torch.split(gated_q_qry,
                                gated_q_qry.size(2) // self.num_head,
                                dim=2)
        q_val_set = torch.split(gated_q_val,
                                gated_q_val.size(2) // self.num_head,
                                dim=2)
        # multi-head
        for i in range(self.num_head):
            v_key_slice, v_qry_slice, v_val_slice = v_key_set[i], v_qry_set[
                i], v_val_set[i]  #[batch, num_obj, feat_size]
            q_key_slice, q_qry_slice, q_val_slice = q_key_set[i], q_qry_set[
                i], q_val_set[i]  #[batch, max_len, feat_size]
            # calculate attention
            v2v = (v_qry_slice @ v_key_slice.transpose(1, 2)) / (
                (self.output_size // self.num_head)**0.5)
            q2q = (q_qry_slice @ q_key_slice.transpose(1, 2)) / (
                (self.output_size // self.num_head)**0.5)

            if APPLY_MASK:
                v2v.masked_fill_(
                    v_mask.unsqueeze(1).expand([batch_size, num_obj,
                                                num_obj]) == 0, -float('inf'))
                q2q.masked_fill_(
                    q_mask.unsqueeze(1).expand([batch_size, max_len,
                                                max_len]) == 0, -float('inf'))
            dyIntraMAF_v2v = F.softmax(v2v, dim=2).unsqueeze(
                3)  #[batch, num_obj, num_obj, 1]
            dyIntraMAF_q2q = F.softmax(q2q, dim=2).unsqueeze(
                3)  #[batch, max_len, max_len, 1]
            # calculate update input
            v_update = (dyIntraMAF_v2v * v_val_slice.unsqueeze(1)).sum(2) if (
                i == 0) else torch.cat(
                    (v_update,
                     (dyIntraMAF_v2v * v_val_slice.unsqueeze(1)).sum(2)),
                    dim=2)
            q_update = (dyIntraMAF_q2q * q_val_slice.unsqueeze(1)).sum(2) if (
                i == 0) else torch.cat(
                    (q_update,
                     (dyIntraMAF_q2q * q_val_slice.unsqueeze(1)).sum(2)),
                    dim=2)
        # update
        updated_v = self.v_output(v + v_update)
        updated_q = self.q_output(q + q_update)
        return updated_v, updated_q
Exemple #59
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """
        _, _, img_h, img_w = x.size()
        cfg._tmp_img_h = img_h
        cfg._tmp_img_w = img_w

        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)

        proto_out = None
        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_mask_scoring:
                pred_outs['score'] = []

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        for k, v in pred_outs.items():
            pred_outs[k] = torch.cat(v, -2)

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:
            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            if cfg.use_semantic_segmentation_loss:
                pred_outs['segm'] = self.semantic_seg_conv(outs[0])

            return pred_outs
        else:
            if cfg.use_mask_scoring:
                pred_outs['score'] = torch.sigmoid(pred_outs['score'])

            if cfg.use_focal_loss:
                if cfg.use_sigmoid_focal_loss:
                    # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                    pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
                    if cfg.use_mask_scoring:
                        pred_outs['conf'] *= pred_outs['score']
                elif cfg.use_objectness_score:
                    # See focal_loss_sigmoid in multibox_loss.py for details
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                    pred_outs['conf'][:, :,
                                      1:] = objectness[:, :, None] * F.softmax(
                                          pred_outs['conf'][:, :, 1:], -1)
                    pred_outs['conf'][:, :, 0] = 1 - objectness
                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
            else:

                if cfg.use_objectness_score:
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])

                    pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \
                        * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1)

                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            return self.detect(pred_outs, self)
  def train(self, save_embedding=False):
    self.set_mode('train')
    preprocessor = self.data_loader['train'].dataset.preprocessor
    temp_min = 0.1
    anneal_rate = self.anneal_rate
    temp = 1.

    total_loss = 0.
    total_step = 0
    for e in range(self.epoch):
      self.global_epoch += 1
      pred_word_labels = []
      gold_word_labels = []
      pred_phone_labels = []
      gold_phone_labels = []
      for b_idx, (audios, phoneme_labels, word_labels,\
                audio_masks, phone_masks, word_masks)\
          in enumerate(self.data_loader['train']):
        if b_idx > 2 and self.debug:
          break
        self.global_iter += 1
         
        x = cuda(audios, self.cuda)
        if self.audio_feature == "wav2vec2":
          x = self.audio_feature_net.feature_extractor(x)
        phoneme_labels = cuda(phoneme_labels, self.cuda)
        word_labels = cuda(word_labels, self.cuda)
        audio_masks = cuda(audio_masks, self.cuda)
        phone_masks = cuda(phone_masks, self.cuda)
        word_masks = cuda(word_masks, self.cuda)
        if self.audio_net.ds_ratio > 1:
          audio_masks = audio_masks[:, ::self.audio_net.ds_ratio]
          word_masks = word_masks[:, :, ::self.audio_net.ds_ratio]

        audio_lens = audio_masks.sum(-1).long()
        sent_lens = phone_masks.sum(-1).long()
        word_lens = (word_labels >= 0).long().sum(-1)

        phone_logits, word_logits, _, embedding = self.audio_net(
                               x, masks=audio_masks,
                               temp=temp,
                               num_sample=self.num_sample,
                               return_feat=True)
        
        # Compute phoneme one-hot vector
        phoneme_vectors = F.one_hot(phoneme_labels, self.n_phone_class)
        phone_denoised_logits,\
        phone_word_logits,\
        denoised_encodings,\
        embedding = self.phone_net(phoneme_vectors,
                                   temp=temp,
                                   num_sample=self.num_sample,
                                   return_feat=True)

        quantized = None
        if self.model_type == 'vq-mlp':
          word_logits = out_logits[:, :, :self.n_visual_class]
          quantized = out_logits[:, :, self.n_visual_class:]

        word_logits = torch.matmul(word_masks, word_logits)
        
        word_loss = F.cross_entropy(word_logits.permute(0, 2, 1), word_labels,\
                                    ignore_index=-100,
                                    ).div(math.log(2))
        info_loss = (F.softmax(phone_logits, dim=-1)\
                      * F.log_softmax(phone_logits, dim=-1)
                    ).sum().div(audio_lens.sum()*math.log(2)) 

        # Permutation-invariant CTC loss for multilingual phones
        batch_size = x.size(0)
        phone_word_losses = [] 
        num_words = np.where(word_masks.sum(-1) > 0, 
                             torch.tensor(1, device=x.device), 
                             torch.tensor(0, device=x.device)).sum(-1) 
        for idx in range(batch_size):
          word_orders = list(itertools.permutations(range(num_words[idx])))
          word_orders = word_orders[:200] # Limit the number of order
          phone_word_losses.append(torch.max(
                              [F.ctc_loss(F.log_softmax(phone_denoised_logits[idx], dim=-1)\
                                            .permute(1, 0, 2), 
                                          word_labels[word_order],
                                          sent_lens[idx],
                                          num_words[idx])
                               for word_order in word_orders]
                              )
                            )
        phone_word_loss = torch.sum(phone_word_losses)
        phone_info_loss = (F.softmax(phone_denoised_logits, dim=-1)\
                      * F.log_softmax(phone_denoised_logits, dim=-1)
                    ).sum().div(sent_lens.sum()*math.log(2)) 
        

        # Use denoised phoneme labels for training the phoneme classifier
        phone_word_encodings = F.gumbel_softmax(phone_word_logits, 
                                                tau=temp,
                                                dim=-1)
        denoising_mask = torch.where(phone_word_encodings.max(-1)[1].detach() > 0,
                                     torch.tensor(1, device=x.device),
                                     torch.tensor(0, device=x.device)).detach()
        phoneme_labels_denoised = denoising_mask * denoised_encodings.max(-1)[1].detach()\
                                    + (1 - denoising_mask) * phoneme_labels 
        phone_loss = F.ctc_loss(F.log_softmax(phone_logits, dim=-1)\
                                  .permute(1, 0, 2),
                                phoneme_labels_denoised,
                                audio_lens,
                                sent_lens)
        audio_ib_loss = self.weight_phone_loss * phone_loss\
                        + self.weight_word_loss * word_loss\
                        + self.beta * info_loss\

        phone_ib_loss = self.weight_phone_word_loss * phone_word_loss\
                        + self.beta * phone_info_loss # TODO weight_phone_word

        loss =  audio_ib_loss + phone_ib_loss
        if self.model_type == 'vq-mlp':
          loss += self.audio_net.quantize_loss(embedding, quantized,
                                               masks=audio_masks)

        izy_bound = math.log(self.n_visual_class, 2) - word_loss
        izx_bound = info_loss
        total_loss += loss.cpu().detach().numpy()
        total_step += 1.

        self.optim.zero_grad()
        loss.backward()
        if self.max_grad_norm is not None:
          torch.nn.utils.clip_grad_norm_(
            self.audio_net.parameters(),
            self.max_grad_norm
          )
        self.optim.step()
  
        for i in range(audios.size(0)):
          audio_len = audio_lens[i]
          sent_len = sent_lens[i]
          word_len = word_lens[i]

          gold_phone_label = phoneme_labels_denoised[i, :sent_len]
          pred_phone_label = phone_logits[i, :audio_len].max(-1)[1]
          gold_phone_labels.append(gold_phone_label.cpu().detach().numpy().tolist())
          pred_phone_labels.append(pred_phone_label.cpu().detach().numpy().tolist())

          if word_len > 0:
            gold_word_labels.append(word_labels[i, :word_len].cpu().detach().numpy().tolist())
            pred_word_label = word_logits[i, :word_len].max(-1)[1]
            pred_word_labels.append(pred_word_label.cpu().detach().numpy().tolist())

        if self.global_iter % 1000 == 0:
          temp = np.maximum(temp * np.exp(-anneal_rate * b_idx), temp_min)
          avg_loss = total_loss / total_step
          print(f'i:{self.global_iter:d} temp:{temp} avg loss (total loss):{avg_loss:.2f} ({total_loss:.2f}) '
                f'IZY:{izy_bound:.2f} IZX:{izx_bound:.2f}'
                f'phone_loss:{phone_loss:.5f} phone_word_loss:{phone_word_loss:.5f}')

      # Evaluate training visual word classification accuracy and phone token error rate
      acc = compute_accuracy(gold_word_labels, pred_word_labels)
      dist, n_tokens = compute_edit_distance(pred_phone_labels, gold_phone_labels, preprocessor)
      pter = float(dist) / float(n_tokens)
      print(f'Epoch {self.global_epoch}\ttraining visual word accuracy: {acc:.3f}\ttraining phone token error rate: {pter:.3f}')

      if (self.global_epoch % 2) == 0:
        self.scheduler.step()
      self.test(save_embedding=save_embedding)