Beispiel #1
0
 def cls_loss(self,gt_label,pred_label):
     # get the mask element which >= 0, only 0 and 1 can effect the detection loss
     pred_label = torch.squeeze(pred_label)
     mask = torch.ge(gt_label,0)
     valid_gt_label = torch.masked_select(gt_label,mask).float()
     valid_pred_label = torch.masked_select(pred_label,mask)
     return self.loss_cls(valid_pred_label,valid_gt_label)
def train_multilabel(features, targets, classes, train_split, test_split, C=1.0, ignore_hard_examples=True, after_ReLU=False, normalize_L2=False):
    print('\nHyperparameters:\n - C: {}\n - after_ReLU: {}\n - normL2: {}'.format(C, after_ReLU, normalize_L2))
    train_APs = []
    test_APs = []
    for class_id in range(len(classes)):
        
        classifier = SVC(C=C, kernel='linear') # http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
        
        if ignore_hard_examples:
            train_masks = (targets[train_split][:,class_id] != 0).view(-1, 1)
            train_features = torch.masked_select(features[train_split], train_masks.expand_as(features[train_split])).view(-1,features[train_split].size(1))
            train_targets = torch.masked_select(targets[train_split], train_masks.expand_as(targets[train_split])).view(-1,targets[train_split].size(1))
            test_masks = (targets[test_split][:,class_id] != 0).view(-1, 1)
            test_features = torch.masked_select(features[test_split], test_masks.expand_as(features[test_split])).view(-1,features[test_split].size(1))
            test_targets = torch.masked_select(targets[test_split], test_masks.expand_as(targets[test_split])).view(-1,targets[test_split].size(1))
        else:
            train_features = features[train_split]
            train_targets = targets[train_split]
            test_features = features[test_split]
            test_targets = features[test_split]

        if after_ReLU:
            train_features[train_features < 0] = 0
            test_features[test_features < 0] = 0

        if normalize_L2:
            train_norm = torch.norm(train_features, p=2, dim=1).unsqueeze(1)
            train_features = train_features.div(train_norm.expand_as(train_features))
            test_norm = torch.norm(test_features, p=2, dim=1).unsqueeze(1)
            test_features = test_features.div(test_norm.expand_as(test_features))

        train_X = train_features.numpy()
        train_y = (train_targets[:,class_id] != -1).numpy() # uses hard examples if not ignored

        test_X = test_features.numpy()
        test_y = (test_targets[:,class_id] != -1).numpy()

        classifier.fit(train_X, train_y) # train parameters of the classifier

        train_preds = classifier.predict(train_X)
        train_acc = accuracy_score(train_y, train_preds) * 100
        train_AP = average_precision_score(train_y, train_preds) * 100
        train_APs.append(train_AP)

        test_preds = classifier.predict(test_X)
        test_acc = accuracy_score(test_y, test_preds) * 100
        test_AP = average_precision_score(test_y, test_preds) * 100
        test_APs.append(test_AP)

        print('class "{}" ({}/{}):'.format(classes[class_id], test_y.sum(), test_y.shape[0]))
        print('  - {:8}: acc {:.2f}, AP {:.2f}'.format(train_split, train_acc, train_AP))
        print('  - {:8}: acc {:.2f}, AP {:.2f}'.format(test_split, test_acc, test_AP))

    print('all classes:')
    print('  - {:8}: mAP {:.4f}'.format(train_split, sum(train_APs)/len(classes)))
    print('  - {:8}: mAP {:.4f}'.format(test_split, sum(test_APs)/len(classes)))
Beispiel #3
0
    def compute_accuracy(self, prob_cls, gt_cls):
        #we only need the detection which >= 0
        prob_cls = torch.squeeze(prob_cls)
        mask = torch.ge(gt_cls, 0)
        #get valid element
        valid_gt_cls = torch.masked_select(gt_cls, mask)
        valid_prob_cls = torch.masked_select(prob_cls, mask)
        size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0])
        prob_ones = torch.ge(valid_prob_cls, 0.6).float()
        right_ones = torch.eq(prob_ones, valid_gt_cls.float()).float()

        return torch.div(torch.mul(torch.sum(right_ones), float(1.0)), float(size))
def compute_stage_loss(criterion, targets, outputs, masks):
  assert isinstance(outputs, list), 'The ouputs type is wrong : {:}'.format(type(outputs))
  total_loss = 0
  each_stage_loss = []
  
  for output in outputs:
    stage_loss = 0
    output = torch.masked_select(output , masks)
    target = torch.masked_select(targets, masks)

    stage_loss = criterion(output, target)
    total_loss = total_loss + stage_loss
    each_stage_loss.append(stage_loss.item())
  return total_loss, each_stage_loss
Beispiel #5
0
    def updateGradInput(self, input, gradOutput):
        input, mask = input
        if input.type() == 'torch.cuda.FloatTensor':
            torch.arange(0, mask.nelement(), out=self._maskIndexBufferCPU).resize_(mask.size())
            self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU)
        else:
            torch.arange(0, mask.nelement(), out=self._maskIndexBuffer).resize_(mask.size())

        torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices)
        self._gradBuffer.resize_(input.nelement()).zero_()
        self._gradBuffer.scatter_(0, self._maskIndices, gradOutput)
        self._gradBuffer.resize_(input.size())
        self.gradInput = [self._gradBuffer, self._gradMask.resize_(mask.size()).fill_(0)]
        return self.gradInput
Beispiel #6
0
    def split_on_targets(self, hiddens, targets):
        # Split the targets into those in the head and in the tail
        split_targets = []
        split_hiddens = []

        # Determine to which split each element belongs (for each start split value, add 1 if equal or greater)
        # This method appears slower at least for WT-103 values for approx softmax
        #masks = [(targets >= self.splits[idx]).view(1, -1) for idx in range(1, self.nsplits)]
        #mask = torch.sum(torch.cat(masks, dim=0), dim=0)
        ###
        # This is equally fast for smaller splits as method below but scales linearly
        mask = None
        for idx in range(1, self.nsplits):
            partial_mask = targets >= self.splits[idx]
            mask = mask + partial_mask if mask is not None else partial_mask
        ###
        #masks = torch.stack([targets] * (self.nsplits - 1))
        #mask = torch.sum(masks >= self.split_starts, dim=0)
        for idx in range(self.nsplits):
            # If there are no splits, avoid costly masked select
            if self.nsplits == 1:
                split_targets, split_hiddens = [targets], [hiddens]
                continue
            # If all the words are covered by earlier targets, we have empties so later stages don't freak out
            if sum(len(t) for t in split_targets) == len(targets):
                split_targets.append([])
                split_hiddens.append([])
                continue
            # Are you in our split?
            tmp_mask = mask == idx
            split_targets.append(torch.masked_select(targets, tmp_mask))
            split_hiddens.append(hiddens.masked_select(tmp_mask.unsqueeze(1).expand_as(hiddens)).view(-1, hiddens.size(1)))
        return split_targets, split_hiddens
def rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox, config):
    """Return the RPN bounding box loss graph.

    config: the model config object.
    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
        Uses 0 padding to fill in unsed bbox deltas.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
    """
    # Positive anchors contribute to the loss, but negative and
    # neutral anchors (match value of 0 or -1) don't.   
    indices = torch.eq(rpn_match, 1) 
    rpn_bbox = torch.masked_select(rpn_bbox, indices)
    batch_counts = torch.sum(indices.float(), dim=1)
        
    outputs = []
    for i in range(config.IMAGES_PER_GPU):
#        print(batch_counts[i].cpu().data.numpy()[0])
        outputs.append(target_bbox[i, torch.arange(int(batch_counts[i].cpu().data.numpy()[0])).type(torch.cuda.LongTensor)])
    
    target_bbox = torch.cat(outputs, dim=0)
    
    loss = F.smooth_l1_loss(rpn_bbox, target_bbox, size_average=True)
    return loss
def mrcnn_bbox_loss(target_bbox, target_class_ids, pred_bbox):
    """Loss for Mask R-CNN bounding box refinement.

    target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
    target_class_ids: [batch, num_rois]. Integer class IDs.
    pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))]
    """
    # Reshape to merge batch and roi dimensions for simplicity.
    target_class_ids = target_class_ids.contiguous().view(-1)
    target_bbox = target_bbox.contiguous().view(-1, 4)
    pred_bbox = pred_bbox.contiguous().view(-1, pred_bbox.size()[2], 4)
#    print(target_class_ids)

    # Only positive ROIs contribute to the loss. And only
    # the right class_id of each ROI. Get their indicies.
    positive_roi_ix = torch.gt(target_class_ids , 0)
#    print(positive_roi_ix)
    positive_roi_class_ids = torch.masked_select(target_class_ids, positive_roi_ix)
    
    indices = target_class_ids
#    indices = torch.stack([positive_roi_ix, positive_roi_class_ids], dim=1)
#    print(indices)
    # Gather the deltas (predicted and true) that contribute to loss
#    target_bbox = torch.gather(target_bbox, positive_roi_ix)
#    pred_bbox = torch.gather(pred_bbox, indices)

    loss = F.smooth_l1_loss(pred_bbox, target_bbox, size_average=True)
    return loss
Beispiel #9
0
    def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None):

        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)
        batch_wrong_dis = torch.bmm(batch_wrong, feat)

        wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \
                + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1)

        loss_dis = torch.sum(torch.log(wrong_score + 1))
        loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm()

        if fake:
            fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
            fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)

            margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
            loss_fake = torch.sum(margin_score)
            loss_dis += loss_fake
            loss_norm += fake.norm()

        loss = (loss_dis + 0.1 * loss_norm) / batch_size
        if fake:
            return loss, loss_fake.data[0] / batch_size
        else:
            return loss
Beispiel #10
0
    def forward(self, input, target, mask):

        logprob_select = torch.gather(input, 1, target)

        out = torch.masked_select(logprob_select, mask)

        loss = -torch.sum(out) / mask.float().sum()
        return loss
Beispiel #11
0
def plot_clusters(num, e, centers, points, fig, model):
    plt.figure(0)
    plt.clf()
    plt.gca().set_xlim([-0.05,1.05])
    plt.gca().set_ylim([-0.05,1.05])
    clusters = e[fig].max()+1
    colors = cm.rainbow(np.linspace(0,1,clusters))
    for i in range(clusters):
        c = colors[i][:-1]
        mask = e[fig] == i
        x = torch.masked_select(points[fig,:,0], mask)
        y = torch.masked_select(points[fig,:,1], mask)
        plt.plot(x.cpu().numpy(), y.cpu().numpy(), 'o', c=rgb2hex(c))
        if centers is not None:
            center = centers[i]
            plt.plot([center.data[0]], [center.data[1]], '*', c=rgb2hex(c))
    plt.title('clustering')
    plt.savefig('./plots/clustering_it_{}_{}.png'.format(num, model))
Beispiel #12
0
    def forward(self, input, target):
        logprob_select = torch.gather(input, 1, target)

        mask = target.data.gt(0)  # generate the mask
        if isinstance(input, Variable):
            mask = Variable(mask, volatile=input.volatile)
        
        out = torch.masked_select(logprob_select, mask)

        loss = -torch.sum(out) # get the average loss.
        return loss
def rpn_class_loss(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.

    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.    
    anchor_class = torch.eq(rpn_match, 1)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.    
    indices = torch.ne(rpn_match, 0.)

    rpn_class_logits = torch.masked_select(rpn_class_logits, indices)
    anchor_class = torch.masked_select(anchor_class, indices)

    rpn_class_logits = rpn_class_logits.contiguous().view(-1, 2)

    anchor_class = anchor_class.contiguous().view(-1).type(torch.cuda.LongTensor)
    loss = F.cross_entropy(rpn_class_logits, anchor_class, weight=None)
    return loss
Beispiel #14
0
def test_net(save_folder, net, cuda, dataset, transform, top_k,
             im_size=300, thresh=0.05):
    num_images = len(dataset)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap)+1)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    output_dir = get_output_dir('ssd300_120000', set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        im, gt, h, w = dataset.pull_item(i)

        x = Variable(im.unsqueeze(0))
        if args.cuda:
            x = x.cuda()
        _t['im_detect'].tic()
        detections = net(x).data
        detect_time = _t['im_detect'].toc(average=False)

        # skip j = 0, because it's the background class
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.dim() == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack((boxes.cpu().numpy(),
                                  scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
            all_boxes[j][i] = cls_dets

        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1,
                                                    num_images, detect_time))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
Beispiel #15
0
    def forward(self, prob, target, reward):
        """
        Args:
            prob: (N, C), torch Variable
            target : (N, ), torch Variable
            reward : (N, ), torch Variable
        """
        prob=prob.view(-1,prob.size(2)).contiguous()
        N = target.size(0)
        C = prob.size(1)
        one_hot = torch.zeros((N, C))
        if prob.is_cuda:
            one_hot = one_hot.cuda()

        one_hot.scatter_(1, target.data.view((-1,1)), 1)
        one_hot = one_hot.type(torch.ByteTensor)
        one_hot = Variable(one_hot)
        if prob.is_cuda:
            one_hot = one_hot.cuda()
        loss = torch.masked_select(prob, one_hot)
        loss = loss * reward
        loss =  -torch.sum(loss)
        return loss
Beispiel #16
0
def multiclass_nms(multi_bboxes,
                   multi_scores,
                   score_thr,
                   nms_cfg,
                   max_num=-1,
                   score_factors=None):
    """NMS for multi-class bboxes.

    Args:
        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
        multi_scores (Tensor): shape (n, #class), where the last column
            contains scores of the background class, but this will be ignored.
        score_thr (float): bbox threshold, bboxes with scores lower than it
            will not be considered.
        nms_thr (float): NMS IoU threshold
        max_num (int): if there are more than max_num bboxes after NMS,
            only top max_num will be kept.
        score_factors (Tensor): The factors multiplied to scores before
            applying NMS

    Returns:
        tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels \
            are 0-based.
    """
    num_classes = multi_scores.size(1) - 1
    # exclude background category
    if multi_bboxes.shape[1] > 4:
        bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
    else:
        bboxes = multi_bboxes[:, None].expand(multi_scores.size(0),
                                              num_classes, 4)
    scores = multi_scores[:, :-1]

    # filter out boxes with low scores
    valid_mask = scores > score_thr

    # We use masked_select for ONNX exporting purpose,
    # which is equivalent to bboxes = bboxes[valid_mask]
    # (TODO): as ONNX does not support repeat now,
    # we have to use this ugly code
    bboxes = torch.masked_select(
        bboxes,
        torch.stack((valid_mask, valid_mask, valid_mask, valid_mask),
                    -1)).view(-1, 4)
    if score_factors is not None:
        scores = scores * score_factors[:, None]
    scores = torch.masked_select(scores, valid_mask)
    labels = valid_mask.nonzero(as_tuple=False)[:, 1]

    if bboxes.numel() == 0:
        bboxes = multi_bboxes.new_zeros((0, 5))
        labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)

        if torch.onnx.is_in_onnx_export():
            raise RuntimeError('[ONNX Error] Can not record NMS '
                               'as it has not been executed this time')
        return bboxes, labels

    dets, keep = batched_nms(bboxes, scores, labels, nms_cfg)

    if max_num > 0:
        dets = dets[:max_num]
        keep = keep[:max_num]

    return dets, labels[keep]
Beispiel #17
0
def perform_qlearning_step(policy_net, target_net, optimizer, replay_buffer, batch_size, gamma, device):
    """ Perform a deep Q-learning step
    Parameters
    -------
    policy_net: torch.nn.Module
        policy Q-network
    target_net: torch.nn.Module
        target Q-network
    optimizer: torch.optim.Adam
        optimizer
    replay_buffer: ReplayBuffer
        replay memory storing transitions
    batch_size: int
        size of batch to sample from replay memory
    gamma: float
        discount factor used in Q-learning update
    device: torch.device
        device on which to the models are allocated
    Returns
    -------
    float
        loss value for current learning step
    """
    #    1.1 Sample transitions from replay_buffer
    obs_batch, act_batch, rew_batch, next_obs_batch, done_mask = replay_buffer.sample(batch_size)
    obs_batch, act_batch, rew_batch, next_obs_batch, done_mask = torch.tensor(obs_batch), torch.tensor(act_batch), torch.tensor(rew_batch), torch.tensor(next_obs_batch), torch.tensor(done_mask)

    #    1.2 Squeeze observations (add a dimension)

    logging.debug("Shapes: obs_batch=%s, act_batch=%s, rew_batch=%s, next_obs_batch=%s, done_mask=%s" % (
          obs_batch.shape, act_batch.shape, rew_batch.shape, next_obs_batch.shape, done_mask.shape))

    #    2. Compute Q(s_t, a)
    # ASSUMING ACTION IS AN INDEX (squeeze makes ((a, b, c)) to (a, b, c)
    q_batch = policy_net(obs_batch)
    mask = torch.zeros(q_batch.shape).type(torch.ByteTensor)
    for idx, a in enumerate(act_batch):
        mask[idx][a] = 1
    q_batch = torch.masked_select(q_batch, mask)

    #    3. Compute \max_a Q(s_{t+1}, a) for all next states.
    q_next_batch = target_net(obs_batch)
    q_next_batch = torch.max(q_next_batch, 1)[0]

    #    4. Mask next state values where episodes have terminated
    # Following nature-paper page 7 Algorithm 1 this means replacing q_next_batch with rewards (so zero here) for terminations
    done_mask = done_mask.type(torch.ByteTensor)
    q_next_batch.masked_fill(done_mask, 0)

    #    5. Compute the target
    q_next_batch *= gamma
    target = rew_batch + q_next_batch

    # Reset gradients
    optimizer.zero_grad()

    #    6. Compute the loss
    logging.debug("Targets: %s" % target[:5])
    criterion = torch.nn.MSELoss()
    loss = criterion(target, q_batch)

    #    7. Calculate the gradients
    grad = loss.backward()

    #    8. Clip the gradients
    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 1)

    #    9. Optimize the model
    optimizer.step()

    return loss.item()
Beispiel #18
0
 def boolean_mask(self, tensor, mask):
     mask = self.astensor(mask).type(torch.ByteTensor).cuda()
     return torch.masked_select(tensor, mask)
Beispiel #19
0
    def forward_features(self, x):
        B = x.shape[0]
        device = x.device
        outs = []
        img = x

        # stage 1 Unchanged
        x, (H, W) = self.patch_embed1(x)
        x = x + self.pos_embed1
        x = self.pos_drop1(x)
        for blk in self.block1:
            x = blk(x, H, W)

        # stage 2
        y_map, x_map = torch.meshgrid(
            torch.arange(H, device=device).float() / (H - 1),
            torch.arange(W, device=device).float() / (W - 1))
        xy_map = torch.stack((x_map, y_map), dim=-1)
        loc = xy_map.reshape(-1, 2)[None, ...].repeat([B, 1, 1])

        # split into grid and adaptive tokens
        pos = torch.arange(x.shape[1], dtype=torch.long, device=x.device)
        tmp = pos.reshape([H, W])
        grid_stride = self.grid_stride
        pos_grid = tmp[grid_stride // 2:H:grid_stride,
                       grid_stride // 2:W:grid_stride]
        pos_grid = pos_grid.reshape([-1])
        mask = torch.ones(pos.shape, dtype=torch.bool, device=pos.device)
        mask[pos_grid] = 0
        pos_ada = torch.masked_select(pos, mask)

        x_grid = torch.index_select(x, 1, pos_grid)
        x_ada = torch.index_select(x, 1, pos_ada)
        loc_grid = torch.index_select(loc, 1, pos_grid)
        loc_ada = torch.index_select(loc, 1, pos_ada)

        x = torch.cat([x_grid, x_ada], 1)
        loc = torch.cat([loc_grid, loc_ada], 1)
        N_grid = x_grid.shape[1]

        if vis:
            outs.append((x, loc, [H, W]))

        # stage 2
        x, loc = self.down_layers1(x, loc, self.pos_embed2, H, W,
                                   self.pos_size, N_grid)  # down sample
        H, W = H // 2, W // 2
        for blk in self.block2:
            x = blk(x, x, loc, H, W)
        if vis:
            outs.append((x, loc, [H, W]))

        # stage 3
        x, loc = self.down_layers2(x, loc, self.pos_embed3, H, W,
                                   self.pos_size, N_grid)  # down sample
        H, W = H // 2, W // 2
        for blk in self.block3:
            x = blk(x, x, loc, H, W)
        if vis:
            outs.append((x, loc, [H, W]))

        # stage 4
        x, loc = self.down_layers3(x, loc, self.pos_embed4, H, W,
                                   self.pos_size, N_grid)  # down sample
        H, W = H // 2, W // 2
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        for blk in self.block4:
            x = blk(x, x, loc, H, W)

        if vis:
            outs.append((x, loc, [H, W]))
            # show_tokens(img, outs, N_grid)
            if self.num % 1 == 0:
                show_tokens(img, outs, N_grid)
            self.num = self.num + 1

        x = self.norm(x)
        return x[:, 0]
Beispiel #20
0
    def forward(self, gt, pre, pre1, pre2, weight1, bias1, weight2, bias2,
                feat1, feat2, flag):

        N = gt.size(0)

        mask = flag.eq(1)

        pre_label1 = torch.masked_select(pre1, mask)
        pre_label1 = pre_label1.view(-1, self.AU_num)

        pre_label2 = torch.masked_select(pre2, mask)
        pre_label2 = pre_label2.view(-1, self.AU_num)

        pre_label = torch.masked_select(pre, mask)
        pre_label = pre_label.view(-1, self.AU_num)

        gt = torch.masked_select(gt, mask)
        gt = gt.view(-1, self.AU_num)

        if bool(gt.numel()):
            loss_pred = self.lossfunc(pre_label, gt)
            loss_pred1 = self.lossfunc(pre_label1, gt)
            loss_pred2 = self.lossfunc(pre_label2, gt)
        else:
            loss_pred = Variable(torch.FloatTensor([0])).cuda()
            loss_pred1 = Variable(torch.FloatTensor([0])).cuda()
            loss_pred2 = Variable(torch.FloatTensor([0])).cuda()

        if self.fusion_mode == 0:
            loss_BCE = (loss_pred1 + loss_pred2) / 2
        else:
            loss_BCE = loss_pred + (loss_pred1 + loss_pred2) / 2

        ############### loss multi-view ########
        loss_multi_view = torch.FloatTensor([0])
        loss_multi_view = loss_multi_view.cuda()

        bias1 = bias1.view(self.AU_num, -1)
        feat1 = torch.cat((weight1, bias1), 1)
        bias2 = bias2.view(self.AU_num, -1)
        feat2 = torch.cat((weight2, bias2), 1)

        tmp = torch.norm(feat1, 2, 1)
        feat_norm1 = feat1 / tmp.view(self.AU_num, -1)
        tmp = torch.norm(feat2, 2, 1)
        feat_norm2 = feat2 / tmp.view(self.AU_num, -1)

        x = feat_norm1 * feat_norm2
        x = torch.sum(x, 1)
        loss_weight_orth = torch.mean(torch.abs(x))
        loss_multi_view = loss_multi_view + loss_weight_orth

        loss_multi_view = loss_multi_view * self.lambda_multi_view
        ############ end loss multi-view #######

        ################# J-S divergence #################
        loss_similar = torch.FloatTensor([0])
        loss_similar = loss_similar.cuda()

        if self.use_web != 0:
            p1 = self.sigmoid(pre1)
            log_p1 = self.log_sigmoid(pre1)
            p2 = self.sigmoid(pre2)
            log_p2 = self.log_sigmoid(pre2)
            p = (p1 + p2) / 2
            # print(torch.max(p1));
            # print(torch.min(p1));

            if self.select_sample == 0:
                mask_idx = torch.ge(p1, -1)
            elif self.select_sample == 1:
                mask_idx1 = torch.ge(p1, -1)
                mask_idx2 = torch.ge(p1, -1)

                p_scale1 = p1 * p1 + p2 * p2
                p_scale2 = (1 - p1) * (1 - p1) + (1 - p2) * (1 - p2)
                for i in range(0, self.AU_num):
                    r = (1 - self.sample_weight[i]) * (
                        1 - self.sample_weight[i]) * 2 * self.sample_scale
                    idx_temp = torch.le(p_scale1[:, i], r)
                    mask_idx1[:, i] = idx_temp

                    r = self.sample_weight[i] * self.sample_weight[
                        i] * 2 * self.sample_scale
                    idx_temp = torch.le(p_scale2[:, i], r)
                    mask_idx2[:, i] = idx_temp

                mask_idx = mask_idx1 | mask_idx2
            elif self.select_sample == 2:
                mask_idx1 = torch.ge(p1, -1)
                mask_idx2 = torch.ge(p1, -1)

                p_scale1 = (p1 - 1) * (p1 - 1) + p2 * p2
                p_scale2 = p1 * p1 + (1 - p2) * (1 - p2)
                for i in range(0, self.AU_num):
                    r = self.sample_r
                    idx_temp = torch.le(p_scale1[:, i], r)
                    mask_idx1[:, i] = idx_temp

                    idx_temp = torch.le(p_scale2[:, i], r)
                    mask_idx2[:, i] = idx_temp

                mask_idx = mask_idx1 | mask_idx2

            idx1 = torch.le(p1, 1 - self.eps)
            idx2 = torch.ge(p1, self.eps)
            idx = idx1 & idx2 & mask_idx
            tmp_p1 = 1 - p1[idx] + self.eps
            Hp1 = torch.mean(-(p1[idx] * log_p1[idx] +
                               tmp_p1 * torch.log(tmp_p1)))

            idx1 = torch.le(p2, 1 - self.eps)
            idx2 = torch.ge(p2, self.eps)
            idx = idx1 & idx2 & mask_idx
            tmp_p2 = 1 - p2[idx] + self.eps
            Hp2 = torch.mean(-(p2[idx] * log_p2[idx] +
                               tmp_p2 * torch.log(tmp_p2)))

            idx1 = torch.le(p, 1 - self.eps)
            idx2 = torch.ge(p, self.eps)
            idx = idx1 & idx2 & mask_idx
            tmp_p11 = p[idx] + self.eps
            tmp_p22 = 1 - p[idx] + self.eps
            H1 = torch.mean(-(tmp_p11 * torch.log(tmp_p11) +
                              (tmp_p22) * torch.log(tmp_p22)))

            H2 = (Hp1 + Hp2) / 2

            loss_web = torch.abs(H1 - H2)
            loss_similar = loss_web

        loss_similar = loss_similar * self.lambda_co_regularization
        ################# end J-S divergence #################

        loss = loss_BCE + loss_multi_view + loss_similar

        return loss, loss_pred, loss_pred1, loss_pred2, loss_multi_view, loss_similar
def test_net(save_folder,
             net,
             cuda,
             dataset,
             transform,
             top_k,
             im_size=300,
             thresh=0.05):
    num_images = len(dataset)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap) + 1)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    output_dir = get_output_dir('ssd300_120000', set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        im, gt, h, w = dataset.pull_item(i)
        # 这里im的颜色偏暗,因为BaseTransform减去了一个mean
        # im_saver = cv2.resize(im[(a2,a1,0),:,:].permute((a1,a2,0)).numpy(), (w,h))

        im_det = dataset.pull_image(i)

        # print(im_det)
        # print("======\n")
        x = Variable(im.unsqueeze(0))
        if args.cuda:
            x = x.cuda()
        _t['im_detect'].tic()
        detections = net(x).data
        detect_time = _t['im_detect'].toc(average=False)

        # skip j = 0, because it's the background class
        # //
        # //
        # print(detections)
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.size(0) == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            # print(boxes)
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack(
                (boxes.cpu().numpy(), scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
            all_boxes[j][i] = cls_dets

            # print(all_boxes)
            for item in cls_dets:
                # print(item)
                # print(item[5])
                if item[4] > thresh:
                    # print(item)
                    chinese = labelmap[j - 1] + str(round(item[4], 2))
                    # print(chinese+'det\n\n')
                    if chinese[0] == '带':
                        chinese = 'P_Battery_Core' + chinese[6:]
                    else:
                        chinese = 'P_Battery_No_Core' + chinese[7:]
                    cv2.rectangle(im_det, (item[0], item[1]),
                                  (item[2], item[3]), (0, 0, 255), 2)
                    cv2.putText(im_det, chinese,
                                (int(item[0]), int(item[1]) - 5), 0, 0.6,
                                (0, 0, 255), 2)
        real = 0
        if gt[0][4] == 3:
            real = 0
        else:
            real = 1

        for item in gt:
            if real == 0:
                print('this pic dont have the obj:', dataset.ids[i])
                break
            chinese = labelmap[int(item[4])]
            # print(chinese+'gt\n\n')
            if chinese[0] == '带':
                chinese = 'P_Battery_Core'
            else:
                chinese = 'P_Battery_No_Core'
            cv2.rectangle(im_det, (int(item[0] * w), int(item[1] * h)),
                          (int(item[2] * w), int(item[3] * h)), (0, 255, 255),
                          2)
            cv2.putText(im_det, chinese,
                        (int(item[0] * w), int(item[1] * h) - 5), 0, 0.6,
                        (0, 255, 255), 2)
            # print(labelmap[int(item[4])])

        # print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time))

        # cv2.imwrite('/media/trs2/wuzhangjie/SSD/eval/Xray20190723/Attention/base_battery_core_bs8_V/det_images/{0}_det.jpg'.format(dataset.ids[i]), im_det)

        # cv2.imwrite('/media/dsg3/shiyufeng/eval/Xray20190723/battery_2cV_version/20epoch_network/{0}_gt.jpg'.format(dataset.ids[i]), im_gt)
        # cv2.imwrite( '/media/dsg3/husheng/eval/{0}_det.jpg'.format(dataset.ids[i]), im_det)
        # cv2.imwrite( '/media/dsg3/husheng/eval/{0}_gt.jpg'.format(dataset.ids[i]), im_gt)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    # print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
Beispiel #22
0
def test_net(save_folder,
             net,
             cuda,
             testset,
             transform,
             max_per_image=300,
             thresh=0.005):

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
    num_classes = (21, 81)[args.dataset == 'COCO']
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file, 'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        print('Evalutating done')
        return

    for i in range(num_images):
        img, _, h, w = testset.pull_item(i)
        scale = torch.Tensor([w, h, w, h])
        with torch.no_grad():
            # x = transform(img).unsqueeze(0)
            x = img.unsqueeze(0)
            if cuda:
                x = x.cuda()
                scale = scale.cuda()

        _t['im_detect'].tic()
        detections = net(x)  # forward pass
        detections.detach_()
        detect_time = _t['im_detect'].toc(average=False)

        # skip j = 0, because it's the background class
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.size(0) == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack(
                (boxes.cpu().numpy(), scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
            all_boxes[j][i] = cls_dets
        '''
        # boxes, scores = detector.forward(out,priors)
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores = scores[0]

        boxes *= scale
        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        # scale each detection back up to the image

        _t['misc'].tic()

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)

            keep = nms(c_dets, 0.45, force_cpu=args.cpu)
            c_dets = c_dets[keep, :]
            all_boxes[j][i] = c_dets
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1,num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        nms_time = _t['misc'].toc()
        '''

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s'.format(
                i + 1, num_images, detect_time))
            _t['im_detect'].clear()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    testset.evaluate_detections(all_boxes, save_folder)
Beispiel #23
0
    def __call__(self, prediction_labels: torch.Tensor,
                 gold_labels: torch.Tensor, mask: torch.Tensor) -> Dict:
        """
        计算 metric. 返回的是 F1 字典:

        {"precision_[tag]": [value],
         "recall_[tag]" : [value],
         "f1-measure_[tag]": [value],
         "precision-overall": [value],
         "recall-overall": [value],
         "f1-measure-overall": [value]}

         其中的 [tag] 是 span 的 tag, 也就是 "B-[tag]" 中的 "[tag]"

        :param prediction_labels: 预测的结果, shape: (B, SeqLen)
        :param gold_labels: 实际的结果, shape: (B, SeqLen)
        :param mask: 对 predictions 和 gold label 的 mask, shape: (B, SeqLen)
        :return: 当前的 metric 计算字典结果.
        """

        if prediction_labels.dim() != 2:
            raise RuntimeError(
                f"prediction_labels shape 应该是: (B, SeqLen), 现在是:{prediction_labels.size()}"
            )
        if gold_labels.dim() != 2:
            raise RuntimeError(
                f"gold_labels shape 应该是: (B, SeqLen), 现在是:{gold_labels.size()}"
            )

        if mask is not None:
            if mask.dim() != 2:
                raise RuntimeError(
                    f"mask shape 应该是: (B, SeqLen), 现在是:{mask.size()}")

        # 转换到 cpu 进行计算
        prediction_labels, gold_labels = prediction_labels.detach().cpu(
        ), gold_labels.detach().cpu()

        if mask is not None:
            mask = mask.detach().cpu()
        else:
            mask = torch.ones(size=(prediction_labels.size(0),
                                    prediction_labels.size(1)),
                              dtype=torch.long).cpu()

        assert prediction_labels.size() == gold_labels.size(), \
            f"prediction_labels.size: {prediction_labels.size()} 与 gold_labels.size: {gold_labels.size()} 不匹配!"

        assert prediction_labels.size() == mask.size(), \
            f"prediction_labels.size: {prediction_labels.size()} 与 mask.size: {mask.size()} 不匹配!"

        bool_mask = (mask != 0)

        num_classes = self.label_vocabulary.label_size

        if (torch.masked_select(gold_labels, bool_mask) >= num_classes).any():
            raise RuntimeError(f"gold_labels 中存在比 num_classes 大的数值")

        # 将预测的结果 decode 成 span list
        prediction_spans_list = BIO.decode_label_index_to_span(
            batch_sequence_label_index=prediction_labels,
            mask=mask,
            vocabulary=self.label_vocabulary)

        # 将gold label index decode 成 span  list
        gold_spans_list = BIO.decode_label_index_to_span(
            batch_sequence_label_index=gold_labels,
            mask=mask,
            vocabulary=self.label_vocabulary)

        # 预测的 每个 label 的 span 数量字典
        num_prediction = defaultdict(int)

        # golden 每一个 label 的 span
        num_golden = defaultdict(int)

        # 当前 batch 下的 true_positives
        true_positives = defaultdict(int)
        false_positives = defaultdict(int)
        false_negatives = defaultdict(int)

        for prediction_spans, gold_spans in zip(prediction_spans_list,
                                                gold_spans_list):
            intersection = BIO.span_intersection(span_list1=prediction_spans,
                                                 span_list2=gold_spans)

            for span in intersection:
                # self._true_positives[span["label"]] += 1
                true_positives[span["label"]] += 1

            for span in prediction_spans:
                num_prediction[span["label"]] += 1

            for span in gold_spans:
                num_golden[span["label"]] += 1

        for label, num in num_prediction.items():
            false_positives[label] = num - true_positives[label]

        for label, num in num_golden.items():
            false_negatives[label] = num - true_positives[label]

        for k, v in true_positives.items():
            self._true_positives[k] += v

        for k, v in false_positives.items():
            self._false_positives[k] += v

        for k, v in false_negatives.items():
            self._false_negatives[k] += v

        return self._metric(true_positives=true_positives,
                            false_positives=false_positives,
                            false_negatives=false_negatives)
Beispiel #24
0
def do_train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = utils.AverageMeter('Time', ':6.3f')
    data_time = utils.AverageMeter('Data', ':6.3f')
    losses = utils.AverageMeter('Loss', ':.3f')
    top1 = utils.AverageMeter('Acc@1', ':6.2f')
    top5 = utils.AverageMeter('Acc@5', ':6.2f')
    learning_rate = utils.AverageMeter('LR', ':.4f')

    losses_id = utils.AverageMeter('L_ID', ':.3f')
    losses_mag = utils.AverageMeter('L_mag', ':.6f')
    progress_template = [
        batch_time, data_time, losses, losses_id, losses_mag, top1, top5,
        learning_rate
    ]

    progress = utils.ProgressMeter(len(train_loader),
                                   progress_template,
                                   prefix="Epoch: [{}]".format(epoch))
    end = time.time()

    # update lr
    learning_rate.update(current_lr)

    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        global iters
        iters += 1

        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output, x_norm = model(input, target)

        loss_id, loss_g, one_hot = criterion(output, target, x_norm)
        loss = loss_id + args.lambda_g * loss_g

        # measure accuracy and record loss
        acc1, acc5 = utils.accuracy(args, output[0], target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0], input.size(0))
        top5.update(acc5[0], input.size(0))

        losses_id.update(loss_id.item(), input.size(0))
        losses_mag.update(args.lambda_g * loss_g.item(), input.size(0))

        # compute gradient and do solver step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)
        if args.vis_mag:
            if (i > 10000) and (i % 100 == 0):
                x_norm = x_norm.detach().cpu().numpy()
                cos_theta = torch.masked_select(
                    output[0], one_hot.bool()).detach().cpu().numpy()
                logit = torch.masked_select(F.softmax(
                    output[0]), one_hot.bool()).detach().cpu().numpy()
                np.savez(
                    '{}/vis/epoch_{}_iter{}'.format(args.pth_save_fold, epoch,
                                                    i), x_norm, logit,
                    cos_theta)
Beispiel #25
0
def batch_select(mat, idx):
    mask = torch.arange(mat.size(1)).expand_as(mat).to(mat.device,
                                                       dtype=torch.long)
    mask = (mask == idx.view(-1, 1))
    return torch.masked_select(mat, mask)
#
#   2. Indexing, Slicing, Joining, Reshaping
#
# 1) Indexing
x = torch.rand(4, 3)
# torch.index_select
out = torch.index_select(x, 0, torch.LongTensor([0, 3]))
# print(x, out)

# pythonic indexing
x[:, 0], x[0, :], x[0:2, 0:2]

# torch.masked_select
x = torch.randn(2, 3)
mask = torch.ByteTensor([[0, 0, 1], [0, 1, 0]])
out = torch.masked_select(x, mask)

# x, mask, out

# 2) Joining
# torch.cat(seq, dim=0)     concatenate tensor along dim
# 1 2 3
# 4 5 6
x = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
# -1 -2 -3
# -4 -5 -6
y = torch.FloatTensor([[-1, -2, -3], [-4, -5, -6]])

# 1 2 3
# 4 5 6
# -1 -2 -3
Beispiel #27
0
    def forward(self, inputs, targets, nonorm):
        n = inputs.size(0)

        sim_mat = torch.matmul(inputs, inputs.t())

        targets = targets

        base = 0.5
        loss = list()
        c = 0

        for i in range(n):
            pos_pair_ = torch.masked_select(sim_mat[i], targets == targets[i])

            #  move itself
            pos_pair_ = torch.masked_select(pos_pair_,
                                            pos_pair_ < self.pos_margin)
            neg_pair_ = torch.masked_select(sim_mat[i], targets != targets[i])

            if self.sample_method is not None:

                # pos_pair_ = torch.masked_select(pos_pair_, pos_pair_ < self.pos_margin)
                neg_pair_ = neg_pair_[neg_pair_ + 0.5 > min(pos_pair_)]
                neg_pair_ = torch.masked_select(neg_pair_,
                                                neg_pair_ > self.neg_margin)

            pos_pair_ = torch.sort(pos_pair_)[0]
            neg_pair_ = torch.sort(neg_pair_)[0]

            if self.Dynamic_margin is not None:

                pos_pair = pos_pair_
                neg_pair = neg_pair_

                pos_loss = 1.0 / 2 * torch.log(1 + torch.sum(
                    torch.exp(-2 * (pos_pair - 0.5) + self.epoch_num / 300 *
                              (pos_pair - self.pos_margin)**2)))
                neg_loss = 1.0 / 50 * torch.log(1 + torch.sum(
                    torch.exp(50 * (neg_pair - 0.5) + self.epoch_num / 300 *
                              (self.neg_margin - neg_pair)**2)))

            else:
                pos_pair = pos_pair_
                neg_pair = neg_pair_

                pos_loss = 1.0 / 2 * torch.log(
                    1 + torch.sum(torch.exp(-2 * (pos_pair - 0.5))))
                neg_loss = 1.0 / 50 * torch.log(
                    1 + torch.sum(torch.exp(50 * (neg_pair - 0.5))))

            if len(neg_pair) == 0:
                c += 1
                continue

            loss.append(pos_loss + neg_loss)

        loss = sum(loss) / n
        prec = float(c) / n
        mean_neg_sim = torch.mean(neg_pair_).item()
        mean_pos_sim = torch.mean(pos_pair_).item()
        return loss, prec, mean_pos_sim, mean_neg_sim
Beispiel #28
0
    def __init__(self, tensor):
        self.floating_dtype = tensor.dtype.is_floating_point
        self.int_mode = True
        self.sci_mode = False
        self.max_width = 1

        with torch.no_grad():
            tensor_view = tensor.reshape(-1)

        if not self.floating_dtype:
            for value in tensor_view:
                value_str = '{}'.format(value)
                self.max_width = max(self.max_width, len(value_str))

        else:
            nonzero_finite_vals = torch.masked_select(
                tensor_view,
                torch.isfinite(tensor_view) & tensor_view.ne(0))

            if nonzero_finite_vals.numel() == 0:
                # no valid number, do nothing
                return

            # Convert to double for easy calculation. HalfTensor overflows with 1e8, and there's no div() on CPU.
            nonzero_finite_abs = nonzero_finite_vals.abs().double()
            nonzero_finite_min = nonzero_finite_abs.min().double()
            nonzero_finite_max = nonzero_finite_abs.max().double()

            for value in nonzero_finite_vals:
                if value != torch.ceil(value):
                    self.int_mode = False
                    break

            if self.int_mode:
                # in int_mode for floats, all numbers are integers, and we append a decimal to nonfinites
                # to indicate that the tensor is of floating type. add 1 to the len to account for this.
                if nonzero_finite_max / nonzero_finite_min > 1000. or nonzero_finite_max > 1.e8:
                    self.sci_mode = True
                    for value in nonzero_finite_vals:
                        value_str = ('{{:.{}e}}').format(
                            PRINT_OPTS.precision).format(value)
                        self.max_width = max(self.max_width, len(value_str))
                else:
                    for value in nonzero_finite_vals:
                        value_str = ('{:.0f}').format(value)
                        self.max_width = max(self.max_width,
                                             len(value_str) + 1)
            else:
                # Check if scientific representation should be used.
                if nonzero_finite_max / nonzero_finite_min > 1000.\
                        or nonzero_finite_max > 1.e8\
                        or nonzero_finite_min < 1.e-4:
                    self.sci_mode = True
                    for value in nonzero_finite_vals:
                        value_str = ('{{:.{}e}}').format(
                            PRINT_OPTS.precision).format(value)
                        self.max_width = max(self.max_width, len(value_str))
                else:
                    for value in nonzero_finite_vals:
                        value_str = ('{{:.{}f}}').format(
                            PRINT_OPTS.precision).format(value)
                        self.max_width = max(self.max_width, len(value_str))

        if PRINT_OPTS.sci_mode is not None:
            self.sci_mode = PRINT_OPTS.sci_mode
Beispiel #29
0
    def generate(
        self,
        models,
        sample,
        prefix_tokens=None,
        bos_token=None,
        **kwargs
    ):
        """Generate a batch of translations.

        Args:
            models (List[~fairseq.models.FairseqModel]): ensemble of models
            sample (dict): batch
            prefix_tokens (torch.LongTensor, optional): force decoder to begin
                with these tokens
        """
        model = EnsembleModel(models)
        if not self.retain_dropout:
            model.eval()

        # model.forward normally channels prev_output_tokens into the decoder
        # separately, but SequenceGenerator directly calls model.encoder
        encoder_input = {
            k: v for k, v in sample['net_input'].items()
            if k != 'prev_output_tokens'
        }

        src_tokens = encoder_input['src_tokens']
        src_lengths = (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)
        input_size = src_tokens.size()
        # batch dimension goes first followed by source lengths
        bsz = input_size[0]
        src_len = input_size[1]
        beam_size = self.beam_size

        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                model.max_decoder_positions() - 1,
            )

        # compute the encoder output for each beam
        encoder_outs = model.forward_encoder(encoder_input)
        self.encoder_input = encoder_input

        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = model.reorder_encoder_out(encoder_outs, new_order)
        # print('first....................................................................')
        model.reorder_encoder_input(self.encoder_input, new_order)

        # initialize buffers
        scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.data.new(bsz * beam_size, max_len + 2).long().fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = bos_token or self.eos
        attn, attn_buf = None, None
        nonpad_idxs = None

        # The blacklist indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then the blacklist would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        blacklist = src_tokens.new_zeros(bsz, beam_size).eq(-1)  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{'idx': None, 'score': -math.inf} for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfin_idx, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == max_len or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[unfin_idx].max()
                if self.normalize_scores:
                    best_unfinalized_score /= max_len ** self.len_penalty
                if worst_finalized[sent]['score'] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step, bbsz_idx, eos_scores, unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.

            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.

            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step + 2]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step+2] if attn is not None else None

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step+1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1) ** self.len_penalty

            cum_unfin = []
            prev = 0
            for f in finished:
                if f:
                    prev += 1
                else:
                    cum_unfin.append(prev)

            sents_seen = set()
            for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), eos_scores.tolist())):
                unfin_idx = idx // beam_size
                sent = unfin_idx + cum_unfin[unfin_idx]

                sents_seen.add((sent, unfin_idx))

                if self.match_source_len and step > src_lengths[unfin_idx]:
                    score = -math.inf

                def get_hypo():

                    if attn_clone is not None:
                        # remove padding tokens from attn scores
                        hypo_attn = attn_clone[i][nonpad_idxs[sent]]
                        _, alignment = hypo_attn.max(dim=0)
                    else:
                        hypo_attn = None
                        alignment = None

                    return {
                        'tokens': tokens_clone[i],
                        'score': score,
                        'attention': hypo_attn,  # src_len x tgt_len
                        'alignment': alignment,
                        'positional_scores': pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent]['score']:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]['idx']
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]), key=lambda r: r[1]['score'])
                    worst_finalized[sent] = {
                        'score': s['score'],
                        'idx': idx,
                    }

            newly_finished = []
            for sent, unfin_idx in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step, unfin_idx, unfinalized_scores):
                    finished[sent] = True
                    newly_finished.append(unfin_idx)
            return newly_finished

        reorder_state = None
        batch_idxs = None
        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size)
                model.reorder_incremental_state(reorder_state)
                model.reorder_encoder_out(encoder_outs, reorder_state)
                model.reorder_encoder_input(self.encoder_input, reorder_state)

            lprobs, avg_attn_scores = model.forward_decoder(
                tokens[:, :step + 1], encoder_outs, temperature=self.temperature, sample=self.encoder_input
            )

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            if self.no_repeat_ngram_size > 0:
                # for each beam and batch sentence, generate a list of previous ngrams
                gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)]
                for bbsz_idx in range(bsz * beam_size):
                    gen_tokens = tokens[bbsz_idx].tolist()
                    for ngram in zip(*[gen_tokens[i:] for i in range(self.no_repeat_ngram_size)]):
                        gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \
                                gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]]

            # Record attention scores
            if avg_attn_scores is not None:
                if attn is None:
                    attn = scores.new(bsz * beam_size, src_tokens.size(1), max_len + 2)
                    attn_buf = attn.clone()
                    nonpad_idxs = src_tokens.ne(self.pad)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            scores_buf = scores_buf.type_as(lprobs)
            eos_bbsz_idx = buffer('eos_bbsz_idx')
            eos_scores = buffer('eos_scores', type_of=scores)
            if step < max_len:
                self.search.set_src_lengths(src_lengths)

                if self.no_repeat_ngram_size > 0:
                    def calculate_banned_tokens(bbsz_idx):
                        # before decoding the next token, prevent decoding of ngrams that have already appeared
                        ngram_index = tuple(tokens[bbsz_idx, step + 2 - self.no_repeat_ngram_size:step + 1].tolist())
                        return gen_ngrams[bbsz_idx].get(ngram_index, [])

                    if step + 2 - self.no_repeat_ngram_size >= 0:
                        # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
                        banned_tokens = [calculate_banned_tokens(bbsz_idx) for bbsz_idx in range(bsz * beam_size)]
                    else:
                        banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)]

                    for bbsz_idx in range(bsz * beam_size):
                        lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf

                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    probs_slice = lprobs.view(bsz, -1, lprobs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        probs_slice, dim=1,
                        index=prefix_tokens[:, step].view(-1, 1)
                    ).view(-1, 1).repeat(1, cand_size)
                    if step > 0:
                        # save cumulative scores for each hypothesis
                        cand_scores.add_(scores[:, step - 1].view(bsz, beam_size).repeat(1, 2))
                    cand_indices = prefix_tokens[:, step].view(-1, 1).repeat(1, cand_size)
                    cand_beams = torch.zeros_like(cand_indices)

                    # handle prefixes of different lengths
                    partial_prefix_mask = prefix_tokens[:, step].eq(self.pad)
                    if partial_prefix_mask.any():
                        partial_scores, partial_indices, partial_beams = self.search.step(
                            step,
                            lprobs.view(bsz, -1, self.vocab_size),
                            scores.view(bsz, beam_size, -1)[:, :, :step],
                        )
                        cand_scores[partial_prefix_mask] = partial_scores[partial_prefix_mask]
                        cand_indices[partial_prefix_mask] = partial_indices[partial_prefix_mask]
                        cand_beams[partial_prefix_mask] = partial_beams[partial_prefix_mask]
                else:
                    cand_scores, cand_indices, cand_beams = self.search.step(
                        step,
                        lprobs.view(bsz, -1, self.vocab_size),
                        scores.view(bsz, beam_size, -1)[:, :, :step],
                    )
            else:
                # make probs contain cumulative scores for each hypothesis
                lprobs.add_(scores[:, step - 1].unsqueeze(-1))

                # finalize all active hypotheses once we hit max_len
                # pick the hypothesis with the highest prob of EOS right now
                torch.sort(
                    lprobs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= len(finalize_hypos(step, eos_bbsz_idx, eos_scores))
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)

            finalized_sents = set()
            if step >= self.min_len:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    finalized_sents = finalize_hypos(step, eos_bbsz_idx, eos_scores, cand_scores)
                    num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < max_len

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = cand_indices.new_ones(bsz)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                tokens_buf.resize_as_(tokens)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1)
                    attn_buf.resize_as_(attn)
                bsz = new_bsz
            else:
                batch_idxs = None

            # Set active_mask so that values > cand_size indicate eos or
            # blacklisted hypos and values < cand_size indicate candidate
            # active hypos. After this, the min values per row are the top
            # candidate active hypos.
            active_mask = buffer('active_mask')
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
            torch.topk(
                active_mask, k=beam_size, dim=1, largest=False,
                out=(_ignore, active_hypos)
            )

            active_bbsz_idx = buffer('active_bbsz_idx')
            torch.gather(
                cand_bbsz_idx, dim=1, index=active_hypos,
                out=active_bbsz_idx,
            )
            active_scores = torch.gather(
                cand_scores, dim=1, index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1], dim=0, index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices, dim=1, index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step], dim=0, index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores, dim=1, index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            if attn is not None:
                torch.index_select(
                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx,
                    out=attn_buf[:, :, :step + 2],
                )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            if attn is not None:
                attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True)

        return finalized
Beispiel #30
0
def ROIAlign(feature_maps, rois, config, pool_size, mode='bilinear'):
    """Implements ROI Align on the features.

    Params:
    - pool_shape: [height, width] of the output pooled regions. Usually [7, 7]
    - image_shape: [height, width, chanells]. Shape of input image in pixels

    Inputs:
    - boxes: [batch, num_boxes, (x1, y1, x2, y2)] in normalized
             coordinates. Possibly padded with zeros if not enough
             boxes to fill the array.
    - Feature maps: List of feature maps from different levels of the pyramid.
                    Each is [batch, channels, height, width]

    Output:
    Pooled regions in the shape: [batch, num_boxes, height, width, channels].
    The width and height are those specific in the pool_shape in the layer
    constructor.
    """
    """
    [  x2-x1             x1 + x2 - W + 1  ]
    [  -----      0      ---------------  ]
    [  W - 1                  W - 1       ]
    [                                     ]
    [           y2-y1    y1 + y2 - H + 1  ]
    [    0      -----    ---------------  ]
    [           H - 1         H - 1      ]
    """
    #feature_maps= [P2, P3, P4, P5]
    rois = rois.detach()
    crop_resize = CropAndResize(pool_size, pool_size, 0)
    
    roi_number = rois.size()[1]
    
    pooled = rois.data.new(
            config.IMAGES_PER_GPU*rois.size(
            1), 256, pool_size, pool_size).zero_()
            
    rois = rois.view(
            config.IMAGES_PER_GPU*rois.size(1),
            4)
                   
    # Loop through levels and apply ROI pooling to each. P2 to P5.
    x_1 = rois[:, 0]
    y_1 = rois[:, 1]
    x_2 = rois[:, 2]
    y_2 = rois[:, 3]


    roi_level = log2_graph(
        torch.div(torch.sqrt((y_2 - y_1) * (x_2 - x_1)), 224.0))
        
        
    roi_level = torch.clamp(torch.clamp(
        torch.add(torch.round(roi_level), 4), min=2), max=5)

    # P2 is 256x256, P3 is 128x128, P4 is 64x64, P5 is 32x32
    # P2 is 4, P3 is 8, P4 is 16, P5 is 32
    for i, level in enumerate(range(2, 6)):

        scaling_ratio = 2**level

        height = float(config.IMAGE_MAX_DIM)/ scaling_ratio
        width = float(config.IMAGE_MAX_DIM) / scaling_ratio

        ixx = torch.eq(roi_level, level)

        box_indices = ixx.view(-1).int() * 0
        ix = torch.unsqueeze(ixx, 1)
        level_boxes = torch.masked_select(rois, ix)
        try:
            if level_boxes.size()[0] == 0:
                continue
        except:
            continue

        level_boxes = level_boxes.view(-1, 4)
        
        crops = crop_resize(feature_maps[i], torch.div(
                level_boxes, float(config.IMAGE_MAX_DIM)
                )[:, [1, 0, 3, 2]], box_indices)
                
        indices_pooled = ixx.nonzero()[:, 0]
        pooled[indices_pooled.data, :, :, :] = crops.data

    pooled = pooled.view(config.IMAGES_PER_GPU, roi_number,
               256, pool_size, pool_size)        
    pooled = Variable(pooled).cuda()
    return pooled
Beispiel #31
0
def REINFORCE(training_pairs, policy_nn, optimizer, num_episodes, relation=None):
	f = open(graphpath)
	content = f.readlines()
	f.close()
	kb = KB()
	for line in content:
		ent1, rel, ent2 = line.rsplit()
		kb.addRelation(ent1, rel, ent2) # Each line is a triple, represented with strings instead of numbers
		
	dropout = nn.Dropout(dynamic_action_dropout_rate)

	train = training_pairs

	success = 0

	path_found = set()
	path_found_entity = []
	path_relation_found = []
	success_cnt_list = []

	env = Env(dataPath, train[0], model=args.model)
	# Initialize the environment

	for i_episode in range(num_episodes):
	# for i_episode in range(15):
		start = time.time()
		print ('Episode %d' % i_episode)
		sample = train[random.choice(range(len(training_pairs)))]
		print ('Training sample: ', sample[:-1])

		if relation is None:
			env = Env(dataPath, sample, args.model)
		else:
			env.path = []
			env.path_relations = []

		sample = sample.split()
		state_idx = [env.entity2id_[sample[0]], env.entity2id_[sample[1]], 0]

		episode = []

		state_batch_negative = []
		lstm_input_batch_negative = []
		hidden_batch_negative = []
		cell_batch_negative = []
		action_batch_negative = []
		now_embedding_batch_negative = []
		neighbour_embeddings_list_batch_negative = []

		state_batch_positive = []
		lstm_input_batch_positive = []
		hidden_batch_positive = []
		cell_batch_positive = []
		action_batch_positive = []
		now_embedding_batch_positive = []
		neighbour_embeddings_list_batch_positive = []

		hidden_this_time = torch.zeros(3, 1, hidden_dim)
		cell_this_time = torch.zeros(3, 1, hidden_dim)
		if USE_CUDA:
			hidden_this_time = hidden_this_time.cuda()
			cell_this_time = cell_this_time.cuda()

		forward_node_list = []

		for t in count():
		# for t in range(10):
			state_vec = floatTensor(env.idx_state(state_idx))
			state = torch.cat([state_vec, hidden_this_time[-1]], dim=1) # Only use the last layer's output
			lstm_input = state_vec.unsqueeze(1)

			now_embedding = floatTensor(env.entity2vec[[state_idx[0]]])

			connected_node_list = []
			if state_idx[0] in env.entity2link:
				for rel in env.entity2link[state_idx[0]]:
					connected_node_list.extend(env.entity2link[state_idx[0]][rel])
			connected_node_list = list(set(connected_node_list))
			if len(connected_node_list) == 0:
				neighbour_embeddings_list = [torch.zeros(1, embedding_dim).cuda() if USE_CUDA else torch.zeros(1, embedding_dim)]
			else:
				neighbour_embeddings_list = [floatTensor(env.entity2vec[connected_node_list])]

			action_probs, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden_this_time, cell_this_time, now_embedding, neighbour_embeddings_list)

			# Action Dropout
			dropout_action_probs = dropout(action_probs)
			# print(dropout_action_probs.shape)
			probability = np.squeeze(dropout_action_probs.cpu().detach().numpy())
			probability = probability / sum(probability)
			action_chosen = np.random.choice(np.arange(action_space), p = probability)

			reward, new_state, done = env.interact(state_idx, action_chosen)
			
			if reward == -1: # the action fails for this step
				state_batch_negative.append(state)
				lstm_input_batch_negative.append(lstm_input)
				hidden_batch_negative.append(hidden_this_time)
				cell_batch_negative.append(cell_this_time)
				action_batch_negative.append(action_chosen)
				now_embedding_batch_negative.append(now_embedding)
				neighbour_embeddings_list_batch_negative.append(neighbour_embeddings_list[0])

				# Force to choose a valid action to go forward
				try:
					valid_action_list = list(env.entity2link[state_idx[0]].keys()) 
					probability = probability[valid_action_list]
					# print("Line 288: ", sum(probability))
					probability = probability / sum(probability)
					# print("Line 288: ", probability)
					valid_action_chosen = np.random.choice(valid_action_list, p = probability)
					valid_reward, valid_new_state, valid_done = env.interact(state_idx, valid_action_chosen)

					reward, new_state, done = valid_reward, valid_new_state, valid_done

					if new_state == None:
						forward_node_list.append(env.entity2id_[sample[1]]) # The right tail entity
					else:
						forward_node_list.append(new_state[0])

					state_batch_positive.append(state)
					lstm_input_batch_positive.append(lstm_input)
					hidden_batch_positive.append(hidden_this_time)
					cell_batch_positive.append(cell_this_time)
					action_batch_positive.append(valid_action_chosen)
					now_embedding_batch_positive.append(now_embedding)
					neighbour_embeddings_list_batch_positive.append(neighbour_embeddings_list[0])

					hidden_this_time = hidden_new
					cell_this_time = cell_new

				except:
					print("Cannot find a valid action!")

			else: # the action find a path that can forward
				if new_state == None:
					forward_node_list.append(env.entity2id_[sample[1]]) # The right tail entity
				else:
					forward_node_list.append(new_state[0])

				state_batch_positive.append(state)
				lstm_input_batch_positive.append(lstm_input)
				hidden_batch_positive.append(hidden_this_time)
				cell_batch_positive.append(cell_this_time)
				action_batch_positive.append(action_chosen)
				now_embedding_batch_positive.append(now_embedding)
				neighbour_embeddings_list_batch_positive.append(neighbour_embeddings_list[0])

				hidden_this_time = hidden_new
				cell_this_time = cell_new

			new_state_vec = env.idx_state(new_state)
			episode.append(Transition(state = state_vec, action = action_chosen, next_state = new_state_vec, reward = reward))

			if done or t == max_steps:
				break

			state_idx = new_state
			
		# Discourage the agent when it chooses an invalid step
		if len(state_batch_negative) != 0 and done != 1:
			print ('Penalty to invalid steps:', len(state_batch_negative))
			
			policy_nn.zero_grad()
			action_mask = byteTensor(convert_to_one_hot(np.array(action_batch_negative), depth = action_space))
			# action_prob = torch.stack(action_prob_batch_negative).squeeze(1)
			# print(state_batch_negative[0].shape)
			state = torch.cat(state_batch_negative, dim=0)
			lstm_input = torch.cat(lstm_input_batch_negative, dim=1)
			hidden = torch.cat(hidden_batch_negative, dim=1)
			cell = torch.cat(cell_batch_negative, dim=1)
			now_embedding = torch.cat(now_embedding_batch_negative, dim=0)
			action_prob, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden, cell, now_embedding, neighbour_embeddings_list_batch_negative)
			# print(action_prob.shape)
			picked_action_prob = torch.masked_select(action_prob, action_mask)
			print(picked_action_prob)
			loss = -torch.sum(torch.log(picked_action_prob) * args.wrong_reward) # Reward for each invalid action is wrong_reward
			loss.backward(retain_graph=True)
			torch.nn.utils.clip_grad_norm(policy_nn.parameters(), 0.2)
			optimizer.step()
			
		print ('----- FINAL PATH -----')
		print ('\t'.join(env.path))
		print ('PATH LENGTH', len(env.path))
		print ('----- FINAL PATH -----')
		
		# If the agent success, do one optimization
		if done == 1:
			print ('Success')
			
			path_found_entity.append(path_clean(' -> '.join(env.path)))

			success += 1

			# Compute the reward for a successful episode.
			path_length = len(env.path)
			length_reward = 1/path_length
			global_reward = 1

			if len(path_found) != 0:
				path_found_embedding = [env.path_embedding(path.split(' -> ')) for path in path_found]
				curr_path_embedding = env.path_embedding(env.path_relations)
				path_found_embedding = np.reshape(path_found_embedding, (-1,embedding_dim))
				cos_sim = cosine_similarity(path_found_embedding, curr_path_embedding)
				diverse_reward = -np.mean(cos_sim)
				print ('diverse_reward', diverse_reward)
				total_reward = args.global_reward_weight * global_reward + args.length_reward_weight * length_reward + args.diverse_reward_weight * diverse_reward 
			else:
				total_reward = args.global_reward_weight * global_reward + (args.length_reward_weight + args.diverse_reward_weight) * length_reward
			path_found.add(' -> '.join(env.path_relations))

			# total_reward = 0.1*global_reward + 0.9*length_reward
			

			policy_nn.zero_grad()
			action_mask = byteTensor(convert_to_one_hot(np.array(action_batch_positive), depth = action_space))
			state = torch.cat(state_batch_positive, dim=0)
			lstm_input = torch.cat(lstm_input_batch_positive, dim=1)
			hidden = torch.cat(hidden_batch_positive, dim=1)
			cell = torch.cat(cell_batch_positive, dim=1)
			now_embedding = torch.cat(now_embedding_batch_positive, dim=0)
			action_prob, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden, cell, now_embedding, neighbour_embeddings_list_batch_positive)
			# print(action_prob.shape)
			picked_action_prob = torch.masked_select(action_prob, action_mask)
			loss = -torch.sum(torch.log(picked_action_prob) * total_reward) 
			# The reward for each step of a successful episode is total_reward
			loss.backward(retain_graph=True)
			torch.nn.utils.clip_grad_norm(policy_nn.parameters(), 0.2)
			optimizer.step()
		else:

			if (len(state_batch_positive) != 0):
				# reward shaping

				if args.reward_shaping_model == "TransH":
					# print("Enters TransH.")
					head = ent_embedding[[env.entity2id_[sample[0]]]]
					rel_emb = rel_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					norm = norm_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					tail = ent_embedding[forward_node_list]
					head_proj = head - np.sum(head * norm, axis=1, keepdims=True) * norm
					tail_proj = tail - np.sum(tail * norm, axis=1, keepdims=True) * norm
					scores = -np.sum(np.abs(head_proj + rel_emb - tail_proj), axis = 1)
					# print(scores)

				elif args.reward_shaping_model == "TransR":
					# print("Enters TransR.")
					head = ent_embedding[[env.entity2id_[sample[0]]]]
					rel_emb = rel_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					norm = norm_embedding[[env.relation2id_[relation.replace('_', ':')]]].squeeze(0)
					tail = ent_embedding[forward_node_list]
					head_proj = np.matmul(norm, head.T).T
					tail_proj = np.matmul(norm, tail.T).T
					scores = -np.sum(np.abs(head_proj + rel_emb - tail_proj), axis = 1)
					# print(scores)

				elif args.reward_shaping_model == "TransD":
					# print("Enters TransD.")
					head = ent_embedding[[env.entity2id_[sample[0]]]]
					head_norm = ent_norm_embedding[[env.entity2id_[sample[0]]]]
					tail = ent_embedding[forward_node_list]
					tail_norm = ent_norm_embedding[forward_node_list]
					rel_emb = rel_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					rel_norm = rel_norm_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					head_proj = head + np.sum(head * head_norm, axis=1, keepdims=True) * rel_norm
					tail_proj = tail + np.sum(tail * tail_norm, axis=1, keepdims=True) * rel_norm
					scores = -np.sum(np.abs(head_proj + rel_emb - tail_proj), axis = 1)
					# print(scores)

				elif args.reward_shaping_model == "ProjE":
					# print("Enter ProjE.")
					h = ent_embedding[[env.entity2id_[sample[0]]]]
					r = rel_embedding[[env.relation2id_[relation.replace('_', ':')]]]
					ent_mat = np.transpose(ent_embedding)
					hr = h * simple_hr_combination_weights[:100] + r * simple_hr_combination_weights[100:]
					hrt_res = np.matmul(np.tanh(hr + combination_bias_hr), ent_mat)
					scores = hrt_res[0][forward_node_list]
					scores = torch.log(torch.sigmoid(torch.FloatTensor(scores))).numpy()
					# print(scores)

				elif args.reward_shaping_model == "ConvE":
					# print("Enters ConvE.")
					rel_id = TransE_to_ConvE_id_relation[env.relation2id_[relation.replace('_', ':')]]
					head_id = TransE_to_ConvE_id_entity[env.entity2id_[sample[0]]]
					tail_id = [TransE_to_ConvE_id_entity[elem] for elem in forward_node_list]

					bs = ConvE_model.batch_size
					x_middle, output = ConvE_model(longTensor([head_id] + [0] * (bs - 1)), longTensor([rel_id] * bs))

					scores = np.log(output[0][tail_id].detach().cpu().numpy() + 10 ** -30)
					# print(scores)

				else:
					head_embedding = ent_embedding[env.entity2id_[sample[0]]]
					query_embedding = rel_embedding[env.relation2id_[relation.replace('_', ':')]]
					tail_embedding = ent_embedding[forward_node_list]
					scores = -np.sum(np.abs(head_embedding + query_embedding - tail_embedding), axis = 1)

				policy_nn.zero_grad()
				action_mask = byteTensor(convert_to_one_hot(np.array(action_batch_positive), depth = action_space))
				state = torch.cat(state_batch_positive, dim=0)
				lstm_input = torch.cat(lstm_input_batch_positive, dim=1)
				hidden = torch.cat(hidden_batch_positive, dim=1)
				cell = torch.cat(cell_batch_positive, dim=1)
				now_embedding = torch.cat(now_embedding_batch_positive, dim=0)
				action_prob, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden, cell, now_embedding, neighbour_embeddings_list_batch_positive)
				# print(action_prob.shape)
				picked_action_prob = torch.masked_select(action_prob, action_mask)
				# print(picked_action_prob)
				loss = -torch.sum(torch.log(picked_action_prob) * floatTensor(scores) * args.useless_reward) 
				# The reward for each step of an unsuccessful episode is useless_reward
				loss.backward(retain_graph=True)
				torch.nn.utils.clip_grad_norm(policy_nn.parameters(), 0.2)
				optimizer.step()
			
			print ('Failed, Do one teacher guideline') # Force the agent to learn using a successful sample
			teacher_success_flag = False
			teacher_success_failed_times = 0
			while (not teacher_success_flag) and teacher_success_failed_times < 3:
				try:
					good_episodes = teacher(sample[0], sample[1], 1, env, graphpath, knowledge_base = kb, output_mode = 1) # Episode's ID instead of state!
					if len(good_episodes) == 0:
						teacher_success_failed_times += 1
					else:
						for item in good_episodes:
							if len(item) == 0:
								teacher_success_failed_times += 1
								break

							teacher_state_batch = []
							teacher_action_batch = []
							teacher_now_embedding_batch = []
							teacher_neighbour_embeddings_list_batch = []

							total_reward = 0.0*1 + 1*1/len(item)

							for t, transition in enumerate(item):
								teacher_state_batch.append(floatTensor(env.idx_state(transition.state)))
								teacher_action_batch.append(transition.action)
								teacher_now_embedding_batch.append(floatTensor(env.entity2vec[[transition.state[0]]]))

								connected_node_list = []
								if transition.state[0] in env.entity2link:
									for rel in env.entity2link[transition.state[0]]:
										connected_node_list.extend(env.entity2link[transition.state[0]][rel])
								connected_node_list = list(set(connected_node_list)) # Remove duplicates
								if len(connected_node_list) == 0:
									if USE_CUDA:
										neighbour_embeddings_list = torch.zeros(1, embedding_dim).cuda()
									else:
										neighbour_embeddings_list = torch.zeros(1, embedding_dim)

								else:
									neighbour_embeddings_list = floatTensor(env.entity2vec[connected_node_list])

								teacher_neighbour_embeddings_list_batch.append(neighbour_embeddings_list)
							   
							if (len(teacher_state_batch) != 0):
								hidden_this_time = torch.zeros(3, 1, hidden_dim)
								cell_this_time = torch.zeros(3, 1, hidden_dim)
								if USE_CUDA:
									hidden_this_time = hidden_this_time.cuda()
									cell_this_time = cell_this_time.cuda()

								state_batch_teacher = []
								lstm_input_batch_teacher = []
								hidden_batch_teacher = []
								cell_batch_teacher = []

								for idx, state_vec in enumerate(teacher_state_batch):
									state_vec = floatTensor(state_vec)
									state = torch.cat([state_vec, hidden_this_time[-1]], dim=1) # Only use the last layer's output
									lstm_input = state_vec.unsqueeze(1)
									now_embedding = teacher_now_embedding_batch[idx]
									teacher_neighbour_embeddings_list = [teacher_neighbour_embeddings_list_batch[idx]]
									action_prob, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden_this_time, cell_this_time, now_embedding, teacher_neighbour_embeddings_list)
									# print(action_prob.shape)
									hidden_this_time = hidden_new
									cell_this_time = cell_new

									state_batch_teacher.append(state)
									lstm_input_batch_teacher.append(lstm_input)
									hidden_batch_teacher.append(hidden_this_time)
									cell_batch_teacher.append(cell_this_time)

								now_embedding = torch.cat(teacher_now_embedding_batch, dim=0)

								policy_nn.zero_grad()
								action_mask = byteTensor(convert_to_one_hot(np.array(teacher_action_batch), depth = action_space))
								state = torch.cat(state_batch_teacher, dim=0)
								lstm_input = torch.cat(lstm_input_batch_teacher, dim=1)
								hidden = torch.cat(hidden_batch_teacher, dim=1)
								cell = torch.cat(cell_batch_teacher, dim=1)
								action_prob, lstm_output, hidden_new, cell_new = policy_nn(state, lstm_input, hidden, cell, now_embedding, teacher_neighbour_embeddings_list_batch)
								# print(action_prob.shape)
								picked_action_prob = torch.masked_select(action_prob, action_mask)
								loss = -torch.sum(torch.log(picked_action_prob) * args.teacher_reward) # The reward for each step of a teacher episode is teacher_reward
								loss.backward(retain_graph=True)
								torch.nn.utils.clip_grad_norm(policy_nn.parameters(), 0.2)
								optimizer.step()

								teacher_success_flag = True
							else:
								teacher_success_failed_times += 1
					
				except Exception as e:
					print ('Teacher guideline failed')
					teacher_success_failed_times += 10

		print ('Episode time: ', time.time() - start)
		print ('\n')
		print ("Retrain Success count: ", success)
		success_cnt_list.append(success)
	print ('Retrain Success percentage:', success/num_episodes)
	print (success_cnt_list)
	
	for path in path_found_entity: # Only successful paths
		rel_ent = path.split(' -> ')
		path_relation = []
		for idx, item in enumerate(rel_ent):
			if idx%2 == 0:
				path_relation.append(item)
		path_relation_found.append(' -> '.join(path_relation))
		
	relation_path_stats = collections.Counter(path_relation_found).items()
	relation_path_stats = sorted(relation_path_stats, key = lambda x:x[1], reverse=True) # Rank the paths according to their frequency.
	
	f = open(feature_stats, 'w')
	for item in relation_path_stats:
		f.write(item[0]+'\t'+str(item[1])+'\n')
	f.close()
	print ('Path stats saved')

	with open("logs/training/" + relation + ".out", 'a') as fw:
		fw.write(save_file_header + '_path_stats.txt' + '\n')
		fw.write('Retrain Success persentage: ' + str(success/num_episodes) + '\n')
		fw.write("Retrain success cnt list: ")
		fw.write(" ".join([str(elem) for elem in success_cnt_list]) + '\n')
		fw.write("\n")

	return 
Beispiel #32
0
    def _generate(
        self,
        sample: Dict[str, Dict[str, Tensor]],
        prefix_tokens: Optional[Tensor] = None,
        bos_token: Optional[int] = None,
    ):
        net_input = sample["net_input"]
        src_tokens = net_input["src_tokens"]
        if src_tokens.dim() > 2:
            src_lengths = net_input["src_lengths"]
        else:
            # length of the source text being the character length except EndOfSentence and pad
            src_lengths = ((src_tokens.ne(self.eos)
                            & src_tokens.ne(self.pad)).long().sum(dim=1))
        # bsz: total number of sentences in beam
        input_size = src_tokens.size()
        bsz, src_len = input_size[0], input_size[1]
        beam_size = self.beam_size

        max_len: int = -1
        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                self.model.max_decoder_positions() - 1,
            )
        assert (
            self.min_len <= max_len
        ), "min_len cannot be larger than max_len, please adjust these!"
        # compute the encoder output for each beam
        encoder_outs = self.model.forward_encoder(net_input)

        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order)
        # ensure encoder_outs is a List.
        assert encoder_outs is not None

        # initialize buffers
        scores = (torch.zeros(bsz * beam_size,
                              max_len + 1).to(src_tokens).float()
                  )  # +1 for eos; pad is never choosed for scoring
        tokens = (torch.zeros(bsz * beam_size,
                              max_len + 2).to(src_tokens).long().fill_(
                                  self.pad))  # +2 for eos and pad
        tokens[:, 0] = self.eos if bos_token is None else bos_token
        attn: Optional[Tensor] = None

        # The blacklist indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then the blacklist would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        blacklist = (torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
                     )  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = torch.jit.annotate(
            List[List[Dict[str, Tensor]]],
            [
                torch.jit.annotate(List[Dict[str, Tensor]], [])
                for i in range(bsz)
            ],
        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step

        finished = [
            False for i in range(bsz)
        ]  # a boolean array indicating if the sentence at the index is finished or not
        num_remaining_sent = bsz  # number of sentences remaining

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        reorder_state: Optional[Tensor] = None
        batch_idxs: Optional[Tensor] = None
        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            # print(f'step: {step}')
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                self.model.reorder_incremental_state(reorder_state)
                encoder_outs = self.model.reorder_encoder_out(
                    encoder_outs, reorder_state)

            lprobs, avg_attn_scores = self.model.forward_decoder(
                tokens[:, :step + 1], encoder_outs, self.temperature)
            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # handle max length constraint
            if step >= max_len:
                lprobs[:, :self.eos] = -math.inf
                lprobs[:, self.eos + 1:] = -math.inf
            elif self.eos_factor is not None:
                # only consider EOS if its score is no less than a specified
                # factor of the best candidate score
                disallow_eos_mask = lprobs[:, self.
                                           eos] < self.eos_factor * lprobs.max(
                                               dim=1)[0]
                lprobs[disallow_eos_mask, self.eos] = -math.inf

            # handle prefix tokens (possibly with different lengths)
            if (prefix_tokens is not None and step < prefix_tokens.size(1)
                    and step < max_len):
                lprobs, tokens, scores = self._prefix_tokens(
                    step, lprobs, scores, tokens, prefix_tokens, beam_size)
            elif step < self.min_len:
                # minimum length constraint (does not apply if using prefix_tokens)
                lprobs[:, self.eos] = -math.inf

            # Record attention scores, only support avg_attn_scores is a Tensor
            if avg_attn_scores is not None:
                if attn is None:
                    attn = torch.empty(bsz * beam_size,
                                       avg_attn_scores.size(1),
                                       max_len + 2).to(scores)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            eos_bbsz_idx = torch.empty(0).to(
                tokens
            )  # indices of hypothesis ending with eos (finished sentences)
            eos_scores = torch.empty(0).to(
                scores
            )  # scores of hypothesis ending with eos (finished sentences)

            self.search.set_src_lengths(src_lengths)

            if self.no_repeat_ngram_size > 0:
                lprobs = self._no_repeat_ngram(tokens, lprobs, bsz, beam_size,
                                               step)

            cand_scores, cand_indices, cand_beams = self.search.step(
                step,
                lprobs.view(bsz, -1, self.vocab_size),
                scores.view(bsz, beam_size, -1)[:, :, :step],
            )

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
            eos_mask[:, :beam_size][blacklist] = torch.tensor(0).to(eos_mask)

            # only consider eos when it's among the top beam_size indices
            eos_bbsz_idx = torch.masked_select(cand_bbsz_idx[:, :beam_size],
                                               mask=eos_mask[:, :beam_size])

            finalized_sents: List[int] = []
            if eos_bbsz_idx.numel() > 0:
                eos_scores = torch.masked_select(cand_scores[:, :beam_size],
                                                 mask=eos_mask[:, :beam_size])
                finalized_sents = self.finalize_hypos(
                    step,
                    eos_bbsz_idx,
                    eos_scores,
                    tokens,
                    scores,
                    finalized,
                    finished,
                    beam_size,
                    attn,
                    src_lengths,
                    max_len,
                )
                num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < max_len

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz).to(cand_indices)
                batch_mask[torch.tensor(finalized_sents).to(
                    cand_indices)] = torch.tensor(0).to(batch_mask)
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]

                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]
                blacklist = blacklist[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(
                        new_bsz * beam_size, attn.size(1), -1)
                bsz = new_bsz
            else:
                batch_idxs = None
            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos

            # Rewrite the operator since the element wise or is not supported in torchscript.

            eos_mask[:, :beam_size] = ~((~blacklist) &
                                        (~eos_mask[:, :beam_size]))
            active_mask = torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            new_blacklist, active_hypos = torch.topk(active_mask,
                                                     k=beam_size,
                                                     dim=1,
                                                     largest=False)

            # update blacklist to ignore any finalized hypos
            blacklist = new_blacklist.ge(cand_size)[:, :beam_size]
            assert (~blacklist).any(dim=1).all()

            active_bbsz_idx = torch.gather(cand_bbsz_idx,
                                           dim=1,
                                           index=active_hypos)
            active_scores = torch.gather(cand_scores,
                                         dim=1,
                                         index=active_hypos)

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            tokens[:, :step + 1] = torch.index_select(tokens[:, :step + 1],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            tokens.view(bsz, beam_size,
                        -1)[:, :, step + 1] = torch.gather(cand_indices,
                                                           dim=1,
                                                           index=active_hypos)
            if step > 0:
                scores[:, :step] = torch.index_select(scores[:, :step],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            scores.view(bsz, beam_size,
                        -1)[:, :, step] = torch.gather(cand_scores,
                                                       dim=1,
                                                       index=active_hypos)

            # copy attention for active hypotheses
            if attn is not None:
                attn[:, :, :step + 2] = torch.index_select(
                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx)

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            # make into beam container
            BCList = [
                BeamContainer(elem["score"].item(), elem)
                for elem in finalized[sent]
            ]
            BCList.sort()
            BCList.reverse()
            finalized[sent] = torch.jit.annotate(List[Dict[str, Tensor]],
                                                 [x.elem for x in BCList])

        return finalized
Beispiel #33
0
    def _generate(self, src_tokens, src_lengths, beam_size=None, maxlen=None, prefix_tokens=None):
        bsz, srclen = src_tokens.size()
        maxlen = min(maxlen, self.maxlen) if maxlen is not None else self.maxlen

        # the max beam size is the dictionary size - 1, since we never select pad
        beam_size = beam_size if beam_size is not None else self.beam_size
        beam_size = min(beam_size, self.vocab_size - 1)

        encoder_outs = []
        incremental_states = {}
        for model in self.models:
            if not self.retain_dropout:
                model.eval()
            if isinstance(model.decoder, FairseqIncrementalDecoder):
                incremental_states[model] = {}
            else:
                incremental_states[model] = None

            # compute the encoder output for each beam
            encoder_out = model.encoder(
                src_tokens.repeat(1, beam_size).view(-1, srclen),
                src_lengths.expand(beam_size, src_lengths.numel()).t().contiguous().view(-1),
            )
            encoder_outs.append(encoder_out)

        # initialize buffers
        scores = src_tokens.data.new(bsz * beam_size, maxlen + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.data.new(bsz * beam_size, maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos
        attn, attn_buf = None, None
        nonpad_idxs = None

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{'idx': None, 'score': -math.inf} for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == maxlen or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[sent].max()
                if self.normalize_scores:
                    best_unfinalized_score /= maxlen ** self.len_penalty
                if worst_finalized[sent]['score'] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step, bbsz_idx, eos_scores, unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.
            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.
            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step + 2]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step+2] if attn is not None else None

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step+1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1) ** self.len_penalty

            cum_unfin = []
            prev = 0
            for f in finished:
                if f:
                    prev += 1
                else:
                    cum_unfin.append(prev)

            sents_seen = set()
            for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), eos_scores.tolist())):
                unfin_idx = idx // beam_size
                sent = unfin_idx + cum_unfin[unfin_idx]

                sents_seen.add((sent, unfin_idx))

                def get_hypo():

                    if attn_clone is not None:
                        # remove padding tokens from attn scores
                        hypo_attn = attn_clone[i][nonpad_idxs[sent]]
                        _, alignment = hypo_attn.max(dim=0)
                    else:
                        hypo_attn = None
                        alignment = None

                    return {
                        'tokens': tokens_clone[i],
                        'score': score,
                        'attention': hypo_attn,  # src_len x tgt_len
                        'alignment': alignment,
                        'positional_scores': pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent]['score']:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]['idx']
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]), key=lambda r: r[1]['score'])
                    worst_finalized[sent] = {
                        'score': s['score'],
                        'idx': idx,
                    }

            newly_finished = []
            for sent, unfin_idx in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step, unfinalized_scores):
                    finished[sent] = True
                    newly_finished.append(unfin_idx)
            return newly_finished

        reorder_state = None
        batch_idxs = None
        for step in range(maxlen + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size)
                for i, model in enumerate(self.models):
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(incremental_states[model], reorder_state)
                    encoder_outs[i] = model.encoder.reorder_encoder_out(encoder_outs[i], reorder_state)

            lprobs, avg_attn_scores = self._decode(tokens[:, :step + 1], encoder_outs, incremental_states)

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # Record attention scores
            if avg_attn_scores is not None:
                if attn is None:
                    attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2)
                    attn_buf = attn.clone()
                    nonpad_idxs = src_tokens.ne(self.pad)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            scores_buf = scores_buf.type_as(lprobs)
            eos_bbsz_idx = buffer('eos_bbsz_idx')
            eos_scores = buffer('eos_scores', type_of=scores)
            if step < maxlen:
                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    probs_slice = lprobs.view(bsz, -1, lprobs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        probs_slice, dim=1,
                        index=prefix_tokens[:, step].view(-1, 1).data
                    ).expand(-1, cand_size)
                    cand_indices = prefix_tokens[:, step].view(-1, 1).expand(bsz, cand_size).data
                    cand_beams = torch.zeros_like(cand_indices)
                else:
                    cand_scores, cand_indices, cand_beams = self.search.step(
                        step,
                        lprobs.view(bsz, -1, self.vocab_size),
                        scores.view(bsz, beam_size, -1)[:, :, :step],
                    )
            else:
                # make probs contain cumulative scores for each hypothesis
                lprobs.add_(scores[:, step - 1].unsqueeze(-1))

                # finalize all active hypotheses once we hit maxlen
                # pick the hypothesis with the highest prob of EOS right now
                torch.sort(
                    lprobs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= len(finalize_hypos(
                    step, eos_bbsz_idx, eos_scores))
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)

            finalized_sents = set()
            if step >= self.minlen:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    finalized_sents = finalize_hypos(
                        step, eos_bbsz_idx, eos_scores, cand_scores)
                    num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < maxlen

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = cand_indices.new_ones(bsz)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)

                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                tokens_buf.resize_as_(tokens)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1)
                    attn_buf.resize_as_(attn)
                bsz = new_bsz
            else:
                batch_idxs = None

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer('active_mask')
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
            torch.topk(
                active_mask, k=beam_size, dim=1, largest=False,
                out=(_ignore, active_hypos)
            )

            active_bbsz_idx = buffer('active_bbsz_idx')
            torch.gather(
                cand_bbsz_idx, dim=1, index=active_hypos,
                out=active_bbsz_idx,
            )
            active_scores = torch.gather(
                cand_scores, dim=1, index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1], dim=0, index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices, dim=1, index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step], dim=0, index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores, dim=1, index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            if attn is not None:
                torch.index_select(
                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx,
                    out=attn_buf[:, :, :step + 2],
                )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            if attn is not None:
                attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True)

        return finalized
Beispiel #34
0
    在[N, 4]的数据找到[N, 1]的掩码, 最终得到的值是[M] 将数据扯平
torch中的大于、小于、大于等于、小于等于 等于
    gt(greater than) lt(less than) ge(greater than or equal to) le(less equal) eq(equal)
"""

seed = torch.manual_seed(0)
cls_label = torch.randint(0, 3, (10, 1))
print(cls_label.shape)
offset_label = torch.randn(10, 4)
print(offset_label.shape)

'方法一:'
mask_cls = torch.lt(cls_label, 2)  # [M, 1] 是一个二维的数据,如果用索引的方法需要降维取值
print(mask_cls.shape)  # torch.Size([10, 1])
# exit()
cls = torch.masked_select(cls_label, mask_cls)
print(cls, cls.shape)  # tensor([0, 0, 1, 0, 1, 1, 1, 0]) torch.Size([8])

mask_offset = torch.gt(cls_label, 0)
print(mask_offset.shape)  # torch.Size([10, 1])
offset = torch.masked_select(offset_label, mask_offset)
print(offset, offset.shape)
# 这里数据的顺序没有发生改变,因此可以,与下面相同
print(offset.reshape(-1, 4), offset.reshape(-1, 4).shape)

print("-----------------------------")
'方法二:使用传统方式'
mask_cls = cls_label[:, 0] < 2
cls = cls_label[mask_cls]
print(cls.shape)
mask_offset = cls_label[:, 0] > 0
Beispiel #35
0
 def updateOutput(self, input):
     input, mask = input
     torch.masked_select(input, mask, out=self.output)
     return self.output
Beispiel #36
0
def apply_mask(inp, mask, size=9):
    return torch.masked_select(inp.transpose(0,-1),mask).view(size,-1).transpose(0,1)
def test_net(save_folder,
             net,
             cuda,
             dataset,
             transform,
             top_k,
             im_size=300,
             thresh=0.05):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(dataset)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    """
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap)+1)]
    """

    all_boxes = [[[] for _ in range(num_images)] for _ in range(len(labelmap))]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    output_dir = get_output_dir('ssd300_120000', set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        im, gt, h, w = dataset.pull_item(i)

        x = Variable(im.unsqueeze(0))
        """
        if args.cuda:
            x = x.cuda()
        """
        _t['im_detect'].tic()
        detections = net(x).data
        detect_time = _t['im_detect'].toc(average=False)

        # skip j = 0, because it's the background class
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.dim() == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack((boxes.cpu().numpy(),
                                  scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            all_boxes[j][i] = cls_dets

        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
Beispiel #38
0

if __name__ == '__main__':
    opts = {
        'dim_mm': 6,
        'dim_ho': 4,
    }
    nms_module = Dumplicate_Removal(opts)
    visual_features = Variable(torch.normal(torch.zeros(10, 4)))
    rois = Variable(
        torch.cat((torch.zeros(10, 1), (torch.rand(10, 4) + torch.FloatTensor([
            [0, 1, 2, 3],
        ])) * 100),
                  dim=1))
    duplicate_labels = Variable(torch.ones(5, 1)).type(torch.LongTensor)
    cls_prob_object = Variable(torch.rand(10, 20))

    mask = torch.zeros_like(cls_prob_object[:duplicate_labels.size(0)]).type(
        torch.ByteTensor)
    for i in range(duplicate_labels.size(0)):
        mask[i, duplicate_labels.data[i][0]] = 1
    selected_prob = torch.masked_select(
        cls_prob_object[:duplicate_labels.size(0)], mask)
    reranked_score = nms_module(visual_features[:duplicate_labels.size(0)],
                                selected_prob, rois[:duplicate_labels.size(0)])
    selected_prob = selected_prob.unsqueeze(1) * reranked_score

    loss = F.binary_cross_entropy(selected_prob, duplicate_labels.float())
    loss.backward()
    print(nms_module.transform_rescore.weight.grad)
Beispiel #39
0
    def _generate(self,
                  src_tokens,
                  src_lengths,
                  beam_size=None,
                  maxlen=None,
                  prefix_tokens=None):
        bsz, srclen = src_tokens.size()
        maxlen = min(maxlen,
                     self.maxlen) if maxlen is not None else self.maxlen

        # the max beam size is the dictionary size - 1, since we never select pad
        beam_size = beam_size if beam_size is not None else self.beam_size
        beam_size = min(beam_size, self.vocab_size - 1)

        encoder_outs = []
        incremental_states = {}
        for model in self.models:
            if not self.retain_dropout:
                model.eval()
            if isinstance(model.decoder, FairseqIncrementalDecoder):
                incremental_states[model] = {}
            else:
                incremental_states[model] = None

            # compute the encoder output for each beam
            encoder_out = model.encoder(
                src_tokens.repeat(1, beam_size).view(-1, srclen),
                src_lengths.expand(
                    beam_size, src_lengths.numel()).t().contiguous().view(-1),
            )
            encoder_outs.append(encoder_out)

        # initialize buffers
        scores = src_tokens.data.new(bsz * beam_size,
                                     maxlen + 1).float().fill_(0)
        scores_buf = scores.clone()
        tokens = src_tokens.data.new(bsz * beam_size,
                                     maxlen + 2).fill_(self.pad)
        tokens_buf = tokens.clone()
        tokens[:, 0] = self.eos
        attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2)
        attn_buf = attn.clone()

        # list of completed sentences
        finalized = [[] for i in range(bsz)]
        finished = [False for i in range(bsz)]
        worst_finalized = [{
            'idx': None,
            'score': -math.inf
        } for i in range(bsz)]
        num_remaining_sent = bsz

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (torch.arange(0, bsz) *
                        beam_size).unsqueeze(1).type_as(tokens)
        cand_offsets = torch.arange(0, cand_size).type_as(tokens)

        # helper function for allocating buffers on the fly
        buffers = {}

        def buffer(name, type_of=tokens):  # noqa
            if name not in buffers:
                buffers[name] = type_of.new()
            return buffers[name]

        def is_finished(sent, step, unfinalized_scores=None):
            """
            Check whether we've finished generation for a given sentence, by
            comparing the worst score among finalized hypotheses to the best
            possible score among unfinalized hypotheses.
            """
            assert len(finalized[sent]) <= beam_size
            if len(finalized[sent]) == beam_size:
                if self.stop_early or step == maxlen or unfinalized_scores is None:
                    return True
                # stop if the best unfinalized score is worse than the worst
                # finalized one
                best_unfinalized_score = unfinalized_scores[sent].max()
                if self.normalize_scores:
                    best_unfinalized_score /= maxlen**self.len_penalty
                if worst_finalized[sent]['score'] >= best_unfinalized_score:
                    return True
            return False

        def finalize_hypos(step,
                           bbsz_idx,
                           eos_scores,
                           unfinalized_scores=None):
            """
            Finalize the given hypotheses at this step, while keeping the total
            number of finalized hypotheses per sentence <= beam_size.
            Note: the input must be in the desired finalization order, so that
            hypotheses that appear earlier in the input are preferred to those
            that appear later.
            Args:
                step: current time step
                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
                    indicating which hypotheses to finalize
                eos_scores: A vector of the same size as bbsz_idx containing
                    scores for each hypothesis
                unfinalized_scores: A vector containing scores for all
                    unfinalized hypotheses
            """
            assert bbsz_idx.numel() == eos_scores.numel()

            # clone relevant token and attention tensors
            tokens_clone = tokens.index_select(0, bbsz_idx)
            tokens_clone = tokens_clone[:, 1:step +
                                        2]  # skip the first index, which is EOS
            tokens_clone[:, step] = self.eos
            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step + 2]

            # compute scores per token position
            pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
            pos_scores[:, step] = eos_scores
            # convert from cumulative to per-position scores
            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

            # normalize sentence-level scores
            if self.normalize_scores:
                eos_scores /= (step + 1)**self.len_penalty

            cum_unfin = []
            prev = 0
            for f in finished:
                if f:
                    prev += 1
                else:
                    cum_unfin.append(prev)

            sents_seen = set()
            for i, (idx, score) in enumerate(
                    zip(bbsz_idx.tolist(), eos_scores.tolist())):
                unfin_idx = idx // beam_size
                sent = unfin_idx + cum_unfin[unfin_idx]

                sents_seen.add((sent, unfin_idx))

                def get_hypo():

                    # remove padding tokens from attn scores
                    nonpad_idxs = src_tokens[sent].ne(self.pad)
                    hypo_attn = attn_clone[i][nonpad_idxs]
                    _, alignment = hypo_attn.max(dim=0)

                    return {
                        'tokens': tokens_clone[i],
                        'score': score,
                        'attention': hypo_attn,  # src_len x tgt_len
                        'alignment': alignment,
                        'positional_scores': pos_scores[i],
                    }

                if len(finalized[sent]) < beam_size:
                    finalized[sent].append(get_hypo())
                elif not self.stop_early and score > worst_finalized[sent][
                        'score']:
                    # replace worst hypo for this sentence with new/better one
                    worst_idx = worst_finalized[sent]['idx']
                    if worst_idx is not None:
                        finalized[sent][worst_idx] = get_hypo()

                    # find new worst finalized hypo for this sentence
                    idx, s = min(enumerate(finalized[sent]),
                                 key=lambda r: r[1]['score'])
                    worst_finalized[sent] = {
                        'score': s['score'],
                        'idx': idx,
                    }

            newly_finished = []
            for sent, unfin_idx in sents_seen:
                # check termination conditions for this sentence
                if not finished[sent] and is_finished(sent, step,
                                                      unfinalized_scores):
                    finished[sent] = True
                    newly_finished.append(unfin_idx)
            return newly_finished

        reorder_state = None
        batch_idxs = None
        for step in range(maxlen + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                for i, model in enumerate(self.models):
                    if isinstance(model.decoder, FairseqIncrementalDecoder):
                        model.decoder.reorder_incremental_state(
                            incremental_states[model], reorder_state)
                    encoder_outs[i] = model.decoder.reorder_encoder_out(
                        encoder_outs[i], reorder_state)

            probs, avg_attn_scores = self._decode(tokens[:, :step + 1],
                                                  encoder_outs,
                                                  incremental_states)
            if step == 0:
                # at the first step all hypotheses are equally likely, so use
                # only the first beam
                probs = probs.unfold(0, 1, beam_size).squeeze(2).contiguous()
                scores = scores.type_as(probs)
                scores_buf = scores_buf.type_as(probs)
            elif not self.sampling:
                # make probs contain cumulative scores for each hypothesis
                probs.add_(scores[:, step - 1].view(-1, 1))

            probs[:, self.pad] = -math.inf  # never select pad
            probs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # Record attention scores
            attn[:, :, step + 1].copy_(avg_attn_scores)

            cand_scores = buffer('cand_scores', type_of=scores)
            cand_indices = buffer('cand_indices')
            cand_beams = buffer('cand_beams')
            eos_bbsz_idx = buffer('eos_bbsz_idx')
            eos_scores = buffer('eos_scores', type_of=scores)
            if step < maxlen:
                if prefix_tokens is not None and step < prefix_tokens.size(1):
                    probs_slice = probs.view(bsz, -1, probs.size(-1))[:, 0, :]
                    cand_scores = torch.gather(
                        probs_slice,
                        dim=1,
                        index=prefix_tokens[:, step].view(-1, 1).data).expand(
                            -1, cand_size)
                    cand_indices = prefix_tokens[:, step].view(-1, 1).expand(
                        bsz, cand_size).data
                    cand_beams.resize_as_(cand_indices).fill_(0)
                elif self.sampling:
                    assert self.pad == 1, 'sampling assumes the first two symbols can be ignored'

                    if self.sampling_topk > 0:
                        values, indices = probs[:, 2:].topk(self.sampling_topk)
                        exp_probs = values.div_(
                            self.sampling_temperature).exp()
                        if step == 0:
                            torch.multinomial(exp_probs,
                                              beam_size,
                                              replacement=True,
                                              out=cand_indices)
                        else:
                            torch.multinomial(exp_probs,
                                              1,
                                              replacement=True,
                                              out=cand_indices)
                        torch.gather(exp_probs,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_scores)
                        torch.gather(indices,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_indices)
                        cand_indices.add_(2)
                    else:
                        exp_probs = probs.div_(
                            self.sampling_temperature).exp_().view(
                                -1, self.vocab_size)

                        if step == 0:
                            # we exclude the first two vocab items, one of which is pad
                            torch.multinomial(exp_probs[:, 2:],
                                              beam_size,
                                              replacement=True,
                                              out=cand_indices)
                        else:
                            torch.multinomial(exp_probs[:, 2:],
                                              1,
                                              replacement=True,
                                              out=cand_indices)

                        cand_indices.add_(2)
                        torch.gather(exp_probs,
                                     dim=1,
                                     index=cand_indices,
                                     out=cand_scores)

                    cand_scores.log_()
                    cand_indices = cand_indices.view(bsz, -1).repeat(1, 2)
                    cand_scores = cand_scores.view(bsz, -1).repeat(1, 2)
                    if step == 0:
                        cand_beams = torch.zeros(
                            bsz, cand_size).type_as(cand_indices)
                    else:
                        cand_beams = torch.arange(0, beam_size).repeat(
                            bsz, 2).type_as(cand_indices)
                        # make scores cumulative
                        cand_scores.add_(
                            torch.gather(
                                scores[:, step - 1].view(bsz, beam_size),
                                dim=1,
                                index=cand_beams,
                            ))
                else:
                    # take the best 2 x beam_size predictions. We'll choose the first
                    # beam_size of these which don't predict eos to continue with.
                    torch.topk(
                        probs.view(bsz, -1),
                        k=min(cand_size,
                              probs.view(bsz, -1).size(1) -
                              1),  # -1 so we never select pad
                        out=(cand_scores, cand_indices),
                    )
                    torch.div(cand_indices, self.vocab_size, out=cand_beams)
                    cand_indices.fmod_(self.vocab_size)
            else:
                # finalize all active hypotheses once we hit maxlen
                # pick the hypothesis with the highest prob of EOS right now
                torch.sort(
                    probs[:, self.eos],
                    descending=True,
                    out=(eos_scores, eos_bbsz_idx),
                )
                num_remaining_sent -= len(
                    finalize_hypos(step, eos_bbsz_idx, eos_scores))
                assert num_remaining_sent == 0
                break

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            eos_mask = cand_indices.eq(self.eos)

            finalized_sents = set()
            if step >= self.minlen:
                # only consider eos when it's among the top beam_size indices
                torch.masked_select(
                    cand_bbsz_idx[:, :beam_size],
                    mask=eos_mask[:, :beam_size],
                    out=eos_bbsz_idx,
                )
                if eos_bbsz_idx.numel() > 0:
                    torch.masked_select(
                        cand_scores[:, :beam_size],
                        mask=eos_mask[:, :beam_size],
                        out=eos_scores,
                    )
                    finalized_sents = finalize_hypos(step, eos_bbsz_idx,
                                                     eos_scores, cand_scores)
                    num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            assert step < maxlen

            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz).type_as(cand_indices)
                batch_mask[cand_indices.new(finalized_sents)] = 0
                batch_idxs = batch_mask.nonzero().squeeze(-1)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)

                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]
                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                scores_buf.resize_as_(scores)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens_buf.resize_as_(tokens)
                attn = attn.view(bsz,
                                 -1)[batch_idxs].view(new_bsz * beam_size,
                                                      attn.size(1), -1)
                attn_buf.resize_as_(attn)
                bsz = new_bsz
            else:
                batch_idxs = None

            # set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos
            active_mask = buffer('active_mask')
            torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
                out=active_mask,
            )

            # get the top beam_size active hypotheses, which are just the hypos
            # with the smallest values in active_mask
            active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore')
            torch.topk(active_mask,
                       k=beam_size,
                       dim=1,
                       largest=False,
                       out=(_ignore, active_hypos))
            active_bbsz_idx = buffer('active_bbsz_idx')
            torch.gather(
                cand_bbsz_idx,
                dim=1,
                index=active_hypos,
                out=active_bbsz_idx,
            )
            active_scores = torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores[:, step].view(bsz, beam_size),
            )

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses
            torch.index_select(
                tokens[:, :step + 1],
                dim=0,
                index=active_bbsz_idx,
                out=tokens_buf[:, :step + 1],
            )
            torch.gather(
                cand_indices,
                dim=1,
                index=active_hypos,
                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
            )
            if step > 0:
                torch.index_select(
                    scores[:, :step],
                    dim=0,
                    index=active_bbsz_idx,
                    out=scores_buf[:, :step],
                )
            torch.gather(
                cand_scores,
                dim=1,
                index=active_hypos,
                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
            )

            # copy attention for active hypotheses
            torch.index_select(
                attn[:, :, :step + 2],
                dim=0,
                index=active_bbsz_idx,
                out=attn_buf[:, :, :step + 2],
            )

            # swap buffers
            tokens, tokens_buf = tokens_buf, tokens
            scores, scores_buf = scores_buf, scores
            attn, attn_buf = attn_buf, attn

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            finalized[sent] = sorted(finalized[sent],
                                     key=lambda r: r['score'],
                                     reverse=True)

        return finalized
Beispiel #40
0
 def EntropyLoss(self, input_):
     mask = input_.ge(0.000001)
     mask_out = torch.masked_select(input_, mask)
     entropy = -(torch.sum(mask_out * torch.log(mask_out)))
     return entropy / float(input_.size(0))
def ROIAlign(feature_maps, rois, config, pool_size, mode='bilinear'):
    """Implements ROI Align on the features.

    Params:
    - pool_shape: [height, width] of the output pooled regions. Usually [7, 7]
    - image_shape: [height, width, chanells]. Shape of input image in pixels

    Inputs:
    - boxes: [batch, num_boxes, (x1, y1, x2, y2)] in normalized
             coordinates. Possibly padded with zeros if not enough
             boxes to fill the array.
    - Feature maps: List of feature maps from different levels of the pyramid.
                    Each is [batch, channels, height, width]

    Output:
    Pooled regions in the shape: [batch, num_boxes, height, width, channels].
    The width and height are those specific in the pool_shape in the layer
    constructor.
    """
    """
    [  x2-x1             x1 + x2 - W + 1  ]
    [  -----      0      ---------------  ]
    [  W - 1                  W - 1       ]
    [                                     ]
    [           y2-y1    y1 + y2 - H + 1  ]
    [    0      -----    ---------------  ]
    [           H - 1         H - 1      ]
    """
    #feature_maps= [P2, P3, P4, P5]
    rois = rois.detach()
    crop_resize = CropAndResize(pool_size, pool_size, 0)
    
    roi_number = rois.size()[1]
    
    pooled = rois.data.new(
            config.IMAGES_PER_GPU*rois.size(
            1), 256, pool_size, pool_size).zero_()
            
    rois = rois.view(
            config.IMAGES_PER_GPU*rois.size(1),
            4)
                   
    # Loop through levels and apply ROI pooling to each. P2 to P5.
    x_1 = rois[:, 0]
    y_1 = rois[:, 1]
    x_2 = rois[:, 2]
    y_2 = rois[:, 3]


    roi_level = log2_graph(
        torch.div(torch.sqrt((y_2 - y_1) * (x_2 - x_1)), 224.0))
        
        
    roi_level = torch.clamp(torch.clamp(
        torch.add(torch.round(roi_level), 4), min=2), max=5)

    # P2 is 256x256, P3 is 128x128, P4 is 64x64, P5 is 32x32
    # P2 is 4, P3 is 8, P4 is 16, P5 is 32
    for i, level in enumerate(range(2, 6)):

        scaling_ratio = 2**level

        height = float(config.IMAGE_MAX_DIM)/ scaling_ratio
        width = float(config.IMAGE_MAX_DIM) / scaling_ratio

        ixx = torch.eq(roi_level, level)

        box_indices = ixx.view(-1).int() * 0
        ix = torch.unsqueeze(ixx, 1)
        level_boxes = torch.masked_select(rois, ix)
        if level_boxes.size()[0] == 0:
            continue
        level_boxes = level_boxes.view(-1, 4)
        
        crops = crop_resize(feature_maps[i], torch.div(
                level_boxes, float(config.IMAGE_MAX_DIM)
                )[:, [1, 0, 3, 2]], box_indices)
                
        indices_pooled = ixx.nonzero()[:, 0]
        pooled[indices_pooled.data, :, :, :] = crops.data

    pooled = pooled.view(config.IMAGES_PER_GPU, roi_number,
               256, pool_size, pool_size)        
    pooled = Variable(pooled).cuda()
    return pooled
Beispiel #42
0
def train(model, optim, sche, db, opt, model_0):
    """
    Args:
        model (torch.nn.module): the model to be trained
        optim (torch.optim.X): torch optimizer to be used
        db (torch.utils.data.Dataset): prepared tor ch dataset object
        opt: command line input from the user
    """
    # for debug
    #    outputs_A = []
    #    outputs_B = []
    accuracy_history = []
    if opt.active:
        # if active learning is enabled
        # Get c_A, c_B and Sc_A2B first
        # Prepare hooker to get layer features
        # We use this 2 aggregators for the whole file, so be careful 1) to empty them properly; 2) use only them as feature_maps aggregators
        def hook_A(module, input, output):
            outputs_A.append(
                output.to(torch.device("cpu")).detach().numpy().reshape(
                    output.shape[0], -1))

        def hook_B(module, input, output):
            outputs_B.append(
                output.to(torch.device("cpu")).detach().numpy().reshape(
                    output.shape[0], -1))

        if 'Alex'.lower() in opt.model_type.lower():
            handleA = model.alex.features[-1].register_forward_hook(hook_A)
            handleB = model.alex.classifier[-3].register_forward_hook(hook_B)
        elif 'VGG16'.lower() in opt.model_type.lower():
            handleA = model.vgg16.features[-1].register_forward_hook(hook_A)
            handleB = model.vgg16.classifier[-3].register_forward_hook(hook_B)

        # Get c_A, c_B, Sc_A2B

        embed_dir = path.join('../datasets/c_x_A_B', opt.model_type.lower())
        if not (path.exists(embed_dir)
                and path.exists(path.join(embed_dir, 'c_A.npy'))
                and path.exists(path.join(embed_dir, 'c_B.npy'))):
            # create the directory you want to save to
            if not path.exists(embed_dir):
                os.makedirs(embed_dir)
            outputs_A = []
            outputs_B = []
            imagenet_loader = torch.utils.data.DataLoader(
                db['imagenet'], batch_size=opt.batch_size, shuffle=False)
            model.eval()
            for batch_idx, batch in enumerate(imagenet_loader):
                data = batch['image']
                if opt.cuda:
                    data = data.cuda()
                with torch.no_grad():
                    model(data)
                del data
            #assert len(outputs_A) == 1000
            #assert len(outputs_B) == 1000
            c_A = outputs_A = np.vstack(outputs_A)
            c_B = outputs_B = np.vstack(outputs_B)

            np.save(path.join(embed_dir, 'c_A.npy'), c_A)
            np.save(path.join(embed_dir, 'c_B.npy'), c_B)
        else:
            c_A = np.load(path.join(embed_dir, 'c_A.npy'))
            c_B = np.load(path.join(embed_dir, 'c_B.npy'))

        if not path.exists(path.join(embed_dir, 'Sc_A2B.npy')):
            ScA = dnu.Sx_generator(c_A, c_A)
            ScB = dnu.Sx_generator(c_B, c_B)
            Sc_A2B = ScA - ScB
            np.save(path.join(embed_dir, 'Sc_A2B.npy'), Sc_A2B)
        else:
            Sc_A2B = np.load(path.join(embed_dir, 'Sc_A2B.npy'))

    # Start fine-tuning (transfer learning) process! epoch is only 1
    criterion = nn.CrossEntropyLoss()
    model_0.eval()
    if opt.alternate:
        current_class = 0
    for epoch in range(1, opt.epochs + 1):
        #### Here, firstly, compute score and get active learning batch of size opt.active_batch_size
        n_samples = len(db['train'])

        # sample with replacement
        sampler = torch.utils.data.sampler.WeightedRandomSampler(
            np.ones(n_samples) / n_samples, n_samples)
        train_loader = torch.utils.data.DataLoader(
            db['train'],
            batch_size=opt.active_sample_size
            if opt.active else opt.batch_size,
            shuffle=False,
            sampler=sampler)

        # loader = torch.utils.data.DataLoader(db['eval'], batch_size=opt.eval_batch_size, shuffle=False, num_workers=4)
        # num_eval = len(db['eval'])
        # for batch_idx, batch in enumerate(loader):

        # if opt.eval:
        #     evaluate(model, db, opt)
        #     model.train()
        for batch_idx, batch in enumerate(train_loader):
            if batch_idx == 50:
                break
            data = batch['image']
            target = batch['label']
            if opt.cuda:
                with torch.no_grad():
                    data, target = data.cuda(), target.cuda()
            if opt.active:
                if opt.alternate:
                    mask = target == current_class
                    selected_target = torch.masked_select(target, mask)
                    mask = mask.unsqueeze(1)
                    if mask.sum() == 0:
                        continue
                    selected = torch.masked_select(
                        data.view(opt.active_sample_size, -1), mask)
                    selected = selected.view(mask.sum(), 3, 224, 224)
                    data = selected
                    target = selected_target
                    current_class = 1 - current_class
                # extract feature maps and score the sampled batch
                outputs_A = []
                outputs_B = []
                model.eval()
                with torch.no_grad():
                    outputs = model(data)
#                assert len(outputs_A[0]) == opt.active_sample_size
#                assert len(outputs_B[0]) == opt.active_sample_size
                x_A = outputs_A[0]
                x_B = outputs_B[0]
                alpha = F.softmax(model_0(data),
                                  1).to(torch.device("cpu")).detach().numpy()
                with torch.no_grad():
                    p = F.softmax(model(data),
                                  1).to(torch.device("cpu")).detach().numpy()
                t = batch_idx  # temperature for decaying lamb value btw distinctiveness & uncertainty
                best_indices = np.argsort(
                    dnu.score(opt.lamb,
                              t,
                              p,
                              alpha,
                              x_A,
                              x_B,
                              c_A,
                              c_B,
                              Sc_A2B=Sc_A2B))[::-1]
#                best_indices = np.random.permutation(opt.active_sample_size)

#### Secondly, fine-tune train the module
# sche.step()
            model.train()

            # erase all computed gradient
            optim.zero_grad()

            # take data with maximum score
            if opt.active:
                outputs = model(
                    data[best_indices[:opt.active_batch_size].tolist()])
                loss = criterion(
                    outputs,
                    target[best_indices[:opt.active_batch_size].tolist()])
            else:
                outputs = model(data)
                #_, preds = torch.max(outputs, 1)
                loss = criterion(outputs, target)


#            if batch_idx > 10:
#                print('debug')
# compute gradient
            loss.backward()
            #train one step
            optim.step()
            if batch_idx % opt.report_every == 0:
                if opt.active:
                    print(
                        'Train Epoch: {} [{}/{} ({:.0f}%)] Actively choosen {}\tLoss: {:.6f} '
                        .format(epoch, batch_idx * opt.active_sample_size,
                                len(db['train']),
                                100. * batch_idx / len(train_loader),
                                batch_idx * opt.active_batch_size,
                                loss.data.item()))
                else:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.
                          format(epoch, batch_idx * opt.batch_size,
                                 len(db['train']),
                                 100. * batch_idx / len(train_loader),
                                 loss.data.item()))

            # evaluate model if specified
            if opt.eval and batch_idx % opt.eval_every == 0:
                accuracy_history.append(evaluate(model, db, opt))
                model.train()
    accuracy_history = np.array(accuracy_history)
    np.save(
        './history' + 'active_' + str(opt.active) + 'lambda_' + str(opt.lamb) +
        '_alternate_' + str(opt.alternate) + '.npy', accuracy_history)
    if opt.active:
        handleA.remove()
        handleB.remove()
Beispiel #43
0
def evaluate(dataset, train_steps=None):
    examples = processor.get_examples(data_dir, dataset)
    examples_dict = {e.guid: e for e in examples}
    features, tokenize_info = convert_examples_to_features(examples, max_seq_length,
                                                           tokenizer, label_list)

    logger.info("***** Running Evaluation on %s set*****" % dataset)
    logger.info("  Num examples = %d", len(examples))
    logger.info("  Num features = %d", len(features))
    logger.info("  Batch size = %d", config[dataset]['batch_size'])

    data = create_tensor_data(features)
    sampler = SequentialSampler(data)
    dataloader = DataLoader(data, sampler=sampler,
                            batch_size=config[dataset]['batch_size'])
    model.eval()
    predictions = []
    predict_masks = []
    nb_steps, nb_examples = 0, 0
    loss, accuracy = 0, 0
    for batch in tqdm(dataloader, desc="Evaluating"):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, predict_mask, label_ids = batch
        with torch.no_grad():
            tmp_loss = model(input_ids, segment_ids, input_mask, predict_mask, label_ids)
            outputs, _ = model(input_ids, segment_ids, input_mask, predict_mask)
        if not config['task']['cal_X_loss']:
            reshaped_predict_mask, _, _ = valid_first(predict_mask)
        else:
            reshaped_predict_mask = predict_mask
        masked_label_ids = torch.masked_select(label_ids, predict_mask)
        masked_outputs = torch.masked_select(outputs, reshaped_predict_mask)
        masked_label_ids = masked_label_ids.cpu().numpy()
        masked_outputs = masked_outputs.detach().cpu().numpy()

        def cal_accuracy(outputs, labels):
            return np.sum(outputs == labels)

        tmp_accuracy = cal_accuracy(masked_outputs, masked_label_ids)
        predictions.extend(outputs.detach().cpu().numpy().tolist())
        predict_masks.extend(reshaped_predict_mask.detach().cpu().numpy().tolist())
        if config['n_gpu'] > 1:
            tmp_loss = tmp_loss.mean()  # mean() to average on multi-gpu.

        loss += tmp_loss.item()
        accuracy += tmp_accuracy
        nb_examples += predict_mask.detach().cpu().numpy().sum()
        nb_steps += 1
    loss = loss / nb_steps
    accuracy = accuracy / nb_examples

    logger.info('eval_loss: %.4f; eval_accuracy: %.4f' % (loss, accuracy))
    if train_steps is not None:
        fn1 = "%s.predict_epoch_%s" % (dataset, train_steps)
        fn2 = "%s.mistake_epoch_%s" % (dataset, train_steps)
    else:
        fn1 = "%s.predict" % dataset
        fn2 = "%s.mistake" % dataset
    writer1 = codecs.open(os.path.join(config['task']['output_dir'], fn1), 'w', encoding='utf-8')
    writer2 = codecs.open(os.path.join(config['task']['output_dir'], fn2), 'w', encoding='utf-8')
    for feature, predict_line, predict_mask in zip(features, predictions, predict_masks):
        example = examples_dict[feature.ex_id]
        w1_sent = []
        word_idx = feature.start_ix
        mistake = False
        for index, label_id in enumerate(predict_line[:sum(predict_mask)]):
            if example.words[word_idx] == '[SEP]':
                word_idx += 1
                w1_sent.append("\n")
            line = ' '.join([example.words[word_idx], example.labels[word_idx], label_list[label_id]])
            w1_sent.append(line)
            if label_list[label_id] != example.labels[word_idx]:
                mistake = True
            word_idx += 1
        writer1.write('\n'.join(w1_sent) + '\n\n')
        if mistake: writer2.write('\n'.join(w1_sent) + '\n\n')
    writer1.close()
    writer2.close()
    return loss
Beispiel #44
0
    def train(self):
        self.scheduler.step()
        self.loss.step()
        epoch = self.scheduler.last_epoch + 1
        lr = self.scheduler.get_lr()[0]

        self.ckp.write_log('[Epoch {}]\tLearning rate: {:.2e}'.format(
            epoch, Decimal(lr)))
        self.loss.start_log()
        self.model.train()

        timer_data, timer_model = utility.timer(), utility.timer()
        for batch, (lr, hr, _, idx_scale) in enumerate(self.loader_train):
            lr, hr = self.prepare(lr, hr)
            timer_data.hold()
            timer_model.tic()
            N, C, H, W = lr.size()
            _, _, outH, outW = hr.size()
            scale_coord_map, mask = self.input_matrix_wpn(
                H, W,
                self.args.scale[idx_scale])  ###  get the position matrix, mask

            if self.args.n_GPUs > 1:
                scale_coord_map = torch.cat([scale_coord_map] *
                                            self.args.n_GPUs, 0)
            else:
                scale_coord_map = scale_coord_map.cuda()

            self.optimizer.zero_grad()
            sr = self.model(lr, idx_scale, scale_coord_map)
            re_sr = torch.masked_select(sr, mask.cuda())
            re_sr = re_sr.contiguous().view(N, C, outH, outW)
            loss = self.loss(re_sr, hr)

            if loss.item() < self.args.skip_threshold * self.error_last:
                loss.backward()
                self.optimizer.step()
            else:
                print('Skip this batch {}! (Loss: {})'.format(
                    batch + 1, loss.item()))

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                self.ckp.write_log('[{}/{}]\t{}\t{:.1f}+{:.1f}s'.format(
                    (batch + 1) * self.args.batch_size,
                    len(self.loader_train.dataset),
                    self.loss.display_loss(batch), timer_model.release(),
                    timer_data.release()))

            timer_data.tic()

        self.loss.end_log(len(self.loader_train))
        self.error_last = self.loss.log[-1, -1]

        if self.args.n_GPUs == 1:
            target = self.model
        else:
            target = self.model  #.module

        torch.save(
            target.state_dict(),
            os.path.join(self.ckp.dir, 'model', 'model_{}.pt'.format(epoch)))
Beispiel #45
0
def test_net(save_folder, net, dataset, thresh=0.05):
    num_images = len(dataset)

    all_boxes = [[[] for _ in range(num_images)] for _ in range(2)]
    _t = {'im_detect': Timer(), 'misc': Timer()}

    output_dir = get_output_dir(os.path.join(save_folder, 'sfd_hand'),
                                set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        img = dataset.pull_image(i)
        h, w, _ = img.shape
        shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
        image = cv2.resize(img,
                           None,
                           None,
                           fx=shrink,
                           fy=shrink,
                           interpolation=cv2.INTER_LINEAR)

        x = to_chw_bgr(image)
        x = x.astype('float32')
        x -= cfg.img_mean
        x = x[[2, 1, 0], :, :]
        x = Variable(torch.from_numpy(x).unsqueeze(0))
        if use_cuda:
            x = x.cuda()
        _t['im_detect'].tic()
        detections = net(x).data
        detect_time = _t['im_detect'].toc(average=False)

        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(thresh).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.dim() == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack(
                (boxes.cpu().numpy(), scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
            all_boxes[j][i] = cls_dets

            fin_mask = np.where(scores > 0.6)[0]
            bboxes = boxes.cpu().numpy()[fin_mask]
            scores = scores[fin_mask]
            for k in range(len(scores)):
                leftup = (int(bboxes[k][0]), int(bboxes[k][1]))
                right_bottom = (int(bboxes[k][2]), int(bboxes[k][3]))
                cv2.rectangle(img, leftup, right_bottom, (0, 255, 0), 2)

        save_file = os.path.join(output_dir, '{}.jpg'.format(i + 1))
        cv2.imwrite(save_file, img)

        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
Beispiel #46
0
    def test(self):
        epoch = self.scheduler.last_epoch + 1
        self.ckp.write_log('\nEvaluation:')
        self.ckp.add_log(torch.zeros(1, len(self.scale)))
        self.model.eval()
        timer_test = utility.timer()
        with torch.no_grad():
            for idx_scale, scale in enumerate(self.scale):
                eval_acc = 0
                eval_acc_ssim = 0
                self.loader_test.dataset.set_scale(idx_scale)
                #tqdm_test = tqdm(self.loader_test, ncols=80)
                for idx_img, (lr, hr, filename,
                              _) in enumerate(self.loader_test):
                    filename = filename[0]
                    no_eval = (hr.nelement() == 1)
                    if not no_eval:
                        lr, hr = self.prepare(lr, hr)
                    else:
                        lr, = self.prepare(lr)

                    N, C, H, W = lr.size()
                    scale = self.args.scale[idx_scale]
                    outH, outW = int(H * scale), int(W * scale)
                    #_,_,outH,outW = hr.size()
                    #timer_test.tic()

                    scale_coord_map, mask = self.input_matrix_wpn(
                        H, W, self.args.scale[idx_scale])
                    #position, mask = self.pos_matrix(H,W,self.args.scale[idx_scale])
                    #print(timer_test.toc())
                    if self.args.n_GPUs > 1:
                        scale_coord_map = torch.cat([scale_coord_map] *
                                                    self.args.n_GPUs, 0)
                    else:
                        scale_coord_map = scale_coord_map.cuda()

                    timer_test.tic()
                    sr = self.model(lr, idx_scale, scale_coord_map)
                    timer_test.hold()
                    re_sr = torch.masked_select(sr, mask.cuda())
                    sr = re_sr.contiguous().view(N, C, outH, outW)
                    sr = utility.quantize(sr, self.args.rgb_range)
                    #timer_test.hold()
                    save_list = [sr]
                    if not no_eval:
                        eval_acc += utility.calc_psnr(
                            sr,
                            hr,
                            scale,
                            self.args.rgb_range,
                            benchmark=self.loader_test.dataset.benchmark)
                        eval_acc_ssim += utility.calc_ssim(
                            sr,
                            hr,
                            scale,
                            benchmark=self.loader_test.dataset.benchmark)
                        save_list.extend([lr, hr])

                    if self.args.save_results:
                        a = 1
                        self.ckp.save_results(filename, save_list, scale)

                self.ckp.log[-1, idx_scale] = eval_acc / len(self.loader_test)
                best = self.ckp.log.max(0)
                # print(timer_test.acc/100)
                self.ckp.write_log(
                    '[{} x{}]\tPSNR: {:.3f} SSIM: {:.4f} (Best: {:.3f} @epoch {})'
                    .format(self.args.data_test, scale,
                            self.ckp.log[-1, idx_scale],
                            eval_acc_ssim / len(self.loader_test),
                            best[0][idx_scale], best[1][idx_scale] + 1))
        print(timer_test.acc / 100)
        self.ckp.write_log('Total time: {:.2f}s\n'.format(timer_test.toc()),
                           refresh=True)
        if not self.args.test_only:
            self.ckp.save(self, epoch, is_best=(best[1][0] + 1 == epoch))
Beispiel #47
0
def train(myNMT, args, lang1, lang2):
    # train model
    # myNMT (NMT model): model to train
    # args (a set of parameters): from parser
    # lang1 (Language class): source language
    # lang2 (Language class): target language

    myoptim = optim.Adam(myNMT.parameters(), lr=args.lr)
    
    training_data = [ IndicesFromPairs(p, lang1, lang2) for p in readPairs(args.source_training_file, args.target_training_file) ]

    # generate batches
    def generateBatches(data, batch_size):
        batches = []
        batch = []
        for i in range(len(data)): 
            batch.append(data[i])
            if len(batch) >= batch_size:
                batches.append(batch)
                batch = []
        if batch != []:
           batches.append(batch)
           batch = []
        return batches

    training_batches_pairs = generateBatches(training_data, args.batch_size)
    
    # transfer batches to padded Variables
    training_batches = []
    source_len, target_len = [], []
    for b in training_batches_pairs: 
        source_batch = [ sentence[0]  for sentence in b] 
        target_batch = [ sentence[1]  for sentence in b] 
        source_len.append([len(s) for s in source_batch])
        target_len.append([len(s) for s in target_batch])
        max_len = source_len[-1][0]
        source_batch = [ s + [lang1.PAD_token] * (max_len - len(s)) for s in source_batch] 
        max_len = max(target_len[-1])
        target_batch = [ s + [lang2.PAD_token] * (max_len - len(s)) for s in target_batch] 

        # mask for target sentence
        source_variable = ag.Variable(torch.LongTensor(source_batch))
        target_variable = ag.Variable(torch.LongTensor(target_batch))
        if args.gpu:
            source_variable = source_variable.cuda()
            target_variable = target_variable.cuda()
        training_batches.append((source_variable, target_variable))
       
    for e in range(args.num_epoch):
        for i in range(len(training_batches)):
            source, target = training_batches[i]
            myoptim.zero_grad()
            loss = 0
            criterion = nn.CrossEntropyLoss()
     
            # train network
            encoder_outputs, encoder_hidden = myNMT.encoder(source, source_len[i])

            # encoder has bidirectional rnn, dimensions are different 
            decoder_hidden = myNMT.decoder.init_hidden(encoder_hidden) 
            batch_size, length = target.size()
            decoder_input = ag.Variable(torch.LongTensor([lang2.SOS_token]  * target.size()[0]))
            if args.gpu:
                decoder_input = decoder_input.cuda()
            for j in range(length):
                decoder_output, decoder_hidden = myNMT.decoder(decoder_input, decoder_hidden, encoder_outputs)

                # compute loss with mask 
                mask_tensor = torch.from_numpy((np.array(target_len[i]) > j).astype(np.int32)).byte()
                masked_index = ag.Variable(torch.masked_select(torch.arange(0, batch_size), mask_tensor).long())
                if args.gpu:
                    masked_index = masked_index.cuda()
                masked_outputs = torch.index_select(decoder_output, 0, masked_index)
                masked_targets = torch.index_select(target[:, j], 0, masked_index)
                loss += criterion(masked_outputs, masked_targets)

                decoder_input = target[:,j]

            loss = loss.div(sum(target_len[i]))
            loss.backward()
            torch.nn.utils.clip_grad_norm(myNMT.parameters(), args.clip)
            myoptim.step()
            print (time.strftime('%Hh %Mm %Ss', time.localtime()), " batch ", i)

        test = evaluate(myNMT, args.source_validation_file, args.target_validation_file, args, lang1, lang2)
        print (time.strftime('%Hh %Mm %Ss', time.localtime()), " epoch ", e, " evaluate accuracy ", test)
        print (time.strftime('%Hh %Mm %Ss', time.localtime()), " epoch ", e, " evaluate accuracy ", test, file=open(args.process_file, 'a'))
        torch.save(myNMT.state_dict(), args.weights_file+str(e))
Beispiel #48
0
    def forward(self, q_data, qa_data, target, student_id=None):

        batch_size = q_data.shape[0]
        seqlen = q_data.shape[1]
        q_embed_data = self.q_embed(q_data)
        qa_embed_data = self.qa_embed(qa_data)

        memory_value = nn.Parameter(
            torch.cat([
                self.init_memory_value.unsqueeze(0) for _ in range(batch_size)
            ], 0).data)
        self.mem.init_value_memory(memory_value)

        slice_q_data = torch.chunk(q_data, seqlen, 1)
        slice_q_embed_data = torch.chunk(q_embed_data, seqlen, 1)
        slice_qa_embed_data = torch.chunk(qa_embed_data, seqlen, 1)

        value_read_content_l = []
        input_embed_l = []
        predict_logs = []
        for i in range(seqlen):
            ## Attention
            q = slice_q_embed_data[i].squeeze(1)
            correlation_weight = self.mem.attention(q)
            if_memory_write = slice_q_data[i].squeeze(1).ge(1)
            if_memory_write = utils.varible(
                torch.FloatTensor(if_memory_write.data.tolist()), 1)

            ## Read Process
            read_content = self.mem.read(correlation_weight)
            value_read_content_l.append(read_content)
            input_embed_l.append(q)
            ## Write Process
            qa = slice_qa_embed_data[i].squeeze(1)
            new_memory_value = self.mem.write(correlation_weight, qa,
                                              if_memory_write)

            # read_content_embed = torch.tanh(self.read_embed_linear(torch.cat([read_content, q], 1)))
            # pred = self.predict_linear(read_content_embed)
            # predict_logs.append(pred)

        all_read_value_content = torch.cat(
            [value_read_content_l[i].unsqueeze(1) for i in range(seqlen)], 1)
        input_embed_content = torch.cat(
            [input_embed_l[i].unsqueeze(1) for i in range(seqlen)], 1)
        # input_embed_content = input_embed_content.view(batch_size * seqlen, -1)
        # input_embed_content = torch.tanh(self.input_embed_linear(input_embed_content))
        # input_embed_content = input_embed_content.view(batch_size, seqlen, -1)

        predict_input = torch.cat(
            [all_read_value_content, input_embed_content], 2)
        read_content_embed = torch.tanh(
            self.read_embed_linear(predict_input.view(batch_size * seqlen,
                                                      -1)))

        pred = self.predict_linear(read_content_embed)
        # predicts = torch.cat([predict_logs[i] for i in range(seqlen)], 1)
        target_1d = target  # [batch_size * seq_len, 1]
        mask = target_1d.ge(0)  # [batch_size * seq_len, 1]
        # pred_1d = predicts.view(-1, 1)           # [batch_size * seq_len, 1]
        pred_1d = pred.view(-1, 1)  # [batch_size * seq_len, 1]

        filtered_pred = torch.masked_select(pred_1d, mask)
        filtered_target = torch.masked_select(target_1d, mask)
        loss = torch.nn.functional.binary_cross_entropy_with_logits(
            filtered_pred, filtered_target)

        return loss, torch.sigmoid(filtered_pred), filtered_target
Beispiel #49
0
    def _generate(
        self,
        sample: Dict[str, Dict[str, Tensor]],
        prefix_tokens: Optional[Tensor] = None,
        constraints: Optional[Tensor] = None,
        bos_token: Optional[int] = None,
    ):
        incremental_states = torch.jit.annotate(
            List[Dict[str, Dict[str, Optional[Tensor]]]],
            [
                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
                for i in range(self.model.models_size)
            ],
        )
        net_input = sample["net_input"]

        if "src_tokens" in net_input:
            src_tokens = net_input["src_tokens"]
            # length of the source text being the character length except EndOfSentence and pad
            src_lengths = ((src_tokens.ne(self.eos)
                            & src_tokens.ne(self.pad)).long().sum(dim=1))
        elif "source" in net_input:
            src_tokens = net_input["source"]
            src_lengths = (net_input["padding_mask"].size(-1) -
                           net_input["padding_mask"].sum(-1)
                           if net_input["padding_mask"] is not None else
                           torch.tensor(src_tokens.size(-1)).to(src_tokens))
        else:
            raise Exception("expected src_tokens or source in net input")

        # bsz: total number of sentences in beam
        # Note that src_tokens may have more than 2 dimensions (i.e. audio features)
        bsz, src_len = src_tokens.size()[:2]
        beam_size = self.beam_size

        if constraints is not None and not self.search.supports_constraints:
            raise NotImplementedError(
                "Target-side constraints were provided, but search method doesn't support them"
            )

        # Initialize constraints, when active
        self.search.init_constraints(constraints, beam_size)

        max_len: int = -1
        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                self.model.max_decoder_positions() - 1,
            )
        assert (
            self.min_len <= max_len
        ), "min_len cannot be larger than max_len, please adjust these!"
        # compute the encoder output for each beam
        encoder_outs = self.model.forward_encoder(net_input)

        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order)
        # ensure encoder_outs is a List.
        assert encoder_outs is not None

        # initialize buffers
        scores = (torch.zeros(bsz * beam_size,
                              max_len + 1).to(src_tokens).float()
                  )  # +1 for eos; pad is never chosen for scoring
        tokens = (torch.zeros(bsz * beam_size,
                              max_len + 2).to(src_tokens).long().fill_(
                                  self.pad))  # +2 for eos and pad
        tokens[:, 0] = self.eos if bos_token is None else bos_token
        attn: Optional[Tensor] = None

        # A list that indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then cands_to_ignore would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        cands_to_ignore = (torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
                           )  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = torch.jit.annotate(
            List[List[Dict[str, Tensor]]],
            [
                torch.jit.annotate(List[Dict[str, Tensor]], [])
                for i in range(bsz)
            ],
        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step

        finished = [
            False for i in range(bsz)
        ]  # a boolean array indicating if the sentence at the index is finished or not
        num_remaining_sent = bsz  # number of sentences remaining

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = ((torch.arange(0, bsz) *
                         beam_size).unsqueeze(1).type_as(tokens).to(
                             src_tokens.device))
        cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(
            src_tokens.device)

        reorder_state: Optional[Tensor] = None
        batch_idxs: Optional[Tensor] = None

        original_batch_idxs: Optional[Tensor] = None
        if "id" in sample and isinstance(sample["id"], Tensor):
            original_batch_idxs = sample["id"]
        else:
            original_batch_idxs = torch.arange(0, bsz).type_as(tokens)

        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                    original_batch_idxs = original_batch_idxs[batch_idxs]
                self.model.reorder_incremental_state(incremental_states,
                                                     reorder_state)
                encoder_outs = self.model.reorder_encoder_out(
                    encoder_outs, reorder_state)

            lprobs, avg_attn_scores = self.model.forward_decoder(
                tokens[:, :step + 1],
                encoder_outs,
                incremental_states,
                self.temperature,
            )

            if self.lm_model is not None:
                lm_out = self.lm_model(tokens[:, :step + 1])
                probs = self.lm_model.get_normalized_probs(lm_out,
                                                           log_probs=True,
                                                           sample=None)
                probs = probs[:, -1, :] * self.lm_weight
                lprobs += probs

            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # handle max length constraint
            if step >= max_len:
                lprobs[:, :self.eos] = -math.inf
                lprobs[:, self.eos + 1:] = -math.inf

            # handle prefix tokens (possibly with different lengths)
            if (prefix_tokens is not None and step < prefix_tokens.size(1)
                    and step < max_len):
                lprobs, tokens, scores = self._prefix_tokens(
                    step, lprobs, scores, tokens, prefix_tokens, beam_size)
            elif step < self.min_len:
                # minimum length constraint (does not apply if using prefix_tokens)
                lprobs[:, self.eos] = -math.inf

            # Record attention scores, only support avg_attn_scores is a Tensor
            if avg_attn_scores is not None:
                if attn is None:
                    attn = torch.empty(bsz * beam_size,
                                       avg_attn_scores.size(1),
                                       max_len + 2).to(scores)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            eos_bbsz_idx = torch.empty(0).to(
                tokens
            )  # indices of hypothesis ending with eos (finished sentences)
            eos_scores = torch.empty(0).to(
                scores
            )  # scores of hypothesis ending with eos (finished sentences)

            if self.should_set_src_lengths:
                self.search.set_src_lengths(src_lengths)

            if self.repeat_ngram_blocker is not None:
                lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz,
                                                   beam_size, step)

            # Shape: (batch, cand_size)
            cand_scores, cand_indices, cand_beams = self.search.step(
                step,
                lprobs.view(bsz, -1, self.vocab_size),
                scores.view(bsz, beam_size, -1)[:, :, :step],
                tokens[:, :step + 1],
                original_batch_idxs,
            )

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            # Shape of eos_mask: (batch size, beam size)
            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
            eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(
                eos_mask)

            # only consider eos when it's among the top beam_size indices
            # Now we know what beam item(s) to finish
            # Shape: 1d list of absolute-numbered
            eos_bbsz_idx = torch.masked_select(cand_bbsz_idx[:, :beam_size],
                                               mask=eos_mask[:, :beam_size])

            finalized_sents: List[int] = []
            if eos_bbsz_idx.numel() > 0:
                eos_scores = torch.masked_select(cand_scores[:, :beam_size],
                                                 mask=eos_mask[:, :beam_size])

                finalized_sents = self.finalize_hypos(
                    step,
                    eos_bbsz_idx,
                    eos_scores,
                    tokens,
                    scores,
                    finalized,
                    finished,
                    beam_size,
                    attn,
                    src_lengths,
                    max_len,
                )
                num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            if self.search.stop_on_max_len and step >= max_len:
                break
            assert step < max_len, f"{step} < {max_len}"

            # Remove finalized sentences (ones for which {beam_size}
            # finished hypotheses have been generated) from the batch.
            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz,
                                        dtype=torch.bool,
                                        device=cand_indices.device)
                batch_mask[finalized_sents] = False
                # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it
                batch_idxs = torch.arange(
                    bsz, device=cand_indices.device).masked_select(batch_mask)

                # Choose the subset of the hypothesized constraints that will continue
                self.search.prune_sentences(batch_idxs)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]

                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]
                cands_to_ignore = cands_to_ignore[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(
                        new_bsz * beam_size, attn.size(1), -1)
                bsz = new_bsz
            else:
                batch_idxs = None

            # Set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos

            # Rewrite the operator since the element wise or is not supported in torchscript.

            eos_mask[:, :beam_size] = ~((~cands_to_ignore) &
                                        (~eos_mask[:, :beam_size]))
            active_mask = torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
            )

            # get the top beam_size active hypotheses, which are just
            # the hypos with the smallest values in active_mask.
            # {active_hypos} indicates which {beam_size} hypotheses
            # from the list of {2 * beam_size} candidates were
            # selected. Shapes: (batch size, beam size)
            new_cands_to_ignore, active_hypos = torch.topk(active_mask,
                                                           k=beam_size,
                                                           dim=1,
                                                           largest=False)

            # update cands_to_ignore to ignore any finalized hypos.
            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
            # Make sure there is at least one active item for each sentence in the batch.
            assert (~cands_to_ignore).any(dim=1).all()

            # update cands_to_ignore to ignore any finalized hypos

            # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam
            # can be selected more than once).
            active_bbsz_idx = torch.gather(cand_bbsz_idx,
                                           dim=1,
                                           index=active_hypos)
            active_scores = torch.gather(cand_scores,
                                         dim=1,
                                         index=active_hypos)

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses

            # Set the tokens for each beam (can select the same row more than once)
            tokens[:, :step + 1] = torch.index_select(tokens[:, :step + 1],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            # Select the next token for each of them
            tokens.view(bsz, beam_size,
                        -1)[:, :, step + 1] = torch.gather(cand_indices,
                                                           dim=1,
                                                           index=active_hypos)
            if step > 0:
                scores[:, :step] = torch.index_select(scores[:, :step],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            scores.view(bsz, beam_size,
                        -1)[:, :, step] = torch.gather(cand_scores,
                                                       dim=1,
                                                       index=active_hypos)

            # Update constraints based on which candidates were selected for the next beam
            self.search.update_constraints(active_hypos)

            # copy attention for active hypotheses
            if attn is not None:
                attn[:, :, :step + 2] = torch.index_select(
                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx)

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            scores = torch.tensor(
                [float(elem["score"].item()) for elem in finalized[sent]])
            _, sorted_scores_indices = torch.sort(scores, descending=True)
            finalized[sent] = [
                finalized[sent][ssi] for ssi in sorted_scores_indices
            ]
            finalized[sent] = torch.jit.annotate(List[Dict[str, Tensor]],
                                                 finalized[sent])
        return finalized
Beispiel #50
0
    def forward(self, input, target, hidden, sent_lens, ce=False, noise=None):
        emb = self.emblayer(input)
        if (self.use_cell):
            if (self.use_rnn):
                hidden_output = torch.randn(emb.size(1),
                                            self.hidden_size).to(device)
                hidden_outputs = []
                for x in emb:
                    hidden_output = self.rnnlayer(x, hidden)
                    hidden_outputs.append(hidden_output)
                hidden_outputs = pack_padded_sequence(
                    torch.stack(hidden_outputs), sent_lens)
            else:
                hidden_output = torch.randn(emb.size(1),
                                            self.hidden_size).to(device)
                hidden_outputs = []
                for x in emb:
                    hidden_output, hidden = self.rnnlayer(
                        x, (hidden_output, hidden))
                    hidden_outputs.append(hidden_output)
                hidden_outputs = pack_padded_sequence(
                    torch.stack(hidden_outputs), sent_lens)
        elif (self.use_rnn_only):
            emb = pack_padded_sequence(emb, sent_lens)
            hidden_outputs, hidden = self.rnnlayer(emb, hidden)
        else:
            emb = pack_padded_sequence(emb, sent_lens)
            hidden_outputs, hidden = self.rnnlayer(emb, hidden)
        ''' CE traing '''
        if self.ce is True or ce is True:
            output = F.linear(hidden_outputs[0], self.weight, self.bias)
            # output = self.outlayer(hidden_outputs[0])
        # ''' NCE training '''
        elif self.nce is True:
            ''' 
                target  size: seq_len, minibatch
                noise   size: seq_len, nsample
                indices size: seq_len, minibatch+nsample
                input   size: seq_len, minibatch, nhidden
            '''
            minibatch = target.size(-1)
            indices = torch.cat([target, noise], dim=-1)
            hidden_outputs = pad_packed_sequence(hidden_outputs)[0]
            hidden_outputs = hidden_outputs.contiguous()
            '''
                weight  size: seq_len, nhidden, minibatch+nsample
                bias    size: seq_len, 1,       minibatch+nsample
            '''
            weight = self.weight.index_select(0, indices.view(-1)).view(
                *indices.size(), -1).transpose(1, 2)
            bias = self.bias.index_select(
                0, indices.view(-1)).view_as(indices).unsqueeze(1)
            '''
                out          size: seq_len, minibatch, minibatch+nsample
                target_score size: seq_len, minibatch, minibatch
                noise_score  size: seq_len, minibatch, nsample
            '''
            out = torch.baddbmm(1, bias, 1, hidden_outputs, weight)
            target_score, noise_score = out[:, :, :minibatch], out[:, :,
                                                                   minibatch:]
            target_score = target_score.sub(self.lognormconst).exp()
            noise_score = noise_score.sub(self.lognormconst).exp()
            target_score = target_score.contiguous()
            noise_score = noise_score.contiguous()
            '''
                target_score      size: seq_len, minibatch
                target_noise_prob size: seq_len, minibatch
                noise_noise_prob   size: seq_len, minibatch, nsample
            '''
            index_slice = torch.arange(
                0,
                target_score.size(1) * target_score.size(2),
                target_score.size(1)).long()
            for i, v in enumerate(index_slice):
                index_slice[i] = index_slice[i] + i
            target_score = target_score.view(target_score.size(0),
                                             -1).contiguous()
            target_score = target_score[:, index_slice]
            ## target_score = target_score.view(target_score.size(0), -1)[:, index_slice]

            target_noise_prob = self.noiseprob[target.view(-1)].view_as(
                target_score)
            noise_noise_prob = self.noiseprob[noise.view(-1)].view_as(
                noise).unsqueeze(1).expand_as(noise_score)

            model_loss = self.safe_log(
                target_score /
                (target_score + self.ncesample * target_noise_prob))
            noise_loss = torch.sum(
                self.safe_log(
                    (self.ncesample * noise_noise_prob) /
                    (noise_score + self.ncesample * noise_noise_prob)),
                -1).squeeze()
            loss = -(model_loss + noise_loss)

            mask = input.gt(0.1)
            mask[0, :] = 1
            loss = torch.masked_select(loss, mask)
            return loss.mean()

        else:
            print('need to be either ce or nce loss')
            exit()
        return output