Beispiel #1
0
 def biaffine(self, dep_arc, dep_rel, head_arc, head_rel, mask, arc_targets, rel_targets, blend):
     is_train = autograd.is_training()
     batch_size = mask.shape[1]
     seq_len = mask.shape[0]
     W_arc = self.arc_W.data()
     arc_logits: nd.NDArray = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size,
                                       num_outputs=1,
                                       bias_x=True, bias_y=False)
     if blend is not None:
         arc_logits = arc_logits + blend
     # (#head x #dep) x batch_size
     flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
     # (#head ) x (#dep x batch_size)
     arc_preds = nd.greater(arc_logits, 0)  # sigmoid y > 0.5 when x > 0
     if is_train or arc_targets is not None:
         arc_correct = arc_preds.asnumpy() * arc_targets
         arc_accuracy = np.sum(arc_correct) / np.sum(arc_targets * mask)
         # targets_1D = flatten_numpy(arc_targets)
         # losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
         flat_arc_targets = reshape_fortran(arc_targets, (seq_len, seq_len * batch_size))
         losses = self.binary_ce_loss(flat_arc_logits, nd.array(flat_arc_targets))
         if is_train or arc_targets is not None:
             mask_1D_tensor = nd.array(flatten_numpy(mask))
         arc_loss = nd.sum(losses * mask_1D_tensor) / mask_1D_tensor.sum()
         # return arc_accuracy, 0, 0, arc_loss
     W_rel = self.rel_W.data()
     rel_logits: nd.NDArray = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                                       num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
     # #head x rel_size x #dep x batch_size
     flat_rel_logits = reshape_fortran(rel_logits.transpose([1, 0, 2, 3]),
                                       (self._vocab.rel_size, seq_len * seq_len * batch_size))
     # rel_size x (#head x #dep x batch_size)
     if is_train or arc_targets is not None:
         mask_rel: nd.NDArray = reshape_fortran(nd.array(mask * arc_targets),
                                                (1, seq_len * seq_len * batch_size))
         flat_rel_preds = flat_rel_logits.argmax(0)
         flat_rel_target = nd.array(reshape_fortran(rel_targets, (1, seq_len * seq_len * batch_size))).squeeze(
             axis=0)
         rel_correct = nd.equal(flat_rel_preds, flat_rel_target).asnumpy()
         rel_correct = rel_correct * flatten_numpy(arc_targets * mask)
         rel_accuracy = np.sum(rel_correct) / np.sum(arc_targets * mask)
         losses = self.softmax_loss(flat_rel_logits, flat_rel_target)
         rel_loss = nd.sum(losses * mask_rel) / mask_rel.sum()
     if is_train or arc_targets is not None:
         loss = arc_loss + rel_loss
     if is_train:
         return arc_accuracy, rel_accuracy, loss
     outputs = []
     rel_preds = rel_logits.transpose([1, 0, 2, 3]).argmax(0)
     arc_preds = arc_preds.transpose([2, 0, 1])
     rel_preds = rel_preds.transpose([2, 0, 1])
     for msk, arc_pred, rel_pred in zip(np.transpose(mask), arc_preds, rel_preds):
         # parse sentences one by one
         msk[0] = 1.
         sent_len = int(np.sum(msk))
         arc_pred = arc_pred[:sent_len, :sent_len]
         outputs.append((arc_pred[:sent_len, :sent_len], arc_pred * rel_pred[:sent_len, :sent_len]))
     return outputs
def balance_sampler(samples):
    """ignore extra negative samples to keep batch balance"""
    num_pos = nd.sum(samples == 1, axis=0)
    num_neg = nd.sum(samples == 0, axis=0)
    drop_prob = (num_neg - num_pos) / num_neg
    drop_prob = nd.where(nd.lesser(drop_prob, 0), nd.zeros_like(drop_prob),
                         drop_prob)
    mask = nd.where(
        nd.greater(
            nd.random.uniform(0, 1, shape=samples.shape, ctx=samples.context),
            drop_prob), nd.ones_like(samples), nd.zeros_like(samples))
    mask = nd.where(nd.equal(samples, 1), samples, mask)
    return mask
Beispiel #3
0
def batch_process(seq, ctx):
    seq = np.array(seq)
    aligned_seq = np.zeros(
        (max_sequence_length - 2 * region_radius, batch_size, region_size))
    for i in range(region_radius, max_sequence_length - region_radius):
        aligned_seq[i - region_radius] = seq[:, i - region_radius:i -
                                             region_radius + region_size]
    aligned_seq = nd.array(aligned_seq, ctx)
    batch_sequence = nd.array(seq, ctx)
    trimed_seq = batch_sequence[:, region_radius:max_sequence_length -
                                region_radius]
    mask = nd.broadcast_axes(nd.greater(trimed_seq, 0).reshape(
        (batch_size, -1, 1)),
                             axis=2,
                             size=128)
    return aligned_seq, nd.array(trimed_seq, ctx), mask
Beispiel #4
0
    def forward(self, src_idx, tgt_idx):
        # compute encoder mask
        key_mask = self._get_key_mask(src_idx,
                                      src_idx,
                                      pad_idx=self.src_pad_idx)
        src_non_pad_mask = self._get_non_pad_mask(src_idx,
                                                  pad_idx=self.src_pad_idx)

        # compute decoder mask
        self_tril_mask = self._get_self_tril_mask(tgt_idx)
        self_key_mask = self._get_key_mask(tgt_idx,
                                           tgt_idx,
                                           pad_idx=self.tgt_pad_idx)
        self_att_mask = nd.greater((self_key_mask + self_tril_mask), 1)

        context_att_mask = self._get_key_mask(src_idx,
                                              tgt_idx,
                                              pad_idx=self.src_pad_idx)
        tgt_non_pad_mask = self._get_non_pad_mask(tgt_idx,
                                                  pad_idx=self.tgt_pad_idx)

        # Encoder
        position = nd.array(self._position_encoding_init(
            src_idx.shape[1], self._model_dim),
                            ctx=src_idx.context)
        position = nd.expand_dims(position, axis=0)
        position = nd.broadcast_axes(position, axis=0, size=tgt_idx.shape[0])
        position = position * src_non_pad_mask
        src_emb = self.embedding(src_idx)
        enc_output = self.encoder(src_emb, position, key_mask,
                                  src_non_pad_mask)

        # Decoder
        position = nd.array(self._position_encoding_init(
            tgt_idx.shape[1], self._model_dim),
                            ctx=src_idx.context)
        position = nd.expand_dims(position, axis=0)
        position = nd.broadcast_axes(position, axis=0, size=tgt_idx.shape[0])
        position = position * tgt_non_pad_mask
        tgt_emb = self.embedding(tgt_idx)

        outputs = self.decoder(enc_output, tgt_emb, position, self_att_mask,
                               context_att_mask, tgt_non_pad_mask)
        outputs = self.linear(outputs)
        return outputs
Beispiel #5
0
    def forward(self, pred, target):
        batch_size = target.shape[0]
        label_size = target.shape[1]

        ## rank weight to sample and
        rank_weights = self.rank_weights
        max_num_trials = target.shape[1] - 1

        pos_mask = nd.greater(target, 0).asnumpy()
        neg_mask = nd.equal(target, 0).asnumpy()
        L = nd.zeros_like(pred)

        for i in range(batch_size):
            for j in range(label_size):
                if target[i, j] == 1:
                    ##initialization
                    sample_score_margin = -1
                    num_trials = 0
                    while ((sample_score_margin < 0)
                           and (num_trials < max_num_trials)):
                        neg_labels_idx = np.array([
                            idx for idx, v in enumerate(target[i, :]) if v == 0
                        ])
                        if len(neg_labels_idx) > 0:
                            neg_idx = np.random.choice(neg_labels_idx,
                                                       replace=False)
                            sample_score_margin = pred[i, neg_idx] - pred[i, j]
                            num_trials += 1
                        else:
                            num_trials = 1
                            pass
                    ## how many trials determin the weight
                    r_j = int(np.floor(max_num_trials / num_trials))
                    L[i, j] = rank_weights[r_j]
        #print("L weight",L)
        loss = nd.sum(
            L *
            (nd.sum(1 - nd.array(pos_mask).as_in_context(pred.context) * pred +
                    nd.array(neg_mask).as_in_context(pred.context) * pred,
                    axis=1,
                    keepdims=True)))
        self.save_for_backward(L, pos_mask, neg_mask)
        return loss
Beispiel #6
0
def batch_process(seq, isContextWord, ctx):
    seq = np.array(seq)
    aligned_seq = np.zeros(
        (max_sequence_length - 2 * region_radius, batch_size, region_size))
    for i in range(region_radius, max_sequence_length - region_radius):
        aligned_seq[i - region_radius] = seq[:, i - region_radius:i -
                                             region_radius + region_size]
    if isContextWord:
        unit_id_bias = np.array([i * vocab_size for i in range(region_size)])
        aligned_seq = aligned_seq.transpose((1, 0, 2)) + unit_id_bias
    aligned_seq = nd.array(aligned_seq, ctx)
    batch_sequence = nd.array(seq, ctx)
    trimed_seq = batch_sequence[:, region_radius:max_sequence_length -
                                region_radius]
    mask = nd.broadcast_axes(nd.greater(trimed_seq, 0).reshape(
        (batch_size, -1, 1)),
                             axis=2,
                             size=128)
    return aligned_seq, nd.array(trimed_seq, ctx), mask
Beispiel #7
0
def get_max_pred(batch_heatmaps):
    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = nd.argmax(heatmaps_reshaped, 2)
    maxvals = nd.max(heatmaps_reshaped, 2)

    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))

    preds = nd.tile(idx, (1, 1, 2)).astype(np.float32)

    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width)

    pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)

    preds *= pred_mask
    return preds, maxvals
Beispiel #8
0
def get_max_pred(batch_heatmaps):
    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = nd.argmax(heatmaps_reshaped, 2)
    maxvals = nd.max(heatmaps_reshaped, 2)

    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))

    preds = nd.tile(idx, (1, 1, 2)).astype(np.float32)

    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width)

    pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)

    preds *= pred_mask
    return preds, maxvals
Beispiel #9
0
    def forward(self, en_bert_output, en_idx, ch_idx):
        self_tril_mask = self._get_self_tril_mask(ch_idx)
        self_key_mask = self._get_key_mask(ch_idx,
                                           ch_idx,
                                           pad_idx=self.ch_pad_idx)
        self_att_mask = nd.greater((self_key_mask + self_tril_mask), 1)

        context_att_mask = self._get_key_mask(en_idx,
                                              ch_idx,
                                              pad_idx=self.en_pad_idx)
        non_pad_mask = self._get_non_pad_mask(ch_idx, pad_idx=self.ch_pad_idx)

        position = nd.array(self._position_encoding_init(
            ch_idx.shape[1], self._model_dim),
                            ctx=self._ctx)
        position = nd.expand_dims(position, axis=0)
        position = nd.broadcast_axes(position, axis=0, size=ch_idx.shape[0])
        position = position * non_pad_mask
        ch_emb = self.ch_embedding(ch_idx)
        outputs = self.decoder(en_bert_output, ch_emb, position, self_att_mask,
                               context_att_mask, non_pad_mask)
        outputs = self.linear(outputs)
        return outputs
Beispiel #10
0
    # test for Lsep loss function implementaion
    Lsep_func = LSEP_funcLoss()
    if use_identity:
        pred = nd.array([[0.9, 0.4, 0.5, 0.2], [0.1, 0.6, 0.2, 0.8]])
        target = nd.array([[1, 1, 0, 0], [0, 1, 0, 1]])
    pred.attach_grad()
    with autograd.record():
        loss = Lsep_func(pred, target)
        loss.backward(mxnet.nd.ones_like(loss))
    print("lsep loss with function ", loss)
    print(pred.grad)

    warp_loss = WarpLoss(label_size=63)
    pred = nd.random.normal(shape=(10, 63))
    target = nd.random.normal(shape=(10, 63))
    target = nd.greater(target, 0.2)
    use_identity = True
    if use_identity:
        pred = nd.array([[0.9, 0.4, 0.5, 0.2], [0.1, 0.6, 0.2, 0.8]])
        target = nd.array([[1, 1, 0, 0], [0, 1, 0, 1]])
    pred.attach_grad()
    with autograd.record():
        loss = warp_loss(pred, target)

        loss.backward()
    print("warp loss with nn block ", loss)
    print("pred.grad", pred.grad)

    # test for autograd function edition of WARPLoss
    warp_funcloss = WARP_funcLoss(label_size=4)
    if use_identity:
Beispiel #11
0
    def forward(self, is_train, req, in_data, out_data, aux):
        fea = in_data[0]
        data = in_data[1]
        weights = in_data[2]
        prob = in_data[3]
        
        prob = prob / 3
        prob = nd.exp(prob)
        prob = prob/nd.sum(prob,axis=1,keepdims=1)
        
        w = nd.dot(prob,weights)
        w = nd.expand_dims(w,2)
        w = nd.expand_dims(w,3)     
        fea_w = fea*w
        
        d_w = data.shape[3]
        d_h = data.shape[2]
            
        w = fea.shape[2]
        n = fea.shape[0]
        
        
        fea = nd.mean(fea_w,axis=1,keepdims=1)
#        fea = nd.contrib.BilinearResize2D(fea,height=4*w,width=4*w)
#        w = 4*w
        
        max_val = nd.max(fea,axis=(2,3),keepdims=1)
        fea = fea / max_val
        
        
        if is_train:
            fea_mask = nd.greater_equal(fea,0.1)
            fea_mask2 = nd.greater_equal(fea,0.25)
        else:
            fea_mask = nd.greater_equal(fea,0.1)
            fea_mask2 = nd.greater_equal(fea,0.25)
        
        
        
        fea_mask1 = -nd.Pooling(-fea_mask,kernel=(5,5),pool_type='max',pad=(2,2))
        fea_mask1 = nd.Pooling(fea_mask1,kernel=(11,11),pool_type='max',pad=(5,5))
        cmask = nd.sum(fea_mask1,axis=(2,3),keepdims=1)
        cmask = nd.greater(fea,4)
        fea_mask = cmask * fea_mask2 * fea_mask1 + (1-cmask)*fea_mask2
        
        fea_mask = fea_mask[:,0,:,:].asnumpy()
        
        
        shape = self.outsize
        
        img_res = nd.zeros((n,3,shape,shape))
#        fea_res = nd.zeros((n,shape,shape))
        for i in range(n):
            m = fea_mask[i] 
            try:
                
                arg = np.float32(np.where(m==1))   
                ymin = np.int32(np.floor(np.min(arg[0])*(d_h/w)))
                ymax = np.int32(np.ceil(np.max(arg[0])*(d_h/w)))
                xmin = np.int32(np.floor(np.min(arg[1])*(d_w/w)))
                xmax = np.int32(np.ceil(np.max(arg[1])*(d_w/w)))
                
                x_center = (xmin+xmax)/2
                y_center = (ymin+ymax)/2
    #            
                x_length = xmax - xmin
                y_length = ymax - ymin
                longside = max(y_length,x_length)
                 
                x = np.int(max(x_center-longside/2,0))
                xmax = np.int(min(x_center+longside/2,d_w))
       
                l_x = xmax-x
                y = np.int(max(y_center-longside/2,0))
                ymax = np.int(min(y_center+longside/2,d_h))
                l_y = ymax-y
            
#            fea0 = fea[i]
#            fea0 = nd.expand_dims(fea0,0)
#            fea0 = nd.expand_dims(fea0,0)
#            fea0 = nd.contrib.BilinearResize2D(fea0,height=d_h,width=d_w)
#            
            
                img_crop = data[i,:,y:y+l_y,x:x+l_x]
            except:
                print(arg)
#            fea_crop = fea0[0,:,y:y+l_y,x:x+l_x]
            
            img_crop = nd.expand_dims(img_crop,0)
#            fea_crop  = nd.expand_dims(fea_crop,0)

            img_crop = nd.contrib.BilinearResize2D(img_crop,height=shape,width=shape)
#            fea_crop = nd.contrib.BilinearResize2D(fea_crop,height=shape,width=shape)
#                
#                if l_y > l_x:
#                    longside = int((l_y/l_x)*resize)
#                    img_crop = nd.contrib.BilinearResize2D(img_crop,height=longside,width=resize)
#                    s = int(np.floor((longside-shape)/2))
#                    img_crop = img_crop[:,:,s:s+shape,s1:s1+shape]
#                else:
#                    longside = int(l_x/l_y*resize)
#                    img_crop = nd.contrib.BilinearResize2D(img_crop,height=resize,width=longside) 
#                    s = int(np.floor((longside-shape)/2))
#                    img_crop = img_crop[:,:,s1:s1+shape,s:s+shape]
#                    
            
            img_res[i,:,:,:] = nd.squeeze(img_crop)
#            fea_res[i,:,:] = nd.squeeze(fea_crop)
#        fea_res = nd.expand_dims(fea_res,1)
#        img_res = img_res * fea_res
        self.assign(out_data[0], req[0], img_res)
Beispiel #12
0
 def _sample_bernoulli(probability):
     return nd.greater(probability, nd.uniform(shape=probability.shape))