Ejemplo n.º 1
0
def dev(ch_bert, model, ch_vocab, dev_dataiter, logger, ctx):
    TP_s = 0
    FP_s = 0
    FN_s = 0
    example_ids = []
    for content, token_types, valid_len, label, example_id in tqdm(
            dev_dataiter):
        example_ids.extend(example_id)
        content = content.as_in_context(ctx)
        token_types = token_types.as_in_context(ctx)
        valid_len = valid_len.as_in_context(ctx)
        label = label.as_in_context(ctx)

        output = model(content, token_types, valid_len)
        predict = nd.argmax(nd.softmax(output, axis=-1), axis=-1)
        label = label.as_in_context(ctx)
        tp_s = int(nd.sum(nd.equal(predict, label)).asscalar())
        fp_s = int(
            nd.sum(nd.not_equal(predict, label) *
                   nd.equal(label, 0)).asscalar())
        fn_s = int(
            nd.sum(nd.not_equal(predict, label) *
                   nd.equal(label, 1)).asscalar())
        TP_s += tp_s
        FP_s += fp_s
        FN_s += fn_s

    P_s = TP_s / (TP_s + FP_s)
    R_s = TP_s / (TP_s + FN_s)
    F = (2 * P_s * R_s) / (P_s + R_s)

    logger.info("F:{}".format(F))
    return F
Ejemplo n.º 2
0
def distanceAA2(regions,i,binnum,dibins,dibins4):
#Initiate empty array for storing histogram for directions, distances, and number of counted pairs in each distance range bin
    co0=nd.zeros(binnum-1,gpu(0),dtype="float32")
    codi0=nd.zeros((5,binnum-1),gpu(0),dtype="float32")
    count0=nd.zeros(binnum-1,gpu(0),dtype="float32")
    count4=nd.zeros((5,binnum-1),gpu(0),dtype="float32")
    co4=nd.zeros((5,binnum-1),gpu(0),dtype="float32")
    seed=nd.zeros((1,2),gpu(0))
#Calculate index coordinates and directions by chuncks
    a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:]
    b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:]
    a1=nd.array(a,gpu(0))
    b1=nd.array(b,gpu(0))
#    print ("a1",a1,"b1",b1)
    for ii in range (a1.shape[0]-1):
        a1_b1=(nd.expand_dims(a1[ii].reshape((1,2)),axis=1)-b1[ii+1:,:]).reshape((a1[ii+1:,:].shape[0],2))
        seed=nd.concat(seed,a1_b1,dim=0)
    if seed.shape[0]>1:
        x1_x2=seed[1:,0]
        y1_y2=seed[1:,1]
        labels=nd.zeros(x1_x2.shape[0],gpu(0),dtype="float32")
        sdi0=(nd.degrees(nd.arctan((y1_y2)/(x1_x2)))+90).reshape((-1,))
        ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,))

#Change 0 to 180 so it can apply sum of boolean mask without losing values        
        sdi0=nd.where(condition=(sdi0==0),x=labels+180,y=sdi0)

#Store sum of distances co0 and histogram of directions in each range bin
        for p in range (0,binnum-1):
            booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1]))
            count0[p]+=nd.nansum(booleanmask)
            co0[p]+=nd.nansum(ldis*booleanmask)

#Exclue values not in distance range bin
            sdi1=nd.where(condition=(booleanmask==0),x=labels-1,y=sdi0)
            for q in range (0,5):
                booleanmaskdi=nd.equal((sdi1>=dibins[q]),(sdi1<dibins[q+1]))            
                codi0[q,p]+=nd.nansum(booleanmaskdi)
            
        for k in range (0,5):
            booleanmaskdi=nd.equal((sdi0>=dibins4[k]),(sdi0<dibins4[k+1]))
            ldis0=ldis*booleanmaskdi
            for l in range (0,binnum-1):
                booleanmask=nd.equal((ldis0>=bins[l]),(ldis0<bins[l+1]))
                count4[k,l]+=nd.nansum(booleanmask)
                co4[k,l]+=nd.nansum(ldis0*booleanmask)

    codi0[0,:]+=codi0[4,:]
    codi0=codi0[0:4,:]
    count4[0,:]+=count4[4,:]
    count4=count4[0:4,:]
    co4[0,:]+=co4[4,:]
    co4=co4[0:4,:]
    return(co0,codi0,count0,co4,count4)
Ejemplo n.º 3
0
def distanceAATOPO(regions,i,binnum,dibins,dibins4,x,y,ctx):
#Initiate empty array for storing histogram for directions, distances, and number of counted pairs in each distance range bin
    co0=nd.zeros(binnum-1,ctx[0],dtype="float32")
    codi0=nd.zeros((5,binnum-1),ctx[0],dtype="float32")
    count0=nd.zeros(binnum-1,ctx[0],dtype="float32")
    count4=nd.zeros((5,binnum-1),ctx[0],dtype="float32")
    co4=nd.zeros((5,binnum-1),ctx[0],dtype="float32")
    
#Calculate index coordinates and directions by chuncks
    a=regions[i*broadcdp:min((i+1)*broadcdp,regions.shape[0]),:]
    a1=nd.array(a,ctx[0])
    b1=nd.array([x,y],ctx[0])
    a1_b1=(nd.expand_dims(a1,axis=1)-b1).reshape((-1,2))
    x1_x2=a1_b1[:,0]
    y1_y2=a1_b1[:,1]
#Find the rows where all equal zeros
    boolmask=(x1_x2==0)*(y1_y2==0)
    labels=nd.zeros(boolmask.shape[0],ctx[0],dtype="float32")
    sdi0=(nd.degrees(nd.arctan((y1_y2)/(x1_x2)))+90).reshape((-1,))
    ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,))
#Change the zeros into -1
    sdi0=nd.where(condition=boolmask,x=labels-1,y=sdi0)
    ldis=nd.where(condition=boolmask,x=labels-1,y=ldis)
#Change 0 to 180 so it can apply sum of boolean mask without losing values        
    sdi0=nd.where(condition=(sdi0==0),x=labels+180,y=sdi0)
#Store sum of distances co0 and histogram of directions in each range bin
    for p in range (0,binnum-1):
        booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1]))
        count0[p]+=nd.sum(booleanmask)
        co0[p]+=nd.sum(ldis*booleanmask)
#Exclue values not in distance range bin
        sdi1=nd.where(condition=(booleanmask==0),x=labels-1,y=sdi0)
        for q in range (0,5):
            booleanmaskdi=nd.equal((sdi1>=dibins[q]),(sdi1<dibins[q+1]))            
            codi0[q,p]+=nd.nansum(booleanmaskdi)
            
    for k in range (0,5):
        booleanmaskdi=nd.equal((sdi0>=dibins4[k]),(sdi0<dibins4[k+1]))
        ldis0=ldis*booleanmaskdi
        for l in range (0,binnum-1):
            booleanmask=nd.equal((ldis0>=bins[l]),(ldis0<bins[l+1]))
            count4[k,l]+=nd.sum(booleanmask)
            co4[k,l]+=nd.sum(ldis0*booleanmask)
            
    codi0[0,:]+=codi0[4,:]
    codi0=codi0[0:4,:]
    count4[0,:]+=count4[4,:]
    count4=count4[0:4,:]
    co4[0,:]+=co4[4,:]
    co4=co4[0:4,:]
    return(co0.asnumpy(),codi0.asnumpy(),count0.asnumpy(),co4.asnumpy(),count4.asnumpy())
    def _get_new_alive_state(self, new_seq, new_log_probs, new_cache):
        """Gather the top k sequences that are still alive.

        Args:
          new_seq: New sequences generated by growing the current alive sequences
            int32 tensor with shape [batch_size, 2 * beam_size, cur_index + 1]
          new_log_probs: Log probabilities of new sequences
            float32 tensor with shape [batch_size, beam_size]
          new_cache: Dict of cached values for each sequence.

        Returns:
          Dictionary with alive keys from _StateKeys:
            {Top beam_size sequences that are still alive (don't end with eos_id)
             Log probabilities of top alive sequences
             Dict cache storing decoder states for top alive sequences}
        """
        new_finished_flags = nd.equal(new_seq[:, :, -1], self.eos_id)
        new_log_probs = new_log_probs + new_finished_flags * -INF
        top_alive_seq, top_alive_log_probs = _gather_topk_beams(
            [new_seq, new_log_probs], new_log_probs, self.batch_size,
            self.beam_size)
        top_alive_cache = _gather_topk_beams([],
                                             new_log_probs,
                                             self.batch_size,
                                             self.beam_size,
                                             cache=new_cache)

        return {
            _StateKeys.ALIVE_SEQ: top_alive_seq,
            _StateKeys.ALIVE_LOG_PROBS: top_alive_log_probs,
            _StateKeys.ALIVE_CACHE: top_alive_cache
        }
Ejemplo n.º 5
0
    def train(self,epochs):
        for i in range(epochs):
            efficiency = 0
            cumuLoss = 0
            for j in range(self.nbIter):
                z = nd.round(nd.random.uniform(0,1,(self.batchSize,self.code.k),ctx=self.ctx))
                x = nd.dot(z,self.code.G)%2

                noiseBSC = nd.random.uniform(0.01,0.99,(self.batchSize,self.code.n),ctx=self.ctx)
                noiseBSC = nd.floor(noiseBSC/nd.max(noiseBSC,axis=(1,)).reshape((self.batchSize,1)))

                y = (x + noiseBSC)%2

                with autograd.record():
                    zHat = self.net(y)
                    loss = self.SE(zHat,z)
                loss.backward()

                self.adam(self.params,self.vs,self.sqrs, self.lr, self.batchSize, self.t)
                self.t+=1

                cumuLoss += loss.asscalar()
                zHat = nd.round(zHat)
                efficiency += nd.sum(nd.equal(zHat,z)).asscalar()


            Pc = efficiency/(self.batchSize*self.nbIter*self.code.k)
            Pe = 1 - Pc
            normCumuLoss = cumuLoss/(self.batchSize*self.nbIter*self.code.k)
            print("Epochs %d: Pe = %lf , loss = %lf" % (i,Pe,normCumuLoss))
Ejemplo n.º 6
0
def distance2(regions,i,binnum,bins,ctx):
#Initiate empty array for storing the number of counted pairs in each distance range bin
    count0=nd.zeros(binnum-1,ctx[0],dtype="float32")
    seed=nd.zeros((1,2),ctx[0])
#Calculate index coordinates and directions by chuncks
    a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:]
    b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:]
    a1=nd.array(a,ctx[0])
    b1=nd.array(b,ctx[0])    
    for i in range (a1.shape[0]):
        if i<a1.shape[0]-1:
            a1_b1=(nd.expand_dims(a1[i].reshape((1,2)),axis=1)-b1[i+1:,:]).reshape((a1[i+1:,:].shape[0],2))
            seed=nd.concat(seed,a1_b1,dim=0)
    if seed.shape[0]>1:
        x1_x2=seed[:,0]
        y1_y2=seed[:,1]
#Find the rows where all equal zeros and assign label -1
        boolmask=(x1_x2==0)*(y1_y2==0)
        labels=nd.zeros(boolmask.shape[0],ctx[0],dtype="float32")-1
        ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,))
#Change the zeros into -1
        ldis=nd.where(condition=boolmask,x=labels,y=ldis)
        for p in range (0,binnum-1):
            booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1]))
            count0[p]+=nd.sum(booleanmask)
    return(count0.asnumpy())
Ejemplo n.º 7
0
def get_accuracy(pre_l, true_l):
    one_zero_pre = nd.where(pre_l > 0.5, nd.ones_like(pre_l),
                            nd.zeros_like(pre_l))
    compare = nd.equal(one_zero_pre, true_l).sum(axis=1)
    samples_right = nd.where(compare == 3, nd.ones_like(compare),
                             nd.zeros_like(compare)).sum()
    all_num = pre_l.shape[0]
    return samples_right / all_num
Ejemplo n.º 8
0
 def biaffine(self, dep_arc, dep_rel, head_arc, head_rel, mask, arc_targets, rel_targets, blend):
     is_train = autograd.is_training()
     batch_size = mask.shape[1]
     seq_len = mask.shape[0]
     W_arc = self.arc_W.data()
     arc_logits: nd.NDArray = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size,
                                       num_outputs=1,
                                       bias_x=True, bias_y=False)
     if blend is not None:
         arc_logits = arc_logits + blend
     # (#head x #dep) x batch_size
     flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
     # (#head ) x (#dep x batch_size)
     arc_preds = nd.greater(arc_logits, 0)  # sigmoid y > 0.5 when x > 0
     if is_train or arc_targets is not None:
         arc_correct = arc_preds.asnumpy() * arc_targets
         arc_accuracy = np.sum(arc_correct) / np.sum(arc_targets * mask)
         # targets_1D = flatten_numpy(arc_targets)
         # losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
         flat_arc_targets = reshape_fortran(arc_targets, (seq_len, seq_len * batch_size))
         losses = self.binary_ce_loss(flat_arc_logits, nd.array(flat_arc_targets))
         if is_train or arc_targets is not None:
             mask_1D_tensor = nd.array(flatten_numpy(mask))
         arc_loss = nd.sum(losses * mask_1D_tensor) / mask_1D_tensor.sum()
         # return arc_accuracy, 0, 0, arc_loss
     W_rel = self.rel_W.data()
     rel_logits: nd.NDArray = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                                       num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
     # #head x rel_size x #dep x batch_size
     flat_rel_logits = reshape_fortran(rel_logits.transpose([1, 0, 2, 3]),
                                       (self._vocab.rel_size, seq_len * seq_len * batch_size))
     # rel_size x (#head x #dep x batch_size)
     if is_train or arc_targets is not None:
         mask_rel: nd.NDArray = reshape_fortran(nd.array(mask * arc_targets),
                                                (1, seq_len * seq_len * batch_size))
         flat_rel_preds = flat_rel_logits.argmax(0)
         flat_rel_target = nd.array(reshape_fortran(rel_targets, (1, seq_len * seq_len * batch_size))).squeeze(
             axis=0)
         rel_correct = nd.equal(flat_rel_preds, flat_rel_target).asnumpy()
         rel_correct = rel_correct * flatten_numpy(arc_targets * mask)
         rel_accuracy = np.sum(rel_correct) / np.sum(arc_targets * mask)
         losses = self.softmax_loss(flat_rel_logits, flat_rel_target)
         rel_loss = nd.sum(losses * mask_rel) / mask_rel.sum()
     if is_train or arc_targets is not None:
         loss = arc_loss + rel_loss
     if is_train:
         return arc_accuracy, rel_accuracy, loss
     outputs = []
     rel_preds = rel_logits.transpose([1, 0, 2, 3]).argmax(0)
     arc_preds = arc_preds.transpose([2, 0, 1])
     rel_preds = rel_preds.transpose([2, 0, 1])
     for msk, arc_pred, rel_pred in zip(np.transpose(mask), arc_preds, rel_preds):
         # parse sentences one by one
         msk[0] = 1.
         sent_len = int(np.sum(msk))
         arc_pred = arc_pred[:sent_len, :sent_len]
         outputs.append((arc_pred[:sent_len, :sent_len], arc_pred * rel_pred[:sent_len, :sent_len]))
     return outputs
def accuracy_metric(gallery_features, gallery_label, query_features, query_label):
    B1 = nd.sum(nd.square(gallery_features), axis=1, keepdims=True)
    B2 = nd.sum(nd.square(query_features), axis=1, keepdims=True)
    dist_mat = nd.broadcast_add(B2, B1.T) - 2 * nd.dot(query_features, gallery_features.T)
    label_mask = nd.broadcast_equal(dist_mat, nd.min(dist_mat, axis=1, keepdims=True)).astype('float32')
    pre_label_mat = nd.broadcast_mul(label_mask, gallery_label.reshape(1, -1).astype('float32'))
    pre_label_list = nd.max(pre_label_mat, axis=1)
    cor_num = nd.sum(nd.equal(pre_label_list, query_label.astype('float32')))
    return cor_num.asnumpy()[0] / len(query_label)
def lifted_loss(net,data,label):
    label = label.reshape(-1, 1)
    label_mat = nd.equal(label, label.T).astype('float32')
    vec = net(data)
    dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True)
    dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T)
    p_row = nd.sum(nd.exp(1.0 - (dist_mat)) * (1 - label_mat), 1, True)
    loss = 1000 * (nd.log(p_row + p_row.T + 1e-5) + dist_mat) * label_mat / (2 * label_mat.sum())
    return loss
Ejemplo n.º 11
0
def train_and_valid(ch_bert, model, ch_vocab, train_dataiter, dev_dataiter,
                    trainer, finetune_trainer, epochs, loss_func, ctx, lr,
                    batch_size, params_save_step, params_save_path_root,
                    eval_step, log_step, check_step, logger,
                    num_train_examples, warmup_ratio):
    batches = len(train_dataiter)

    num_train_steps = int(num_train_examples / batch_size * epochs)
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    global_step = 0

    dev_bleu_score = 0

    for epoch in range(epochs):
        for content, token_types, valid_len, label, example_id in train_dataiter:
            # learning rate schedule
            if global_step < num_warmup_steps:
                new_lr = lr * global_step / num_warmup_steps
            else:
                non_warmup_steps = global_step - num_warmup_steps
                offset = non_warmup_steps / (num_train_steps -
                                             num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)

            content = content.as_in_context(ctx)
            token_types = token_types.as_in_context(ctx)
            valid_len = valid_len.as_in_context(ctx)
            label = label.as_in_context(ctx)

            with autograd.record():
                output = model(content, token_types, valid_len)
                loss_mean = loss_func(output, label)
                loss_mean = nd.sum(loss_mean) / batch_size
            loss_mean.backward()
            loss_scalar = loss_mean.asscalar()

            trainer.step(1)
            finetune_trainer.step(1)

            if global_step and global_step % log_step == 0:
                acc = nd.sum(
                    nd.equal(nd.argmax(nd.softmax(output, axis=-1), axis=-1),
                             label)) / batch_size
                acc = acc.asscalar()
                logger.info(
                    "epoch:{}, batch:{}/{}, acc:{}, loss:{}, (lr:{}s)".format(
                        epoch, global_step % batches, batches, acc,
                        loss_scalar, trainer.learning_rate))
            global_step += 1
        F1 = dev(ch_bert, model, ch_vocab, dev_dataiter, logger, ctx)
        if not os.path.exists(params_save_path_root):
            os.makedirs(params_save_path_root)
        model_params_file = params_save_path_root + \
            "model_step_{}_{}.params".format(global_step, F1)
        model.save_parameters(model_params_file)
        logger.info("{} Save Completed.".format(model_params_file))
def Treplit_hard_loss(net,data,label):
    label = label.reshape(-1, 1)
    label_mat = nd.equal(label, label.T).astype('float32')
    vec = net(data)
    dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True)
    dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T)
    p_min=nd.log(nd.sum(label_mat*nd.exp(dist_mat),axis=1))
    p_max=nd.log(nd.sum((1-label_mat)*nd.exp(-dist_mat)),axis=1)
    loss=nd.relu(p_min+p_max+1)
    return loss
Ejemplo n.º 13
0
def hard_example_mining(dist_mat, labels, return_inds=False):
    """For each anchor, find the hardest positive and negative sample.
    Args:
      dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N]
      labels: pytorch LongTensor, with shape [N]
      return_inds: whether to return the indices. Save time if `False`(?)
    Returns:
      dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
      dist_an: pytorch Variable, distance(anchor, negative); shape [N]
      p_inds: pytorch LongTensor, with shape [N];
        indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
      n_inds: pytorch LongTensor, with shape [N];
        indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
    NOTE: Only consider the case in which all labels have same num of samples,
      thus we can cope with all anchors in parallel.
    """

    assert len(dist_mat.shape) == 2
    assert dist_mat.shape[0] == dist_mat.shape[1]
    N = dist_mat.shape[0]

    # shape [N, N]
    is_pos = nd.equal(labels.broadcast_to((N, N)),
                      labels.broadcast_to((N, N)).T).astype('float32')
    is_neg = nd.not_equal(labels.broadcast_to((N, N)),
                          labels.broadcast_to((N, N)).T).astype('float32')
    # `dist_ap` means distance(anchor, positive)
    # both `dist_ap` and `relative_p_inds` with shape [N, 1]
    dist_pos = dist_mat * is_pos
    dist_ap = nd.max(dist_pos, axis=1)
    # `dist_an` means distance(anchor, negative)
    # both `dist_an` and `relative_n_inds` with shape [N, 1]
    dist_neg = dist_mat * is_neg + nd.max(dist_mat, axis=1,
                                          keepdims=True) * is_pos
    dist_an = nd.min(dist_neg, axis=1)
    # shape [N]

    # if return_inds:
    #     # shape [N, N]
    #     ind = (labels.new().resize_as_(labels)
    #            .copy_(torch.arange(0, N).long())
    #            .unsqueeze(0).expand(N, N))
    #     # shape [N, 1]
    #     p_inds = torch.gather(
    #         ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data)
    #     n_inds = torch.gather(
    #         ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data)
    #     # shape [N]
    #     p_inds = p_inds.squeeze(1)
    #     n_inds = n_inds.squeeze(1)
    #     return dist_ap, dist_an, p_inds, n_inds

    return dist_ap, dist_an
def main():

    net=resnet.features
    #net.load_parameters('./new_metric_200.params')
    #net.initialize(init=mx.init.Xavier())
    net.collect_params().reset_ctx(ctx)
    transforms=gluon.data.vision.transforms.Compose([
        gluon.data.vision.transforms.RandomSaturation(0.2),
        gluon.data.vision.transforms.RandomContrast(0.2),
        gluon.data.vision.transforms.RandomBrightness(0.1),
        gluon.data.vision.transforms.RandomFlipTopBottom(),
        gluon.data.vision.transforms.ToTensor()
    ])
    test_dataset = Reader('./data/imgs', 'metric',data_argument=False)
    test_data = gluon.data.DataLoader(test_dataset.transform_first(transforms),
                                      batch_size, True, num_workers=16)
    acc = evaluate(net, test_data, ctx=ctx)
    print('Accuracy: %s' % (acc))
    for epoch in range(201):
        train_data = Reader('./data/imgs', 'metric',data_argument=True)
        train_data = DataLoader(
            train_data.transform_first(transforms),
            batch_size, False,num_workers=16)
        total_loss=0
        start_time=time.time()

        for i,(data,label) in enumerate(train_data):
            data=data.as_in_context(ctx)
            label=label.as_in_context(ctx)

            with autograd.record():
                label=label.reshape(-1,1)
                label_mat = nd.equal(label,label.T).astype('float32')
                vec = net(data)
                vec=nd.Flatten(vec)
                dist_mat = - nd.dot(vec, vec.T)/50
                p_row=nd.sum(nd.exp(1.0-(dist_mat))*(1-label_mat),1,True)
                loss=(nd.log(p_row+p_row.T + 1e-5)+dist_mat)*label_mat
                loss=nd.relu(loss)
            loss.backward()
            now_lr =  0.01*(0.99**epoch)
            trainer_triplet = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': now_lr,'momentum':0.9,'wd':0.00005})
            trainer_triplet.step(batch_size)
            total_loss += mx.nd.mean(loss).asscalar()
            # if i % 20 == 0:
            #     print('Batch: %s, Loss: %s' % (i, total_loss))
        print('Epoch: %s, Loss: %s, Time: %s' % (epoch, total_loss/len(train_data),time.time()-start_time))
        start_time=time.time()
        if epoch>0 and epoch%5==0:
            acc=evaluate(net, test_data, ctx=ctx)
            print('Accuracy: %s,Time: %s'%(acc,time.time()-start_time))
        if epoch>10 and epoch%2==0:
            net.save_parameters('8.15_mobilenet_metric_'+str(epoch)+'.params')
Ejemplo n.º 15
0
def balance_sampler(samples):
    """ignore extra negative samples to keep batch balance"""
    num_pos = nd.sum(samples == 1, axis=0)
    num_neg = nd.sum(samples == 0, axis=0)
    drop_prob = (num_neg - num_pos) / num_neg
    drop_prob = nd.where(nd.lesser(drop_prob, 0), nd.zeros_like(drop_prob),
                         drop_prob)
    mask = nd.where(
        nd.greater(
            nd.random.uniform(0, 1, shape=samples.shape, ctx=samples.context),
            drop_prob), nd.ones_like(samples), nd.zeros_like(samples))
    mask = nd.where(nd.equal(samples, 1), samples, mask)
    return mask
Ejemplo n.º 16
0
def distance11(regions_high,regions_low,i,binnum,bins):
#Initiate empty array for storing the number of counted pairs in each distance range bin
    count0=nd.zeros(binnum-1,gpu(0),dtype="float32")
#Calculate index coordinates and directions by chuncks
    a=regions_high[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions_high.shape[0]),:]
    b=regions_low[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions_low.shape[0]),:]
    a1=nd.array(a,gpu(0))
    b1=nd.array(b,gpu(0))
    a1_b1=(nd.expand_dims(a1,axis=1)-b1).reshape((-1,2))
    x1_x2=a1_b1[:,0]
    y1_y2=a1_b1[:,1]
    ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,))
    for p in range (0,binnum-1):
        booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1]))
        count0[p]+=nd.nansum(booleanmask)
    return(count0)
Ejemplo n.º 17
0
def hard_example_mining(dist_mat, labels):
    assert len(dist_mat.shape) == 2
    assert dist_mat.shape[0] == dist_mat.shape[1]
    N = dist_mat.shape[0]

    # shape [N, N]
    is_pos = nd.equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32')
    is_neg = nd.not_equal(labels.broadcast_to((N, N)), labels.broadcast_to((N, N)).T).astype('float32')

    dist_pos = dist_mat * is_pos
    dist_ap = nd.max(dist_pos, axis=1)

    dist_neg = dist_mat * is_neg + nd.max(dist_mat, axis=1, keepdims=True) * is_pos
    dist_an = nd.min(dist_neg, axis=1)

    return dist_ap, dist_an
Ejemplo n.º 18
0
    def forward(self, x, target):
        assert x.shape[1] == self.size #sequence length

        with autograd.pause():
            true_dist = nd.zeros_like(x) + self.smoothing / (self.size - 2)
            target_mask = nd.zeros_like(true_dist)
            for r, c in enumerate(target):
                target_mask[r,c] = 1
            true_dist = nd.where(target_mask, nd.zeros_like(true_dist) + self.confidence, true_dist)
            true_dist[:, self.padding_idx] = 0
            mask = nd.equal(target,self.padding_idx)

            if len(mask.shape) > 0:
                true_dist = nd.where( nd.squeeze(mask), nd.zeros_like(true_dist) ,true_dist )

        self.true_dist = true_dist
        return self.criterion(x, true_dist.as_in_context(cfg.ctx))
    def _get_new_finished_state(self, state, new_seq, new_log_probs):
        """Combine new and old finished sequences, and gather the top k sequences.

        Args:
          state: A dictionary with the current loop state.
          new_seq: New sequences generated by growing the current alive sequences
            int32 tensor with shape [batch_size, beam_size, i + 1]
          new_log_probs: Log probabilities of new sequences
            float32 tensor with shape [batch_size, beam_size]

        Returns:
          Dictionary with finished keys from _StateKeys:
            {Top beam_size finished sequences based on score,
             Scores of finished sequences,
             Finished flags of finished sequences}
        """
        i = state[_StateKeys.CUR_INDEX]
        finished_seq = state[_StateKeys.FINISHED_SEQ]
        finished_scores = state[_StateKeys.FINISHED_SCORES]
        finished_flags = state[_StateKeys.FINISHED_FLAGS]

        finished_seq = nd.concat(finished_seq,
                                 nd.zeros(shape=(self.batch_size,
                                                 self.beam_size, 1),
                                          ctx=ctx),
                                 dim=2)
        length_norm = _length_normalization(self.alpha, i + 1)
        new_scores = new_log_probs / length_norm

        new_finished_flags = nd.equal(new_seq[:, :, -1], self.eos_id)
        new_scores = new_scores + (1. - new_finished_flags) * -INF

        # combine sequences, scores, and flags
        finished_seq = nd.concat(finished_seq, new_seq, dim=1)
        finished_scores = nd.concat(finished_scores, new_scores, dim=1)
        finished_flags = nd.concat(finished_flags, new_finished_flags, dim=1)

        top_finished_seq, top_finished_scores, top_finished_flags = _gather_topk_beams(
            [finished_seq, finished_scores, finished_flags], finished_scores,
            self.batch_size, self.beam_size)

        return {
            _StateKeys.FINISHED_SEQ: top_finished_seq,
            _StateKeys.FINISHED_SCORES: top_finished_scores,
            _StateKeys.FINISHED_FLAGS: top_finished_flags
        }
Ejemplo n.º 20
0
def get_rmse(class_pre_l, class_true_l, con_pre_l, con_true_l, data_utils):
    # find right predictions
    one_zero_pre = nd.where(class_pre_l > 0.5, nd.ones_like(class_pre_l),
                            nd.zeros_like(class_pre_l))
    compare = nd.equal(one_zero_pre, class_true_l).sum(axis=1)
    weight_right = nd.repeat(nd.expand_dims(nd.where(compare == 3, nd.ones_like(compare), nd.zeros_like(compare)),\
                                            axis=0),repeats=2,axis=0).transpose()

    # calculate rmse based on right prediction
    eth_co_me_limit = nd.array([[
        data_utils.scale_CO[1], data_utils.scale_CO[0], data_utils.scale_Me[0]
    ]])
    concentration_mat = nd.where(class_pre_l > 0.5, nd.repeat(eth_co_me_limit,repeats=class_pre_l.shape[0],axis=0), \
                            nd.zeros_like(class_pre_l))
    eth_pre_con, eth_pre_con_true = concentration_mat[:,
                                                      0] * con_pre_l[:,
                                                                     1], concentration_mat[:,
                                                                                           0] * con_true_l[:,
                                                                                                           1]
    co_pre_con, co_pre_con_true = concentration_mat[:,
                                                    1] * con_pre_l[:,
                                                                   0], concentration_mat[:,
                                                                                         1] * con_true_l[:,
                                                                                                         0]
    me_pre_con, me_pre_con_true = concentration_mat[:,
                                                    2] * con_pre_l[:,
                                                                   0], concentration_mat[:,
                                                                                         2] * con_true_l[:,
                                                                                                         0]
    co_or_me_con, co_or_me_con_true = co_pre_con + me_pre_con, co_pre_con_true + me_pre_con_true

    co_or_me_eth_con = nd.concat(nd.expand_dims(co_or_me_con, axis=0),
                                 nd.expand_dims(eth_pre_con, axis=0),
                                 dim=0).transpose()
    co_or_me_eth_con_true = nd.concat(nd.expand_dims(co_or_me_con_true,
                                                     axis=0),
                                      nd.expand_dims(eth_pre_con_true, axis=0),
                                      dim=0).transpose()

    # rmse = (((co_or_me_eth_con-co_or_me_eth_con_true)**2*weight_right).sum()/(weight_right[:,0].sum()))**(0.5)
    rmse = (((co_or_me_eth_con - co_or_me_eth_con_true)**2).mean(axis=0))

    return rmse
Ejemplo n.º 21
0
    def forward(self, pred, target):
        batch_size = target.shape[0]
        label_size = target.shape[1]

        ## rank weight to sample and
        rank_weights = self.rank_weights
        max_num_trials = target.shape[1] - 1

        pos_mask = nd.greater(target, 0).asnumpy()
        neg_mask = nd.equal(target, 0).asnumpy()
        L = nd.zeros_like(pred)

        for i in range(batch_size):
            for j in range(label_size):
                if target[i, j] == 1:
                    ##initialization
                    sample_score_margin = -1
                    num_trials = 0
                    while ((sample_score_margin < 0)
                           and (num_trials < max_num_trials)):
                        neg_labels_idx = np.array([
                            idx for idx, v in enumerate(target[i, :]) if v == 0
                        ])
                        if len(neg_labels_idx) > 0:
                            neg_idx = np.random.choice(neg_labels_idx,
                                                       replace=False)
                            sample_score_margin = pred[i, neg_idx] - pred[i, j]
                            num_trials += 1
                        else:
                            num_trials = 1
                            pass
                    ## how many trials determin the weight
                    r_j = int(np.floor(max_num_trials / num_trials))
                    L[i, j] = rank_weights[r_j]
        #print("L weight",L)
        loss = nd.sum(
            L *
            (nd.sum(1 - nd.array(pos_mask).as_in_context(pred.context) * pred +
                    nd.array(neg_mask).as_in_context(pred.context) * pred,
                    axis=1,
                    keepdims=True)))
        self.save_for_backward(L, pos_mask, neg_mask)
        return loss
Ejemplo n.º 22
0
def distance2(regions,i,binnum,bins):
#Initiate empty array for storing the number of counted pairs in each distance range bin
    count0=nd.zeros(binnum-1,gpu(0),dtype="float32")
    seed=nd.zeros((1,2),gpu(0))
#Calculate index coordinates and directions by chuncks
    a=regions[i[0]*broadcdp:min((i[0]+1)*broadcdp,regions.shape[0]),:]
    b=regions[i[1]*broadcdp:min((i[1]+1)*broadcdp,regions.shape[0]),:]
    a1=nd.array(a,gpu(0))
    b1=nd.array(b,gpu(0))    
    for ii in range (a1.shape[0]-1):
        a1_b1=(nd.expand_dims(a1[ii].reshape((1,2)),axis=1)-b1[ii+1:,:]).reshape((a1[ii+1:,:].shape[0],2))
        seed=nd.concat(seed,a1_b1,dim=0)
    if seed.shape[0]>1:
        x1_x2=seed[1:,0]
        y1_y2=seed[1:,1]
        ldis=nd.broadcast_hypot(x1_x2,y1_y2).reshape((-1,))
        for p in range (0,binnum-1):
            booleanmask=nd.equal((ldis>=bins[p]),(ldis<bins[p+1]))
            count0[p]+=nd.nansum(booleanmask)
    return(count0)
Ejemplo n.º 23
0
def batch_loss(transformer_model, en_sentences, x_en_emb, x_en_idx, y_zh_idx,
               loss):
    batch_size = x_en_emb.shape[0]
    ch2idx, idx2ch = load_ch_vocab()

    y_zh_idx_nd = nd.array(y_zh_idx, ctx=ghp.ctx)
    dec_input_zh_idx = nd.concat(
        nd.ones(shape=y_zh_idx_nd[:, :1].shape, ctx=ghp.ctx) * 2,
        y_zh_idx_nd[:, :-1],
        dim=1)

    x_en_emb = x_en_emb
    x_en_idx = x_en_idx

    output = transformer_model(x_en_emb, x_en_idx, dec_input_zh_idx, True)
    predict = nd.argmax(nd.softmax(output, axis=-1), axis=-1)

    # print("input_idx:", dec_input_zh_idx[0])
    # print("predict_idx:", predict[0])
    print("source:", en_sentences[0])

    label_token = []
    for n in range(len(y_zh_idx[0])):
        label_token.append(idx2ch[int(y_zh_idx[0][n])])
    print("target:", "".join(label_token))

    predict_token = []
    for n in range(len(predict[0])):
        predict_token.append(idx2ch[int(predict[0][n].asscalar())])
    print("predict:", "".join(predict_token))

    is_target = nd.not_equal(y_zh_idx_nd, 0)
    # print(is_target)
    current = nd.equal(y_zh_idx_nd, predict) * is_target
    acc = nd.sum(current) / nd.sum(is_target)

    l = loss(output, y_zh_idx_nd)
    l_mean = nd.sum(l) / batch_size

    return l_mean, acc
Ejemplo n.º 24
0
def getMask(q_seq, k_seq):
    # q_seq shape : (batch_size, q_seq_len)
    # k_seq shape : (batch_size, k_seq_len)
    q_len = q_seq.shape[1]
    pad_mask = nd.not_equal(k_seq, 0)
    pad_mask = nd.expand_dims(pad_mask, axis=1)
    pad_mask = nd.broadcast_axes(pad_mask, axis=1, size=q_len)

    return pad_mask


def getSelfMask(q_seq):
    batch_size, seq_len = q_seq.shape
    mask_matrix = np.ones(shape=(seq_len, seq_len), dtype=np.float)
    mask = np.tril(mask_matrix, k=0)
    mask = nd.expand_dims(nd.array(mask, ctx=ghp.ctx), axis=0)
    mask = nd.broadcast_axes(mask, axis=0, size=batch_size)
    return mask


if __name__ == '__main__':
    mask = getMask(nd.array([[1, 2, 0], [1, 0, 0]]),
                   nd.array([[1, 2, 3], [5, 0, 0]]))
    print(mask)
    mask = getSelfMask(nd.array([[1, 2, 0], [2, 0, 0], [0, 0, 0]]))
    print(mask)

    score = nd.array([[5, 6, 0], [5, 10, 0]])
    pading = nd.ones_like(score) * 0.1
    score = nd.where(nd.equal(mask[0], 0), pading, score)
    print(score)
Ejemplo n.º 25
0
    def train(self,
              inputs,
              outputs,
              epochs=10,
              batch_size=32,
              lr=0.001,
              transform=None,
              verbose=True):
        """train the neural network to fit the outputs with the inputs.

        Args:
            inputs: an ndarray of input.
            outputs: an ndarray of outputs.
            epochs, batch_size, lr: the parameters of the learning algorithm.
            transform: if None, take the output as given, else try to compute
                        transformed outputs = transform(outputs) and fit with them.
            verbose: If True then the results will be displayed all along the training.
        Returns:
            The historical of the training. (tuple of array)."""

        if transform:
            outputs = transform(outputs)
        n = (inputs.shape[1] - 1) // batch_size + 1

        #inputs-1/batch - 1 < n <= inputs-1/batch
        if len(outputs.shape) == 1:
            outputs = outputs.reshape((1, outputs.shape[0]))
        assert inputs.shape[1] == outputs.shape[1], "Shapes does not match."

        data = nd.concat(inputs.T, outputs.T)

        efficiencies = []
        cumuLosses = []
        epochs = list(range(epochs))

        for i in epochs:
            efficiency = 0
            cumuLoss = 0
            data = nd.shuffle(data)
            batchs = [
                data[k * batch_size:min(inputs.shape[1], (k + 1) *
                                        batch_size), :] for k in range(n)
            ]
            for batch in batchs:
                with autograd.record():
                    output = self.compute(batch[:, :inputs.shape[0]].T)
                    loss = SymNet.squared_error(output,
                                                batch[:, inputs.shape[0]:].T)
                loss.backward()

                self.adam_descent(batch_size, lr)

                output = nd.round(output)
                cumuLoss += loss.asscalar()
                efficiency += nd.sum(
                    nd.equal(output, batch[:, inputs.shape[0]:].T)).asscalar()

            efficiency /= outputs.shape[1] * outputs.shape[0]
            efficiencies.append(efficiency)

            cumuLoss /= outputs.shape[1] * outputs.shape[0]
            cumuLosses.append(cumuLoss)

            if verbose:
                print("Epochs %d: Pe = %lf , loss = %lf" %
                      (i, 1 - efficiency, cumuLoss))

        return (epochs, cumuLosses, efficiencies)
Ejemplo n.º 26
0
def train_and_valid(src_bert, mt_model, src_vocab, tgt_vocab, train_dataiter,
                    dev_dataiter, trainer, finetune_trainer, epochs, loss_func,
                    ctx, lr, batch_size, params_save_path_root, eval_step,
                    log_step, check_step, label_smooth, logger,
                    num_train_examples, warmup_ratio):
    batches = len(train_dataiter)

    num_train_steps = int(num_train_examples / batch_size * epochs)
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    global_step = 0
    dev_bleu_score = 0

    for epoch in range(epochs):
        for src, tgt, label, src_valid_len, tgt_valid_len in train_dataiter:
            # learning rate strategy
            if global_step < num_warmup_steps:
                new_lr = lr * global_step / num_warmup_steps
            else:
                non_warmup_steps = global_step - num_warmup_steps
                offset = non_warmup_steps / \
                    (num_train_steps - num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)

            src = src.as_in_context(ctx)
            tgt = tgt.as_in_context(ctx)
            label = label.as_in_context(ctx)
            src_valid_len = src_valid_len.as_in_context(ctx)
            src_token_type = nd.zeros_like(src, ctx=ctx)

            tgt_mask = nd.not_equal(tgt, tgt_vocab(tgt_vocab.padding_token))

            if label_smooth:
                eps = 0.1
                num_class = len(tgt_vocab.idx_to_token)
                one_hot = nd.one_hot(label, num_class)
                one_hot_label = one_hot * \
                    (1 - eps) + (1 - one_hot) * eps / num_class

            with autograd.record():
                src_bert_outputs = src_bert(src, src_token_type, src_valid_len)
                mt_outputs = mt_model(src_bert_outputs, src, tgt)
                loss_mean = loss_func(mt_outputs, one_hot_label, tgt_mask)

            loss_mean.backward()
            loss_scalar = loss_mean.asscalar()

            trainer.step(1)
            finetune_trainer.step(1)

            if global_step and global_step % log_step == 0:
                predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1)
                correct = nd.equal(label, predicts)
                accuracy = (nd.sum(correct * tgt_mask) /
                            nd.sum(tgt_mask)).asscalar()
                logger.info(
                    "epoch:{}, batch:{}/{}, bleu:{}, acc:{}, loss:{}, (lr:{}s)"
                    .format(epoch, global_step % batches, batches,
                            dev_bleu_score, accuracy, loss_scalar,
                            trainer.learning_rate))

            if global_step and global_step % check_step == 0:
                predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1)
                refer_sample = src.asnumpy().tolist()
                label_sample = label.asnumpy().tolist()
                pred_sample = predicts.asnumpy().tolist()
                logger.info("train sample:")
                logger.info("refer  :{}".format(" ".join([
                    src_vocab.idx_to_token[int(idx)] for idx in refer_sample[0]
                ])).replace(src_vocab.padding_token, ""))
                logger.info("target :{}".format(" ".join([
                    tgt_vocab.idx_to_token[int(idx)] for idx in label_sample[0]
                ])).replace(EOS, "[EOS]").replace(tgt_vocab.padding_token, ""))
                logger.info("predict:{}".format(" ".join([
                    tgt_vocab.idx_to_token[int(idx)] for idx in pred_sample[0]
                ])).replace(EOS, "[EOS]"))

            if global_step and global_step % eval_step == 0:
                dev_bleu_score = eval(src_bert,
                                      mt_model,
                                      src_vocab,
                                      tgt_vocab,
                                      dev_dataiter,
                                      logger,
                                      ctx=ctx)
                if not os.path.exists(params_save_path_root):
                    os.makedirs(params_save_path_root)
                model_params_file = params_save_path_root + \
                    "src_bert_step_{}.params".format(global_step)
                src_bert.save_parameters(model_params_file)
                logger.info("{} Save Completed.".format(model_params_file))

                model_params_file = params_save_path_root + \
                    "mt_step_{}.params".format(global_step)
                mt_model.save_parameters(model_params_file)
                logger.info("{} Save Completed.".format(model_params_file))
            writer.add_scalar("loss", loss_scalar, global_step)
            global_step += 1
Ejemplo n.º 27
0
def accuracy(output, label, batch_size):
    out = nd.argmax(output, axis=1)
    res = nd.sum(nd.equal(out.reshape((-1, 1)), label)) / batch_size
    return res
def get_padding(x, padding_value=0):
    return nd.equal(x, padding_value)
Ejemplo n.º 29
0
def accuracy(predictions, targets):
	predictions = nd.argmax(predictions, 1)
	return nd.mean(nd.equal(predictions, targets)).asscalar() * 100