Exemple #1
0
def test_gradient_with_attention():
    ctx = mx.cpu()
    input_shape = (2, 2)
    input_dim = 4
    num_steps = 10
    latent_dim = 2
    batch_size = 3
    num_recurrent_units = 3

    # build the network
    read_nn = SelectiveAttentionRead(2, input_shape, batch_size)
    write_nn = SelectiveAttentionWrite(2, input_shape, batch_size)

    draw_nn = DRAW(read_nn, write_nn, num_steps, batch_size,
                   num_recurrent_units, input_dim, latent_dim)
    model_params = draw_nn.collect_params()
    model_params.initialize(init=mx.init.Uniform(1.0), ctx=ctx)

    # loss function
    loss_fn = DRAWLoss(
        SigmoidBinaryCrossEntropyLoss(from_sigmoid=False, batch_axis=0),
        input_dim, latent_dim)

    def fwd(x):
        y, qs = draw_nn(x)
        return nd.sum(loss_fn(x, qs, y))

    batch_x = mx.nd.random_uniform(shape=(batch_size, input_dim))

    # TODO: investigate why this fails for the first parameters if fwd is not called once before check gradient is
    # called
    fwd(batch_x)
    for p in model_params.values():
        assert check_gradient(fwd, [batch_x], p)
Exemple #2
0
 def __init__(self, mlp_arc_size):
     super().__init__()
     self.binary_ce_loss = SigmoidBinaryCrossEntropyLoss(batch_axis=-1)
     self.arc_W = parameter_init(self,
                                 'arc_W', (mlp_arc_size, mlp_arc_size + 1),
                                 init=mx.init.Zero())
     self.mlp_arc_size = mlp_arc_size
Exemple #3
0
def test_gradient():
    ctx = mx.cpu()
    num_latent_maps = 1
    input_shape = (1, 2, 2)
    input_dim = 4
    batch_size = 2
    # build the network
    enc_nn = nn.HybridSequential()
    enc_nn.add(nn.Conv2D(channels=2, kernel_size=(1, 1), activation='relu', bias_initializer=mx.init.Uniform(1.0)))

    dec_nn = nn.HybridSequential()
    dec_nn.add(nn.Conv2DTranspose(channels=1, kernel_size=(1, 1), bias_initializer=mx.init.Uniform(1.0)))

    conv_draw_nn = ConvDRAW(enc_nn, dec_nn, num_steps=2, batch_size=batch_size, input_shape=input_shape,
                            num_latent_maps=num_latent_maps, encoder_output_shape=(2, 2, 2), rnn_hidden_channels=1,
                            kernel_size=(1, 1), ctx=ctx)
    model_params = conv_draw_nn.collect_params()
    mx.random.seed(np.random.randint(1000000))
    model_params.initialize(init=mx.init.Uniform(1.0), ctx=ctx)  # don't initialize to small weights

    # loss function
    loss_fn = ConvDRAWLoss(SigmoidBinaryCrossEntropyLoss(from_sigmoid=False, batch_axis=0), input_dim, (1, 2, 2))

    def fwd(x):
        y, q, p = conv_draw_nn(x)
        return nd.sum(loss_fn(x, q, p, y))

    batch_x = mx.nd.random_uniform(shape=(batch_size, *input_shape))

    fwd(batch_x)  # the following check fails for the first parameter if fwd is not called at least once before it.
    for p in model_params.values():
        assert check_gradient(fwd, [batch_x], p)
Exemple #4
0
def train(model, features, X, X_train, y_train, epochs):
    cross_entropy = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    trainer = Trainer(model.collect_params(), 'sgd', {
        'learning_rate': 0.001,
        'momentum': 1
    })

    feature_representations = [features(X).asnumpy()]

    for e in range(1, epochs + 1):
        cum_loss = 0
        cum_preds = []

        for i, x in enumerate(X_train.flatten()):
            y = array(y_train)[i]
            with autograd.record():
                preds = model(X)[x]
                loss = cross_entropy(preds, y)
                # logger.debug("x:[{}], y:[{}], pred:[{}], loss:[{}]".format(x,y,preds,loss.asscalar()))
            loss.backward()
            trainer.step(1)

            cum_loss += loss.asscalar()
            cum_preds += [preds.asscalar()]

        feature_representations.append(features(X).asnumpy())

        if (e % (epochs // 10)) == 0:
            logger.debug(f"Epoch {e}/{epochs} -- Loss: {cum_loss: f}")
            logger.debug(cum_preds)
    return feature_representations
Exemple #5
0
def train(model, features, X, X_train, y_train, epochs):
    cross_entropy = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    trainer = Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.001, 'momentum': 1})

    feature_representations = [features(X).asnumpy()]
    plt.figure()
    for e in range(1, epochs + 1):
        cum_loss = 0
        cum_preds = []

        for i, x in enumerate(X_train):
            y = nd.array(y_train)[i]
            with autograd.record():
                preds = model(X)[x]
                loss = cross_entropy(preds, y)
            loss.backward()
            trainer.step(1)

            cum_loss += loss.asscalar()
            cum_preds += [preds.asscalar()]
        plt.cla()
        plt.title('epochs'+str(e))
        showData(features(X).asnumpy(), zkc.network)
        plt.pause(0.001)
        feature_representations.append(features(X).asnumpy())

        if (e % (epochs // 10)) == 0:
            print(f"Epoch {e}/{epochs} -- Loss: {cum_loss: .4f}")
            print(cum_preds)
    plt.show()
    return feature_representations
Exemple #6
0
def test_gradient():
    ctx = mx.cpu()
    latent_dim = 2
    input_dim = 4
    batch_size = 2
    # build the network
    enc_nn = nn.HybridSequential()
    enc_nn.add(nn.Dense(units=3, activation='relu'))
    enc_nn.add(nn.Dense(units=latent_dim * 2))

    dec_nn = nn.HybridSequential()
    dec_nn.add(nn.Dense(units=3, activation='relu'))
    dec_nn.add(nn.Dense(units=input_dim))

    vae_nn = VAE(enc_nn, dec_nn, batch_size, latent_dim)
    model_params = vae_nn.collect_params()
    model_params.initialize(init=mx.init.Uniform(1.0), ctx=ctx)  # don't initialize to small weights

    # loss function
    loss_fn = VAELoss(SigmoidBinaryCrossEntropyLoss(from_sigmoid=False, batch_axis=0), input_dim, latent_dim)

    def fwd(x):
        y, q = vae_nn(x)
        return nd.sum(loss_fn(x, q, y))

    batch_x = mx.nd.random_uniform(shape=(batch_size, input_dim))

    fwd(batch_x)  # the following check fails for the first parameter if fwd is not called at least once before it.
    for p in model_params.values():
        assert check_gradient(fwd, [batch_x], p)
Exemple #7
0
 def __init__(self, weight=100, batch_axis=0, **kwargs):
     """
     :param weight: for l1 loss
     :param batch_axis:
     :param kwargs:
     """
     super(GeneratorCriterion, self).__init__(weight, batch_axis, **kwargs)
     self.bce_loss = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True,
                                                   batch_axis=batch_axis)
     self.l1_loss = L1Loss(weight=weight, batch_axis=0)
 def __init__(self, vocab,
              mlp_arc_size,
              mlp_rel_size):
     super(BiAffine, self).__init__()
     self._vocab = vocab
     self.binary_ce_loss = SigmoidBinaryCrossEntropyLoss(batch_axis=-1)
     self.rel_W = parameter_init(self, 'rel_W', (vocab.rel_size * (mlp_rel_size + 1), mlp_rel_size + 1),
                                 init=mx.init.Zero())
     self.arc_W = parameter_init(self, 'arc_W', (mlp_arc_size, mlp_arc_size + 1), init=mx.init.Zero())
     self.softmax_loss = SoftmaxCrossEntropyLoss(axis=0, batch_axis=-1)
     self.mlp_arc_size = mlp_arc_size
     self.mlp_rel_size = mlp_rel_size
Exemple #9
0
 def __init__(self,
              affinity=True,
              affinity_weight=0.2,
              ignore_label=-1,
              sub_sample=True,
              height=None,
              width=None,
              affinity_size=36,
              l2loss=True,
              **kwargs):
     """
     Initialization. Sub-sample is adopted based on memory considerations.
     :param affinity: whether to adopt affinity loss besides the standard cross-entropy loss
     :param affinity_weight: affinity loss coefficient
     :param ignore_label: ignored label when compute loss
     :param sub_sample: whether to down-sample label
     :param height: sub-sample height
     :param width:  sub-sample width
     :param affinity_size: sub-sample size
     :param l2loss: Set true to use mean square error for affinity loss, binary cross-entropy
         loss otherwise. The label and prediction should have the same size.
     """
     super(PixelAffinityLoss, self).__init__(ignore_label=ignore_label,
                                             **kwargs)
     self.affinity = affinity
     self.weight = affinity_weight
     self.height = height if height else affinity_size
     self.width = width if width else affinity_size
     self.sub_sample = sub_sample
     if l2loss:
         from mxnet.gluon.loss import L2Loss
         self.affinity_loss = L2Loss()
     else:
         from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss
         self.affinity_loss = SigmoidBinaryCrossEntropyLoss(
             from_sigmoid=True)
def run_training(net, trainer, train_dataloader, val_dataloader, options):
    stop_early = 0
    best_metric = 0
    best_model_name = ''
    loss_fn = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)

    for epoch in range(args.epochs):
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0

        for i, (rec_id, (data, original_length),
                label) in enumerate(train_dataloader):
            data = data.as_in_context(context)
            label = label.as_in_context(context).astype(np.float32)
            original_length = original_length.as_in_context(context).astype(
                np.float32)

            wc = original_length.sum().asscalar()
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]

            with autograd.record():
                output = net(data)
                loss = loss_fn(output, label).mean()
            loss.backward()
            trainer.step(1)

            log_interval_L += loss.asscalar()
            epoch_L += loss.asscalar()

            if (i + 1) % options.log_interval == 0:
                print(
                    '[Epoch %d Batch %d/%d] avg loss %g, throughput %gK wps' %
                    (epoch, i + 1, len(train_dataloader), log_interval_L /
                     log_interval_sent_num, log_interval_wc / 1000 /
                     (time.time() - start_log_interval_time)))
                # Clear log interval training stats
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0

        end_epoch_time = time.time()
        _, train_acc, train_em, train_f1, _ = run_evaluate(
            net, train_dataloader, options)
        valid_avg_L, valid_acc, valid_em, valid_f1, _ = run_evaluate(
            net, val_dataloader, options)

        print(
            '[Epoch %d] '
            'train acc %.4f, train EM %.4f, train F1 %.4f, train avg loss %g, '
            'valid acc %.4f, valid EM %.4f, valid F1 %.4f, valid avg loss %g, '
            'throughput %gK wps' %
            (epoch, train_acc, train_em, train_f1, epoch_L / epoch_sent_num,
             valid_acc, valid_em, valid_f1, valid_avg_L, epoch_wc / 1000 /
             (end_epoch_time - start_epoch_time)))

        if valid_f1 < best_metric:
            print('No Improvement.')
            stop_early += 1
            if options.early_stop and stop_early == 5:
                print('No improvement for 5 times. Stop training. '
                      'Best valid F1 found: %.4f' % best_metric)
                break
        else:
            # Reset stop_early if the validation loss finds a new low value
            print('Observed Improvement.')
            stop_early = 0
            best_model_name = options.save_prefix + '_{:04d}.params'.format(
                epoch)
            net.save_parameters(best_model_name)
            best_metric = valid_f1

    print('Stop training. Best valid F1: %.4f, best model: %s' %
          (best_metric, best_model_name))
    return best_model_name
def run_evaluate(net, dataloader, options, return_predictions=False):
    """Evaluate network on the specified dataset"""
    total_L = 0.0
    total_sample_num = 0
    total_correct_classes = 0
    exact_match = 0
    prediction_results = []
    f1s = [mx.metric.F1(average='micro') for i in range(options.sentiments)]

    start_log_interval_time = time.time()

    loss_fn = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)

    print('Begin Testing...')

    for i, (rec_id, (data, original_length), label) in enumerate(dataloader):
        data = data.as_in_context(context)
        original_length = original_length.as_in_context(context).astype(
            np.float32)
        label = label.as_in_context(context).astype(np.float32)

        output = net(data)
        L = loss_fn(output, label)

        total_L += L.sum().asscalar()
        total_sample_num += label.shape[0]
        total_class_num = label.shape[1]

        pred = output > options.threshold
        total_correct_classes += (pred == label).sum().asscalar()
        exact_match += int(
            ((pred == label).sum(axis=1) == total_class_num).sum().asscalar())

        for j, f1 in enumerate(f1s):
            emotion_pred = pred[:, j].reshape(0, 1)
            emotion_pred_neg = (1 - pred[:, j]).reshape(0, 1)
            pred_for_emotion = [
                mx.nd.concat(*[emotion_pred_neg, emotion_pred], dim=1)
            ]
            label_for_emotion = [label[:, j]]
            f1.update(label_for_emotion, pred_for_emotion)

        if return_predictions:
            for ri, pr in zip(rec_id, pred):
                item = {
                    'ri': ri.asscalar(),
                    'happiness': pr[0].asscalar(),
                    'sadness': pr[1].asscalar(),
                    'anger': pr[2].asscalar(),
                    'fear': pr[3].asscalar(),
                    'surprise': pr[4].asscalar()
                }
                prediction_results.append(item)

        if (i + 1) % args.log_interval == 0:
            print('[Batch {}/{}] elapsed {:.2f} s'.format(
                i + 1, len(dataloader),
                time.time() - start_log_interval_time))
            start_log_interval_time = time.time()

    avg_L = total_L / float(total_sample_num)
    # we need to divide by number of classes,
    acc = total_correct_classes / float(total_sample_num) / float(
        total_class_num)
    em = exact_match / float(total_sample_num)
    f1_avg = mx.nd.array([f1.get()[1] for f1 in f1s]).mean().asscalar()
    return avg_L, acc, em, f1_avg, prediction_results
Exemple #12
0
 def hybrid_forward(self, F, output, *args, **kwargs):
     label, _ = args
     loss = SigmoidBinaryCrossEntropyLoss()
     return loss(output, label)
Exemple #13
0
    def train_model_for_ml(self):
        """
        训练模型, 多标签
        """
        base_net = self.get_base_net()  # 基础网络
        train_data, len_td = self.get_train_data(self.batch_size)  # 训练数据,按批次获取
        val_data, len_vd = self.get_val_data(self.batch_size)  # 训练数据,按批次获取

        trainer = Trainer(base_net.collect_params(), 'rmsprop',
                          {'learning_rate': 1e-4})
        loss_func = SigmoidBinaryCrossEntropyLoss()

        lr_steps = [10, 20, 30, np.inf]  # 逐渐降低学习率
        lr_factor = 0.75
        lr_counter = 0

        n_batch = int(len_td / self.batch_size)

        self.print_info('训练 - 样本数:{}, 批次样本: {}, 批次数: {}'.format(
            len_td, self.batch_size, n_batch))

        for epoch in range(self.epochs):

            if epoch == lr_steps[lr_counter]:  # 逐渐降低学习率
                trainer.set_learning_rate(trainer.learning_rate * lr_factor)
                lr_counter += 1

            e_loss, e_r, e_p, e_f1 = 0, 0, 0, 0  # epoch

            for i, batch in enumerate(train_data):

                data, labels = batch[0], batch[1].astype('float32')

                data = split_and_load(data,
                                      ctx_list=self.ctx,
                                      batch_axis=0,
                                      even_split=False)
                labels = split_and_load(labels,
                                        ctx_list=self.ctx,
                                        batch_axis=0,
                                        even_split=False)

                with autograd.record():  # 梯度求导
                    outputs = [base_net(X) for X in data]
                    bc_loss = [
                        loss_func(yhat, y) for yhat, y in zip(outputs, labels)
                    ]

                for l in bc_loss:
                    l.backward()

                trainer.step(self.batch_size)

                batch_loss = sum([l.mean().asscalar() for l in bc_loss]) / len(
                    bc_loss)  # batch的loss
                e_loss += batch_loss

                br, bp, bf1 = self.get_batch_rpf(outputs, labels)

                e_r += br
                e_p += bp
                e_f1 += bf1

                self.print_info(
                    'batch: {}, loss: {:.5f}, recall: {:.2f}, precision: {:.2f}, f1: {:.2f}'
                    .format(i, batch_loss, br, bp, bf1))

                n_batch = i + 1  # 批次数

            e_loss /= n_batch
            e_r /= n_batch
            e_p /= n_batch
            e_f1 /= n_batch

            self.print_info(
                'epoch: {}, loss: {:.5f}, recall: {:.2f}, precision: {:.2f}, f1: {:.2f}'
                .format(epoch, e_loss, e_r, e_p, e_f1))
            e_r, e_p, e_f1 = self.val_net(base_net, val_data, len_vd)

            self.save_net_and_params(base_net, epoch, e_f1,
                                     name='multilabel')  # 存储网络
Exemple #14
0
def train_model():
    epochs = 5

    configs = get_configs()
    is_gpu = configs['is_gpu']
    batch_size = configs['batch_size']
    ctx = get_context(is_gpu)

    print("gpu: {}, batch_size: {}".format(is_gpu, batch_size))

    base_net = get_base_net(ctx=ctx)

    trainer = Trainer(base_net.collect_params(), 'rmsprop', {'learning_rate': 1e-3})

    bc_loss = SigmoidBinaryCrossEntropyLoss()
    triplet_loss = TripletLoss(margin=0)

    train_data = get_train_data(batch_size=batch_size)  # train data
    triplet_train_data = get_triplet_train_data(batch_size=batch_size)  # 训练数据

    for epoch in range(epochs):
        train_loss = 0  # 训练loss
        total_right, total_all = 0, 0

        for i, (batch, tp_batch) in enumerate(zip(train_data, triplet_train_data)):
            data, labels = batch[0], batch[1].astype('float32')
            tp_data, tp_labels = tp_batch[0], tp_batch[1].astype('float32')

            # print(data.shape, labels.shape)
            # print(tp_data.shape, tp_labels.shape)

            data = data.as_in_context(context=ctx)
            labels = labels.as_in_context(context=ctx)
            tp_data = tp_data.as_in_context(context=ctx)
            tp_labels = tp_labels.as_in_context(context=ctx)

            tp_data = mx.nd.transpose(tp_data, (1, 0, 2, 3, 4))
            tp_labels = mx.nd.transpose(tp_labels, (1, 0, 2))

            # print(tp_data.shape, tp_labels.shape)

            anc_ins, pos_ins, neg_ins = tp_data[0, :], tp_data[1, :], tp_data[2, :]

            # print(anc_ins.shape, pos_ins.shape, neg_ins.shape)

            with autograd.record():
                outputs = base_net(data)
                v_bc_loss = bc_loss(outputs, labels)

                inter1 = base_net(anc_ins)
                inter2 = base_net(pos_ins)
                inter3 = base_net(neg_ins)
                v_triplet_loss = triplet_loss(inter1, inter2, inter3)  # 交叉熵

            autograd.backward([v_bc_loss, v_triplet_loss])
            trainer.step(batch_size)

            print('bc: {}, triplet: {}'.format(np.sum(v_bc_loss.asnumpy()), np.sum(v_triplet_loss.asnumpy())))

            train_loss += v_bc_loss.mean().asscalar()
            acc, nr, na = get_batch_acc(outputs, labels)
            total_right += nr
            total_all += na

            if i != 0:  # batch 0 doesn't have train_loss.
                print('batch: %s, loss: %s, acc: %s' % (i, train_loss / i, acc))
            else:
                print('batch: %s' % i)

        train_loss /= len(train_data)
        print('epoch: %s, loss: %s, acc: %s' % (epoch, train_loss, total_right / total_all))
        self.linear1 = nn.Dense(in_units=confidence_C,units=(confidence_C+K_way)//2,\
                                use_bias=True,activation='relu')
        self.linear2 = nn.Dense(units=K_way)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        return x  # x shape is N*K_way,to pred top_k is the output_label.loss is SoftmaxwithCrossentropy


if __name__ == '__main__':
    from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss
    from mxnet import nd, autograd
    model = Decision_thresh(thresh_size=4)
    model.initialize(init=mx.init.Xavier())
    x = nd.array([[0.1, 0.7, 0.9, 0.4], [0.8, 0.5, 0.8, 0.1]])
    label = nd.array([[0, 1, 1, 0], [1, 0, 0, 0]])
    loss_criterion = SigmoidBinaryCrossEntropyLoss()
    with autograd.record():
        y_pred = model(x)
        loss = loss_criterion(y_pred, label)
        print("loss", nd.sum(loss).asscalar())
        loss.backward()
        print(model.thresh.grad())

    # to test the Decision_topk model to predict the top_k is groud truth

    model2 = Decision_topk(confidence_C=63, K_way=4)
    mdoel2.initialize(init=mx.init.Xavier())
    #x = nd.
Exemple #16
0
    # build the network
    enc_nn = nn.HybridSequential()
    enc_nn.add(nn.Dense(units=args.num_encoder_units, activation='relu'))
    enc_nn.add(nn.Dense(units=args.latent_dim * 2))

    dec_nn = nn.HybridSequential()
    dec_nn.add(nn.Dense(units=args.num_decoder_units, activation='relu'))
    dec_nn.add(nn.Dense(units=input_dim))

    vae_nn = VAE(enc_nn, dec_nn, args.batch_size, args.latent_dim)
    model_params = vae_nn.collect_params()
    model_params.initialize(ctx=ctx)

    # loss function
    loss_fn = VAELoss(
        SigmoidBinaryCrossEntropyLoss(from_sigmoid=False, batch_axis=0),
        input_dim, args.latent_dim)

    # optimizer
    trainer = Trainer(params=model_params,
                      optimizer='adam',
                      optimizer_params={'learning_rate': args.learning_rate})

    # forward function for training
    def forward_fn(batch):
        x = batch.data[0].as_in_context(ctx)
        y, q = vae_nn(x)
        loss = loss_fn(x, q, y)
        return loss

    # train