def train(num_gpus, batch_size, lr):
    train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print('training on ', ctx)
    net = resnet18(10)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    for epoch in range(5):
        start = time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]

            for l in ls:
                l.backward()
            trainer.step(batch_size)
        nd.waitall()
        print('epoch %d, training time: %.1f sec' % (epoch, time() - start))
        test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0])
        print('validation accuracy %.4f' % (test_acc))
Beispiel #2
0
def train(num_gpus, batch_size, lr):
    train_iter, test_iter = gb.load_data_fashion_mnist(
        batch_size, root="../data/fashion-mnist")
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print("running on:", ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
        nd.waitall()
        train_time = time.time() - start
        test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0])
        print("epoch %d, time: %.1f sec, test acc: %.2f" %
              (epoch + 1, train_time, test_acc))
Beispiel #3
0
def _get_batch(batch, ctx):
    """Return features and labels on ctx."""
    features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])
Beispiel #4
0
    def train_batch(self, data, label):
        """
        Train batch
        :param data:
        :param label:
        :return:
        """
        data_lst = split_and_load(data, self.ctx)
        label_lst = split_and_load(label, self.ctx)

        criterion_xent = SoftmaxCELoss()

        with autograd.record():
            losses = []
            for x, y in zip(data_lst, label_lst):
                y_hat = self.inference(x)
                l_xent = criterion_xent(y_hat, y[:, 0])

                loss = l_xent

                losses.append(loss)

                # logging
                self.metrics['Train-Xent'].update(None, [l_xent])
                self.metrics['Train-Acc'].update([y[:, 0]], [y_hat])

            for l in losses:
                l.backward()

        self.trainer.step(data.shape[0])

        self.cur_iter += 1

        if self.args.log_itv != 0 and self.cur_iter % self.args.log_itv == 0:
            self.log_iter()
Beispiel #5
0
def _get_batch(batch, ctx):
    features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
        # 当 ctx 包含多个 GPU 时,划分⼩批量数据样本并复制到各个 GPU 上。
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])
Beispiel #6
0
def _get_batch(batch, ctx):
    """Return features and labels on ctx."""
    features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])
Beispiel #7
0
def validate_yolov3_coco(net, val_data_loader, ctx, eval_metric):
    eval_metric.reset()
    net.set_nms(nms_thresh=0.45, nms_topk=400)
    mx.nd.waitall()
    net.hybrydize()

    for batch in val_data_loader:
        data = utils.split_and_load(batch[0], ctx_list=ctx, even_split=False)
        label = utils.split_and_load(batch[1], ctx_list=ctx, even_split=False)

        det_bboxes, det_ids, det_scores = [], [], []
        gt_bboxes, gt_ids, gt_difficults = [], [], []

        for x, y in zip(data, label):
            ids, scores, bboxes = net(x)
            det_ids.append(ids)
            det_scores.append(scores)
            det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
            gt_difficults.append(
                y.slice_axis(axis=-1, begin=5, end=6
                             ) if y.shape[-1] > 5 else None)

        eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids,
                           gt_difficults)

    return eval_metric.get()
Beispiel #8
0
def test(test_net, ctx, test_loader, iteration, logger):
    # print("Start testing iter %d." % iteration)
    Loss = gloss.SoftmaxCrossEntropyLoss()
    metric = mx.metric.Accuracy()
    metric_top5 = mx.metric.TopKAccuracy(5)
    test_loss = mx.metric.Loss()
    for batch in test_loader:
        trans = gutils.split_and_load(batch[0], ctx)
        labels = gutils.split_and_load(batch[1], ctx)
        outputs = [test_net(tran) for tran in trans]
        losses = [
            Loss(output, label) for output, label in zip(outputs, labels)
        ]
        test_loss.update(0, losses)
        metric.update(labels, outputs)
        metric_top5.update(labels, outputs)
    _, test_top1_acc = metric.get()
    _, test_top5_acc = metric_top5.get()
    _, test_loss = test_loss.get()

    if test_top1_acc >= 0.7:
        test_net.save_parameters('imagenet_param/test_iter%d_%.5f.param' %
                                 (iteration, test_top1_acc))
    test_str = ("test_Loss: %f, test top1-acc %f, test top5-acc %f." %
                (test_loss, test_top1_acc, test_top5_acc))
    logger.info(test_str)
Beispiel #9
0
def train_model():
    batch_size = 10
    num_epochs = 10
    ctx = [mx.gpu(i) for i in range(1)]

    net = get_model()
    net.initialize(ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    net.hybridize()
    loss = FocalLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate, 'wd': 0.001})

    for epoch in range(1, num_epochs + 1):
        for X, y in data_iter(batch_size, data_train_index, data_train_im, data_train_gt, ctx):
            # print('x  shape is ', X.shape)
            # print('y  shape is ', y.shape)
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [loss(net(gpu_X), gpu_y) for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
            # print(ls[0].asscalar())
            print("epoch %d, loss: %f" % (epoch, ls[0].asscalar()))
        print("epoch %d, loss: %f" % (epoch, ls[0].asscalar()))
        net.save_params('ckp/CSRNet-%d.params' % epoch)
Beispiel #10
0
    def eval(self, inference, val_loader, log=True, target=True, epoch=True):
        """
        Evaluate the model
        :param inference: network
        :param val_loader: data loader
        :param log: log flag
        :param target: target flag for updating the record and log
        :param epoch: epoch flag for updating the record and log
        :return:
        """
        mtc_acc = Accuracy()
        mtc_acc.reset()
        # val_loader.reset()

        feature_nest, y_nest, y_hat_nest = [], [], []
        for X, Y in val_loader:
            X_lst = split_and_load(X, self.args.ctx, even_split=False)
            Y_lst = split_and_load(Y, self.args.ctx, even_split=False)

            for x, y in zip(X_lst, Y_lst):
                y_hat, features = inference(x)
                # update metric
                mtc_acc.update([y], [y_hat])

                y_nest.extend(y.asnumpy())
                feature_nest.extend(features.asnumpy())
                y_hat_nest.extend(y_hat.asnumpy())

        feature_nest = np.array(feature_nest)
        y_nest = np.array(y_nest).astype(int)
        y_hat_nest = np.array(y_hat_nest)

        if log:
            target_key = 'Tgt' if target else 'Src'
            epoch_key = 'Epoch' if epoch else 'Iter'
            record = self.cur_epoch if epoch else self.cur_iter

            if mtc_acc.get()[1] > self.records[epoch_key]['%s-Acc' %
                                                          target_key]:
                if target:
                    self.records[epoch_key][epoch_key] = record
                self.records[epoch_key]['%s-Acc' %
                                        target_key] = mtc_acc.get()[1]
                self.records[epoch_key]['%s-label' % target_key] = y_nest
                self.records[epoch_key]['%s-preds' % target_key] = y_hat_nest
                self.records[epoch_key]['%s-features' %
                                        target_key] = feature_nest

                self.save_params(inference, 0, epoch_key)

            self.logger.update_scalar(
                '%s [%d]: Eval-Acc-%s' % (epoch_key, record, target_key),
                mtc_acc.get()[1])
            if self.sw:
                self.sw.add_scalar('Acc/Eval-%s-Acc-%s' % (epoch, target_key),
                                   mtc_acc.get()[1],
                                   global_step=record)

        return mtc_acc.get()[1], y_nest, y_hat_nest, feature_nest
Beispiel #11
0
def _get_batch(batch, ctx):
    if isinstance(batch, mx.io.DataBatch):
        features = batch.data[0]
        labels = batch.label[0]
    else:
        features, labels = batch
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])
Beispiel #12
0
def train(train_net, iterations, trainer, ctx, lr_period: tuple, lr_decay,
          train_loader, test_loader, cat_interval):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = 'Attention56_train.log'
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    net.collect_params().reset_ctx(ctx)
    train_gen = inf_train_gen(train_loader)
    Loss = gloss.SoftmaxCrossEntropyLoss()
    metric = mx.metric.Accuracy()
    metric_top5 = mx.metric.TopKAccuracy(5)
    train_loss = mx.metric.Loss()
    prev_time = datetime.datetime.now()

    metric.reset()
    train_loss.reset()

    for iteration in range(int(iterations)):
        batch = next(train_gen)
        trans = gutils.split_and_load(batch.data[0], ctx)
        labels = gutils.split_and_load(batch.label[0], ctx)

        with autograd.record():
            outputs = [train_net(tran) for tran in trans]
            losses = [
                Loss(output, label) for output, label in zip(outputs, labels)
            ]

        for loss in losses:
            loss.backward()

        trainer.step(batch_size)
        train_loss.update(0, losses)
        metric.update(labels, outputs)
        metric_top5.update(labels, outputs)
        if iteration % cat_interval == cat_interval - 1:
            cur_time = datetime.datetime.now()
            time_str = format_time(prev_time, cur_time)
            _, top1_acc = metric.get()
            _, top5_acc = metric_top5.get()
            _, epoch_loss = train_loss.get()
            metric.reset()
            metric_top5.reset()
            train_loss.reset()
            epoch_str = (
                "Iter %d. Loss: %.5f, Train top1-acc %f, Train top5-acc %f." %
                (iteration, epoch_loss, top1_acc, top5_acc))
            prev_time = cur_time
            logger.info(epoch_str + time_str + 'lr ' +
                        str(trainer.learning_rate))
            test(train_net, ctx, test_loader, iteration, logger)
        if iteration in lr_period:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
Beispiel #13
0
def _get_batch(batch, ctx):
    if isinstance(batch, mx.io.DataBatch):
        features = batch.data[0]
        labels = batch.label[0]
    else:
        features, labels = batch
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx),
            features.shape[0])
Beispiel #14
0
def train(transformer, data_iter, lr, num_epochs, vocab, ctx):
    print('start training')
    print('ctx:', ctx)

    trainer = gluon.Trainer(transformer.collect_params(), 'adam', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()

    best_epoch = 0
    best_loss = float('Inf')
    sw = SummaryWriter(logdir='../logs', flush_secs=5)

    for epoch in range(num_epochs):
        l_sum = 0.0
        for i, data in enumerate(data_iter):
            X, Y, label, X_valid_len, Y_valid_len = data
            # X = X.as_in_context(ctx)
            # Y = Y.as_in_context(ctx)
            # label = label.as_in_context(ctx)
            gpu_Xs = gutils.split_and_load(X, ctx, even_split=False)
            gpu_Ys = gutils.split_and_load(Y, ctx, even_split=False)
            gpu_labels = gutils.split_and_load(label, ctx, even_split=False)

            with autograd.record():
                # l = batch_loss(transformer, X, Y, vocab, loss)
                ls = [batch_loss(transformer, gpu_X, gpu_Y, gpu_label, vocab, loss)
                      for gpu_X, gpu_Y, gpu_label in zip(gpu_Xs, gpu_Ys, gpu_labels)]

            # l.backward()
            b_loss = 0.0
            for l in ls:
                l.backward()
                b_loss += l.asscalar()
            trainer.step(X.shape[0])
            nd.waitall()

            l_sum += b_loss
            if i % 100 == 0:
                info = "epoch %d, batch %d, batch_loss %.3f" % (epoch, i, b_loss)
                print(info)
                sw.add_scalar(tag='batch_loss', value=b_loss, global_step=i)

        cur_loss = l_sum / len(data_iter)
        # 保存模型
        if cur_loss < best_loss:
            best_loss = cur_loss
            best_epoch = epoch
            if not os.path.exists('../model'):
                os.mkdir('../model')
            transformer.save_parameters('../model/transformer' + str(epoch) + '.params')

        info = "epoch %d, loss %.3f, best_loss %.3f, best_epoch %d" % (
            epoch, cur_loss, best_loss, best_epoch)
        print(info)
        sw.add_scalar(tag='loss', value=cur_loss, global_step=epoch)
Beispiel #15
0
def evaluate_confusion_matrix(data_iterator, net, ctx=ctx):
    import numpy as np
    cm = np.zeros([classes, classes])
    for i, (X, y) in enumerate(data_iterator):
        X = X / 255
        data = gutils.split_and_load(X, ctx, even_split=False)
        label = gutils.split_and_load(y, ctx, even_split=False)
        output = [(net(data), label) for data, label in zip(data, label)]
        for result in output:
            cm += ConfusionMatrix(result[0], result[1])
    return cm
Beispiel #16
0
 def _eval(self):
     n_correct = 0
     edit_dis = 0
     for images, labels in tqdm(self.validate_loader, desc='test model'):
         gpu_images = gutils.split_and_load(images, self.ctx)
         gpu_labels = gutils.split_and_load(labels, self.ctx)
         preds = [self.model(x)[0] for x in gpu_images]
         batch_dict = self.accuracy_batch(preds, gpu_labels, phase='VAL')
         n_correct += batch_dict['n_correct']
         edit_dis += batch_dict['edit_dis']
     return {'n_correct': n_correct, 'edit_dis': edit_dis}
Beispiel #17
0
def _get_batch(batch, ctx):
    """Return features and labels on ctx."""
    if isinstance(batch, mx.io.DataBatch):
        features = batch.data[0]
        labels = batch.label[0]
    else:
        features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])
Beispiel #18
0
    def simulate_z_posterior_mcmc(self,
                                  x,
                                  nsim,
                                  init=None,
                                  burnin=100,
                                  step_size=0.1):
        # This function relies on the hybridized self.langevin, so we need to make sure
        # nsim is equal to self.langevin.nchain. If not, we reinitialize self.langevin
        if nsim != self.langevin.nchain:
            self.langevin = LangevinLogPDF(self.dec, nsim, self.log_cond_pdf)
            self.langevin_loss = LangevinLoss(self.dec, nsim,
                                              self.log_cond_pdf)
            self.langevin.hybridize()
            self.langevin_loss.hybridize()

        n = x.shape[0]

        # For now neural network parameters do not need gradient
        for name, param in self.loglik_params.items():
            param.grad_req = "null"

        if init is None:
            mu, logvar = self.enc(x.as_in_context(self.ctx[0]))
            init, _ = self.simulate_latent(mu, logvar, nsim, F=nd)
            init = init.reshape((nsim, n, self.latent_dim))

        # init [nsim x n x d], x [n x d]
        # split_and_load() works on the first axis, first transpose init, and then transform back
        x_gpus = utils.split_and_load(x, self.ctx)
        init_gpus = utils.split_and_load(init.transpose((1, 0, 2)), self.ctx)
        chains_gpus = []
        for x, init in zip(x_gpus, init_gpus):
            # Transpose back
            init = init.transpose((1, 0, 2))
            sampler = LangevinMultiChain(shape=init.shape[1:],
                                         nchain=nsim,
                                         ctx=x.context)
            chains = sampler.sample(model=self,
                                    start=init,
                                    step_size=step_size,
                                    burnin=burnin,
                                    args={"x": x})
            chains_gpus.append(chains)

        nd.waitall()
        chains = [c.as_in_context(x.context) for c in chains_gpus]

        # Restore gradient flag
        for name, param in self.loglik_params.items():
            # Parameters that have no gradient, e.g. in batch normalization,
            # are unaffected
            param.grad_req = "write"

        return nd.concat(*chains, dim=1)
Beispiel #19
0
def evaluate_accuracy(data_iterator, net, ctx=ctx):
    acc = 0.
    for i, (X, y) in enumerate(data_iterator):
        X = X / 255
        data = gutils.split_and_load(X, ctx, even_split=False)
        label = gutils.split_and_load(y, ctx, even_split=False)
        output = [(net(data), label) for data, label in zip(data, label)]
        for result in output:
            acc += accuracy(result[0], result[1])
        # if i + 1 >= 30 :
        #     return acc / (30 * len(ctx))
    return acc / (len(data_iterator) * len(ctx))
Beispiel #20
0
 def batch_fn(batch, ctx):
     # data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
     # data2 = split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
     # label = split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
     # return data, data2, label
     # data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
     # label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
     # return data, label
     data = split_and_load(batch[0][0], ctx_list=ctx, batch_axis=0)
     data2 = split_and_load(batch[0][1], ctx_list=ctx, batch_axis=0)
     label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
     return data, data2, label
Beispiel #21
0
def _get_batch(batch, ctx):
    """return features and labels on ctx"""
    if isinstance(batch, mx.io.DataBatch):
        features = batch.data[0]
        labels = batch.label[0]
    else:
        features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx),
            features.shape[0])
Beispiel #22
0
def unpack_batch(batch, ctx_list):
    data = split_and_load(batch.data[0],
                          ctx_list=ctx_list,
                          batch_axis=0,
                          even_split=False)
    label = split_and_load(batch.label[0],
                           ctx_list=ctx_list,
                           batch_axis=0,
                           even_split=False)
    if batch.index is None:
        return data, label
    else:
        return data, label, batch.index[0]
Beispiel #23
0
def _get_batch(batch, ctx):
    if isinstance(batch, mx.io.DataBatch):   # in fact, the batch is a list
        features = batch.data[0]
        labels = batch.label[0]
    else:
        features, labels = batch

    # use split_and_load: split the data(ndarray) to len(ctx_list) slices along batch_axis, and loads each slice to one context in ctx_list
    # here, the len(ctx_list) = 0, return data.as_in_context(ctx), this can be found in the source of this function.
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx),
            features.shape[0])   # return the batch size
Beispiel #24
0
 def get_batch_max_length(self, img_h, img_w, ctx):
     from mxnet.gluon import utils as gutils
     input = gutils.split_and_load(nd.zeros((2, 3, img_h, img_w)), ctx)
     # 特征提取阶段
     visual_feature = [self.feature_extraction(x) for x in input]
     self.batch_max_length = visual_feature[0].shape[-1]
     return self.batch_max_length
Beispiel #25
0
def multi_gpus_forward(net, ctx, img, labels=None, bboxes=None):
    img_list = gutils.split_and_load(img, ctx)

    if autograd.is_training():
        labels_list = gutils.split_and_load(labels, ctx)
        bboxes_list = gutils.split_and_load(bboxes, ctx)

        rpn_cls_losses = []
        rpn_box_losses = []
        rcnn_cls_losses = []
        rcnn_box_losses = []
        total_losses = []
        for img, labels, bboxes in zip(img_list, labels_list, bboxes_list):
            rpn_cls_loss, rpn_box_loss, rcnn_cls_loss, rcnn_box_loss, total_loss = net(img, labels, bboxes)
            rpn_cls_losses.append(rpn_cls_loss)
            rpn_box_losses.append(rpn_box_loss)
            rcnn_cls_losses.append(rcnn_cls_loss)
            rcnn_box_losses.append(rcnn_box_loss)
            total_losses.append(total_loss)

        rpn_cls_loss = np.mean([loss.asscalar() for loss in rpn_cls_losses])
        rpn_box_loss = np.mean([loss.asscalar() for loss in rpn_box_losses])
        rcnn_cls_loss = np.mean([loss.asscalar() for loss in rpn_cls_losses])
        rcnn_box_loss = np.mean([loss.asscalar() for loss in rcnn_cls_losses])
        total_loss = np.mean([loss.asscalar() for loss in total_losses])
        return rpn_cls_loss, rpn_box_loss, rcnn_cls_loss, rcnn_box_loss, total_loss

    else:
        cls_ids_list = []
        scores_list = []
        bboxes_list = []
        for img in img_list:
            cls_ids, scores, bboxes = net(img)
            cls_ids_list.append(cls_ids)
            scores_list.append(scores)
            bboxes_list.append(bboxes)

        cls_ids_list = [item.asnumpy() for item in cls_ids_list]
        scores_list = [item.asnumpy() for item in scores_list]
        bboxes_list = [item.asnumpy() for item in bboxes_list]
        cls_ids = np.concatenate(cls_ids_list)
        scores = np.concatenate(scores_list)
        bboxes = np.concatenate(bboxes_list)
        return cls_ids, scores, bboxes
Beispiel #26
0
 def split_map(obj):
     if isinstance(obj, NDArray):
         return split_and_load(obj, ctx_list, batch_axis, even_split=False)
     if isinstance(obj, tuple) and len(obj) > 0:
         return list(zip(*map(split_map, obj)))
     if isinstance(obj, list) and len(obj) > 0:
         return list(map(list, zip(*map(split_map, obj))))
     if isinstance(obj, dict) and len(obj) > 0:
         return list(map(type(obj), zip(*map(split_map, obj.items()))))
     return [obj for _ in ctx_list]
Beispiel #27
0
    def train_epoch(self, inference, trainer, **kwargs):
        """
        Training with dSNEt loss
        :param inference: inference
        :param trainer: trainer of inference
        :return:
        """

        for Xs, Ys, Xt, Yt, _ in self.train_src_loader:
            Xs_lst = split_and_load(Xs, self.args.ctx, even_split=False)
            Ys_lst = split_and_load(Ys, self.args.ctx, even_split=False)
            Xt_lst = split_and_load(Xt, self.args.ctx, even_split=False)
            Yt_lst = split_and_load(Yt, self.args.ctx, even_split=False)

            if self.args.train_src:
                self.train_batch(Xs_lst,
                                 Ys_lst,
                                 Xt_lst,
                                 Yt_lst,
                                 inference,
                                 target=False)
                trainer.step(Xs.shape[0])

            self.train_batch(Xt_lst,
                             Yt_lst,
                             Xs_lst,
                             Ys_lst,
                             inference,
                             target=True)

            trainer.step(Xt.shape[0])

            if self.args.log_itv > 0 and self.cur_iter % self.args.log_itv == 0:
                self.log_iter()
                if self.args.eval:
                    self.eval(inference,
                              self.test_tgt_loader,
                              target=True,
                              epoch=False)

        self.log_epoch()
        if self.args.eval and self.cur_epoch > self.args.eval_epoch:
            self.eval(inference, self.test_tgt_loader, target=True, epoch=True)
Beispiel #28
0
    def val_net(self, net, val_data, len_vd):

        n_batch = int(len_vd / self.batch_size)
        self.print_info('训练 - 样本数:{}, 批次样本: {}, 批次数: {}'.format(
            len_vd, self.batch_size, n_batch))

        e_r, e_p, e_f1 = 0, 0, 0

        for i, batch in enumerate(val_data):
            data, labels = batch[0], batch[1].astype('float32')

            data = split_and_load(data,
                                  ctx_list=self.ctx,
                                  batch_axis=0,
                                  even_split=False)  # 多GPU
            labels = split_and_load(labels,
                                    ctx_list=self.ctx,
                                    batch_axis=0,
                                    even_split=False)

            outputs = [net(X) for X in data]

            br, bp, bf1 = self.get_batch_rpf(outputs, labels)

            e_r += br
            e_p += bp
            e_f1 += bf1

            self.print_info(
                'validation: batch: {}, recall: {:.2f}, precision: {:.2f}, f1: {:.2f}'
                .format(i, br, bp, bf1))

            n_batch = i + 1

        e_r /= n_batch
        e_p /= n_batch
        e_f1 /= n_batch

        self.print_info(
            'validation: recall: {:.2f}, precision: {:.2f}, f1: {:.2f}'.format(
                e_r, e_p, e_f1))
        return e_r, e_p, e_f1
Beispiel #29
0
    def bias_correction(self, x, init=None, burnin=100, step_size=0.1):
        postz = self.simulate_z_posterior_mcmc(x,
                                               nsim=self.nchain,
                                               init=init,
                                               burnin=burnin,
                                               step_size=step_size)

        # z [nsim x n x d], x [n x d]
        # split_and_load() works on the first axis, first transpose z, and then transform back
        x_gpus = utils.split_and_load(x, self.ctx)
        z_gpus = utils.split_and_load(postz.transpose((1, 0, 2)), self.ctx)

        loss_gpus = []
        with autograd.record():
            for x, z in zip(x_gpus, z_gpus):
                # Transpose back
                z = z.transpose((1, 0, 2))
                loss = self.langevin_loss(z.reshape((-1, self.latent_dim)), x)
                loss_gpus.append(loss)

        return loss_gpus, postz
Beispiel #30
0
    def extract_features(self, preprocessed_data, verbose=True):
        """
        Parameters
        ----------
        preprocessed_data : SArray

        Returns
        -------
        numpy array containing the deep features
        """
        import numpy as np

        last_progress_update = _time.time()

        if _mac_ver() < (10, 14):
            # Use MXNet
            preprocessed_data = mx.nd.array(preprocessed_data)

            ctx_list = self.ctx
            if len(preprocessed_data) < len(ctx_list):
                ctx_list = ctx_list[:len(preprocessed_data)]
            batches = utils.split_and_load(preprocessed_data,
                                           ctx_list=ctx_list,
                                           even_split=False)

            deep_features = []
            for i, cur_batch in enumerate(batches):
                y = self.vggish_model.forward(cur_batch).asnumpy()
                for i in y:
                    deep_features.append(i)

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    print("Extracted {} of {} batches".format(
                        i, len(audio_data)))
                    last_progress_update = _time.time()

        else:
            # Use Core ML
            deep_features = []
            for i, cur_example in enumerate(preprocessed_data):
                for cur_frame in cur_example:
                    x = {'input1': [cur_frame]}
                    y = self.vggish_model.predict(x)
                    deep_features.append(y['output1'])

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    print("Extracted {} of {}".format(i,
                                                      len(preprocessed_data)))
                    last_progress_update = _time.time()

        return np.asarray(deep_features)
Beispiel #31
0
    def evaluate_net(self, base_net, val_data):
        triplet_loss = gluon.loss.TripletLoss(margin=0)
        rate = 0.0
        sum_correct, sum_all = 0, 0

        for i, batch in enumerate(val_data):
            data, labels = batch[0], batch[1].astype('float32')

            data = split_and_load(data,
                                  ctx_list=self.ctx,
                                  batch_axis=0,
                                  even_split=False)  # 多GPU
            labels = split_and_load(labels,
                                    ctx_list=self.ctx,
                                    batch_axis=0,
                                    even_split=False)
            for X in data:
                anchor_ins, pos_ins, neg_ins = [], [], []
                for b_X in X:
                    anchor_ins.append(nd.expand_dims(b_X[0], axis=0))
                    pos_ins.append(nd.expand_dims(b_X[1], axis=0))
                    neg_ins.append(nd.expand_dims(b_X[2], axis=0))

                anchor_ins = nd.concatenate(anchor_ins, axis=0)
                pos_ins = nd.concatenate(pos_ins, axis=0)
                neg_ins = nd.concatenate(neg_ins, axis=0)

                inter1 = base_net(anchor_ins)
                inter2 = base_net(pos_ins)
                inter3 = base_net(neg_ins)
                loss = triplet_loss(inter1, inter2, inter3)  # TripletLoss
                n_correct = np.sum(np.where(loss == 0, 1, 0))
                sum_all += loss.shape[0]
                sum_correct += n_correct
            rate = safe_div(sum_correct, sum_all)
            self.print_info('验证Batch: {}, 准确率: {:.4f} ({} / {})'.format(
                i, rate, sum_correct, sum_all))
        rate = safe_div(sum_correct, sum_all)
        self.print_info('验证准确率: %.4f (%s / %s)' % (rate, sum_correct, sum_all))
        return rate
Beispiel #32
0
def predict():
    import numpy as np

    from scipy import misc

    # predict_data = gluon.data.vision.datasets.ImageFolderDataset('predict_img').transform_first(val_transform)
    # predict_iterator = gluon.data.DataLoader(predict_data, batch_size=batch_size, num_workers=4)

    for item, (X, y) in enumerate(train_loader):
        logging.info(len(train_loader))
        logging.info(item)
        X = X / 255
        data = gutils.split_and_load(X, ctx, even_split=False)
        label = gutils.split_and_load(y, ctx, even_split=False)
        output = [mx.ndarray.SoftmaxActivation(data=net(i)) for i in data]
        for i in range(len(output)):
            y_pred = output[i].asnumpy().tolist()
            y_pred_index = np.argmax(y_pred, axis=1)
            y_pred_prob = np.max(y_pred, axis=1)
            y_label = label[i].asnumpy().tolist()
            y_label = np.array(y_label)
            error_index = [
                i for i in range(len(y_pred_index))
                if y_pred_index[i] != y_label[i]
            ]
            if len(error_index):
                for j in error_index:
                    predict_name = name_list[int(y_pred_index.tolist()[j])]
                    label_name = name_list[int(y_label.tolist()[j])]
                    predict_prob = str(y_pred_prob.tolist()[j])
                    img_array = data[i][j] * 255
                    misc.imsave(
                        'img_file/hard_train/' + label_name + '/' +
                        # str(item) + '_' + str(i) + '_' + str(j) + '_' +
                        predict_prob + '-' + label_name + '-->' +
                        predict_name + '.bmp',
                        img_array.asnumpy().transpose(1, 2, 0))
    return 0
Beispiel #33
0
def train(num_gpus, batch_size, lr):

    comm = MPI.COMM_WORLD
    comm_rank = comm.Get_rank()
    comm_size = comm.Get_size()

    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    #ctx = [mx.gpu(i) for i in range(num_gpus)]
    if comm_rank == 0:
        ctx = mx.gpu(0)
    else:
        ctx = mx.gpu(1)
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': lr},
                            SSP_FLAG=True,
                            thre=2)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(400000):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(epoch, batch_size)
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[comm_rank])
        print('epoch %d, time %.1f sec, test acc %.2f, process %d' %
              (epoch + 1, train_time, test_acc, comm_rank))
Beispiel #34
0
def get_classifer(max_sample_num):
    ctx = mx.gpu()
    crop_hw = (config.height, config.width)

    ds_train = JSDataset(config.train_root, fortrain=True, crop_hw = crop_hw,max_sample_num = max_sample_num)
    ds_test = JSDataset(config.test_root, fortrain=False, crop_hw=crop_hw, max_sample_num = max_sample_num)

    trainiter = gluon.data.DataLoader(ds_train,batch_size=config.batch_size, shuffle=True,last_batch="rollover")
    testiter = gluon.data.DataLoader(ds_test,batch_size=config.batch_size, shuffle=False,last_batch="rollover")

    logging.info("train num: {} test num: {}".format(len(ds_train), len(ds_test)))

    max_update = config.max_epoch * len(ds_train) // config.batch_size
    lr_sch = mx.lr_scheduler.PolyScheduler(max_update=max_update,base_lr=config.base_lr,pwr=1)

    net = CIFARNET_QUICK(class_num = config.class_num, ctx=ctx)

    if not (config.pretrained_model is None) and not (config.pretrained_model == ""):
        net.load_params(config.pretrained_model,ctx = ctx)
        logging.info("loading model {}".format(config.pretrained_model))

    trainer = mx.gluon.Trainer(net.collect_params(),optimizer=config.optimizer,optimizer_params={"learning_rate":config.base_lr})


    loss_ce = mx.gluon.loss.SoftmaxCrossEntropyLoss()

    class ACC_SHOW(object):
        def __init__(self,label_num):
            self.label_num = label_num
            self.axis = 1
            self.acc = {'total':0,'hit':0}
            self.acc_per_class = OrderedDict()
            for key in range(label_num):
                self.acc_per_class[key] = {'total':0,'hit':0}
            return
        def reset(self):
            self.acc = {'total':0,'hit':0}
            self.acc_per_class = OrderedDict()
            for key in range(self.label_num):
                self.acc_per_class[key] = {'total':0,'hit':0}
            return

        def update(self,preds, labels):
            if isinstance(preds[0],mx.nd.NDArray):
                preds = map(lambda pred: pred.asnumpy(),preds)
                labels = map(lambda label: label.asnumpy(),labels)
            for pred, label in zip(preds,labels):
                pred_label = np.argmax(pred,axis=self.axis)
                label = label.flatten().astype('int32')
                pred_label = pred_label.flatten().astype('int32')
                for p,l in zip(pred_label,label):
                    self.acc_per_class[l]['total'] += 1
                    self.acc['total'] += 1
                    if l == p:
                        self.acc_per_class[l]['hit'] += 1
                        self.acc['hit'] += 1
            return

        def _calc_acc(self,md):
            total = md['total']
            hit = md['hit']
            if total < 1:
                return 0
            return float(hit) / total

        def get_acc(self):
            return self._calc_acc(self.acc)

        def get(self):
            infos = ['acc {:.5} acc_per_class'.format( self._calc_acc(self.acc) )]
            for key in self.acc_per_class.keys():
                #print self.acc_per_class[key]
                infos.append('{:.3}'.format(self._calc_acc(self.acc_per_class[key])))
            return ' '.join(infos)

    class LOSS_SHOW(object):
        def __init__(self):
            self.loss_list = []

        def reset(self):
            self.loss_list = []

        def update(self, loss_list):
            if isinstance(loss_list[0],mx.nd.NDArray):
                loss_list = map(lambda loss: loss.asnumpy(), loss_list)
            loss = np.vstack(loss_list)
            #print loss.tolist()[0]
            self.loss_list.extend(loss.tolist()[0])

        def get(self):
            return "loss {:.5}".format( np.asarray(self.loss_list).mean()  )
    import pdb
    def show_gradient(net):
        return
        grads_list = []
        for block in net.layers:
            if not isinstance(block, CIFARNET_BLOCK) and not isinstance(block, CIFARNET_BLOCK_A):
                continue
            for layer in block.layers:
                if not isinstance(layer, gluon.nn.Conv2D):
                    continue
                grads = layer.weight.grad().as_in_context(mx.cpu()).asnumpy()
                grads_list.append(grads.mean())
                grads_list.append(grads.max())
                grads_list.append(grads.min())
        line = ['grads: ']
        for grads in grads_list:
            line.append( '%.6f'%grads )
        logging.info(','.join(line))
        return


    class TopAcc:
        def __init__(self):
            self.path = ""
            self.score = 0
        def update(self, path, score):
            if self.score < score:
                self.score = score
                self.path = path
            return
        def get_top(self):
            return self.path,self.score

    top_acc = TopAcc()

    loss_show = LOSS_SHOW()
    acc = ACC_SHOW( config.class_num )
    display_iter = len(ds_train) // (2 * config.batch_size )
    if display_iter < 1:
        display_iter = 1
    update = 0
    for epoch in range(config.max_epoch):
        acc.reset()
        loss_show.reset()
        for batch in trainiter:
            update += 1
            data, label = batch
            data_list, label_list = utils.split_and_load(data,ctx_list=[ctx]), utils.split_and_load(label,ctx_list=[ctx])
            with mx.autograd.record():
                pred_list = map(lambda data: net(data), data_list)
                loss_list = map(lambda (pred,label): loss_ce(pred,label), zip(pred_list,label_list))
            for loss in loss_list:
                loss.backward()
            trainer.step(config.batch_size)
            mx.nd.waitall()
            acc.update(labels = label_list,preds = pred_list)
            loss_show.update(loss_list)
            if 0 == (update % display_iter):
                logging.info("train update {} lr {} {} {}".format(update,trainer.learning_rate,loss_show.get(), acc.get()))
            trainer.set_learning_rate(lr_sch(update))
        acc.reset()
        show_gradient(net)
        loss_show.reset()
        for (data,label) in testiter:
            data_list,label_list = utils.split_and_load(data,[ctx]),utils.split_and_load(label, [ctx])
            pred_list = map(lambda data : net(data), data_list)
            loss_list = map(lambda (pred,label): loss_ce(pred,label), zip(pred_list,label_list))
            mx.nd.waitall()
            acc.update(labels = label_list, preds = pred_list)
            loss_show.update(loss_list)
        logging.info("test update {} epoch {} {} {}".format(update,epoch,loss_show.get(), acc.get()))
        if epoch % config.save_epoch_step == 0:
            net.save_params(config.model_path(epoch))
            top_acc.update( config.model_path(epoch), acc.get_acc() )
    net.save_params(config.model_path("last"))
    return top_acc.get_top()
Beispiel #35
0
def _check_batchnorm_result(input, num_devices=1, cuda=False):
    from mxnet.gluon.utils import split_and_load
    def _find_bn(module):
        if isinstance(module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)):
            return module
        elif isinstance(module.module, (mx.gluon.nn.BatchNorm, mx.gluon.contrib.nn.SyncBatchNorm)):
            return module.module

        raise RuntimeError('BN not found')

    def _syncParameters(bn1, bn2, ctx):
        ctx = input.context
        bn2.gamma.set_data(bn1.gamma.data(ctx))
        bn2.beta.set_data(bn1.beta.data(ctx))
        bn2.running_mean.set_data(bn1.running_mean.data(ctx))
        bn2.running_var.set_data(bn1.running_var.data(ctx))

    input1 = input.copy()
    input2 = input.copy()

    if cuda:
        input1 = input.as_in_context(mx.gpu(0))
        ctx_list = [mx.gpu(i) for i in range(num_devices)]
    else:
        ctx_list = [mx.cpu(0) for _ in range(num_devices)]

    nch = input.shape[1]
    bn1 = mx.gluon.nn.BatchNorm(in_channels=nch)
    bn2 = mx.gluon.contrib.nn.SyncBatchNorm(in_channels=nch, num_devices=num_devices)

    bn1.initialize(ctx=ctx_list[0])
    bn2.initialize(ctx=ctx_list)

    # using the same values for gamma and beta
    #_syncParameters(_find_bn(bn1), _find_bn(bn2), ctx_list[0])

    input1.attach_grad()
    inputs2 = split_and_load(input2, ctx_list, batch_axis=0)
    for xi in inputs2:
        xi.attach_grad()

    with mx.autograd.record():
        output1 = bn1(input1)
        output2  = [bn2(xi) for xi in inputs2]
        loss1 = (output1 ** 2).sum()
        loss2 = [(output ** 2).sum() for output in output2]
        mx.autograd.backward(loss1)
        mx.autograd.backward(loss2)

    output2 = mx.nd.concat(*[output.as_in_context(input.context) for output in output2], dim=0)
    # assert forwarding
    assert_almost_equal(input1.asnumpy(), input2.asnumpy(), atol=1e-3, rtol=1e-3)
    assert_almost_equal(output1.asnumpy(), output2.asnumpy(), atol=1e-3, rtol=1e-3)
    assert_almost_equal(_find_bn(bn1).running_mean.data(ctx_list[0]).asnumpy(),
                        _find_bn(bn2).running_mean.data(ctx_list[0]).asnumpy(),
                        atol=1e-3, rtol=1e-3)
    assert_almost_equal(_find_bn(bn1).running_var.data(ctx_list[0]).asnumpy(),
                        _find_bn(bn2).running_var.data(ctx_list[0]).asnumpy(),
                        atol=1e-3, rtol=1e-3)
    input2grad = mx.nd.concat(*[output.grad.as_in_context(input.context) for output in inputs2], dim=0)
    assert_almost_equal(input1.grad.asnumpy(), input2grad.asnumpy(), atol=1e-3, rtol=1e-3)