help='Enable built-in profiler (0=off, 1=on)')
opt = parser.parse_args()

# global variables
logger.info('Starting new image-classification task:, %s', opt)
mx.random.seed(opt.seed)
model_name = opt.model
dataset_classes = {'mnist': 10, 'cifar10': 10, 'imagenet': 1000, 'dummy': 1000}
batch_size, dataset, classes = opt.batch_size, opt.dataset, dataset_classes[
    opt.dataset]
context = [mx.gpu(int(i))
           for i in opt.gpus.split(',')] if opt.gpus.strip() else [mx.cpu()]
num_gpus = len(context)
batch_size *= max(1, num_gpus)
lr_steps = [int(x) for x in opt.lr_steps.split(',') if x.strip()]
metric = CompositeEvalMetric([Accuracy(), TopKAccuracy(5)])


def get_model(model, ctx, opt):
    """Model initialization."""
    kwargs = {'ctx': ctx, 'pretrained': opt.use_pretrained, 'classes': classes}
    if model.startswith('resnet'):
        kwargs['thumbnail'] = opt.use_thumbnail
    elif model.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm

    net = models.get_model(model, **kwargs)
    if opt.resume:
        net.load_params(opt.resume)
    elif not opt.use_pretrained:
        if model in ['alexnet']:
model_name = opt.model
dataset_classes = {
    'mnist': 10,
    'cifar10': 10,
    'imagenet': 1000,
    'dummy': 1000,
    'sampleimgnet': 200
}
batch_size, dataset, classes = opt.batch_size, opt.dataset, dataset_classes[
    opt.dataset]
context = [mx.gpu(int(i))
           for i in opt.gpus.split(',')] if opt.gpus.strip() else [mx.cpu()]
num_gpus = len(context)
batch_size *= max(1, num_gpus)
lr_steps = [int(x) for x in opt.lr_steps.split(',') if x.strip()]
metric = CompositeEvalMetric([Accuracy(), TopKAccuracy(5), CrossEntropy()])


def get_model(model, ctx, opt):
    """Model initialization."""
    kwargs = {'ctx': ctx, 'pretrained': opt.use_pretrained, 'classes': classes}
    if model.startswith('resnet'):
        kwargs['thumbnail'] = opt.use_thumbnail
    elif model.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm

    net = models.get_model(model, **kwargs)
    if opt.resume:
        net.load_params(opt.resume)
    elif not opt.use_pretrained:
        if model in ['alexnet']:
 def __init__(self):
     is_pair = True
     class_labels = ['0', '1']
     metric = Accuracy()
     super(LCQMCTask, self).__init__(class_labels, metric, is_pair)
 def __init__(self):
     is_pair = False
     class_labels = ['0', '1']
     metric = Accuracy()
     super(ChnSentiCorpTask, self).__init__(class_labels, metric, is_pair)
 def __init__(self):
     is_pair = True
     class_labels = ['not_entailment', 'entailment']
     metric = Accuracy()
     super(QNLITask, self).__init__(class_labels, metric, is_pair)
 def __init__(self):
     is_pair = True
     class_labels = ['neutral', 'entailment', 'contradiction']
     metric = Accuracy()
     super(MNLITask, self).__init__(class_labels, metric, is_pair)
Exemple #7
0
 def get_metric():
     """Get metrics Accuracy"""
     return Accuracy()
Exemple #8
0
 def get_metric():
     """Get metrics Accuracy and F1"""
     metric = CompositeEvalMetric()
     for child_metric in [Accuracy(), F1()]:
         metric.add(child_metric)
     return metric
Exemple #9
0
 def __init__(self):
     is_pair = False
     class_labels = ['0', '1']
     self.metric = Accuracy()
     super(SSTTask, self).__init__(class_labels, self.metric, is_pair)
Exemple #10
0
def main():
    epoches = 32
    gpu_id = 7
    ctx_list = [mx.gpu(x) for x in [7, 8]]
    log_interval = 100
    batch_size = 32
    start_epoch = 0
    # trainer_resume = resume + ".states" if resume is not None else None
    trainer_resume = None

    resume = None
    from mxnet.gluon.data.vision import transforms
    transform_fn = transforms.Compose([
        LeftTopPad(dest_shape=(256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    ])
    dataset = CaptionDataSet(
        image_root="/data3/zyx/yks/coco2017/train2017",
        annotation_path=
        "/data3/zyx/yks/coco2017/annotations/captions_train2017.json",
        transforms=transform_fn,
        feature_hdf5="output/train2017.h5")
    val_dataset = CaptionDataSet(
        image_root="/data3/zyx/yks/coco2017/val2017",
        annotation_path=
        "/data3/zyx/yks/coco2017/annotations/captions_val2017.json",
        words2index=dataset.words2index,
        index2words=dataset.index2words,
        transforms=transform_fn,
        feature_hdf5="output/val2017.h5")
    dataloader = DataLoader(dataset=dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True,
                            last_batch="discard")
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True)

    num_words = dataset.words_count

    # set up logger
    save_prefix = "output/res50_"
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = save_prefix + '_train.log'
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    net = EncoderDecoder(num_words=num_words,
                         test_max_len=val_dataset.max_len).cuda()
    for name, p in net.named_parameters():
        if "bias" in name:
            p.data.zero_()
        else:
            p.data.normal_(0, 0.01)
        print(name)
    net = torch.nn.DataParallel(net)
    if resume is not None:
        net.collect_params().load(resume,
                                  allow_missing=True,
                                  ignore_extra=True)
        logger.info("Resumed form checkpoint {}.".format(resume))

    trainer = torch.optim.Adam(params=filter(lambda p: p.requires_grad,
                                             net.parameters()),
                               lr=4e-4)
    criterion = Criterion()
    accu_top3_metric = TopKAccuracy(top_k=3)
    accu_top1_metric = Accuracy(name="batch_accu")
    ctc_loss_metric = Loss(name="ctc_loss")
    alpha_metric = Loss(name="alpha_loss")
    batch_bleu = BleuMetric(name="batch_bleu",
                            pred_index2words=dataset.index2words,
                            label_index2words=dataset.index2words)
    epoch_bleu = BleuMetric(name="epoch_bleu",
                            pred_index2words=dataset.index2words,
                            label_index2words=dataset.index2words)
    btic = time.time()
    logger.info(batch_size)
    logger.info(num_words)
    logger.info(len(dataset.words2index))
    logger.info(len(dataset.index2words))
    logger.info(dataset.words2index["<PAD>"])
    logger.info(val_dataset.words2index["<PAD>"])
    logger.info(len(val_dataset.words2index))
    for nepoch in range(start_epoch, epoches):
        if nepoch > 15:
            trainer.set_learning_rate(4e-5)
        logger.info("Current lr: {}".format(trainer.param_groups[0]["lr"]))
        accu_top1_metric.reset()
        accu_top3_metric.reset()
        ctc_loss_metric.reset()
        alpha_metric.reset()
        epoch_bleu.reset()
        batch_bleu.reset()
        for nbatch, batch in enumerate(tqdm.tqdm(dataloader)):
            batch = [
                Variable(torch.from_numpy(x.asnumpy()).cuda()) for x in batch
            ]
            data, label, label_len = batch
            label = label.long()
            label_len = label_len.long()
            max_len = label_len.max().data.cpu().numpy()
            net.train()
            outputs = net(data, label, max_len)
            predictions, alphas = outputs
            ctc_loss = criterion(predictions, label, label_len)
            loss2 = 1.0 * ((1. - alphas.sum(dim=1))**2).mean()
            ((ctc_loss + loss2) / batch_size).backward()
            for group in trainer.param_groups:
                for param in group['params']:
                    if param.grad is not None:
                        param.grad.data.clamp_(-5, 5)

            trainer.step()
            if nbatch % 10 == 0:
                for n, l in enumerate(label_len):
                    l = int(l.data.cpu().numpy())
                    la = label[n, 1:l].data.cpu().numpy()
                    pred = predictions[n, :(l - 1)].data.cpu().numpy()
                    accu_top3_metric.update(mx.nd.array(la), mx.nd.array(pred))
                    accu_top1_metric.update(mx.nd.array(la), mx.nd.array(pred))
                    epoch_bleu.update(la, predictions[n, :].data.cpu().numpy())
                    batch_bleu.update(la, predictions[n, :].data.cpu().numpy())
                ctc_loss_metric.update(
                    None,
                    preds=mx.nd.array([ctc_loss.data.cpu().numpy()]) /
                    batch_size)
                alpha_metric.update(None,
                                    preds=mx.nd.array(
                                        [loss2.data.cpu().numpy()]))
                if nbatch % log_interval == 0 and nbatch > 0:
                    msg = ','.join([
                        '{}={:.3f}'.format(*metric.get()) for metric in [
                            epoch_bleu, batch_bleu, accu_top1_metric,
                            accu_top3_metric, ctc_loss_metric, alpha_metric
                        ]
                    ])
                    logger.info(
                        '[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}'.
                        format(
                            nepoch, nbatch,
                            log_interval * batch_size / (time.time() - btic),
                            msg))
                    btic = time.time()
                    batch_bleu.reset()
                    accu_top1_metric.reset()
                    accu_top3_metric.reset()
                    ctc_loss_metric.reset()
                    alpha_metric.reset()
        net.eval()
        bleu, acc_top1 = validate(net,
                                  gpu_id=gpu_id,
                                  val_loader=val_loader,
                                  train_index2words=dataset.index2words,
                                  val_index2words=val_dataset.index2words)
        save_path = save_prefix + "_weights-%d-bleu-%.4f-%.4f.params" % (
            nepoch, bleu, acc_top1)
        torch.save(net.module.state_dict(), save_path)
        torch.save(trainer.state_dict(), save_path + ".states")
        logger.info("Saved checkpoint to {}.".format(save_path))
Exemple #11
0
 def fit(self, itr, ctx, epochs, batch_size, callbacks=None):
     # ADAM optimizer
     #opt_params={'learning_rate':0.001, 'beta1':0.9, 'beta2':0.999, 'epsilon':1e-08}
     opt = mx.optimizer.create('adam')
     # SGD optimizer
     #opt = mx.optimizer.create('sgd')
     # AdaDelta optimizer
     #opt = mx.optimizer.create('adadelta')
     # initialize parameters
     # MXNet initializes the weight matrices uniformly by drawing from [−0.07,0.07], bias parameters are all set to 0
     # 'Xavier': initializer is designed to keep the scale of gradients roughly the same in all layers
     self._net.initialize(mx.init.Xavier(magnitude=2.3),
                          ctx=ctx,
                          force_reinit=True)
     # fetch and broadcast parameters
     params = self._net.collect_params()
     # trainer
     trainer = Trainer(params=params, optimizer=opt, kvstore='device')
     # loss function
     loss_fn = SoftmaxCrossEntropyLoss()
     # use accuracy as the evaluation metric
     metric = Accuracy()
     # train
     for e in range(epochs):
         if callbacks is not None:
             for cb in callbacks:
                 cb.before_epoch(e)
         # reset evaluation result to initial state
         metric.reset()
         # reset the train data iterator.
         itr.reset()
         # loop over the train data iterator
         for i, batch in enumerate(itr):
             # splits train data into multiple slices along batch_axis
             # copy each slice into a context
             data = split_and_load(batch.data[0],
                                   ctx_list=ctx,
                                   batch_axis=0,
                                   even_split=False)
             # splits train label into multiple slices along batch_axis
             # copy each slice into a context
             label = split_and_load(batch.label[0],
                                    ctx_list=ctx,
                                    batch_axis=0,
                                    even_split=False)
             outputs = []
             losses = []
             # inside training scope
             with ag.record():
                 for x, y in zip(data, label):
                     z = self._net(x)
                     # computes softmax cross entropy loss
                     l = loss_fn(z, y)
                     outputs.append(z)
                     losses.append(l)
             # backpropagate the error for one iteration
             for l in losses:
                 l.backward()
             # make one step of parameter update.
             # trainer needs to know the batch size of data
             # to normalize the gradient by 1/batch_size
             trainer.step(batch_size)
             # updates internal evaluation
             metric.update(label, outputs)
             # invoke callbacks after batch
             if callbacks is not None:
                 for cb in callbacks:
                     cb.after_batch(e, i, batch_size, metric)
         # invoke callbacks after epoch
         if callbacks is not None:
             for cb in callbacks:
                 cb.after_epoch(e, i, batch_size, metric)
     return metric
    'beta2': 0.999,
    'epsilon': 1e-08
}
opt = mx.optimizer.create('adam', **opt_params)
# initialize parameters
model.initialize(force_reinit=True, ctx=ctx)
# fetch and broadcast parameters
params = model.collect_params()
if params is not None:
    hvd.broadcast_parameters(params, root_rank=0)
# create DistributedTrainer, a subclass of gluon.Trainer
trainer = hvd.DistributedTrainer(params, opt)
# loss function
loss_fn = SoftmaxCrossEntropyLoss()
# use accuracy as the evaluation metric
metric = Accuracy()
# train
start = time.perf_counter()
for epoch in range(1, EPOCHS + 1):
    # Reset the train data iterator.
    train_data.reset()
    for i, batch in enumerate(train_data):
        if i == 0:
            tick_0 = time.time()
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with ag.record():
            output = model(data.astype('float32', copy=False))
            loss = loss_fn(output, label)
        loss.backward()
        trainer.step(BATCH_SIZE)
Exemple #13
0
 def get_metric(cls):
     """Get metrics Accuracy and F1"""
     metric = CompositeEvalMetric()
     for child_metric in [Accuracy(), F1(average='micro')]:
         metric.add(child_metric)
     return metric
Exemple #14
0

def load_net(param_file="net.params", ctx=cpu(0)):
    net = SimpleNet()
    net.load_parameters(param_file, ctx=ctx)
    return net


def get_val_data(transformer, batch_size=128):
    mnist_valid = gluon.data.vision.FashionMNIST(train=False)
    valid_data = gluon.data.DataLoader(
        mnist_valid.transform_first(transformer),
        batch_size=batch_size,
        num_workers=4)
    return valid_data


if __name__ == "__main__":
    ctx = gpu(0) if context.num_gpus() else cpu(0)
    net = load_net("net.params", ctx=ctx)
    valid_data = get_val_data(transformer)

    val_acc = Accuracy()
    for data, label in valid_data:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.predict_mode():
            out = net(data)
            val_acc.update(label, out)
    print("Accuray: ", val_acc.get()[1])
Exemple #15
0
 def __init__(self):
     is_pair = False
     class_labels = ['0', '1', '2', '3']
     metric = Accuracy()
     super(Weibo2Task, self).__init__(class_labels, metric, is_pair)