예제 #1
0
def get_triplet_train_data(batch_size=8):
    """
    triplet loss
    :param batch_size: 批次大小
    :return:
    """
    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
        transforms.RandomLighting(0.1),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    img_folder, img_file = get_data_path()
    img_saved = os.path.join(img_file + ".tp.npz")
    td = TripletDataset(data_folder=img_folder, data_file=img_file,
                        saved_path=img_saved, transform=transform_train)
    train_data = DataLoader(td, batch_size=batch_size, shuffle=True)
    return train_data
def test_transformer():
    from mxnet.gluon.data.vision import transforms

    transform = transforms.Compose([
        transforms.Resize(300),
        transforms.Resize(300, keep_ratio=True),
        transforms.CenterCrop(256),
        transforms.RandomCrop(256, pad=16),
        transforms.RandomResizedCrop(224),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
        transforms.RandomBrightness(0.1),
        transforms.RandomContrast(0.1),
        transforms.RandomSaturation(0.1),
        transforms.RandomHue(0.1),
        transforms.RandomLighting(0.1),
        transforms.ToTensor(),
        transforms.RandomRotation([-10., 10.]),
        transforms.Normalize([0, 0, 0], [1, 1, 1])])

    transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
예제 #3
0
def get_transform(jitter_param=0.4, pca_noise=0.2):
    # Init transformer
    # See https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/data/data_augmentation.html
    transform_train = transforms.Compose([
        transforms.Resize(32),
        transforms.RandomResizedCrop((32, 32), scale=(0.8, 1.0), ratio=(0.9, 1.1)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param,
                                     hue=jitter_param),
        transforms.RandomLighting(alpha=pca_noise),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    ])

    return transform_train, transform_test
예제 #4
0
def GluonTransformation(data: mx.nd.array):
    """
    data: mx.nd.array h,w,c

    retrun data: mx.nd.array
    """

    data = mx.nd.array(data)
    transform = transforms.Compose([
        transforms.RandomResizedCrop(200, (0.8, 1.0)),
        transforms.CenterCrop((300, 300)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomFlipTopBottom(),
        transforms.RandomLighting(0.3),
        transforms.RandomColorJitter(brightness=0.1,
                                     contrast=0.1,
                                     saturation=0.1,
                                     hue=0.2),
        transforms.Resize(384),
        transforms.ToTensor(),  # h,w,c -> c, h, w
        transforms.Normalize(0, 1)
    ])
    data = transform(data)
    return data


# if __name__=='__main__':

#     img=cv2.imread('1.jpg')
#     img_out=ImageRotate(img,30)

#     # img_out=transformation(img)
#     cv2.imshow('ori',img)
#     cv2.imshow('rotate',img_out)
#     cv2.waitKey(0)
#     # cv2.imshow('img',mx.nd.clip(img_out,0,255).asnumpy().astype(np.uint8))
#     # cv2.imshow('img',img_out.asnumpy().astype(np.uint8))

#     # cv2.waitKey(0)
#     print('done!')
예제 #5
0
    def get_data_rec_transfomed(args):
        data_dir = args.data_dir
        num_workers = args.num_workers
        batch_size = args.batch_size * max(1, args.num_gpus)
        normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            return data, label

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                        saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            normalize
        ])
        transform_test = transforms.Compose([
            transforms.Resize(resize, keep_ratio=True),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize
        ])

        train_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train),
            batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
        val_data = gluon.data.DataLoader(
            imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test),
            batch_size=batch_size, shuffle=False, num_workers=num_workers)

        return train_data, val_data, batch_fn
예제 #6
0
def cifar10_train_transform(ds_metainfo,
                            mean_rgb=(0.4914, 0.4822, 0.4465),
                            std_rgb=(0.2023, 0.1994, 0.2010),
                            jitter_param=0.4,
                            lighting_param=0.1):
    assert (ds_metainfo is not None)
    assert (ds_metainfo.input_image_size[0] == 32)
    return transforms.Compose([
        RandomCrop(
            size=32,
            pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(
            brightness=jitter_param,
            contrast=jitter_param,
            saturation=jitter_param),
        transforms.RandomLighting(lighting_param),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=mean_rgb,
            std=std_rgb)
    ])
예제 #7
0
 def __init__(self):
     self.scale = 1.59
     self.per_device_batch_size = 16 if self.im_size == 224 else 4
     self.batch_size = self.per_device_batch_size * max(self.num_gpus, 1)
     self.transform_train = transforms.Compose([
         transforms.Resize((int(self.im_size * self.scale), self.im_size)),
         # transforms.RandomFlipLeftRight(),
         # Normal_Y(),
         transforms.RandomColorJitter(brightness=self.jitter_param,
                                      contrast=self.jitter_param,
                                      saturation=self.jitter_param),
         transforms.ToTensor(),
         transforms.Normalize([0.41432491, 0.41432491, 0.41432491],
                              [0.04530748, 0.04530748, 0.04530748])
     ])
     self.transform_test = transforms.Compose([
         transforms.Resize((int(self.im_size * self.scale), self.im_size)),
         # Normal_Y(),
         transforms.ToTensor(),
         transforms.Normalize([0.41432491, 0.41432491, 0.41432491],
                              [0.04530748, 0.04530748, 0.04530748])
     ])
def get_dataloader(train_dataset, val_dataset, batch_size, num_workers):
    jitter_param = 0.4
    lighting_param = 0.1
    normalize = transforms.Normalize((0.485, 0.456, 0.406),
                                     (0.229, 0.224, 0.225))

    transform_train = transforms.Compose([
        transforms.Resize(480),
        transforms.RandomResizedCrop(224),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param,
                                     contrast=jitter_param,
                                     saturation=jitter_param),
        transforms.RandomLighting(lighting_param),
        transforms.ToTensor(), normalize
    ])

    transform_test = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ])

    train_data = gluon.data.DataLoader(
        train_dataset.transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        last_batch='rollover')

    val_data = gluon.data.DataLoader(
        val_dataset.transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        last_batch='keep')

    return train_data, val_data
예제 #9
0
    def get_train_data(self, batch_size):
        """
        获取训练数据,数据扩充
        """
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=0.4,
                                         contrast=0.4,
                                         saturation=0.4),
            transforms.RandomLighting(0.1),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        td = MultilabelDataset(data_folder=self.train_folder,
                               data_file=self.train_file,
                               transform=transform_train)
        train_data = DataLoader(dataset=td,
                                batch_size=batch_size,
                                shuffle=True)

        return train_data, len(td)
예제 #10
0
def get_data_loader(data_dir, batch_size, num_workers):
    normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    jitter_param = 0.4
    lighting_param = 0.1

    def batch_fn(batch, ctx):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        return data, label

    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                     saturation=jitter_param),
        transforms.RandomLighting(lighting_param),
        transforms.ToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        transforms.Resize(256, keep_ratio=True),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize
    ])

    train_data = gluon.data.DataLoader(
        imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train),
        batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
    val_data = gluon.data.DataLoader(
        imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test),
        batch_size=batch_size, shuffle=False, num_workers=num_workers)

    if 'sync' in opt.kvstore:
        raise ValueError("Need to resize iterator for distributed training to not hang at the end")
    
    return train_data, val_data, batch_fn
예제 #11
0
def get_train_data_source(dataset_args, batch_size, num_workers):
    jitter_param = 0.4
    lighting_param = 0.1
    mean_rgb = (0.4914, 0.4822, 0.4465)
    std_rgb = (0.2023, 0.1994, 0.2010)

    data_dir = dataset_args.data_dir

    transform_train = transforms.Compose([
        RandomCrop(size=32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param,
                                     contrast=jitter_param,
                                     saturation=jitter_param),
        transforms.RandomLighting(lighting_param),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_rgb, std=std_rgb)
    ])
    return gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10(
        root=data_dir, train=True).transform_first(fn=transform_train),
                                 batch_size=batch_size,
                                 shuffle=True,
                                 last_batch='discard',
                                 num_workers=num_workers)
def main():
    opt = parse_args()
    batch_size = opt.batch_size
    classes = 10

    log_dir = os.path.join(opt.save_dir, "logs")
    model_dir = os.path.join(opt.save_dir, "params")
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # Init dataloader
    jitter_param = 0.4
    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(jitter_param),
        transforms.RandomColorJitter(jitter_param),
        transforms.RandomContrast(jitter_param),
        transforms.RandomSaturation(jitter_param),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    train_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        last_batch='discard',
        num_workers=opt.num_workers)

    val_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=opt.num_workers)

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]

    lr_decay = opt.lr_decay
    lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf]

    model_name = opt.model
    model_name = opt.model
    if model_name.startswith('cifar_wideresnet'):
        kwargs = {'classes': classes, 'drop_rate': opt.drop_rate}
    else:
        kwargs = {'classes': classes}
    net = get_model(model_name, **kwargs)

    if opt.resume_from:
        net.load_parameters(opt.resume_from, ctx=context)
    optimizer = 'nag'

    save_period = opt.save_period
    if opt.save_dir and save_period:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_period = 0

    def test(ctx, val_loader):
        metric = mx.metric.Accuracy()
        for i, batch in enumerate(val_loader):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            outputs = [net(X) for X in data]
            metric.update(label, outputs)
        return metric.get()

    def train(train_data, val_data, epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        net.hybridize()
        net.initialize(mx.init.Xavier(), ctx=ctx)
        net.forward(mx.nd.ones((1, 3, 30, 30), ctx=ctx[0]))
        with SummaryWriter(logdir=log_dir, verbose=False) as sw:
            sw.add_graph(net)

        trainer = gluon.Trainer(net.collect_params(), optimizer, {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum
        })
        metric = mx.metric.Accuracy()
        train_metric = mx.metric.Accuracy()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

        iteration = 0
        lr_decay_count = 0

        best_val_score = 0
        global_step = 0

        for epoch in range(epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)
            alpha = 1

            if epoch == lr_decay_epoch[lr_decay_count]:
                trainer.set_learning_rate(trainer.learning_rate * lr_decay)
                lr_decay_count += 1

            tbar = tqdm(train_data)

            for i, batch in enumerate(tbar):
                data = gluon.utils.split_and_load(batch[0],
                                                  ctx_list=ctx,
                                                  batch_axis=0)
                label = gluon.utils.split_and_load(batch[1],
                                                   ctx_list=ctx,
                                                   batch_axis=0)

                with ag.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                train_metric.update(label, output)
                name, acc = train_metric.get()
                iteration += 1
                global_step += len(loss)

            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            name, val_acc = test(ctx, val_data)

            if val_acc > best_val_score:
                best_val_score = val_acc
                net.save_parameters('{}/{}-{}-{:04.3f}-best.params'.format(
                    model_dir, model_name, epoch, best_val_score))

            with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                sw.add_scalar(tag="TrainLos",
                              value=train_loss,
                              global_step=global_step)
                sw.add_scalar(tag="TrainAcc",
                              value=acc,
                              global_step=global_step)
                sw.add_scalar(tag="ValAcc",
                              value=val_acc,
                              global_step=global_step)
                sw.add_graph(net)

            logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' %
                         (epoch, acc, val_acc, train_loss, time.time() - tic))

            if save_period and save_dir and (epoch + 1) % save_period == 0:
                net.save_parameters('{}/{}-{}.params'.format(
                    save_dir, model_name, epoch))

        if save_period and save_dir:
            net.save_parameters('{}/{}-{}.params'.format(
                save_dir, model_name, epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize()
    train(train_data, val_data, opt.num_epochs, context)
예제 #13
0
def get_dataloader(module_name, module_args, num_label):
    train_transfroms = transforms.Compose(
        [transforms.RandomColorJitter(brightness=0.5),
         transforms.ToTensor()])

    val_transfroms = transforms.ToTensor()
    dataset_args = module_args['dataset']
    dataset_args['num_label'] = num_label
    # 创建数据集
    train_data_path = dataset_args.pop('train_data_path')
    train_data_ratio = dataset_args.pop('train_data_ratio')
    val_data_path = dataset_args.pop('val_data_path')

    if module_name == 'ImageDataset':
        train_data_list, val_data_list = get_datalist(
            train_data_path, val_data_path,
            module_args['loader']['validation_split'])
    elif module_name == 'LmdbDataset':
        train_data_list = train_data_path
        val_data_list = val_data_path
    else:
        raise Exception('current only support ImageDataset and LmdbDataset')
    train_dataset_list = []
    for train_data in train_data_list:
        train_dataset_list.append(
            get_dataset(data_list=train_data,
                        module_name=module_name,
                        phase='train',
                        dataset_args=dataset_args))

    if len(train_dataset_list) > 1:
        train_loader = dataset.Batch_Balanced_Dataset(
            dataset_list=train_dataset_list,
            ratio_list=train_data_ratio,
            module_args=module_args,
            dataset_transfroms=train_transfroms,
            phase='train')
    elif len(train_dataset_list) == 1:
        train_loader = DataLoader(
            dataset=train_dataset_list[0].transform_first(train_transfroms),
            batch_size=module_args['loader']['train_batch_size'],
            shuffle=module_args['loader']['shuffle'],
            last_batch='rollover',
            num_workers=module_args['loader']['num_workers'])
        train_loader.dataset_len = len(train_dataset_list[0])
    else:
        raise Exception('no images found')
    if len(val_data_list):
        val_dataset = get_dataset(data_list=val_data_list,
                                  module_name=module_name,
                                  phase='test',
                                  dataset_args=dataset_args)
        val_loader = DataLoader(
            dataset=val_dataset.transform_first(val_transfroms),
            batch_size=module_args['loader']['val_batch_size'],
            shuffle=module_args['loader']['shuffle'],
            last_batch='keep',
            num_workers=module_args['loader']['num_workers'])
        val_loader.dataset_len = len(val_dataset)
    else:
        val_loader = None
    return train_loader, val_loader
예제 #14
0
def train():
    # Create inference
    inference = resnet100(args.num_classes,
                          emb_size=args.emb_size,
                          s=args.margin_s,
                          a=args.margin_a,
                          m=args.margin_m,
                          b=args.margin_b)
    # Load inference params
    if args.init.lower() == 'xavier':
        init = mx.init.Xavier(rnd_type='gaussian',
                              factor_type='out',
                              magnitude=2)
    else:
        init = mx.initializer.Uniform()
    if args.model:
        helper.load_params(inference, args.model, ctx=ctx)
        cur_iter = 0
    else:
        cur_iter = helper.load_params(inference,
                                      args.ckpt_dir,
                                      prefix=args.prefix,
                                      init=init,
                                      ctx=ctx)
    # Hybrid mode --> Symbol mode
    inference.hybridize(static_alloc=True, static_shape=True)

    # Datasets
    if args.color:
        train_transform = transforms.Compose([
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(0.1, 0.1, 0.1),
            ToTensor()
        ])
    else:
        train_transform = transforms.Compose(
            [transforms.RandomFlipLeftRight(),
             ToTensor()])

    train_dataset = ImageRecordDataset(
        args.train_rec).transform_first(train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              last_batch='discard',
                              num_workers=args.num_workers,
                              pin_memory=True)
    test_transform = ToTensor()
    test_dataset = ImageRecordDataset(
        args.test_rec).transform_first(test_transform)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             last_batch='keep',
                             num_workers=args.num_workers,
                             pin_memory=False)

    # Create learning rate scheduler
    iterations_per_epoch = int(len(train_dataset) / args.batch_size)
    lr_steps = [s * iterations_per_epoch for s in args.lr_steps]
    print('Learning rate drops after iterations: {}'.format(lr_steps))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=lr_steps,
                                                        factor=0.1)

    # Create trainer
    trainer = gluon.Trainer(inference.collect_params(),
                            optimizer='sgd',
                            optimizer_params={
                                'learning_rate': args.lr,
                                'wd': args.wd,
                                'lr_scheduler': lr_scheduler,
                                'rescale_grad': 1. / len(ctx)
                            })
    # Load trainer from saved states
    helper.load_trainer(trainer, args.ckpt_dir, cur_iter, prefix=args.prefix)

    # Define loss functions
    softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss()

    # Define metric losses
    metric_ce_loss = mx.metric.Loss('CE-Loss')
    best_acc = 80  # only save the model if the accuracy is better than 80%
    # Start training
    print('Start to train {}...'.format(args.prefix))
    start_epoch = cur_iter // iterations_per_epoch
    for cur_epoch in range(start_epoch + 1, args.max_epoch + 1):
        start_time = timeit.default_timer()
        for batch_idx, (image, label) in enumerate(train_loader):
            if label.ndim > 1:
                label = label[:, 0]  # skip the landmarks
            # if batch_idx > 0: break
            cur_iter += 1
            images = gluon.utils.split_and_load(image, ctx)
            labels = gluon.utils.split_and_load(label, ctx)
            with autograd.record(train_mode=True):
                losses = []
                for x, y in zip(images, labels):
                    fc = inference(x, y)
                    loss_ce = softmax_cross_entropy(fc, y)
                    losses.append(loss_ce)
                    # update metrics
                    metric_ce_loss.update(None, preds=loss_ce)
                for l in losses:
                    l.backward()
            trainer.step(image.shape[0])

            if (batch_idx % args.log_interval
                    == 0) or (batch_idx == iterations_per_epoch - 1):
                elapsed_time = timeit.default_timer() - start_time
                scout = helper.print_scalars(
                    OrderedDict([metric_ce_loss.get()]), cur_epoch, batch_idx,
                    elapsed_time)
                logger.info(scout)
                start_time = timeit.default_timer()
                metric_ce_loss.reset()

            if (batch_idx % args.test_interval
                    == 0) or (batch_idx == iterations_per_epoch - 1):
                # if batch_idx > 0: break
                start_time = timeit.default_timer()
                mu, std, t, _ = eval_lfw(inference.features, args.test_rec,
                                         test_loader, ctx)
                elapsed_time = timeit.default_timer() - start_time
                if mu > best_acc:
                    best_acc = mu
                    # Save trained model
                    logger.info(
                        'Find better model at E: {}, B: {}, I: {}'.format(
                            cur_epoch, batch_idx, cur_iter))
                    helper.save_params(inference,
                                       args.ckpt_dir,
                                       cur_iter,
                                       prefix=args.prefix + '-best')
                scout = helper.print_scalars(
                    OrderedDict([('mu', mu), ('std', std), ('t', t)]),
                    cur_epoch, batch_idx, elapsed_time)
                logger.info(scout)

        # Save trained model
        helper.save_params(inference,
                           args.ckpt_dir,
                           cur_iter,
                           prefix=args.prefix)
        helper.save_trainer(trainer,
                            args.ckpt_dir,
                            cur_iter,
                            prefix=args.prefix)
예제 #15
0
def get_train_data(rec_train, batch_size, data_nthreads, input_size,
                   crop_ratio, args):
    def train_batch_fn(batch, ctx):
        data = batch[0].as_in_context(ctx)
        label = batch[1].as_in_context(ctx)
        return data, label

    jitter_param = 0.4
    lighting_param = 0.1
    resize = int(math.ceil(input_size / crop_ratio))

    train_transforms = []
    if args.auto_aug:
        print('Using AutoAugment')
        from autogluon.utils.augment import AugmentationBlock, autoaug_imagenet_policies
        train_transforms.append(AugmentationBlock(autoaug_imagenet_policies()))

    from gluoncv.utils.transforms import EfficientNetRandomCrop
    from autogluon.utils import pil_transforms

    if input_size >= 320:
        train_transforms.extend([
            EfficientNetRandomCrop(input_size),
            pil_transforms.Resize((input_size, input_size),
                                  interpolation=Image.BICUBIC),
            pil_transforms.RandomHorizontalFlip(),
            pil_transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
            pil_transforms.ToNDArray(),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        train_transforms.extend([
            pil_transforms.ToNDArray(),
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    transform_train = transforms.Compose(train_transforms)

    train_set = mx.gluon.data.vision.ImageRecordDataset(
        rec_train).transform_first(transform_train)
    train_sampler = SplitSampler(len(train_set),
                                 num_parts=num_workers,
                                 part_index=rank)

    train_data = gluon.data.DataLoader(
        train_set,
        batch_size=batch_size,  # shuffle=True,
        last_batch='discard',
        num_workers=data_nthreads,
        sampler=train_sampler)
    return train_data, train_batch_fn
예제 #16
0
def split(X, Y, test_size):
    from sklearn.model_selection import train_test_split
    # 数据集划分操作
    return train_test_split(X, Y, test_size=test_size, shuffle=True)


# 数据增强
transform_train = gtf.Compose([
    # 随机对图像裁剪出面积为原图像面积0.08~1倍、且高和宽之比在3/4~4/3的图像,再放缩为高和
    # 宽都是为 224 的新图
    gtf.RandomResizedCrop(224, scale=(0.08, 1.0),
                          ratio=(3.0 / 4.0, 4.0 / 3.0)),
    gtf.RandomFlipLeftRight(),
    # 随机变化亮度、对比度和饱和度
    gtf.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    # 随机加噪声
    gtf.RandomLighting(0.1),
    gtf.ToTensor(),
    # 对图像的每个通道做标准化
    gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = gtf.Compose([
    gtf.Resize(256),
    # 将图像中央的高和宽均为 224 的正方形区域裁剪出来
    gtf.CenterCrop(224),
    gtf.ToTensor(),
    gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
예제 #17
0
                pass

        balanced_batch_images = nd.concat(*balanced_batch_images, dim=0)
        balanced_batch_texts = nd.concat(*balanced_batch_texts, dim=0)
        return balanced_batch_images, balanced_batch_texts


if __name__ == '__main__':
    import os
    from tqdm import tqdm
    import anyconfig
    from mxnet.gluon.data.vision import transforms
    from utils import parse_config

    train_transfroms = transforms.Compose(
        [transforms.RandomColorJitter(brightness=0.5),
         transforms.ToTensor()])
    config = anyconfig.load(open("config/icdar2015.yaml", 'rb'))
    if 'base' in config:
        config = parse_config(config)
    if os.path.isfile(config['dataset']['alphabet']):
        config['dataset']['alphabet'] = str(
            np.load(config['dataset']['alphabet']))

    dataset_args = config['dataset']['validate']['dataset']['args']
    dataset_args['num_label'] = 80
    dataset_args['alphabet'] = config['dataset']['alphabet']
    dataset = ImageDataset(**dataset_args)
    data_loader = DataLoader(dataset=dataset.transform_first(train_transfroms),
                             batch_size=1,
                             shuffle=True,
예제 #18
0
def main():
    opt = parse_args()

    filehandler = logging.FileHandler(opt.logging_file, mode='a+')
    # streamhandler = logging.StreamHandler()

    logger = logging.getLogger('ImageNet')
    logger.setLevel(level=logging.DEBUG)
    logger.addHandler(filehandler)
    # logger.addHandler(streamhandler)

    logger.info(opt)

    if opt.amp:
        amp.init()

    batch_size = opt.batch_size
    classes = 1000
    num_training_samples = 1281167
    num_validating_samples = 50000

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    accumulate = opt.accumulate

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size

    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=0,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    model_name = opt.model

    kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
    if opt.use_gn:
        kwargs['norm_layer'] = gcv.nn.GroupNorm
    if model_name.startswith('vgg'):
        kwargs['batch_norm'] = opt.batch_norm
    elif model_name.startswith('resnext'):
        kwargs['use_se'] = opt.use_se

    if opt.last_gamma:
        kwargs['last_gamma'] = True

    optimizer = 'sgd'
    optimizer_params = {
        'wd': opt.wd,
        'momentum': opt.momentum,
        'lr_scheduler': lr_scheduler,
        'begin_num_update': num_batches * opt.resume_epoch
    }
    # if opt.dtype != 'float32':
    #     optimizer_params['multi_precision'] = True

    # net = get_model(model_name, **kwargs)
    if opt.model_backend == 'gluoncv':
        net = glcv_get_model(model_name, **kwargs)
    elif opt.model_backend == 'gluoncv2':
        net = glcv2_get_model(model_name, **kwargs)
    else:
        raise ValueError(f'Unknown backend: {opt.model_backend}')
    # net.cast(opt.dtype)
    if opt.resume_params != '':
        net.load_parameters(opt.resume_params, ctx=context, cast_dtype=True)

    # teacher model for distillation training
    if opt.teacher is not None and opt.hard_weight < 1.0:
        teacher_name = opt.teacher
        if opt.teacher_backend == 'gluoncv':
            teacher = glcv_get_model(teacher_name, **kwargs)
        elif opt.teacher_backend == 'gluoncv2':
            teacher = glcv2_get_model(teacher_name, **kwargs)
        else:
            raise ValueError(f'Unknown backend: {opt.teacher_backend}')
        # teacher = glcv2_get_model(teacher_name, pretrained=True, ctx=context)
        # teacher.cast(opt.dtype)
        teacher.collect_params().setattr('grad_req', 'null')
        distillation = True
    else:
        distillation = False

    # Two functions for reading data from record file or raw images
    def get_data_rec(rec_train, rec_val):
        rec_train = os.path.expanduser(rec_train)
        rec_val = os.path.expanduser(rec_val)

        # mean_rgb = [123.68, 116.779, 103.939]
        # std_rgb = [58.393, 57.12, 57.375]

        train_dataset = ImageRecordDataset(filename=rec_train, flag=1)
        val_dataset = ImageRecordDataset(filename=rec_val, flag=1)
        return train_dataset, val_dataset

    def get_data_loader(data_dir):
        train_dataset = ImageNet(data_dir, train=True)
        val_dataset = ImageNet(data_dir, train=False)
        return train_dataset, val_dataset

    def batch_fn(batch, ctx):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1],
                                           ctx_list=ctx,
                                           batch_axis=0)
        return data, label

    if opt.use_rec:
        train_dataset, val_dataset = get_data_rec(opt.rec_train, opt.rec_val)
    else:
        train_dataset, val_dataset = get_data_loader(opt.data_dir)

    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    jitter_param = 0.4
    lighting_param = 0.1
    if not opt.multi_scale:
        train_dataset = train_dataset.transform_first(
            transforms.Compose([
                transforms.RandomResizedCrop(opt.input_size),
                transforms.RandomFlipLeftRight(),
                transforms.RandomColorJitter(brightness=jitter_param,
                                             contrast=jitter_param,
                                             saturation=jitter_param),
                transforms.RandomLighting(lighting_param),
                transforms.ToTensor(), normalize
            ]))
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           pin_memory=True,
                                           last_batch='rollover',
                                           num_workers=num_workers)
    else:
        train_data = RandomTransformDataLoader(
            [
                Transform(
                    transforms.Compose([
                        # transforms.RandomResizedCrop(opt.input_size),
                        transforms.RandomResizedCrop(x * 32),
                        transforms.RandomFlipLeftRight(),
                        transforms.RandomColorJitter(brightness=jitter_param,
                                                     contrast=jitter_param,
                                                     saturation=jitter_param),
                        transforms.RandomLighting(lighting_param),
                        transforms.ToTensor(),
                        normalize
                    ])) for x in range(10, 20)
            ],
            train_dataset,
            interval=10 * opt.accumulate,
            batch_size=batch_size,
            shuffle=False,
            pin_memory=True,
            last_batch='rollover',
            num_workers=num_workers)
    val_dataset = val_dataset.transform_first(
        transforms.Compose([
            transforms.Resize(opt.input_size, keep_ratio=True),
            transforms.CenterCrop(opt.input_size),
            transforms.ToTensor(), normalize
        ]))
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     pin_memory=True,
                                     last_batch='keep',
                                     num_workers=num_workers)

    if opt.mixup:
        train_metric = mx.metric.RMSE()
    else:
        train_metric = mx.metric.Accuracy()
    train_loss_metric = mx.metric.Loss()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        if opt.wandb:
            save_dir = wandb.run.dir
        else:
            save_dir = opt.save_dir
            makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    def mixup_transform(label, classes, lam=1, eta=0.0):
        if isinstance(label, nd.NDArray):
            label = [label]
        res = []
        for l in label:
            y1 = l.one_hot(classes,
                           on_value=1 - eta + eta / classes,
                           off_value=eta / classes)
            y2 = l[::-1].one_hot(classes,
                                 on_value=1 - eta + eta / classes,
                                 off_value=eta / classes)
            res.append(lam * y1 + (1 - lam) * y2)
        return res

    def smooth(label, classes, eta=0.1):
        if isinstance(label, nd.NDArray):
            label = [label]
        smoothed = []
        for l in label:
            res = l.one_hot(classes,
                            on_value=1 - eta + eta / classes,
                            off_value=eta / classes)
            smoothed.append(res)
        return smoothed

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in tqdm.tqdm(enumerate(val_data),
                                  desc='Validating',
                                  total=num_validating_samples // batch_size):
            data, label = batch_fn(batch, ctx)
            # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
            outputs = [net(X) for X in data]
            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return 1 - top1, 1 - top5

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        if opt.resume_params == '':
            import warnings
            with warnings.catch_warnings(record=True) as w:
                net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        if accumulate > 1:
            logger.info(f'accumulate: {accumulate}, using "add" grad_req')
            import warnings
            with warnings.catch_warnings(record=True) as w:
                net.collect_params().setattr('grad_req', 'add')

        trainer = gluon.Trainer(net.collect_params(),
                                optimizer,
                                optimizer_params,
                                update_on_kvstore=False if opt.amp else None)
        if opt.amp:
            amp.init_trainer(trainer)
        if opt.resume_states != '':
            trainer.load_states(opt.resume_states)

        if opt.label_smoothing or opt.mixup:
            sparse_label_loss = False
        else:
            sparse_label_loss = True
        if distillation:
            L = gcv.loss.DistillationSoftmaxCrossEntropyLoss(
                temperature=opt.temperature,
                hard_weight=opt.hard_weight,
                sparse_label=sparse_label_loss)
        else:
            L = gluon.loss.SoftmaxCrossEntropyLoss(
                sparse_label=sparse_label_loss)

        best_val_score = 1

        err_top1_val, err_top5_val = test(ctx, val_data)
        logger.info('initial validation: err-top1=%f err-top5=%f' %
                    (err_top1_val, err_top5_val))

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            train_metric.reset()
            train_loss_metric.reset()
            btic = time.time()
            pbar = tqdm.tqdm(total=num_batches,
                             desc=f'Training [{epoch}]',
                             leave=True)
            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                if opt.mixup:
                    lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha)
                    if epoch >= opt.num_epochs - opt.mixup_off_epoch:
                        lam = 1
                    data = [lam * X + (1 - lam) * X[::-1] for X in data]

                    if opt.label_smoothing:
                        eta = 0.1
                    else:
                        eta = 0.0
                    label = mixup_transform(label, classes, lam, eta)

                elif opt.label_smoothing:
                    hard_label = label
                    label = smooth(label, classes)

                if distillation:
                    # teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \
                    #                 for X in data]
                    with ag.predict_mode():
                        teacher_prob = [
                            nd.softmax(
                                teacher(
                                    nd.transpose(
                                        nd.image.resize(
                                            nd.transpose(X, (0, 2, 3, 1)),
                                            size=opt.teacher_imgsize),
                                        (0, 3, 1, 2))) / opt.temperature)
                            for X in data
                        ]

                with ag.record():
                    # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                    outputs = [net(X) for X in data]
                    if distillation:
                        # loss = [L(yhat.astype('float32', copy=False),
                        #           y.astype('float32', copy=False),
                        #           p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)]
                        # print([outputs, label, teacher_prob])
                        loss = [
                            L(yhat, y, p)
                            for yhat, y, p in zip(outputs, label, teacher_prob)
                        ]
                    else:
                        # loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)]
                        loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
                    if opt.amp:
                        with amp.scale_loss(loss, trainer) as scaled_loss:
                            ag.backward(scaled_loss)
                    else:
                        ag.backward(loss)
                if accumulate > 1:
                    if (i + 1) % accumulate == 0:
                        trainer.step(batch_size * accumulate)
                        net.collect_params().zero_grad()
                else:
                    trainer.step(batch_size)

                train_loss_metric.update(0, loss)

                if opt.mixup:
                    output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \
                                      for out in outputs]
                    train_metric.update(label, output_softmax)
                else:
                    if opt.label_smoothing:
                        train_metric.update(hard_label, outputs)
                    else:
                        train_metric.update(label, outputs)

                _, loss_score = train_loss_metric.get()
                train_metric_name, train_metric_score = train_metric.get()
                samplers_per_sec = batch_size / (time.time() - btic)
                postfix = f'{samplers_per_sec:.1f} imgs/sec, ' \
                          f'loss: {loss_score:.4f}, ' \
                          f'acc: {train_metric_score * 100:.2f}, ' \
                          f'lr: {trainer.learning_rate:.4e}'
                if opt.multi_scale:
                    postfix += f', size: {data[0].shape[-1]}'
                pbar.set_postfix_str(postfix)
                pbar.update()
                btic = time.time()
                if opt.log_interval and not (i + 1) % opt.log_interval:
                    step = epoch * num_batches + i
                    wandb.log(
                        {
                            'samplers_per_sec': samplers_per_sec,
                            train_metric_name: train_metric_score,
                            'lr': trainer.learning_rate,
                            'loss': loss_score
                        },
                        step=step)
                    logger.info(
                        'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f'
                        % (epoch, i, samplers_per_sec, train_metric_name,
                           train_metric_score, trainer.learning_rate))

            pbar.close()
            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))

            err_top1_val, err_top5_val = test(ctx, val_data)
            wandb.log({
                'err1': err_top1_val,
                'err5': err_top5_val
            },
                      step=epoch * num_batches)

            logger.info('[Epoch %d] training: %s=%f' %
                        (epoch, train_metric_name, train_metric_score))
            logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' %
                        (epoch, err_top1_val, err_top5_val))

            if err_top1_val < best_val_score:
                best_val_score = err_top1_val
                net.save_parameters(
                    '%s/%.4f-imagenet-%s-%d-best.params' %
                    (save_dir, best_val_score, model_name, epoch))
                trainer.save_states(
                    '%s/%.4f-imagenet-%s-%d-best.states' %
                    (save_dir, best_val_score, model_name, epoch))

            if save_frequency and save_dir and (epoch +
                                                1) % save_frequency == 0:
                net.save_parameters('%s/imagenet-%s-%d.params' %
                                    (save_dir, model_name, epoch))
                trainer.save_states('%s/imagenet-%s-%d.states' %
                                    (save_dir, model_name, epoch))

        if save_frequency and save_dir:
            net.save_parameters('%s/imagenet-%s-%d.params' %
                                (save_dir, model_name, opt.num_epochs - 1))
            trainer.save_states('%s/imagenet-%s-%d.states' %
                                (save_dir, model_name, opt.num_epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=not opt.multi_scale)
        if distillation:
            teacher.hybridize(static_alloc=True,
                              static_shape=not opt.multi_scale)
    train(context)
예제 #19
0
#net.output=nn.GlobalAvgPool1D(100)
softmax_loss = gluon.loss.SoftmaxCrossEntropyLoss()
gmloss = L_GM_Loss(10, 10, args.margin, args.lamda, args.mult)
gmloss.initialize(mx.init.MSRAPrelu(), ctx=ctx)
net.initialize(mx.init.Xavier(), ctx=ctx)
params = net.collect_params()
params.update(gmloss.collect_params())
#params.update(gmloss.collect_params(select='mean'))

transform_train = transforms.Compose([
    # Randomly crop an area, and then resize it to be 32x32
    transforms.RandomResizedCrop(32),
    # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    # Randomly jitter the brightness, contrast and saturation of the image
    transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    # Randomly adding noise to the image
    transforms.RandomLighting(0.1),
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])
예제 #20
0
파일: train.py 프로젝트: nrjcs/Tennis
def main(_argv):
    FLAGS.every = [int(s) for s in FLAGS.every]
    FLAGS.balance = [
        True if s.lower() == 'true' or s.lower() == 't' else False
        for s in FLAGS.balance
    ]
    FLAGS.lr_steps = [int(s) for s in FLAGS.lr_steps]

    if FLAGS.num_workers < 0:
        FLAGS.num_workers = multiprocessing.cpu_count()

    ctx = [mx.gpu(i) for i in range(FLAGS.num_gpus)
           ] if FLAGS.num_gpus > 0 else [mx.cpu()]

    # Set up logging
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = os.path.join('models', 'vision', 'experiments',
                                 FLAGS.model_id, 'log.txt')
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    key_flags = FLAGS.get_key_flags_for_module(sys.argv[0])
    logging.info('\n'.join(f.serialize() for f in key_flags))

    # set up tensorboard summary writer
    tb_sw = SummaryWriter(log_dir=os.path.join(log_dir, 'tb'),
                          comment=FLAGS.model_id)

    feat_sub_dir = None

    # Data augmentation, will do in dataset incase window>1 and need to be applied image-wise
    jitter_param = 0.4
    lighting_param = 0.1
    transform_train = None
    transform_test = None
    balance_train = True
    if FLAGS.feats_model is None:
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(FLAGS.data_shape),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(lighting_param),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        transform_test = transforms.Compose([
            transforms.Resize(FLAGS.data_shape + 32),
            transforms.CenterCrop(FLAGS.data_shape),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        if bool(FLAGS.flow):

            transform_test = transforms.Compose([
                transforms.Resize(FLAGS.data_shape + 32),
                transforms.CenterCrop(FLAGS.data_shape),
                TwoStreamNormalize()
            ])

            transform_train = transform_test

    if FLAGS.save_feats:
        balance_train = False
        transform_train = transform_test

    if FLAGS.window > 1:
        transform_train = transform_test

    # Load datasets
    if FLAGS.temp_pool not in ['max', 'mean']:
        train_set = TennisSet(split='train',
                              transform=transform_train,
                              every=FLAGS.every[0],
                              padding=FLAGS.padding,
                              stride=FLAGS.stride,
                              window=FLAGS.window,
                              model_id=FLAGS.model_id,
                              split_id=FLAGS.split_id,
                              balance=balance_train,
                              flow=bool(FLAGS.flow),
                              feats_model=FLAGS.feats_model,
                              save_feats=FLAGS.save_feats)

        logging.info(train_set)

        val_set = TennisSet(split='val',
                            transform=transform_test,
                            every=FLAGS.every[1],
                            padding=FLAGS.padding,
                            stride=FLAGS.stride,
                            window=FLAGS.window,
                            model_id=FLAGS.model_id,
                            split_id=FLAGS.split_id,
                            balance=False,
                            flow=bool(FLAGS.flow),
                            feats_model=FLAGS.feats_model,
                            save_feats=FLAGS.save_feats)

        logging.info(val_set)

    test_set = TennisSet(split='test',
                         transform=transform_test,
                         every=FLAGS.every[2],
                         padding=FLAGS.padding,
                         stride=FLAGS.stride,
                         window=FLAGS.window,
                         model_id=FLAGS.model_id,
                         split_id=FLAGS.split_id,
                         balance=False,
                         flow=bool(FLAGS.flow),
                         feats_model=FLAGS.feats_model,
                         save_feats=FLAGS.save_feats)

    logging.info(test_set)

    # Data Loaders
    if FLAGS.temp_pool not in ['max', 'mean']:
        train_data = gluon.data.DataLoader(train_set,
                                           batch_size=FLAGS.batch_size,
                                           shuffle=True,
                                           num_workers=FLAGS.num_workers)
        val_data = gluon.data.DataLoader(val_set,
                                         batch_size=FLAGS.batch_size,
                                         shuffle=False,
                                         num_workers=FLAGS.num_workers)
    test_data = gluon.data.DataLoader(test_set,
                                      batch_size=FLAGS.batch_size,
                                      shuffle=False,
                                      num_workers=FLAGS.num_workers)

    # Define Model
    model = None
    if FLAGS.feats_model is None:
        if FLAGS.backbone == 'rdnet':
            backbone_net = get_r21d(num_layers=34,
                                    n_classes=400,
                                    t=8,
                                    pretrained=True).features
        else:
            if FLAGS.flow == 'sixc':
                backbone_net = get_model(
                    FLAGS.backbone, pretrained=False
                ).features  # 6 channel input, don't want pretraind
            else:
                backbone_net = get_model(FLAGS.backbone,
                                         pretrained=True).features

        if FLAGS.flow in ['twos', 'only']:
            if FLAGS.flow == 'only':
                backbone_net = None
            flow_net = get_model(
                FLAGS.backbone, pretrained=True
            ).features  # todo orig exp was not pretrained flow
            model = TwoStreamModel(backbone_net, flow_net,
                                   len(train_set.classes))
        elif FLAGS.backbone == 'rdnet':
            model = FrameModel(backbone_net, len(train_set.classes), swap=True)
        else:
            model = FrameModel(backbone_net, len(train_set.classes))
    elif FLAGS.temp_pool in ['max', 'mean']:
        backbone_net = get_model(FLAGS.backbone, pretrained=True).features
        model = FrameModel(backbone_net, len(test_set.classes))
    if FLAGS.window > 1:  # Time Distributed RNN

        if FLAGS.backbone_from_id and model is not None:
            if os.path.exists(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id)):
                files = os.listdir(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id))
                files = [f for f in files if f[-7:] == '.params']
                if len(files) > 0:
                    files = sorted(files,
                                   reverse=True)  # put latest model first
                    model_name = files[0]
                    model.load_parameters(
                        os.path.join('models', 'vision', 'experiments',
                                     FLAGS.backbone_from_id, model_name))
                    logging.info('Loaded backbone params: {}'.format(
                        os.path.join('models', 'vision', 'experiments',
                                     FLAGS.backbone_from_id, model_name)))

        if FLAGS.freeze_backbone and model is not None:
            for param in model.collect_params().values():
                param.grad_req = 'null'

        if FLAGS.temp_pool in ['gru', 'lstm']:
            model = CNNRNN(model,
                           num_classes=len(test_set.classes),
                           type=FLAGS.temp_pool,
                           hidden_size=128)
        elif FLAGS.temp_pool in ['mean', 'max']:
            pass
        else:
            assert FLAGS.backbone == 'rdnet'  # ensure 3d net
            assert FLAGS.window in [8, 32]

    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        model.initialize()

    num_channels = 3
    if bool(FLAGS.flow):
        num_channels = 6
    if FLAGS.feats_model is None:
        if FLAGS.window == 1:
            logging.info(
                model.summary(
                    mx.nd.ndarray.ones(shape=(1, num_channels,
                                              FLAGS.data_shape,
                                              FLAGS.data_shape))))
        else:
            logging.info(
                model.summary(
                    mx.nd.ndarray.ones(shape=(1, FLAGS.window, num_channels,
                                              FLAGS.data_shape,
                                              FLAGS.data_shape))))
    else:
        if FLAGS.window == 1:
            logging.info(model.summary(mx.nd.ndarray.ones(shape=(1, 4096))))
        elif FLAGS.temp_pool not in ['max', 'mean']:
            logging.info(
                model.summary(mx.nd.ndarray.ones(shape=(1, FLAGS.window,
                                                        4096))))

    model.collect_params().reset_ctx(ctx)
    model.hybridize()

    if FLAGS.save_feats:
        best_score = -1
        best_epoch = -1
        with open(
                os.path.join('models', 'vision', 'experiments', FLAGS.model_id,
                             'scores.txt'), 'r') as f:
            lines = f.readlines()
            lines = [line.rstrip().split() for line in lines]
            for ep, sc in lines:
                if float(sc) > best_score:
                    best_epoch = int(ep)
                    best_score = float(sc)

        logging.info('Testing best model from Epoch %d with score of %f' %
                     (best_epoch, best_score))
        model.load_parameters(
            os.path.join('models', 'vision', 'experiments', FLAGS.model_id,
                         "{:04d}.params".format(best_epoch)))
        logging.info('Loaded model params: {}'.format(
            os.path.join('models', 'vision', 'experiments', FLAGS.model_id,
                         "{:04d}.params".format(best_epoch))))

        for data, sett in zip([train_data, val_data, test_data],
                              [train_set, val_set, test_set]):
            save_features(model, data, sett, ctx)
        return

    start_epoch = 0
    if os.path.exists(
            os.path.join('models', 'vision', 'experiments', FLAGS.model_id)):
        files = os.listdir(
            os.path.join('models', 'vision', 'experiments', FLAGS.model_id))
        files = [f for f in files if f[-7:] == '.params']
        if len(files) > 0:
            files = sorted(files, reverse=True)  # put latest model first
            model_name = files[0]
            start_epoch = int(model_name.split('.')[0]) + 1
            model.load_parameters(os.path.join('models', 'vision',
                                               'experiments', FLAGS.model_id,
                                               model_name),
                                  ctx=ctx)
            logging.info('Loaded model params: {}'.format(
                os.path.join('models', 'vision', 'experiments', FLAGS.model_id,
                             model_name)))

    # Setup the optimiser
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {
        'learning_rate': FLAGS.lr,
        'momentum': FLAGS.momentum,
        'wd': FLAGS.wd
    })

    # Setup Metric/s
    metrics = [
        Accuracy(label_names=test_set.classes),
        mx.metric.TopKAccuracy(5, label_names=test_set.classes),
        Accuracy(name='accuracy_no',
                 label_names=test_set.classes[1:],
                 ignore_labels=[0]),
        Accuracy(name='accuracy_o',
                 label_names=test_set.classes[0],
                 ignore_labels=list(range(1, len(test_set.classes)))),
        PRF1(label_names=test_set.classes)
    ]

    val_metrics = [
        Accuracy(label_names=test_set.classes),
        mx.metric.TopKAccuracy(5, label_names=test_set.classes),
        Accuracy(name='accuracy_no',
                 label_names=test_set.classes[1:],
                 ignore_labels=[0]),
        Accuracy(name='accuracy_o',
                 label_names=test_set.classes[0],
                 ignore_labels=list(range(1, len(test_set.classes)))),
        PRF1(label_names=test_set.classes)
    ]

    test_metrics = [
        Accuracy(label_names=test_set.classes),
        mx.metric.TopKAccuracy(5, label_names=test_set.classes),
        Accuracy(name='accuracy_no',
                 label_names=test_set.classes[1:],
                 ignore_labels=[0]),
        Accuracy(name='accuracy_o',
                 label_names=test_set.classes[0],
                 ignore_labels=list(range(1, len(test_set.classes)))),
        PRF1(label_names=test_set.classes)
    ]

    # Setup Loss/es
    loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

    if FLAGS.temp_pool not in ['max', 'mean']:
        model = train_model(model, train_set, train_data, metrics, val_set,
                            val_data, val_metrics, trainer, loss_fn,
                            start_epoch, ctx, tb_sw)

    # model training complete, test it
    if FLAGS.temp_pool not in ['max', 'mean']:
        mod_path = os.path.join('models', 'vision', 'experiments',
                                FLAGS.model_id)
    else:
        mod_path = os.path.join('models', 'vision', 'experiments',
                                FLAGS.feats_model)
    best_score = -1
    best_epoch = -1
    with open(os.path.join(mod_path, 'scores.txt'), 'r') as f:
        lines = f.readlines()
        lines = [line.rstrip().split() for line in lines]
        for ep, sc in lines:
            if float(sc) > best_score:
                best_epoch = int(ep)
                best_score = float(sc)

    logging.info('Testing best model from Epoch %d with score of %f' %
                 (best_epoch, best_score))
    model.load_parameters(
        os.path.join(mod_path, "{:04d}.params".format(best_epoch)))
    logging.info('Loaded model params: {}'.format(
        os.path.join(mod_path, "{:04d}.params".format(best_epoch))))

    if FLAGS.temp_pool in ['max', 'mean']:
        assert FLAGS.backbone_from_id or FLAGS.feats_model  # if we doing temporal pooling ensure that we have loaded a pretrained net
        model = TemporalPooling(model,
                                pool=FLAGS.temp_pool,
                                num_classes=0,
                                feats=FLAGS.feats_model != None)

    tic = time.time()
    _ = test_model(model,
                   test_data,
                   test_set,
                   test_metrics,
                   ctx,
                   vis=FLAGS.vis)

    if FLAGS.temp_pool not in ['max', 'mean']:
        str_ = 'Train set:'
        for i in range(len(train_set.classes)):
            str_ += '\n'
            for j in range(len(train_set.classes)):
                str_ += str(metrics[4].mat[i, j]) + '\t'
        print(str_)
    str_ = 'Test set:'
    for i in range(len(test_set.classes)):
        str_ += '\n'
        for j in range(len(test_set.classes)):
            str_ += str(test_metrics[4].mat[i, j]) + '\t'
    print(str_)

    str_ = '[Finished] '
    for metric in test_metrics:
        result = metric.get()
        if not isinstance(result, list):
            result = [result]
        for res in result:
            str_ += ', Test_{}={:.3f}'.format(res[0], res[1])
        metric.reset()

    str_ += '  # Samples: {}, Time Taken: {:.1f}'.format(
        len(test_set),
        time.time() - tic)
    logging.info(str_)
예제 #21
0
args = parser.parse_args()

ctx = mx.gpu()

num_outputs = 10
jitter_param = 0.4
lighting_param = 0.1

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

training_transformer = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param,
                                 contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

validation_transformer = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

mean_img = mx.nd.stack(*[mx.nd.full((224, 224), m) for m in mean])
std_img = mx.nd.stack(*[mx.nd.full((224, 224), s) for s in std])
예제 #22
0
    def data_augmenting(self, config: Configuration, dataset_path):
        jitter_param = config.jitter_param
        lighting_param = config.lighting_param
        batch_size = config.batch_size * max(len(config.gpus_count), 1)
        num_workers = config.num_workers
        if config.data_augmenting:
            transform_train = transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomFlipLeftRight(),
                transforms.RandomColorJitter(brightness=jitter_param,
                                             contrast=jitter_param,
                                             saturation=jitter_param),
                transforms.RandomLighting(lighting_param),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])

        else:
            transform_train = transforms.Compose([
                transforms.Resize(size=(224, 224)),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])

        transform_test = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        ################################################################################
        # With the data augmentation functions, we can define our data loaders:
        # todo  use variable for path
        #
        path = dataset_path
        train_path = os.path.join(path, 'train')
        val_path = os.path.join(path, 'val')
        test_path = os.path.join(path, 'test')

        train_data = gluon.data.DataLoader(
            gluon.data.vision.ImageFolderDataset(train_path).transform_first(
                transform_train),
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers)

        val_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(
            val_path).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        test_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(
            test_path).transform_first(transform_test),
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=num_workers)

        return train_data, val_data, test_data
예제 #23
0
    def get_data_loader(data_dir, batch_size, num_workers):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        jitter_param = 0.4
        lighting_param = 0.1
        input_size = opt.input_size
        crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
        resize = int(math.ceil(input_size / crop_ratio))

        def batch_fn(batch, ctx):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            return data, label

        transform_train = []
        if opt.auto_aug:
            print('Using AutoAugment')
            from autogluon.utils.augment import AugmentationBlock, autoaug_imagenet_policies
            transform_train.append(
                AugmentationBlock(autoaug_imagenet_policies()))

        from gluoncv.utils.transforms import EfficientNetRandomCrop, EfficientNetCenterCrop
        from autogluon.utils import pil_transforms

        if input_size >= 320:
            transform_train.extend([
                EfficientNetRandomCrop(input_size),
                pil_transforms.Resize((input_size, input_size),
                                      interpolation=Image.BICUBIC),
                pil_transforms.RandomHorizontalFlip(),
                pil_transforms.ColorJitter(brightness=0.4,
                                           contrast=0.4,
                                           saturation=0.4),
                transforms.RandomLighting(lighting_param),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])
        else:
            transform_train.extend([
                transforms.RandomResizedCrop(input_size),
                transforms.RandomFlipLeftRight(),
                transforms.RandomColorJitter(brightness=jitter_param,
                                             contrast=jitter_param,
                                             saturation=jitter_param),
                transforms.RandomLighting(lighting_param),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])

        transform_train = transforms.Compose(transform_train)

        train_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=True).transform_first(transform_train),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           last_batch='discard',
                                           num_workers=num_workers)

        if input_size >= 320:
            transform_test = transforms.Compose([
                pil_transforms.ToPIL(),
                EfficientNetCenterCrop(input_size),
                pil_transforms.Resize((input_size, input_size),
                                      interpolation=Image.BICUBIC),
                pil_transforms.ToNDArray(),
                transforms.ToTensor(), normalize
            ])
        else:
            transform_test = transforms.Compose([
                transforms.Resize(resize, keep_ratio=True),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(), normalize
            ])

        val_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
            data_dir, train=False).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        return train_data, val_data, batch_fn
예제 #24
0
파일: train_gnmt.py 프로젝트: nrjcs/Tennis
def main(_argv):

    os.makedirs(os.path.join('models', 'captioning', 'experiments',
                             FLAGS.model_id),
                exist_ok=True)

    if FLAGS.num_gpus > 0:  # only supports 1 GPU
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()

    # Set up logging
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = os.path.join('models', 'captioning', 'experiments',
                                 FLAGS.model_id, 'log.txt')
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    key_flags = FLAGS.get_key_flags_for_module(sys.argv[0])
    logging.info('\n'.join(f.serialize() for f in key_flags))

    # set up tensorboard summary writer
    tb_sw = SummaryWriter(log_dir=os.path.join(log_dir, 'tb'),
                          comment=FLAGS.model_id)

    # are we using features or do we include the CNN?
    if FLAGS.feats_model is None:
        backbone_net = get_model(FLAGS.backbone, pretrained=True,
                                 ctx=ctx).features
        cnn_model = FrameModel(backbone_net,
                               11)  # hardcoded the number of classes
        if FLAGS.backbone_from_id:
            if os.path.exists(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id)):
                files = os.listdir(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id))
                files = [f for f in files if f[-7:] == '.params']
                if len(files) > 0:
                    files = sorted(files,
                                   reverse=True)  # put latest model first
                    model_name = files[0]
                    cnn_model.load_parameters(os.path.join(
                        'models', 'vision', 'experiments',
                        FLAGS.backbone_from_id, model_name),
                                              ctx=ctx)
                    logging.info('Loaded backbone params: {}'.format(
                        os.path.join('models', 'vision', 'experiments',
                                     FLAGS.backbone_from_id, model_name)))
            else:
                raise FileNotFoundError('{}'.format(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id)))

        if FLAGS.freeze_backbone:
            for param in cnn_model.collect_params().values():
                param.grad_req = 'null'

        cnn_model = TimeDistributed(cnn_model.backbone)

        src_embed = cnn_model

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(FLAGS.data_shape),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=0.4,
                                         contrast=0.4,
                                         saturation=0.4),
            transforms.RandomLighting(0.1),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        transform_test = transforms.Compose([
            transforms.Resize(FLAGS.data_shape + 32),
            transforms.CenterCrop(FLAGS.data_shape),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

    else:
        from mxnet.gluon import nn  # need to do this to force no use of Embedding on src
        src_embed = nn.HybridSequential(prefix='src_embed_')
        with src_embed.name_scope():
            src_embed.add(nn.Dropout(rate=0.0))

        transform_train = None
        transform_test = None

    # setup the data
    data_train = TennisSet(split='train',
                           transform=transform_train,
                           captions=True,
                           max_cap_len=FLAGS.tgt_max_len,
                           every=FLAGS.every,
                           feats_model=FLAGS.feats_model)
    data_val = TennisSet(split='val',
                         transform=transform_test,
                         captions=True,
                         vocab=data_train.vocab,
                         every=FLAGS.every,
                         inference=True,
                         feats_model=FLAGS.feats_model)
    data_test = TennisSet(split='test',
                          transform=transform_test,
                          captions=True,
                          vocab=data_train.vocab,
                          every=FLAGS.every,
                          inference=True,
                          feats_model=FLAGS.feats_model)

    val_tgt_sentences = data_val.get_captions(split=True)
    test_tgt_sentences = data_test.get_captions(split=True)
    write_sentences(
        val_tgt_sentences,
        os.path.join('models', 'captioning', 'experiments', FLAGS.model_id,
                     'val_gt.txt'))
    write_sentences(
        test_tgt_sentences,
        os.path.join('models', 'captioning', 'experiments', FLAGS.model_id,
                     'test_gt.txt'))

    # load embeddings for tgt_embed
    if FLAGS.emb_file:
        word_embs = nlp.embedding.TokenEmbedding.from_file(
            file_path=os.path.join('data', FLAGS.emb_file))
        data_train.vocab.set_embedding(word_embs)

        input_dim, output_dim = data_train.vocab.embedding.idx_to_vec.shape
        tgt_embed = gluon.nn.Embedding(input_dim, output_dim)
        tgt_embed.initialize(ctx=ctx)
        tgt_embed.weight.set_data(data_train.vocab.embedding.idx_to_vec)
    else:
        tgt_embed = None

    # setup the model
    encoder, decoder = get_gnmt_encoder_decoder(
        cell_type=FLAGS.cell_type,
        hidden_size=FLAGS.num_hidden,
        dropout=FLAGS.dropout,
        num_layers=FLAGS.num_layers,
        num_bi_layers=FLAGS.num_bi_layers)
    model = NMTModel(src_vocab=None,
                     tgt_vocab=data_train.vocab,
                     encoder=encoder,
                     decoder=decoder,
                     embed_size=FLAGS.emb_size,
                     prefix='gnmt_',
                     src_embed=src_embed,
                     tgt_embed=tgt_embed)

    model.initialize(init=mx.init.Uniform(0.1), ctx=ctx)
    static_alloc = True
    model.hybridize(static_alloc=static_alloc)
    logging.info(model)

    start_epoch = 0
    if os.path.exists(
            os.path.join('models', 'captioning', 'experiments',
                         FLAGS.model_id)):
        files = os.listdir(
            os.path.join('models', 'captioning', 'experiments',
                         FLAGS.model_id))
        files = [f for f in files if f[-7:] == '.params']
        if len(files) > 0:
            files = sorted(files, reverse=True)  # put latest model first
            model_name = files[0]
            if model_name == 'valid_best.params':
                model_name = files[1]
            start_epoch = int(model_name.split('.')[0]) + 1
            model.load_parameters(os.path.join('models', 'captioning',
                                               'experiments', FLAGS.model_id,
                                               model_name),
                                  ctx=ctx)
            logging.info('Loaded model params: {}'.format(
                os.path.join('models', 'captioning', 'experiments',
                             FLAGS.model_id, model_name)))

    # setup the beam search
    translator = BeamSearchTranslator(model=model,
                                      beam_size=FLAGS.beam_size,
                                      scorer=nlp.model.BeamSearchScorer(
                                          alpha=FLAGS.lp_alpha, K=FLAGS.lp_k),
                                      max_length=FLAGS.tgt_max_len + 100)
    logging.info('Use beam_size={}, alpha={}, K={}'.format(
        FLAGS.beam_size, FLAGS.lp_alpha, FLAGS.lp_k))

    # setup the loss function
    loss_function = MaskedSoftmaxCELoss()
    loss_function.hybridize(static_alloc=static_alloc)

    # run the training
    train(data_train, data_val, data_test, model, loss_function,
          val_tgt_sentences, test_tgt_sentences, translator, start_epoch, ctx,
          tb_sw)
예제 #25
0
def main():
    opt = parse_args()
    batch_size = opt.batch_size
    classes = 10

    # Init transformer
    # See https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/data/data_augmentation.html
    jitter_param = 0.4
    transform_train = transforms.Compose([
        transforms.Resize(32),
        transforms.RandomResizedCrop((32, 32),
                                     scale=(0.8, 1.0),
                                     ratio=(0.9, 1.1)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param,
                                     contrast=jitter_param,
                                     saturation=jitter_param,
                                     hue=jitter_param),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test_viz = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
    ])

    dataset = opt.dataset
    if dataset == 'cifar10':
        dataset_train = gluon.data.vision.CIFAR10(train=True)
        dataset_test = gluon.data.vision.CIFAR10(train=False)
    elif dataset == 'cifar100':
        dataset_train = gluon.data.vision.CIFAR100(train=True, fine_label=True)
        dataset_test = gluon.data.vision.CIFAR100(train=False, fine_label=True)
    else:
        print("Dataset: {} is unknow".format(dataset))

    triplet_dataset_train = TripletDataset(dataset_train,
                                           transform=transform_train)
    triplet_dataset_train_loader = gluon.data.DataLoader(
        triplet_dataset_train,
        batch_size=batch_size,
        shuffle=True,
        last_batch='discard',
        num_workers=opt.num_workers)

    dataset_test_loader = gluon.data.DataLoader(
        dataset_test.transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=opt.num_workers)
    # TODO : Try normalizing but failed so we will loop through val set again to get data without normalization
    dataset_test_loader_2 = gluon.data.DataLoader(
        dataset_test.transform_first(transform_test_viz),
        batch_size=batch_size,
        shuffle=False,
        num_workers=opt.num_workers)

    print("Number of train sample: {}".format(len(triplet_dataset_train)))
    print("Number of val sample: {}".format(len(dataset_test)))

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]

    model_name = opt.model
    if model_name.startswith('cifar_wideresnet'):
        kwargs = {
            'classes': classes,
            'drop_rate': opt.drop_rate,
            'pretrained': False,
            'ctx': context
        }
    else:
        kwargs = {'classes': classes, 'pretrained': False, 'ctx': context}
    net = get_model(model_name, **kwargs)

    tripletnet = TripletNet(net.features)
    tripletnet.hybridize()
    tripletnet.initialize(mx.init.Xavier(), ctx=context)

    if opt.resume_from:
        tripletnet.load_parameters(opt.resume_from, ctx=context)
    # Note: Copy parameters from net into siamese. This will make training unconvergeble....
    # else:
    #     net_params = net.collect_params()
    #     siamesenet_params = siamesenet.collect_params()
    #     for p1, p2 in zip(net_params.values(), siamesenet_params.values()):
    #         p2.set_data(p1.data())

    save_period = opt.save_period
    if opt.save_dir and save_period:
        save_dir = os.path.join(opt.save_dir, "params")
        log_dir = os.path.join(opt.save_dir, "logs")
    else:
        save_dir = 'params'
        log_dir = 'logs'
        save_period = 0
    makedirs(save_dir)
    makedirs(log_dir)

    def test(val_data, val_data_2, ctx, epoch):
        embedding = None
        labels = None
        images = None
        initialized = False

        for i, (data, label) in enumerate(val_data):
            if i >= 20:
                # only fetch the first 20 batches of images
                break
            data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(label,
                                               ctx_list=ctx,
                                               batch_axis=0)
            outputs = [tripletnet.get_feature(X) for X in data]
            outputs = mx.nd.concat(*outputs, dim=0)
            label = mx.nd.concat(*label, dim=0)
            if initialized:
                embedding = mx.nd.concat(*(embedding, outputs), dim=0)
                labels = mx.nd.concat(*(labels, label), dim=0)
            else:
                embedding = outputs
                labels = label
                initialized = True

        for i, (data, _) in enumerate(val_data_2):
            data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0)
            data = mx.nd.concat(*data, dim=0)
            if images is None:
                images = data
            else:
                images = mx.nd.concat(*(images, data), dim=0)

        with SummaryWriter(logdir=log_dir) as sw:
            sw.add_embedding(tag='{}_tripletnet_{}'.format(opt.dataset, epoch),
                             embedding=embedding,
                             labels=labels,
                             images=images)

    def train(train_data, val_data, epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        tripletnet.forward(mx.nd.ones((1, 3, 32, 32), ctx=ctx[0]),
                           mx.nd.ones((1, 3, 32, 32), ctx=ctx[0]),
                           mx.nd.ones((1, 3, 32, 32), ctx=ctx[0]))
        with SummaryWriter(logdir=log_dir, verbose=False) as sw:
            sw.add_graph(tripletnet)

        trainer = gluon.Trainer(tripletnet.collect_params(), 'adam',
                                {'learning_rate': 0.001})
        # Init contrastive loss
        loss_fn = gluon.loss.TripletLoss(margin=6)

        global_step = 0

        for epoch in range(epochs):
            train_loss = 0
            num_batch = len(train_data)

            tbar = tqdm(train_data)

            for i, batch in enumerate(tbar):
                batch_loss = 0

                img = gluon.utils.split_and_load(batch[0],
                                                 ctx_list=ctx,
                                                 batch_axis=0)
                img_pos = gluon.utils.split_and_load(batch[1],
                                                     ctx_list=ctx,
                                                     batch_axis=0)
                img_neg = gluon.utils.split_and_load(batch[2],
                                                     ctx_list=ctx,
                                                     batch_axis=0)
                with ag.record():
                    output = [
                        tripletnet(x1, x2, x3)
                        for x1, x2, x3 in zip(img, img_pos, img_neg)
                    ]
                    loss = [loss_fn(x1, x2, x3) for x1, x2, x3 in output]
                for l in loss:
                    l.backward()
                    batch_loss += l.mean().asscalar()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])
                global_step += batch_size

                with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                    sw.add_scalar(tag="BatchLoss",
                                  value=batch_loss,
                                  global_step=global_step)

            train_loss /= batch_size * num_batch
            with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                sw.add_scalar(tag="TrainLoss",
                              value=train_loss,
                              global_step=global_step)

            if save_period and save_dir and (epoch + 1) % save_period == 0:
                # Test on first device
                test(val_data, dataset_test_loader_2, ctx, epoch)
                tripletnet.save_parameters('{}/{}-{}.params'.format(
                    save_dir, model_name, epoch))

        if save_period and save_dir:
            tripletnet.save_parameters('{}/{}-{}.params'.format(
                save_dir, model_name, epochs - 1))

    train(triplet_dataset_train_loader, dataset_test_loader, opt.num_epochs,
          context)
예제 #26
0
    def __getitem__(self, index):
        """Returns a single training item from the dataset as a dictionary.

        Values correspond to mxnet NDArray.
        Keys in the dictionary are either strings or tuples:

            ("color", <frame_id>, <scale>)          for raw colour images,
            ("color_aug", <frame_id>, <scale>)      for augmented colour images,
            ("K", scale) or ("inv_K", scale)        for camera intrinsics,
            "stereo_T"                              for camera extrinsics, and
            "depth_gt"                              for ground truth depth maps.

        <frame_id> is either:
            an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
        or
            "s" for the opposite image in the stereo pair.

        <scale> is an integer representing the scale of the image relative to the full-size image:
            -1      images at native resolution as loaded from disk
            0       images resized to (self.width,      self.height     )
            1       images resized to (self.width // 2, self.height // 2)
            2       images resized to (self.width // 4, self.height // 4)
            3       images resized to (self.width // 8, self.height // 8)
        """
        inputs = {}

        do_color_aug = False  # self.is_train and random.random() > 0.5
        do_flip = self.is_train and random.random() > 0.5

        line = self.filenames[index].split()
        folder = line[0]

        if len(line) == 3:
            frame_index = int(line[1])
        else:
            frame_index = 0

        if len(line) == 3:
            side = line[2]
        else:
            side = None

        for i in self.frame_idxs:
            if i == "s":
                other_side = {"r": "l", "l": "r"}[side]
                inputs[("color", i, -1)] = self.get_color(
                    folder, frame_index, other_side, do_flip)
            else:
                inputs[("color", i, -1)] = self.get_color(
                    folder, frame_index + i, side, do_flip)

        # adjusting intrinsics to match each scale in the pyramid
        for scale in range(self.num_scales):
            K = self.K.copy()

            K[0, :] *= self.width // (2 ** scale)
            K[1, :] *= self.height // (2 ** scale)

            inv_K = np.linalg.pinv(K)

            inputs[("K", scale)] = mx.nd.array(K)

            inputs[("inv_K", scale)] = mx.nd.array(inv_K)

        if do_color_aug:
            color_aug = transforms.RandomColorJitter(
                self.brightness, self.contrast, self.saturation, self.hue)
        else:
            color_aug = (lambda x: x)

        self.preprocess(inputs, color_aug)

        for i in self.frame_idxs:
            del inputs[("color", i, -1)]
            del inputs[("color_aug", i, -1)]

        if self.load_depth:
            depth_gt = self.get_depth(folder, frame_index, side, do_flip)
            inputs["depth_gt"] = np.expand_dims(depth_gt, 0)
            inputs["depth_gt"] = mx.nd.array(inputs["depth_gt"].astype(np.float32))

        if "s" in self.frame_idxs:
            stereo_T = np.eye(4, dtype=np.float32)
            baseline_sign = -1 if do_flip else 1
            side_sign = -1 if side == "l" else 1
            stereo_T[0, 3] = side_sign * baseline_sign * 0.1

            inputs["stereo_T"] = mx.nd.array(stereo_T)

        return inputs
예제 #27
0
    def create_loader(self):
        """
        Overwrite the data loader function
        :return: pairwised data loader, None, eval source loader, test target loader
        """
        cpus = cpu_count()

        train_tforms, eval_tforms = [transforms.Resize(self.args.resize)
                                     ], [transforms.Resize(self.args.resize)]

        if self.args.random_crop:
            train_tforms.append(
                transforms.RandomResizedCrop(self.args.size, scale=(0.8, 1.2)))
        else:
            train_tforms.append(transforms.CenterCrop(self.args.size))

        eval_tforms.append(transforms.CenterCrop(self.args.size))

        if self.args.flip:
            train_tforms.append(transforms.RandomFlipLeftRight())

        if self.args.random_color:
            train_tforms.append(
                transforms.RandomColorJitter(self.args.color_jitter,
                                             self.args.color_jitter,
                                             self.args.color_jitter, 0.1))

        train_tforms.extend([
            transforms.ToTensor(),
            transforms.Normalize(self.args.mean, self.args.std)
        ])
        eval_tforms.extend([
            transforms.ToTensor(),
            transforms.Normalize(self.args.mean, self.args.std)
        ])

        train_tforms = transforms.Compose(train_tforms)
        eval_tforms = transforms.Compose(eval_tforms)

        if 'digits' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_digits_datasets(
                train_tforms, eval_tforms)
        elif 'office' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_office_datasets(
                train_tforms, eval_tforms)
        elif 'visda' in self.args.cfg:
            trs_set, tes_set, tet_set = self.create_visda_datasets(
                train_tforms, eval_tforms)
        else:
            raise NotImplementedError

        self.train_src_loader = DataLoader(trs_set,
                                           self.args.bs,
                                           shuffle=True,
                                           num_workers=cpus)
        self.test_src_loader = DataLoader(tes_set,
                                          self.args.bs,
                                          shuffle=False,
                                          num_workers=cpus)
        self.test_tgt_loader = DataLoader(tet_set,
                                          self.args.bs,
                                          shuffle=False,
                                          num_workers=cpus)
예제 #28
0
파일: dataset.py 프로젝트: zhao1f/autogluon
def get_dataset(path=None,
                train=True,
                name=None,
                input_size=224,
                crop_ratio=0.875,
                jitter_param=0.4,
                *args,
                **kwargs):
    """ Method to produce image classification dataset for AutoGluon, can either be a 
    :class:`ImageFolderDataset`, :class:`RecordDataset`, or a 
    popular dataset already built into AutoGluon ('mnist', 'cifar10', 'cifar100', 'imagenet').

    Parameters
    ----------
    name : str, optional
        Which built-in dataset to use, will override all other options if specified.
        The options are ('mnist', 'cifar', 'cifar10', 'cifar100', 'imagenet')
    train : bool, default = True
        Whether this dataset should be used for training or validation.
    path : str
        The training data location. If using :class:`ImageFolderDataset`,
        image folder`path/to/the/folder` should be provided.
        If using :class:`RecordDataset`, the `path/to/*.rec` should be provided.
    input_size : int
        The input image size.
    crop_ratio : float
        Center crop ratio (for evaluation only)
        
    Returns
    -------
    Dataset object that can be passed to `task.fit()`, which is actually an :class:`autogluon.space.AutoGluonObject`. 
    To interact with such an object yourself, you must first call `Dataset.init()` to instantiate the object in Python.    
    """
    resize = int(math.ceil(input_size / crop_ratio))
    if isinstance(name, str) and name.lower() in built_in_datasets:
        return get_built_in_dataset(name,
                                    train=train,
                                    input_size=input_size,
                                    *args,
                                    **kwargs)

    if '.rec' in path:
        transform = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=jitter_param,
                                         contrast=jitter_param,
                                         saturation=jitter_param),
            transforms.RandomLighting(0.1),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]) if train else transforms.Compose([
            transforms.Resize(resize),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        dataset = RecordDataset(path, *args, **kwargs)
        dataset.transform_first(transform)
    else:
        # PIL Data Augmentation for users from Mac OSX
        transform = Compose([
            RandomResizedCrop(input_size),
            RandomHorizontalFlip(),
            ColorJitter(0.4, 0.4, 0.4),
            ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]) if train else Compose([
            Resize(resize),
            CenterCrop(input_size),
            ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        dataset = ImageFolderDataset(path,
                                     transform=transform,
                                     *args,
                                     **kwargs)
    return dataset.init()
예제 #29
0
def train_indoor(args, config, reporter):
    vars(args).update(config)
    np.random.seed(args.seed)
    random.seed(args.seed)
    mx.random.seed(args.seed)

    # Set Hyper-params
    batch_size = args.batch_size * max(args.num_gpus, 1)
    ctx = [mx.gpu(i)
           for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()]

    # Define DataLoader
    train_path = os.path.join(args.data, 'train')
    test_path = os.path.join(args.data, 'val')

    jitter_param = 0.4
    lighting_param = 0.1
    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])

    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomFlipLeftRight(),
        transforms.RandomColorJitter(brightness=jitter_param,
                                     contrast=jitter_param,
                                     saturation=jitter_param),
        transforms.RandomLighting(lighting_param),
        transforms.ToTensor(), normalize
    ])

    transform_test = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ])
    train_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(
        train_path).transform_first(transform_train),
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=args.num_workers)

    test_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(
        test_path).transform_first(transform_test),
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers)

    # Load model architecture and Initialize the net with pretrained model
    finetune_net = get_model(args.model, pretrained=True)
    with finetune_net.name_scope():
        finetune_net.fc = nn.Dense(args.classes)
    finetune_net.fc.initialize(init.Xavier(), ctx=ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Define trainer
    trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
        'learning_rate': args.lr,
        'momentum': args.momentum,
        'wd': args.wd
    })
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    metric = mx.metric.Accuracy()

    def train(epoch):
        if epoch == args.lr_step:
            trainer.set_learning_rate(trainer.learning_rate * args.lr_factor)

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for l in loss:
                l.backward()

            trainer.step(batch_size)
        mx.nd.waitall()

    def test():
        test_loss = 0
        for i, batch in enumerate(test_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            outputs = [finetune_net(X) for X in data]
            loss = [L(yhat, y) for yhat, y in zip(outputs, label)]

            test_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
            metric.update(label, outputs)

        _, test_acc = metric.get()
        test_loss /= len(test_data)
        reporter(mean_loss=test_loss, mean_accuracy=test_acc)

    for epoch in range(1, args.epochs + 1):
        train(epoch)
        test()
예제 #30
0
else:
    net.initialize(ctx=ctx)
    epoch_start=0

net.hybridize(static_alloc=True, static_shape=True)  # ZoomZoom!! 


# Data augmentation definitions 
transform_train = transforms.Compose([
    # Randomly crop an area, and then resize it to be 32x32
    transforms.RandomResizedCrop(opt.crop_size,scale=(0.6,1.)),# test also with 0.6
    # Randomly flip the image horizontally/vertically
    transforms.RandomFlipLeftRight(),
    transforms.RandomFlipTopBottom(),
    # Randomly jitter the brightness, contrast and saturation of the image
    transforms.RandomColorJitter(brightness=0.9, contrast=0.9, saturation=0.9), #NEW hue
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    #transforms.RandomGray(p=0.35), # Random gray scale NEW
    transforms.ToTensor(),
    transforms.RandomRotation(angle_limits=(-90,90),zoom_in=True), # Random rotation 
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize([11.663384, 10.260227,  7.65015 ], [21.421959, 18.044296, 15.494861])
])

transform_test = transforms.Compose([
    transforms.Resize(opt.crop_size),
    transforms.ToTensor(),
    transforms.Normalize([11.663384, 10.260227,  7.65015 ], [21.421959, 18.044296, 15.494861])
])