Esempio n. 1
0
    batch_size = images_per_gpu * len(gpus)
    num_workers = images_per_gpu * len(gpus)

    root = "/root/userfolder/datasets/TCT"
    # root = "/run/media/hezhujun/DATA1/Document/dataset/TCT"
    val_ann_file = os.path.join(root, "annotations/val.json")
    val_transforms = Compose([
        Resize((1333, 800), True),
        Normalize(mean=(127, 127, 127), std=(255, 255, 255)),
        ToTensor()
    ])
    val_dataset = TCTDataset(root, "tct_val", val_ann_file, val_transforms)
    val_dataset.ids = val_dataset.ids[0:40]
    val_data_loader = DataLoader(val_dataset,
                                 batch_size,
                                 False,
                                 last_batch="keep",
                                 batchify_fn=collate_fn,
                                 num_workers=num_workers)

    anchor_scales = {
        "p2": (32, ),
        "p3": (64, ),
        "p4": (128, ),
        "p5": (256, ),
        "p6": (512, ),
    }
    anchor_ratios = {
        "p2": ((1, 2, 0.5), ),
        "p3": ((1, 2, 0.5), ),
        "p4": ((1, 2, 0.5), ),
        "p5": ((1, 2, 0.5), ),
Esempio n. 2
0
num_worker = 24
save_period = 500
iters = 200e3
lr_steps = [30e3, 60e3, 90e3, np.inf]

scale = 50
margin = 0.5
embedding_size = 128

lr = 0.1
momentum = 0.9
wd = 4e-5

train_set = get_recognition_dataset("emore", transform=transform_train)
train_data = DataLoader(train_set,
                        batch_size,
                        shuffle=True,
                        num_workers=num_worker)

targets = ['lfw']
val_sets = [
    get_recognition_dataset(name, transform=transform_test) for name in targets
]
val_datas = [
    DataLoader(dataset, batch_size, num_workers=num_worker)
    for dataset in val_sets
]

net = get_mobile_facenet(train_set.num_classes,
                         weight_norm=True,
                         feature_norm=True)
net.initialize(init=mx.init.MSRAPrelu(), ctx=ctx)
Esempio n. 3
0
        net(dummy_img)
        weights_init(net.model)
        net.collect_params().reset_ctx(context)

    dataset = DataSet(
        opt.dataroot, 'train',
        transforms.Compose([
            Resize(opt.loadSize, keep_ratio=True, interpolation=3),
            RandomCrop(opt.fineSize),
            transforms.RandomFlipLeftRight(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]))
    dataloader = DataLoader(dataset,
                            batch_size=opt.batchSize,
                            shuffle=True,
                            num_workers=int(opt.workers),
                            last_batch='rollover')

    fake_A_pool = ImagePool(opt.pool_size)
    fake_B_pool = ImagePool(opt.pool_size)

    optimizer_GA = gluon.Trainer(netG_A.collect_params(),
                                 'adam', {
                                     'learning_rate': opt.lr,
                                     'beta1': opt.beta1
                                 },
                                 kvstore='local')
    optimizer_GB = gluon.Trainer(netG_B.collect_params(),
                                 'adam', {
                                     'learning_rate': opt.lr,
Esempio n. 4
0
def dataiter_all_sensors_seq2seq(df, scaler, setting, shuffle=True):
    dataset = setting['dataset']
    training = setting['training']

    df_fill = utils.fill_missing(df)
    df_fill = scaler.transform(df_fill)

    n_timestamp = df_fill.shape[0]
    data_list = [np.expand_dims(df_fill.values, axis=-1)]

    # time in day
    time_idx = (df_fill.index.values -
                df_fill.index.values.astype('datetime64[D]')) / np.timedelta64(
                    1, 'D')
    time_in_day = np.tile(time_idx, [1, NUM_NODES, 1]).transpose((2, 1, 0))
    data_list.append(time_in_day)

    # day in week
    day_in_week = np.zeros(shape=(n_timestamp, NUM_NODES, 7))
    day_in_week[np.arange(n_timestamp), :, df_fill.index.dayofweek] = 1
    data_list.append(day_in_week)

    # temporal feature
    temporal_feature = np.concatenate(data_list, axis=-1)

    geo_feature, _ = get_geo_feature(dataset)

    input_len = dataset['input_len']
    output_len = dataset['output_len']
    feature, data, mask, label = [], [], [], []
    for i in range(n_timestamp - input_len - output_len + 1):
        data.append(temporal_feature[i:i + input_len])

        _mask = np.array(
            df.iloc[i + input_len:i + input_len + output_len] > 1e-5,
            dtype=np.float32)
        mask.append(_mask)

        label.append(temporal_feature[i + input_len:i + input_len +
                                      output_len])

        feature.append(geo_feature)

        if i % 1000 == 0:
            logging.info('Processing %d timestamps', i)
            # if i > 0: break

    data = mx.nd.array(np.stack(data))
    label = mx.nd.array(np.stack(label))
    mask = mx.nd.array(np.expand_dims(np.stack(mask), axis=3))
    feature = mx.nd.array(np.stack(feature))

    logging.info('shape of feature: %s', feature.shape)
    logging.info('shape of data: %s', data.shape)
    logging.info('shape of mask: %s', mask.shape)
    logging.info('shape of label: %s', label.shape)

    from mxnet.gluon.data import ArrayDataset, DataLoader
    return DataLoader(
        ArrayDataset(feature, data, label, mask),
        shuffle=shuffle,
        batch_size=training['batch_size'],
        num_workers=4,
        last_batch='rollover',
    )
Esempio n. 5
0
def train_function(args,
                   reporter,
                   train_df_path,
                   tuning_df_path,
                   time_limits,
                   base_config,
                   problem_types,
                   column_properties,
                   label_columns,
                   label_shapes,
                   log_metrics,
                   stopping_metric,
                   console_log,
                   ignore_warning=False):
    import os
    # Get the log metric scorers
    if isinstance(log_metrics, str):
        log_metrics = [log_metrics]
    # Load the training and tuning data from the parquet file
    train_data = pd.read_parquet(train_df_path)
    tuning_data = pd.read_parquet(tuning_df_path)
    log_metric_scorers = [get_metric(ele) for ele in log_metrics]
    stopping_metric_scorer = get_metric(stopping_metric)
    greater_is_better = stopping_metric_scorer.greater_is_better
    os.environ['MKL_NUM_THREADS'] = '1'
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['MKL_DYNAMIC'] = 'FALSE'
    if ignore_warning:
        import warnings
        warnings.filterwarnings("ignore")
    search_space = args['search_space']
    cfg = base_config.clone()
    specified_values = []
    for key in search_space:
        specified_values.append(key)
        specified_values.append(search_space[key])
    cfg.merge_from_list(specified_values)
    exp_dir = cfg.misc.exp_dir
    if reporter is not None:
        # When the reporter is not None,
        # we create the saved directory based on the task_id + time
        task_id = args.task_id
        exp_dir = os.path.join(exp_dir, 'task{}'.format(task_id))
        os.makedirs(exp_dir, exist_ok=True)
        cfg.defrost()
        cfg.misc.exp_dir = exp_dir
        cfg.freeze()
    logger = logging.getLogger()
    logging_config(folder=exp_dir,
                   name='training',
                   logger=logger,
                   console=console_log)
    logger.info(cfg)
    # Load backbone model
    backbone_model_cls, backbone_cfg, tokenizer, backbone_params_path, _ \
        = get_backbone(cfg.model.backbone.name)
    with open(os.path.join(exp_dir, 'cfg.yml'), 'w') as f:
        f.write(str(cfg))
    text_backbone = backbone_model_cls.from_cfg(backbone_cfg)
    # Build Preprocessor + Preprocess the training dataset + Inference problem type
    # TODO Move preprocessor + Dataloader to outer loop to better cache the dataloader
    preprocessor = TabularBasicBERTPreprocessor(
        tokenizer=tokenizer,
        column_properties=column_properties,
        label_columns=label_columns,
        max_length=cfg.model.preprocess.max_length,
        merge_text=cfg.model.preprocess.merge_text)
    logger.info('Process training set...')
    processed_train = preprocessor.process_train(train_data)
    logger.info('Done!')
    logger.info('Process dev set...')
    processed_dev = preprocessor.process_test(tuning_data)
    logger.info('Done!')
    label = label_columns[0]
    # Get the ground-truth dev labels
    gt_dev_labels = np.array(tuning_data[label].apply(
        column_properties[label].transform))
    ctx_l = get_mxnet_available_ctx()
    base_batch_size = cfg.optimization.per_device_batch_size
    num_accumulated = int(
        np.ceil(cfg.optimization.batch_size / base_batch_size))
    inference_base_batch_size = base_batch_size * cfg.optimization.val_batch_size_mult
    train_dataloader = DataLoader(
        processed_train,
        batch_size=base_batch_size,
        shuffle=True,
        batchify_fn=preprocessor.batchify(is_test=False))
    dev_dataloader = DataLoader(
        processed_dev,
        batch_size=inference_base_batch_size,
        shuffle=False,
        batchify_fn=preprocessor.batchify(is_test=True))
    net = BERTForTabularBasicV1(
        text_backbone=text_backbone,
        feature_field_info=preprocessor.feature_field_info(),
        label_shape=label_shapes[0],
        cfg=cfg.model.network)
    net.initialize_with_pretrained_backbone(backbone_params_path, ctx=ctx_l)
    net.hybridize()
    num_total_params, num_total_fixed_params = count_parameters(
        net.collect_params())
    logger.info('#Total Params/Fixed Params={}/{}'.format(
        num_total_params, num_total_fixed_params))
    # Initialize the optimizer
    updates_per_epoch = int(
        len(train_dataloader) / (num_accumulated * len(ctx_l)))
    optimizer, optimizer_params, max_update \
        = get_optimizer(cfg.optimization,
                        updates_per_epoch=updates_per_epoch)
    valid_interval = math.ceil(cfg.optimization.valid_frequency *
                               updates_per_epoch)
    train_log_interval = math.ceil(cfg.optimization.log_frequency *
                                   updates_per_epoch)
    trainer = mx.gluon.Trainer(net.collect_params(),
                               optimizer,
                               optimizer_params,
                               update_on_kvstore=False)
    if 0 < cfg.optimization.layerwise_lr_decay < 1:
        apply_layerwise_decay(net.text_backbone,
                              cfg.optimization.layerwise_lr_decay,
                              backbone_name=cfg.model.backbone.name)
    # Do not apply weight decay to all the LayerNorm and bias
    for _, v in net.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    params = [p for p in net.collect_params().values() if p.grad_req != 'null']

    # Set grad_req if gradient accumulation is required
    if num_accumulated > 1:
        logger.info('Using gradient accumulation.'
                    ' Global batch size = {}'.format(
                        cfg.optimization.batch_size))
        for p in params:
            p.grad_req = 'add'
        net.collect_params().zero_grad()
    train_loop_dataloader = grouper(repeat(train_dataloader), len(ctx_l))
    log_loss_l = [mx.np.array(0.0, dtype=np.float32, ctx=ctx) for ctx in ctx_l]
    log_num_samples_l = [0 for _ in ctx_l]
    logging_start_tick = time.time()
    best_performance_score = None
    mx.npx.waitall()
    no_better_rounds = 0
    report_idx = 0
    start_tick = time.time()
    best_report_items = None
    for update_idx in tqdm.tqdm(range(max_update), disable=None):
        num_samples_per_update_l = [0 for _ in ctx_l]
        for accum_idx in range(num_accumulated):
            sample_l = next(train_loop_dataloader)
            loss_l = []
            num_samples_l = [0 for _ in ctx_l]
            for i, (sample, ctx) in enumerate(zip(sample_l, ctx_l)):
                feature_batch, label_batch = sample
                feature_batch = move_to_ctx(feature_batch, ctx)
                label_batch = move_to_ctx(label_batch, ctx)
                with mx.autograd.record():
                    pred = net(feature_batch)
                    if problem_types[0] == _C.CLASSIFICATION:
                        logits = mx.npx.log_softmax(pred, axis=-1)
                        loss = -mx.npx.pick(logits, label_batch[0])
                    elif problem_types[0] == _C.REGRESSION:
                        loss = mx.np.square(pred - label_batch[0])
                    loss_l.append(loss.mean() / len(ctx_l))
                    num_samples_l[i] = loss.shape[0]
                    num_samples_per_update_l[i] += loss.shape[0]
            for loss in loss_l:
                loss.backward()
            for i in range(len(ctx_l)):
                log_loss_l[i] += loss_l[i] * len(ctx_l) * num_samples_l[i]
                log_num_samples_l[i] += num_samples_per_update_l[i]
        # Begin to update
        trainer.allreduce_grads()
        num_samples_per_update = sum(num_samples_per_update_l)
        total_norm, ratio, is_finite = \
            clip_grad_global_norm(params, cfg.optimization.max_grad_norm * num_accumulated)
        total_norm = total_norm / num_accumulated
        trainer.update(num_samples_per_update)

        # Clear after update
        if num_accumulated > 1:
            net.collect_params().zero_grad()
        if (update_idx + 1) % train_log_interval == 0:
            log_loss = sum([ele.as_in_ctx(ctx_l[0])
                            for ele in log_loss_l]).asnumpy()
            log_num_samples = sum(log_num_samples_l)
            logger.info(
                '[Iter {}/{}, Epoch {}] train loss={}, gnorm={}, lr={}, #samples processed={},'
                ' #sample per second={}'.format(
                    update_idx + 1, max_update,
                    int(update_idx / updates_per_epoch),
                    log_loss / log_num_samples, total_norm,
                    trainer.learning_rate, log_num_samples,
                    log_num_samples / (time.time() - logging_start_tick)))
            logging_start_tick = time.time()
            log_loss_l = [
                mx.np.array(0.0, dtype=np.float32, ctx=ctx) for ctx in ctx_l
            ]
            log_num_samples_l = [0 for _ in ctx_l]
        if (update_idx + 1) % valid_interval == 0 or (update_idx +
                                                      1) == max_update:
            valid_start_tick = time.time()
            dev_predictions = \
                _classification_regression_predict(net, dataloader=dev_dataloader,
                                                   problem_type=problem_types[0],
                                                   has_label=False)
            log_scores = [
                calculate_metric(scorer, gt_dev_labels, dev_predictions,
                                 problem_types[0])
                for scorer in log_metric_scorers
            ]
            dev_score = calculate_metric(stopping_metric_scorer, gt_dev_labels,
                                         dev_predictions, problem_types[0])
            valid_time_spent = time.time() - valid_start_tick

            if best_performance_score is None or \
                    (greater_is_better and dev_score >= best_performance_score) or \
                    (not greater_is_better and dev_score <= best_performance_score):
                find_better = True
                no_better_rounds = 0
                best_performance_score = dev_score
                net.save_parameters(os.path.join(exp_dir, 'best_model.params'))
            else:
                find_better = False
                no_better_rounds += 1
            mx.npx.waitall()
            loss_string = ', '.join([
                '{}={}'.format(metric.name, score)
                for score, metric in zip(log_scores, log_metric_scorers)
            ])
            logger.info('[Iter {}/{}, Epoch {}] valid {}, time spent={},'
                        ' total_time={:.2f}min'.format(
                            update_idx + 1, max_update,
                            int(update_idx / updates_per_epoch), loss_string,
                            valid_time_spent, (time.time() - start_tick) / 60))
            report_items = [('iteration', update_idx + 1),
                            ('report_idx', report_idx + 1),
                            ('epoch', int(update_idx / updates_per_epoch))] +\
                           [(metric.name, score)
                            for score, metric in zip(log_scores, log_metric_scorers)] + \
                           [('find_better', find_better),
                            ('time_spent', int(time.time() - start_tick))]
            total_time_spent = time.time() - start_tick
            if time_limits is not None and total_time_spent > time_limits:
                break
            report_idx += 1
            if stopping_metric_scorer._sign < 0:
                report_items.append(('reward_attr', -dev_score))
            else:
                report_items.append(('reward_attr', dev_score))
            report_items.append(('eval_metric', stopping_metric_scorer.name))
            report_items.append(('exp_dir', exp_dir))
            if find_better:
                best_report_items = report_items
            reporter(**dict(report_items))
            if no_better_rounds >= cfg.learning.early_stopping_patience:
                logger.info('Early stopping patience reached!')
                break
    best_report_items_dict = dict(best_report_items)
    best_report_items_dict['report_idx'] = report_idx + 1
    reporter(**best_report_items_dict)
    dataset = LSUN(root=opt.dataroot, classes=['bedroom_train'],
                        transform=transforms.Compose([
                            Resize(opt.imageSize, keep_ratio=True,interpolation=3),
                            transforms.CenterCrop(opt.imageSize),
                            transforms.ToTensor(),
                            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                        ]))
elif opt.dataset == 'cifar10':
    dataset = CIFAR10(root=opt.dataroot,train=True).transform_first(transforms.Compose([
            Resize(opt.imageSize, keep_ratio=True,interpolation=3),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
    )
assert dataset
dataloader = DataLoader(dataset, batch_size=opt.batchSize,
                                         shuffle=True, num_workers=int(opt.workers))
print('finish init dataloader')
ngpu = int(opt.ngpu)
nz = int(opt.nz)
ngf = int(opt.ngf)
ndf = int(opt.ndf)
nc = int(opt.nc)
n_extra_layers = int(opt.n_extra_layers)

# custom weights initialization called on netG and netD
def weights_init(layers):
    for layer in layers:
        classname = layer.__class__.__name__
        if classname.find('Conv') != -1:
            layer.weight.set_data(mx.ndarray.random.normal(0.0,0.02,shape=layer.weight.data().shape))
        elif classname.find('BatchNorm') != -1:
Esempio n. 7
0
def train(**kwargs):
    opt._parse(kwargs)

    # set random seed and cudnn benchmark

    sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt'))

    print('=========user config==========')
    pprint(opt._state_dict())
    print('============end===============')

    print('initializing dataset {}'.format(opt.dataset))
    dataset = data_manager.init_dataset(name=opt.dataset)

    summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log'))

    if 'triplet' in opt.model_name:
        trainloader = DataLoader(
            ImageData(dataset.train, TrainTransform(opt.height, opt.width)),
            sampler=RandomIdentitySampler(dataset.train, opt.num_instances),
            batch_size=opt.train_batch,
            num_workers=opt.workers,
            last_batch='discard')
    else:
        trainloader = DataLoader(
            ImageData(dataset.train, TrainTransform(opt.height, opt.width)),
            batch_size=opt.train_batch,
            shuffle=True,
            num_workers=opt.workers,
        )

    queryloader = DataLoader(
        ImageData(dataset.query, TestTransform(opt.height, opt.width)),
        batch_size=opt.test_batch,
        num_workers=opt.workers,
    )

    galleryloader = DataLoader(
        ImageData(dataset.gallery, TestTransform(opt.height, opt.width)),
        batch_size=opt.test_batch,
        num_workers=opt.workers,
    )

    print('initializing model ...')
    model = get_baseline_model(dataset.num_train_pids, mx.gpu(0),
                               opt.pretrained_model)

    print('model size: {:.5f}M'.format(
        sum(p.data().size for p in model.collect_params().values()) / 1e6))

    xent_criterion = gluon.loss.SoftmaxCrossEntropyLoss()
    tri_criterion = TripletLoss(opt.margin)

    def cls_criterion(cls_scores, feat, targets):
        cls_loss = xent_criterion(cls_scores, targets)
        return cls_loss

    def triplet_criterion(cls_scores, feat, targets):
        triplet_loss, dist_ap, dist_an = tri_criterion(feat, targets)
        return triplet_loss

    def cls_tri_criterion(cls_scores, feat, targets):
        cls_loss = xent_criterion(cls_scores, targets)
        triplet_loss, dist_ap, dist_an = tri_criterion(feat, targets)
        loss = cls_loss + triplet_loss
        return loss

    # get optimizer
    optimizer = gluon.Trainer(model.collect_params(), opt.optim, {
        'learning_rate': opt.lr,
        'wd': opt.weight_decay
    })

    def adjust_lr(optimizer, ep):
        if ep < 20:
            lr = 1e-4 * (ep + 1) / 2
        elif ep < 80:
            lr = 1e-3 * opt.num_gpu
        elif ep < 180:
            lr = 1e-4 * opt.num_gpu
        elif ep < 300:
            lr = 1e-5 * opt.num_gpu
        elif ep < 320:
            lr = 1e-5 * 0.1**((ep - 320) / 80) * opt.num_gpu
        elif ep < 400:
            lr = 1e-6
        elif ep < 480:
            lr = 1e-4 * opt.num_gpu
        else:
            lr = 1e-5 * opt.num_gpu

        optimizer.set_learning_rate(lr)

    start_epoch = opt.start_epoch

    # get trainer and evaluator
    use_criterion = None
    if opt.model_name == 'softmax':
        use_criterion = cls_criterion
    elif opt.model_name == 'softmax_triplet':
        use_criterion = cls_tri_criterion
    elif opt.model_name == 'triplet':
        use_criterion = triplet_criterion

    reid_trainer = reidTrainer(opt, model, optimizer, use_criterion,
                               summary_writer, mx.gpu(0))
    reid_evaluator = reidEvaluator(model, mx.gpu(0))

    # start training
    best_rank1 = -np.inf
    best_epoch = 0

    for epoch in range(start_epoch, opt.max_epoch):
        if opt.step_size > 0:
            adjust_lr(optimizer, epoch + 1)
        reid_trainer.train(epoch, trainloader)

        # skip if not save model
        if opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 or (
                epoch + 1) == opt.max_epoch:
            rank1 = reid_evaluator.evaluate(queryloader, galleryloader)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            state_dict = {'model': model, 'epoch': epoch}
            save_checkpoint(state_dict,
                            is_best=is_best,
                            save_dir=opt.save_dir,
                            filename='checkpoint_ep' + str(epoch + 1) +
                            '.params')

    print('Best rank-1 {:.1%}, achived at epoch {}'.format(
        best_rank1, best_epoch))
Esempio n. 8
0
encoder =
decoder =
net.initialize(ctx= ctx_0)

#创建数据迭代器
trainset = myDataLodaer('/home/dwy/DKGMA_data', 'train')
validaset = myDataLodaer('/home/dwy/DKGMA_data', 'valida')
# 优化器
embedding.embedding.collect_params().setattr('grad_req', 'null') #预训练的词嵌入层不参与训练。
en_trainer = gluon.Trainer(encoder.collect_params(), 'adam', {'lr': 1e-5, 'grad_clip': 2})
de_trainer = gluon.Trainer(decoder.collect_params(), 'adam', {'lr': 1e-5, 'grad_clip': 2})
from time import time
tic = time()
total_loss = .0
epoch = 0
traindata = DataLoader(trainset, batch_size=64, shuffle=True, sampler=None, 
                      last_batch='keep', num_workers = 2)
for index, instance in enumerate(traindata):
    
    batchsize = instance.shape(0)
    t,a,k = instance.as_in_context(ctx)
    t_indice, a_indice = utils.bucket(t), utils.bucket(a)
    #title, abstract = utils.unk(t_indice), utils.unk(a_indice)
    t_mask, a_mask = (t_indice!=0), (a_indice!=0)
    indice = mx.nd.concat(t_indice, a_indice, -1)
    
    
    with mx.autograd.record:
        title, abstract = embedding(title), embedding(abstract)
        loss = net(title, abstract, t_mask, a_mask, indice)
    loss.backward()
    de_trainer.step(batch_size = batchsize)
def train(net, train_data, test_data):
    """Train textCNN model for sentiment analysis."""
    start_pipeline_time = time.time()
    net, trainer = text_cnn.init(net, vocab, args.model_mode, context, args.lr)
    random.shuffle(train_data)
    sp = int(len(train_data)*0.9)
    train_dataloader = DataLoader(dataset=train_data[:sp],
                                  batch_size=args.batch_size,
                                  shuffle=True)
    val_dataloader = DataLoader(dataset=train_data[sp:],
                                batch_size=args.batch_size,
                                shuffle=False)
    test_dataloader = DataLoader(dataset=test_data,
                                 batch_size=args.batch_size,
                                 shuffle=False)
    # Training/Testing.
    best_val_acc = 0
    for epoch in range(args.epochs):
        # Epoch training stats.
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats.
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0
        for i, (data, label) in enumerate(train_dataloader):
            data = mx.nd.transpose(data.as_in_context(context))
            label = label.as_in_context(context)
            wc = max_len
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]

            with autograd.record():
                output = net(data)
                L = loss(output, label).mean()
            L.backward()
            # Update parameter.
            trainer.step(1)
            log_interval_L += L.asscalar()
            epoch_L += L.asscalar()
            if (i + 1) % args.log_interval == 0:
                print('[Epoch %d Batch %d/%d] avg loss %g, throughput %gK wps' % (
                    epoch, i + 1, len(train_dataloader),
                    log_interval_L / log_interval_sent_num,
                    log_interval_wc / 1000 / (time.time() - start_log_interval_time)))
                # Clear log interval training stats.
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0
        end_epoch_time = time.time()
        val_avg_L, val_acc = evaluate(net, val_dataloader)
        print('[Epoch %d] train avg loss %g, '
              'test acc %.4f, test avg loss %g, throughput %gK wps' % (
                  epoch, epoch_L / epoch_sent_num,
                  val_acc, val_avg_L,
                  epoch_wc / 1000 / (end_epoch_time - start_epoch_time)))

        if val_acc >= best_val_acc:
            print('Observed Improvement.')
            best_val_acc = val_acc
            test_avg_L, test_acc = evaluate(net, test_dataloader)

    print('Test loss %g, test acc %.4f'%(test_avg_L, test_acc))
    print('Total time cost %.2fs'%(time.time()-start_pipeline_time))
    return test_acc
    # center and crop an area of size (224,224)
    cropped, crop_info = mx.image.center_crop(resized, SIZE)

    # transpose the channels to be (3,224,224)
    transposed = nd.transpose(cropped, (2, 0, 1))

    return transposed, label


################################################
#  Loading Images from folders
################################################
dataset_train = ImageFolderDataset(root=train_data_dir, transform=transform)
dataset_test = ImageFolderDataset(root=validation_data_dir, transform=transform)

dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                              shuffle=True, num_workers=NUM_WORKERS) # last_batch='discard' (removed for testing)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, # last_batch='discard',
                             shuffle=True, num_workers=NUM_WORKERS)
print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test)))


################################################
#  Check categories - for debuging only
################################################

categories = dataset_train.synsets
NUM_CLASSES = len(categories)

print(categories)
print(NUM_CLASSES)
Esempio n. 11
0
    return dataset, lengths


# Preprocess the dataset
train_dataset, train_data_lengths = preprocess_dataset(train_dataset)
valid_dataset, valid_data_lengths = preprocess_dataset(valid_dataset)
test_dataset, test_data_lengths = preprocess_dataset(test_dataset)

# Construct the DataLoader. Pad data and stack label
batchify_fn = nlp.data.batchify.Tuple(
    nlp.data.batchify.Pad(axis=0, ret_length=True),
    nlp.data.batchify.Stack(dtype='float32'))
if args.bucket_type is None:
    print('Bucketing strategy is not used!')
    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  batchify_fn=batchify_fn)
else:
    if args.bucket_type == 'fixed':
        print('Use FixedBucketSampler')
        batch_sampler = nlp.data.FixedBucketSampler(
            train_data_lengths,
            batch_size=args.batch_size,
            num_buckets=args.bucket_num,
            ratio=args.bucket_ratio,
            shuffle=True)
        print(batch_sampler.stats())
    elif args.bucket_type == 'sorted':
        print('Use SortedBucketSampler')
        batch_sampler = nlp.data.SortedBucketSampler(
            train_data_lengths,
                                    std=nd.array([0.229, 0.224, 0.225]))


def transform_val(im, label):
    im = im.astype('float32') / 255
    for aug in val_auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1))
    return im, label


batch_size = 64
ctx = mx.gpu()

val_set = gluon.data.vision.CIFAR10(train=False, transform=transform_val)
val_data = DataLoader(val_set, batch_size, False, num_workers=2)

net = get_attention_cifar(10, num_layers=92)
net.load_parameters("models/cifar10-epoch-80.params", ctx=ctx)
net.hybridize()

metric = mtc.Accuracy()
cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
val_loss = 0

for i, batch in tqdm(enumerate(val_data)):
    data = batch[0].as_in_context(ctx)
    labels = batch[1].as_in_context(ctx)
    outputs = net(data)
    metric.update(labels, outputs)
Esempio n. 13
0
import numpy as np
import time

from sklearn.metrics import precision_score, recall_score

# feat_path, knn_graph_path, label_path
ctx = mx.gpu()
nepoch = 100
batch_size = 64
k_at_hop = [200, 5]
data = loader.TrainDatasetFromNP(
    r'D:\Workspace\Projects\ChaosLab.old\build\x64\pick\feats.npy',
    r'D:\Workspace\Projects\ChaosLab.old\build\x64\pick\knn.npy',
    r'D:\Workspace\Projects\ChaosLab.old\build\x64\pick\labels.npy', k_at_hop)

data_loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=True)


def accuracy(pred, label):
    pred = pred > 0.5
    acc = mx.nd.mean(pred == label)
    #pred = torch.argmax(pred, dim=1).long()
    #acc = torch.mean((pred == label).float())
    pred = pred.asnumpy()
    label = label.asnumpy()
    p = precision_score(label, pred)
    r = recall_score(label, pred)
    return p, r, acc


def Train():
    #AdaptResize(72000),
    #AdaptResize(360000),
    ToNDArray(),
    #Normalize(nd.array([107]), nd.array([1]))
])

# my_train = ReadDataSet('..\\dhSegment\\dataset\\ENP_500', 'train', train_aug)
# my_train = ReadDataSet('ENP_500', 'train', train_aug, fixed_weight=True, log_weight=True, log_base=1.1)
# my_train = ReadDataSet('ENP_500', 'train', train_aug, fixed_weight=True)
# my_test = ReadDataSet('ENP_500', 'val', test_aug, fixed_weight=True)
my_train = ReadDataSet('bw', 'train', train_aug, fixed_weight=True)
my_test = ReadDataSet('bw', 'val', test_aug, fixed_weight=True)

train_loader = DataLoader(my_train,
                          batch_size=1,
                          shuffle=False,
                          last_batch='rollover',
                          num_workers=4,
                          thread_pool=True)
test_loader = DataLoader(my_test,
                         batch_size=1,
                         shuffle=False,
                         last_batch='keep',
                         num_workers=4,
                         thread_pool=True)

model = zoo.load_pretrained_resnext_to_unext101_64_4d(ctx=_ctx,
                                                      migrate_input_norm=False,
                                                      fine_tune=False)
# model=zoo.resume_training_unext101_64_4d(freeze_input_norm = True,
# fine_tune = True,
# ctx=_ctx,
Esempio n. 15
0
                         calib_mode=calib_mode,
                         quantized_dtype=args.quantized_dtype,
                         logger=logger)
     suffix = '-quantized'
 else:
     if logger:
         logger.info('Creating DataLoader for reading calibration dataset')
     dataset = mx.gluon.data.vision.ImageRecordDataset(args.calib_dataset)
     transformer = transforms.Compose([
         transforms.Resize(256),
         transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize(mean=rgb_mean, std=rgb_std)
     ])
     data_loader = DataLoader(dataset.transform_first(transformer),
                              batch_size,
                              shuffle=args.shuffle_dataset,
                              num_workers=data_nthreads)
     qsym = quantize_net(net,
                         ctx=ctx,
                         exclude_layers_match=excluded_sym_names,
                         calib_mode=calib_mode,
                         calib_data=data_loader,
                         num_calib_batches=num_calib_batches,
                         quantized_dtype=args.quantized_dtype,
                         logger=logger)
     if calib_mode == 'entropy':
         suffix = '-quantized-%dbatches-entropy' % num_calib_batches
     elif calib_mode == 'naive':
         suffix = '-quantized-%dbatches-naive' % num_calib_batches
     else:
         raise ValueError(
Esempio n. 16
0
        balanced_batch_images = nd.concat(*balanced_batch_images, dim=0)
        balanced_batch_texts = nd.concat(*balanced_batch_texts, dim=0)
        return balanced_batch_images, balanced_batch_texts


if __name__ == '__main__':
    from mxnet.gluon.data.vision import transforms

    train_transfroms = transforms.Compose(
        [transforms.RandomColorJitter(brightness=0.5),
         transforms.ToTensor()])
    dataset = RecordDataset(r'E:\zj\dataset\rec_train\train.rec',
                            img_h=32,
                            img_w=320,
                            img_channel=3,
                            num_label=80)
    data_loader = DataLoader(dataset=dataset.transform_first(train_transfroms),
                             batch_size=1,
                             shuffle=True,
                             last_batch='rollover',
                             num_workers=2)
    for i, (images, labels) in enumerate(data_loader):
        print(images.shape)
        print(labels)
        img = images[0].asnumpy().transpose((1, 2, 0))
        from matplotlib import pyplot as plt

        plt.imshow(img)
        plt.show()
Esempio n. 17
0
def train():
    """Training function."""
    trainer = gluon.Trainer(model.collect_params(), args.optimizer,
                            {'learning_rate': args.lr, 'beta2': 0.98, 'epsilon': 1e-9})

    train_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack())
    test_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack(), btf.Stack())
    target_val_lengths = list(map(lambda x: x[-1], data_val_lengths))
    target_test_lengths = list(map(lambda x: x[-1], data_test_lengths))
    if args.bucket_scheme == 'constant':
        bucket_scheme = ConstWidthBucket()
    elif args.bucket_scheme == 'linear':
        bucket_scheme = LinearWidthBucket()
    elif args.bucket_scheme == 'exp':
        bucket_scheme = ExpWidthBucket(bucket_len_step=1.2)
    else:
        raise NotImplementedError
    train_batch_sampler = FixedBucketSampler(lengths=data_train_lengths,
                                             batch_size=args.batch_size,
                                             num_buckets=args.num_buckets,
                                             ratio=args.bucket_ratio,
                                             shuffle=True,
                                             use_average_length=True,
                                             bucket_scheme=bucket_scheme)
    logging.info('Train Batch Sampler:\n{}'.format(train_batch_sampler.stats()))
    train_data_loader = DataLoader(data_train,
                                   batch_sampler=train_batch_sampler,
                                   batchify_fn=train_batchify_fn,
                                   num_workers=8)

    val_batch_sampler = FixedBucketSampler(lengths=target_val_lengths,
                                           batch_size=args.test_batch_size,
                                           num_buckets=args.num_buckets,
                                           ratio=args.bucket_ratio,
                                           shuffle=False,
                                           use_average_length=True,
                                           bucket_scheme=bucket_scheme)
    logging.info('Valid Batch Sampler:\n{}'.format(val_batch_sampler.stats()))
    val_data_loader = DataLoader(data_val,
                                 batch_sampler=val_batch_sampler,
                                 batchify_fn=test_batchify_fn,
                                 num_workers=8)
    test_batch_sampler = FixedBucketSampler(lengths=target_test_lengths,
                                            batch_size=args.test_batch_size,
                                            num_buckets=args.num_buckets,
                                            ratio=args.bucket_ratio,
                                            shuffle=False,
                                            use_average_length=True,
                                            bucket_scheme=bucket_scheme)
    logging.info('Test Batch Sampler:\n{}'.format(test_batch_sampler.stats()))
    test_data_loader = DataLoader(data_test,
                                  batch_sampler=test_batch_sampler,
                                  batchify_fn=test_batchify_fn,
                                  num_workers=8)

    if args.bleu == 'tweaked':
        bpe = True
        split_compound_word = True
        tokenized = True
    elif args.bleu == '13a' or args.bleu == 'intl':
        bpe = False
        split_compound_word = False
        tokenized = False
    else:
        raise NotImplementedError

    best_valid_bleu = 0.0
    step_num = 0
    warmup_steps = args.warmup_steps
    grad_interval = args.num_accumulated
    model.collect_params().setattr('grad_req', 'add')
    average_start = (len(train_data_loader) // grad_interval) * (args.epochs - args.average_start)
    average_param_dict = None
    model.collect_params().zero_grad()
    for epoch_id in range(args.epochs):
        log_avg_loss = 0
        log_wc = 0
        loss_denom = 0
        step_loss = 0
        log_start_time = time.time()
        for batch_id, (src_seq, tgt_seq, src_valid_length, tgt_valid_length) \
                in enumerate(train_data_loader):
            src_valid_length = nd.cast(src_valid_length, dtype='float32')
            tgt_valid_length = nd.cast(tgt_valid_length, dtype='float32')
            if batch_id % grad_interval == 0:
                step_num += 1
                new_lr = args.lr / math.sqrt(args.num_units) \
                         * min(1. / math.sqrt(step_num), step_num * warmup_steps ** (-1.5))
                trainer.set_learning_rate(new_lr)
            src_wc = src_valid_length.sum().asscalar()
            tgt_wc = tgt_valid_length.sum().asscalar()
            loss_denom += tgt_wc - tgt_valid_length.shape[0]
            if src_seq.shape[0] > len(ctx):
                src_seq_list, tgt_seq_list, src_valid_length_list, tgt_valid_length_list \
                    = [gluon.utils.split_and_load(seq, ctx, batch_axis=0, even_split=False)
                       for seq in [src_seq, tgt_seq, src_valid_length, tgt_valid_length]]
            else:
                src_seq_list = [src_seq.as_in_context(ctx[0])]
                tgt_seq_list = [tgt_seq.as_in_context(ctx[0])]
                src_valid_length_list = [src_valid_length.as_in_context(ctx[0])]
                tgt_valid_length_list = [tgt_valid_length.as_in_context(ctx[0])]

            Ls = []
            with mx.autograd.record():
                for src_seq, tgt_seq, src_valid_length, tgt_valid_length \
                        in zip(src_seq_list, tgt_seq_list,
                               src_valid_length_list, tgt_valid_length_list):
                    out, _ = model(src_seq, tgt_seq[:, :-1],
                                   src_valid_length, tgt_valid_length - 1)
                    smoothed_label = label_smoothing(tgt_seq[:, 1:])
                    ls = loss_function(out, smoothed_label, tgt_valid_length - 1).sum()
                    Ls.append((ls * (tgt_seq.shape[1] - 1)) / args.batch_size)
            for L in Ls:
                L.backward()
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                if average_param_dict is None:
                    average_param_dict = {k: v.data(ctx[0]).copy() for k, v in
                                          model.collect_params().items()}
                trainer.step(float(loss_denom) / args.batch_size)
                param_dict = model.collect_params()
                param_dict.zero_grad()
                if step_num > average_start:
                    alpha = 1. / max(1, step_num - average_start)
                    for name, average_param in average_param_dict.items():
                        average_param[:] += alpha * (param_dict[name].data(ctx[0]) - average_param)
            step_loss += sum([L.asscalar() for L in Ls])
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                log_avg_loss += step_loss / loss_denom * args.batch_size
                loss_denom = 0
                step_loss = 0
            log_wc += src_wc + tgt_wc
            if (batch_id + 1) % (args.log_interval * grad_interval) == 0:
                wps = log_wc / (time.time() - log_start_time)
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ppl={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'
                             .format(epoch_id, batch_id + 1, len(train_data_loader),
                                     log_avg_loss / args.log_interval,
                                     np.exp(log_avg_loss / args.log_interval),
                                     wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0
        mx.nd.waitall()
        valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
        valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out,
                                                    tokenized=tokenized, tokenizer=args.bleu,
                                                    split_compound_word=split_compound_word,
                                                    bpe=bpe)
        logging.info('[Epoch {}] valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                     .format(epoch_id, valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
        test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
        test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out,
                                                   tokenized=tokenized, tokenizer=args.bleu,
                                                   split_compound_word=split_compound_word,
                                                   bpe=bpe)
        logging.info('[Epoch {}] test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                     .format(epoch_id, test_loss, np.exp(test_loss), test_bleu_score * 100))
        write_sentences(valid_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_valid_out.txt').format(epoch_id))
        write_sentences(test_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_test_out.txt').format(epoch_id))
        if valid_bleu_score > best_valid_bleu:
            best_valid_bleu = valid_bleu_score
            save_path = os.path.join(args.save_dir, 'valid_best.params')
            logging.info('Save best parameters to {}'.format(save_path))
            model.save_params(save_path)
        save_path = os.path.join(args.save_dir, 'epoch{:d}.params'.format(epoch_id))
        model.save_params(save_path)
    save_path = os.path.join(args.save_dir, 'average.params')
    mx.nd.save(save_path, average_param_dict)
    if args.average_checkpoint:
        for j in range(args.num_averages):
            params = mx.nd.load(os.path.join(args.save_dir,
                                             'epoch{:d}.params'.format(args.epochs - j - 1)))
            alpha = 1. / (j + 1)
            for k, v in model._collect_params_with_prefix().items():
                for c in ctx:
                    v.data(c)[:] += alpha * (params[k].as_in_context(c) - v.data(c))
    elif args.average_start > 0:
        for k, v in model.collect_params().items():
            v.set_data(average_param_dict[k])
    else:
        model.load_params(os.path.join(args.save_dir, 'valid_best.params'), ctx)
    valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
    valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out,
                                                tokenized=tokenized, tokenizer=args.bleu, bpe=bpe,
                                                split_compound_word=split_compound_word)
    logging.info('Best model valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                 .format(valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
    test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
    test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out,
                                               tokenized=tokenized, tokenizer=args.bleu, bpe=bpe,
                                               split_compound_word=split_compound_word)
    logging.info('Best model test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                 .format(test_loss, np.exp(test_loss), test_bleu_score * 100))
    write_sentences(valid_translation_out,
                    os.path.join(args.save_dir, 'best_valid_out.txt'))
    write_sentences(test_translation_out,
                    os.path.join(args.save_dir, 'best_test_out.txt'))
Esempio n. 18
0
def test_multi_worker_forked_data_loader():
    """
    Test should successfully run its course of multi-process/forked data loader without errors
    """
    class Dummy(Dataset):
        def __init__(self, random_shape):
            self.random_shape = random_shape

        def __getitem__(self, idx):
            key = idx
            if self.random_shape:
                out = np.random.uniform(size=(random.randint(1000, 1100), 40))
                labels = np.random.uniform(size=(random.randint(10, 15)))
            else:
                out = np.random.uniform(size=(1000, 40))
                labels = np.random.uniform(size=(10))
            return key, out, labels

        def __len__(self):
            return 50

        def batchify(self, data):
            """
            Collate data into batch. Use shared memory for stacking.

            :param data: a list of array, with layout of 'NTC'.
            :return either x  and x's unpadded lengths, or x, x's unpadded lengths, y and y's unpadded lengths
                    if labels are not supplied.
            """

            # input layout is NTC
            keys, inputs, labels = [item[0] for item in data], [item[1] for item in data], \
                                   [item[2] for item in data]

            if len(data) > 1:
                max_data_len = max([seq.shape[0] for seq in inputs])
                max_labels_len = 0 if not labels else max([seq.shape[0] for seq in labels])
            else:
                max_data_len = inputs[0].shape[0]
                max_labels_len = 0 if not labels else labels[0].shape[0]

            x_lens = [item.shape[0] for item in inputs]
            y_lens = [item.shape[0] for item in labels]

            for i, seq in enumerate(inputs):
                pad_len = max_data_len - seq.shape[0]
                inputs[i] = np.pad(seq, ((0, pad_len), (0, 0)), 'constant', constant_values=0)
                labels[i] = np.pad(labels[i], (0, max_labels_len - labels[i].shape[0]),
                                   'constant', constant_values=-1)

            inputs = np.asarray(inputs, dtype=np.float32)
            if labels is not None:
                labels = np.asarray(labels, dtype=np.float32)
            inputs = inputs.transpose((1, 0, 2))
            labels = labels.transpose((1, 0))

            return (nd.array(inputs, dtype=inputs.dtype, ctx=context.Context('cpu_shared', 0)),
                    nd.array(x_lens, ctx=context.Context('cpu_shared', 0))) \
                if labels is None else (
                nd.array(inputs, dtype=inputs.dtype, ctx=context.Context('cpu_shared', 0)),
                nd.array(x_lens, ctx=context.Context('cpu_shared', 0)),
                nd.array(labels, dtype=labels.dtype, ctx=context.Context('cpu_shared', 0)),
                nd.array(y_lens, ctx=context.Context('cpu_shared', 0)))


    # This test is pointless on Windows because Windows doesn't fork
    if platform.system() != 'Windows':
        data = Dummy(True)
        loader = DataLoader(data, batch_size=40, batchify_fn=data.batchify, num_workers=2)
        for epoch in range(1):
            for i, data in enumerate(loader):
                if i % 100 == 0:
                    print(data)
                    print('{}:{}'.format(epoch, i))
Esempio n. 19
0
def test_multimodal_batchify(dataset_name, url, label_column, backbone_name,
                             all_to_text, insert_sep, stochastic_chunk):
    # Test for multimodal batchify
    all_df = load_pd.load(url)
    feature_columns = [col for col in all_df.columns if col != label_column]
    train_df, valid_df = train_test_split(
        all_df, test_size=0.1, random_state=np.random.RandomState(100))
    column_types, problem_type = infer_column_problem_types(
        train_df, valid_df, label_columns=label_column)
    cfg = base_preprocess_cfg()
    if all_to_text:
        cfg.defrost()
        cfg.categorical.convert_to_text = True
        cfg.numerical.convert_to_text = True
        cfg.freeze()

    preprocessor = MultiModalTextFeatureProcessor(column_types=column_types,
                                                  label_column=label_column,
                                                  tokenizer_name=backbone_name,
                                                  cfg=cfg)
    cls_id, sep_id = get_cls_sep_id(preprocessor.tokenizer)
    train_dataset = preprocessor.fit_transform(train_df[feature_columns],
                                               train_df[label_column])
    test_dataset = preprocessor.transform(valid_df[feature_columns])
    auto_max_length = auto_shrink_max_length(
        train_dataset=train_dataset,
        insert_sep=insert_sep,
        num_text_features=len(preprocessor.text_feature_names),
        auto_max_length_quantile=0.9,
        round_to=32,
        max_length=512)
    train_batchify_fn = MultiModalTextBatchify(
        num_text_inputs=len(preprocessor.text_feature_names),
        num_categorical_inputs=len(preprocessor.categorical_feature_names),
        num_numerical_inputs=len(preprocessor.numerical_feature_names) > 0,
        cls_token_id=cls_id,
        sep_token_id=sep_id,
        max_length=auto_max_length,
        mode='train',
        stochastic_chunk=stochastic_chunk,
        insert_sep=insert_sep)
    test_batchify_fn = MultiModalTextBatchify(
        num_text_inputs=len(preprocessor.text_feature_names),
        num_categorical_inputs=len(preprocessor.categorical_feature_names),
        num_numerical_inputs=len(preprocessor.numerical_feature_names) > 0,
        cls_token_id=cls_id,
        sep_token_id=sep_id,
        max_length=auto_max_length,
        mode='test',
        stochastic_chunk=stochastic_chunk,
        insert_sep=insert_sep)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=4,
                                  batchify_fn=train_batchify_fn,
                                  shuffle=True)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=4,
                                 batchify_fn=test_batchify_fn,
                                 shuffle=False)
    for sample in train_dataloader:
        features, label = sample[0], sample[1]
        assert len(features) == train_batchify_fn.num_text_outputs + \
               train_batchify_fn.num_categorical_outputs + train_batchify_fn.num_numerical_outputs
        text_token_ids, text_valid_length, text_segment_ids = features[0]
        assert text_token_ids.shape[1] <= auto_max_length
        assert text_segment_ids.shape[1] <= auto_max_length
        assert text_token_ids.shape == text_segment_ids.shape
    for sample in test_dataloader:
        assert len(sample) == test_batchify_fn.num_text_outputs + \
               test_batchify_fn.num_categorical_outputs + test_batchify_fn.num_numerical_outputs
        text_token_ids, text_valid_length, text_segment_ids = sample[0]
        assert text_token_ids.shape[1] <= auto_max_length
        assert text_segment_ids.shape[1] <= auto_max_length
        assert text_token_ids.shape == text_segment_ids.shape
Esempio n. 20
0
# which handles padding automatically.
# :py:class:`gluoncv.data.batchify.Stack` in addition, is used to stack NDArrays with consistent shapes.
# :py:class:`gluoncv.data.batchify.Tuple` is used to handle different behaviors across multiple outputs from transform functions.

from gluoncv.data.batchify import Tuple, Stack, Pad
from mxnet.gluon.data import DataLoader

batch_size = 2  # for tutorial, we use smaller batch-size
# you can make it larger(if your CPU has more cores) to accelerate data loading
num_workers = 0

# behavior of batchify_fn: stack images, and pad labels
batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
train_loader = DataLoader(train_dataset.transform(train_transform),
                          batch_size,
                          shuffle=True,
                          batchify_fn=batchify_fn,
                          last_batch='rollover',
                          num_workers=num_workers)
val_loader = DataLoader(val_dataset.transform(val_transform),
                        batch_size,
                        shuffle=False,
                        batchify_fn=batchify_fn,
                        last_batch='keep',
                        num_workers=num_workers)

for ib, batch in enumerate(train_loader):
    if ib > 3:
        break
    print('data:', batch[0].shape, 'label:', batch[1].shape)

##########################################################
Esempio n. 21
0
    def get_dataloader(self, bucktet_mode=False):

        if bucktet_mode:
            train_dataset = BucketDataset(
                cfg.train_data_path,
                cfg.voc_path,
                short_side=cfg.short_side,
                fix_width=cfg.fix_width,
                max_len=cfg.max_char_len,
                use_augment=True,
                add_symbol=True,
                max_sample_num=100000,
                load_bucket_path='./data/%s_bucket.json' % cfg.dataset_name)
            val_dataset = BucketDataset(cfg.val_data_path,
                                        cfg.voc_path,
                                        short_side=cfg.short_side,
                                        fix_width=cfg.fix_width,
                                        max_len=cfg.max_char_len,
                                        use_augment=False,
                                        add_symbol=True,
                                        max_sample_num=10000)
            train_sampler = BucketSampler(cfg.batch_size,
                                          train_dataset.bucket_dict,
                                          shuffle=True,
                                          last_batch='discard')
            val_sampler = BucketSampler(1,
                                        val_dataset.bucket_dict,
                                        shuffle=False,
                                        last_batch='keep')
            train_dataloader = DataLoader(train_dataset,
                                          batch_sampler=train_sampler,
                                          num_workers=cfg.num_workers,
                                          pin_memory=True)
            val_dataloader = DataLoader(val_dataset,
                                        batch_sampler=val_sampler,
                                        num_workers=cfg.num_workers,
                                        pin_memory=True)
        else:
            train_dataset = FixedSizeDataset(cfg.train_data_path,
                                             cfg.voc_path,
                                             short_side=cfg.short_side,
                                             fix_width=cfg.fix_width,
                                             max_len=cfg.max_char_len,
                                             use_augment=True,
                                             add_symbol=True,
                                             max_sample_num=100000)
            val_dataset = FixedSizeDataset(cfg.val_data_path,
                                           cfg.voc_path,
                                           short_side=cfg.short_side,
                                           fix_width=cfg.fix_width,
                                           max_len=cfg.max_char_len,
                                           use_augment=False,
                                           add_symbol=True,
                                           max_sample_num=10000)

            train_dataloader = DataLoader(train_dataset,
                                          batch_size=cfg.batch_size,
                                          last_batch='discard',
                                          shuffle=True,
                                          num_workers=cfg.num_workers,
                                          pin_memory=True)
            val_dataloader = DataLoader(val_dataset,
                                        batch_size=cfg.batch_size,
                                        last_batch='keep',
                                        num_workers=cfg.num_workers,
                                        pin_memory=True)
        return train_dataloader, val_dataloader
Esempio n. 22
0
from mxnet.gluon.data import DataLoader
from mxnet.gluon import loss as gloss, Trainer
from mxnet import nd, init, autograd
import generator, discriminator

batch_size = 128
lr = 0.0002
momentum = 0.5
epoches = 5

dataset = datasets.ImageFolderDataset(root='data/celeba',
                                      transform=transforms.Compose([transforms.Resize(64), transforms.CenterCrop(64),
                                                                    transforms.ToTensor(),
                                                                    transforms.Normalize((0.5, 0.5, 0.5),
                                                                                         (0.5, 0.5, 0.5))]))
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=4)

netG = generator.Generator()
netD = discriminator.Discriminator()

netG.initialize(init=init.Normal(sigma=0.02))
netD.initialize(init=init.Normal(sigma=0.02))

trainerG = Trainer(netG.collect_params(), 'adam', {'learning_rate': lr, 'momentum': momentum})
trainerD = Trainer(netD.collect_params(), 'adam', {'learning_rate': lr, 'momentum': momentum})

loss = gloss.SigmoidBCELoss()

for epoch in range(epoches):
    for i, batch in enumerate(data_loader):
        data_real = batch[0]
Esempio n. 23
0
 def data_loader(self, sentences, shuffle=False):
     """Load, tokenize and prepare the input sentences."""
     dataset = BertEmbeddingDataset(sentences, self.transform)
     return DataLoader(dataset=dataset,
                       batch_size=self.batch_size,
                       shuffle=shuffle)
Esempio n. 24
0
def main():
    # importing libraries
    import pandas as pd
    import mxnet as mx
    from mxnet import nd, autograd, gluon
    from mxnet.gluon.data import ArrayDataset
    from mxnet.gluon.data import DataLoader
    import numpy as np
    import random

    # creating variables
    extension = '.csv'

    # Load Data
    categories = ['Excellent', 'Very_good', 'Good', 'Average', 'Poor']

    # Load the data in memory
    MAX_ITEMS_PER_CATEGORY = 80000

    # Loading data from file if exist
    try:
        data = pd.read_pickle('pickleddata.pkl')
    except:
        data = None

    if data is None:
        data = pd.DataFrame(data={'X': [], 'Y': []})
        for index, category in enumerate(categories):
            df = pd.read_csv(category + extension, encoding='utf8')
            df = pd.DataFrame(data={
                'X': (df['Review'])[:MAX_ITEMS_PER_CATEGORY],
                'Y': index
            })
            data = data.append(df)
            print('{}:{} reviews'.format(category, len(df)))

        # Shuffle the samples
        data = data.sample(frac=1)
        data.reset_index(drop=True, inplace=True)
        # Saving the data in a pickled file
        pd.to_pickle(data, 'pickleddata.pkl')

    print('Value counts:\n', data['Y'].value_counts())
    for i, cat in enumerate(categories):
        print(i, cat)
    data.head()

    # Creating the dataset
    ALPHABET = list(
        "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+ =<>()[]{}"
    )  # The 69 characters as specified in the paper
    ALPHABET_INDEX = {letter: index
                      for index, letter in enumerate(ALPHABET)
                      }  # { a: 0, b: 1, etc}
    FEATURE_LEN = 1014  # max-length in characters for one document
    NUM_WORKERS = 0  # number of workers used in the data loading
    BATCH_SIZE = 128  # number of documents per batch

    def encode(text):
        encoded = np.zeros([len(ALPHABET), FEATURE_LEN], dtype='float32')
        review = text.lower()[:FEATURE_LEN - 1:-1]
        i = 0
        for letter in text:
            if i >= FEATURE_LEN:
                break
            if letter in ALPHABET_INDEX:
                encoded[ALPHABET_INDEX[letter]][i] = 1
            i += 1
        return encoded

    def transform(x, y):
        return encode(x), y

    split = 0.8
    split_index = int(split * len(data))
    train_data_X = data['X'][:split_index].as_matrix()
    train_data_Y = data['Y'][:split_index].as_matrix()
    test_data_X = data['X'][split_index:].as_matrix()
    test_data_Y = data['Y'][split_index:].as_matrix()
    train_dataset = ArrayDataset(train_data_X,
                                 train_data_Y).transform(transform)
    test_dataset = ArrayDataset(test_data_X, test_data_Y).transform(transform)

    train_dataloader = DataLoader(train_dataset,
                                  shuffle=True,
                                  batch_size=BATCH_SIZE,
                                  num_workers=NUM_WORKERS,
                                  last_batch='rollover')
    test_dataloader = DataLoader(test_dataset,
                                 shuffle=False,
                                 batch_size=BATCH_SIZE,
                                 num_workers=NUM_WORKERS,
                                 last_batch='rollover')

    ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()

    NUM_FILTERS = 256  # number of convolutional filters per convolutional layer
    NUM_OUTPUTS = len(categories)  # number of classes
    FULLY_CONNECTED = 1024  # number of unit in the fully connected dense layer
    DROPOUT_RATE = 0.5  # probability of node drop out
    LEARNING_RATE = 0.0001  # learning rate of the gradient
    MOMENTUM = 0.9  # momentum of the gradient
    WDECAY = 0.00001  # regularization term to limit size of weights

    net = gluon.nn.HybridSequential()
    with net.name_scope():
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=7,
                            activation='relu'))
        net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=7,
                            activation='relu'))
        net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=3,
                            activation='relu'))
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=3,
                            activation='relu'))
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=3,
                            activation='relu'))
        net.add(
            gluon.nn.Conv1D(channels=NUM_FILTERS,
                            kernel_size=3,
                            activation='relu'))
        net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
        net.add(gluon.nn.Flatten())
        net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
        net.add(gluon.nn.Dropout(DROPOUT_RATE))
        net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
        net.add(gluon.nn.Dropout(DROPOUT_RATE))
        net.add(gluon.nn.Dense(NUM_OUTPUTS))
    print(net)

    hybridize = True  # for speed improvement, compile the network but no in-depth debugging possible
    # load_params = True  # Load pre-trained model

    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

    if hybridize:
        net.hybridize(static_alloc=True, static_shape=True)

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': LEARNING_RATE,
        'wd': WDECAY,
        'momentum': MOMENTUM
    })

    def evaluate_accuracy(data_iterator, net):
        acc = mx.metric.Accuracy()
        for i, (data, label) in enumerate(data_iterator):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            output = net(data)
            prediction = nd.argmax(output, axis=1)
            acc.update(preds=prediction, labels=label)
        return acc.get()[1]

    start_epoch = 6
    number_epochs = 7
    smoothing_constant = .01
    for e in range(start_epoch, number_epochs):
        for i, (review, label) in enumerate(train_dataloader):
            review = review.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(review)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(review.shape[0])

            # moving average of the loss
            curr_loss = nd.mean(loss)
            moving_loss = (curr_loss if (i == 0) else
                           (1 - smoothing_constant) * moving_loss +
                           (smoothing_constant) * curr_loss)

            if (i % 200 == 0):
                print(
                    'Batch {}: Instant loss {:.4f}, Moving loss {:.4f}'.format(
                        i, curr_loss.asscalar(), moving_loss.asscalar()))

        test_accuracy = evaluate_accuracy(test_dataloader, net)
        # Save the model using the gluon params format
        net.save_parameters('crepe_epoch_{}_test_acc_{}.params'.format(
            e,
            int(test_accuracy * 10000) / 100))
        print("Epoch {}. Loss: {:.4f}, Test_acc {:.4f}".format(
            e, moving_loss.asscalar(), test_accuracy))

    net.export('crepe', epoch=number_epochs)

    for i in range(50):
        index = random.randint(1, len(data))
        review = data['X'][index]
        label = categories[int(data['Y'][index])]
        print(review)
        print('\nCategory: {}\n'.format(label))
        encoded = nd.array([encode(review)], ctx=ctx)
        output = net(encoded)
        predicted = categories[np.argmax(output[0].asnumpy())]
        if predicted == label:
            print('Correctly predicted the right category')
        else:
            print('Incorrectly predicted {}'.format(predicted))

    review_title = "Good stuff"
    review = "This course is definitely better than the previous one"

    print(review_title)
    print(review + '\n')
    encoded = nd.array([encode(review + " | " + review_title)], ctx=ctx)
    output = net(encoded)
    softmax = nd.exp(output) / nd.sum(nd.exp(output))[0]
    predicted = categories[np.argmax(output[0].asnumpy())]
    print('Predicted: {}\n'.format(predicted))
    for i, val in enumerate(categories):
        print(val, float(int(softmax[0][i].asnumpy() * 1000) / 10), '%')
Esempio n. 25
0
def train_classifier(vocabulary, data_train, data_val, data_test, ctx):
    """
    Trains the classifier in minibatch fashion with the desired parameters. It also prints statistics on the train,
    val, and test sets.
    """

    # Set up the data loaders for each data source into minibatches of the desired size
    train_dataloader = DataLoader(data_train,
                                  batch_size=args.batch_size,
                                  shuffle=True)
    val_dataloader = DataLoader(data_val,
                                batch_size=args.batch_size,
                                shuffle=True)
    test_dataloader = DataLoader(data_test,
                                 batch_size=args.batch_size,
                                 shuffle=True)

    # Collects the dimensions of the word vectors so that we can create the NN
    emb_input_dim, emb_output_dim = vocabulary.embedding.idx_to_vec.shape

    # determine the kind of NN architecture used
    if args.net == 'cnn':
        model = CNNTextClassifier(emb_input_dim, emb_output_dim)
        print('CNN architecture created...')
    elif args.net == 'lstm':
        model = LSTMTextClassifier(emb_input_dim, emb_output_dim)
        print('LSTM architecture created...')
    elif args.net == 'rnn':
        model = RNNTextClassifier(emb_input_dim, emb_output_dim)
        print('RNN architecture created...')

    # Initialize model parameters on the context ctx
    model.initialize(ctx=ctx)

    # Set the embedding layer parameters to the pre-trained embedding in the vocabulary
    model.embedding.weight.set_data(vocabulary.embedding.idx_to_vec)
    print('Embedding layer populated with word-vectors...')

    # OPTIONAL for efficiency - perhaps easier to comment this out during debugging
    # model.hybridize()

    # this prevents the embedding layer from getting updated through backpropagation, which means that the
    # word-embeddings are not dynamically adapting to the present classification task
    # model.embedding.collect_params().setattr('grad_req', 'null')

    trainer = gluon.Trainer(model.collect_params(),
                            optimizer=args.optimizer,
                            optimizer_params={'learning_rate': args.lr})

    print('Begin training...')

    # Variables to keep track of the best values to plot the precision-recall curve of the best model trained
    # (based on who got the best average precision)
    prev_avg_precision = 0
    relative_recall = None
    relative_precision = None

    # store time reference to calculate training time
    t0 = time()

    epoch_num = 1
    for epoch in range(args.epochs):
        train_loss = 0.
        for batch_idx, (data, label) in enumerate(train_dataloader):
            # data is a matrix where each row is a document and the total number of rows is the batch size
            data = data.as_in_context(ctx)
            # label is a list with the labels for each doc in the minibatch
            label = label.as_in_context(ctx)

            with autograd.record():
                output = model(data)
                # Reshapes the output so that it can be compared to the shape of the label properly. I also had to
                # explicitly state that I wanted float64 (rather than the default float32) otherwise I got an error
                loss = loss_fn(
                    output.reshape(label.shape).astype('float64'),
                    label).mean()
            loss.backward()

            trainer.step(args.batch_size)
            train_loss += loss

        # Collects stats for each dataset after each training epoch
        # train_accuracy, train_prec, train_rec, train_f1, train_avg_prec, _ = evaluate(model, train_dataloader)
        # val_accuracy, val_prec, val_rec, val_f1, val_avg_prec, val_loss = evaluate(model, val_dataloader)
        test_accuracy, test_prec, test_rec, test_f1, test_avg_prec, test_loss, test_rel_prec, test_rel_recall = \
            evaluate(model, test_dataloader)

        print()
        print('Epoch {:d}'.format(epoch_num))
        # print('Train loss: {:.2f}; accuracy: {:.2f}; precision {:.2f}; recall {:.2f}; F1 {:.2f}; Avg prec {:.2f}'
        #       .format(train_loss.asscalar(), train_accuracy, train_prec, train_rec, train_f1, train_avg_prec))
        # print('Val loss: {:.2f}; accuracy {:.2f}; precision {:.2f}; recall {:.2f}; F1 {:.2f}; Avg prec {:.2f}'
        #       .format(val_loss, val_accuracy, val_prec, val_rec, val_f1, val_avg_prec))
        print(
            'Test loss: {:.2f}, accuracy {:.2f}; precision {:.2f}; recall {:.2f}; F1 {:.2f}; Avg prec {:.2f}'
            .format(test_loss, test_accuracy, test_prec, test_rec, test_f1,
                    test_avg_prec))

        epoch_num += 1

        # Stores the lists of recall and precision values for creating the precision-recall curve of the BEST epoch
        # I chose as the best, the first model with the highest average precision
        if prev_avg_precision < test_avg_prec:
            prev_avg_precision = test_avg_prec
            relative_recall = test_rel_recall
            relative_precision = test_rel_prec

    t1 = time()

    # Plots the precision recall curve for the model with the best average precision
    plot_precision_recall_curve(relative_precision, relative_recall)

    print()
    print('Total training time {:.2f} seconds'.format(t1 - t0))
Esempio n. 26
0
from mxnet.gluon import loss

#showpoints(np.random.randn(2500,3), c1 = np.random.uniform(0,1,size = (2500)))

parser = argparse.ArgumentParser()

parser.add_argument('--model', type=str, default = '',  help='model path')
parser.add_argument('--num_points', type=int, default=2500, help='input batch size')


opt = parser.parse_args()
print (opt)

test_dataset = PartDataset(root = 'shapenetcore_partanno_segmentation_benchmark_v0' , train = False, classification = True,  npoints = opt.num_points)

testdataloader = DataLoader(test_dataset, batch_size=32, shuffle = True)

ctx = mx.gpu()
classifier = PointNetCls(k = len(test_dataset.classes), num_points = opt.num_points)
classifier.load_parameters(opt.model, ctx=ctx)
L_loss = loss.SoftmaxCrossEntropyLoss(from_logits=True)


for i, data in enumerate(testdataloader, 0):
    points, target = data
    points = points.transpose((0,2, 1))
    pred, _ = classifier(points.as_in_context(ctx))
    loss = L_loss(pred, target)

    pred_choice = pred.argmax(1)
    correct = (target[:,0] == pred_choice.as_in_context(mx.cpu())).sum()
Esempio n. 27
0
    train_transforms = Compose([
        Resize((1333, 800), True),
        RandomHorizontalFlip(0.5),
        RandomVerticalFlip(0.5),
        Normalize(mean=(127, 127, 127), std=(255, 255, 255)),
        ToTensor()
    ])
    val_transforms = Compose([
        Resize((1333, 800), True),
        Normalize(mean=(127, 127, 127), std=(255, 255, 255)),
        ToTensor()
    ])
    train_dataset = TCTDataset(root, "tct_train", train_transforms)
    train_data_loader = DataLoader(train_dataset,
                                   batch_size,
                                   True,
                                   last_batch="rollover",
                                   batchify_fn=collate_fn,
                                   num_workers=num_workers)
    val_dataset = TCTDataset(root, "tct_val", val_transforms)
    val_data_loader = DataLoader(val_dataset,
                                 batch_size,
                                 False,
                                 last_batch="discard",
                                 batchify_fn=collate_fn,
                                 num_workers=num_workers)

    # anchor_scales = {
    #     "c4": (64, 128, 256),
    # }
    # anchor_ratios = {
    #     "c4": ((1, 2, 0.5),) * 3,
Esempio n. 28
0
RNN_Block_net=nn.HybridSequential()
RNN_Block_net.add(RNN_Block(n_class=num_class))
RNN_Block_net.collect_params().initialize(mx.init.Normal(0.02), ctx=ctx)


GE_trainer = gluon.Trainer(Generator_Encorder_net.collect_params(), 'adam', {'learning_rate': lr, 'beta1': 0.9,'beta2': 0.999})
GD_trainer = gluon.Trainer(Generator_Decorder_net.collect_params(), 'adam', {'learning_rate': lr, 'beta1': 0.9,'beta2': 0.999})
D_trainer = gluon.Trainer(Discriminator_net.collect_params(), 'adam', {'learning_rate': lr, 'beta1': 0.9,'beta2': 0.999})
RNN_trainer = gluon.Trainer(RNN_Block_net.collect_params(), 'adam', {'learning_rate': lr, 'beta1': 0.9,'beta2': 0.999})


#######################################################
######    dataset path    ######
dataset = ImageDataset('/home/cumt306/zhouyi/dataset/Train.txt', (32, 128), 3, 32, alphabet)
data_loader = DataLoader(dataset.transform_first(ToTensor()), batch_size=batch_size, shuffle=True, num_workers=12)
val_dataset = ImageDataset('/home/cumt306/zhouyi/dataset/Val.txt', (32, 128), 3, 32, alphabet)
val_data_loader = DataLoader(dataset.transform_first(ToTensor()), batch_size=batch_size, shuffle=True, num_workers=12)
test_dataset = ImageDataset('/home/cumt306/zhouyi/dataset/Test.txt', (32, 128), 3, 32, alphabet)
test_data_loader = DataLoader(test_dataset.transform_first(ToTensor()), batch_size=batch_size, shuffle=True, num_workers=12)
#######################################################



stamp =  datetime.now().strftime('%Y_%m_%d-%H_%M')
logging.basicConfig(level=logging.DEBUG)
GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
ctc_loss = gluon.loss.CTCLoss(weight=0.2)
L1_loss=gluon.loss.L1Loss()
sw = SummaryWriter(log_dir)
global_step = 0
Esempio n. 29
0
def train():
    """Training function."""
    trainer = gluon.Trainer(model.collect_params(), args.optimizer, {'learning_rate': args.lr})

    train_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack())
    test_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack(), btf.Stack())
    if args.bucket_scheme == 'constant':
        bucket_scheme = ConstWidthBucket()
    elif args.bucket_scheme == 'linear':
        bucket_scheme = LinearWidthBucket()
    elif args.bucket_scheme == 'exp':
        bucket_scheme = ExpWidthBucket(bucket_len_step=1.2)
    else:
        raise NotImplementedError
    train_batch_sampler = FixedBucketSampler(lengths=data_train_lengths,
                                             batch_size=args.batch_size,
                                             num_buckets=args.num_buckets,
                                             ratio=args.bucket_ratio,
                                             shuffle=True,
                                             bucket_scheme=bucket_scheme)
    logging.info('Train Batch Sampler:\n{}'.format(train_batch_sampler.stats()))
    train_data_loader = DataLoader(data_train,
                                   batch_sampler=train_batch_sampler,
                                   batchify_fn=train_batchify_fn,
                                   num_workers=8)

    val_batch_sampler = FixedBucketSampler(lengths=data_val_lengths,
                                           batch_size=args.test_batch_size,
                                           num_buckets=args.num_buckets,
                                           ratio=args.bucket_ratio,
                                           shuffle=False)
    logging.info('Valid Batch Sampler:\n{}'.format(val_batch_sampler.stats()))
    val_data_loader = DataLoader(data_val,
                                 batch_sampler=val_batch_sampler,
                                 batchify_fn=test_batchify_fn,
                                 num_workers=8)
    test_batch_sampler = FixedBucketSampler(lengths=data_test_lengths,
                                            batch_size=args.test_batch_size,
                                            num_buckets=args.num_buckets,
                                            ratio=args.bucket_ratio,
                                            shuffle=False)
    logging.info('Test Batch Sampler:\n{}'.format(test_batch_sampler.stats()))
    test_data_loader = DataLoader(data_test,
                                  batch_sampler=test_batch_sampler,
                                  batchify_fn=test_batchify_fn,
                                  num_workers=8)
    best_valid_bleu = 0.0
    for epoch_id in range(args.epochs):
        log_avg_loss = 0
        log_avg_gnorm = 0
        log_wc = 0
        log_start_time = time.time()
        for batch_id, (src_seq, tgt_seq, src_valid_length, tgt_valid_length)\
                in enumerate(train_data_loader):
            # logging.info(src_seq.context) Context suddenly becomes GPU.
            src_seq = src_seq.as_in_context(ctx)
            tgt_seq = tgt_seq.as_in_context(ctx)
            src_valid_length = src_valid_length.as_in_context(ctx)
            tgt_valid_length = tgt_valid_length.as_in_context(ctx)
            with mx.autograd.record():
                out, _ = model(src_seq, tgt_seq[:, :-1], src_valid_length, tgt_valid_length - 1)
                loss = loss_function(out, tgt_seq[:, 1:], tgt_valid_length - 1).mean()
                loss = loss * (tgt_seq.shape[1] - 1) / (tgt_valid_length - 1).mean()
                loss.backward()
            grads = [p.grad(ctx) for p in model.collect_params().values()]
            gnorm = gluon.utils.clip_global_norm(grads, args.clip)
            trainer.step(1)
            src_wc = src_valid_length.sum().asscalar()
            tgt_wc = (tgt_valid_length - 1).sum().asscalar()
            step_loss = loss.asscalar()
            log_avg_loss += step_loss
            log_avg_gnorm += gnorm
            log_wc += src_wc + tgt_wc
            if (batch_id + 1) % args.log_interval == 0:
                wps = log_wc / (time.time() - log_start_time)
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ppl={:.4f}, gnorm={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'
                             .format(epoch_id, batch_id + 1, len(train_data_loader),
                                     log_avg_loss / args.log_interval,
                                     np.exp(log_avg_loss / args.log_interval),
                                     log_avg_gnorm / args.log_interval,
                                     wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_avg_gnorm = 0
                log_wc = 0
        valid_loss, valid_translation_out = evaluate(val_data_loader)
        valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out)
        logging.info('[Epoch {}] valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                     .format(epoch_id, valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
        test_loss, test_translation_out = evaluate(test_data_loader)
        test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out)
        logging.info('[Epoch {}] test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                     .format(epoch_id, test_loss, np.exp(test_loss), test_bleu_score * 100))
        write_sentences(valid_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_valid_out.txt').format(epoch_id))
        write_sentences(test_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_test_out.txt').format(epoch_id))
        if valid_bleu_score > best_valid_bleu:
            best_valid_bleu = valid_bleu_score
            save_path = os.path.join(args.save_dir, 'valid_best.params')
            logging.info('Save best parameters to {}'.format(save_path))
            model.save_params(save_path)
        if epoch_id + 1 >= (args.epochs * 2) // 3:
            new_lr = trainer.learning_rate * args.lr_update_factor
            logging.info('Learning rate change to {}'.format(new_lr))
            trainer.set_learning_rate(new_lr)
    model.load_params(os.path.join(args.save_dir, 'valid_best.params'))
    valid_loss, valid_translation_out = evaluate(val_data_loader)
    valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out)
    logging.info('Best model valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                 .format(valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
    test_loss, test_translation_out = evaluate(test_data_loader)
    test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out)
    logging.info('Best model test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                 .format(test_loss, np.exp(test_loss), test_bleu_score * 100))
    write_sentences(valid_translation_out,
                    os.path.join(args.save_dir, 'best_valid_out.txt'))
    write_sentences(test_translation_out,
                    os.path.join(args.save_dir, 'best_test_out.txt'))
Esempio n. 30
0
    return encode(x), y

"""We split our data into a training and a testing dataset"""

split = 0.8
split_index = int(split*len(data))
train_data_X = data['X'][:split_index].as_matrix()
train_data_Y = data['Y'][:split_index].as_matrix()
test_data_X = data['X'][split_index:].as_matrix()
test_data_Y = data['Y'][split_index:].as_matrix()
train_dataset = ArrayDataset(train_data_X, train_data_Y).transform(transform)
test_dataset = ArrayDataset(test_data_X, test_data_Y).transform(transform)

"""Creating the training and testing dataloader, with NUM_WORKERS set to the number of CPU core"""

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='rollover')

test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='rollover')

"""## Creation of the network

The context will define where the training takes place, on the CPU or on the GPU
"""

ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()

"""We create the network following the instructions describe in the paper, using the small feature and small output units configuration

![img](data/diagram.png)
![img](data/convolutional_layers.png)
![img](data/dense_layer.png)