Exemple #1
0
        data = data.as_in_context(data_ctx)
        label = label.as_in_context(data_ctx)
        output = net(data)
        prediction = nd.dot(output, scale)
        acc.update(preds=prediction, labels=label)
    return acc.get()[1]


if __name__ == '__main__':
    net.output.collect_params().initialize(
        init=mx.init.Uniform(scale=1 / math.sqrt(2048)),
        ctx=model_ctx,
        force_reinit=False)
    # net.collect_params().initialize(init=mx.init.Xavier(), ctx=model_ctx)
    # net.load_params('/home/gdshen/datasets/mxnet_checkpoint/checkpoint-imdb-15.params', ctx=model_ctx)
    net.collect_params().reset_ctx(ctx=model_ctx)
    #
    net.hybridize()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': mom,
        'wd': weight_decay
    })
    # trainer = gluon.Trainer(net.collect_params(), 'Adam', {'learning_rate': lr})
    data_iter = gluon.data.DataLoader(training_datasets,
                                      10,
                                      shuffle=True,
                                      num_workers=8,
                                      last_batch='discard')
    eval_iter = gluon.data.DataLoader(test_datasets,
                                      10,
Exemple #2
0
def train():
    logging.info('Start Training for Task: %s\n' % (task))

    # Initialize the net with pretrained model,使用预训练好的模型参数
 #    finetune_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
 #    with finetune_net.name_scope():
	# finetune_net.output = nn.Dense(task_num_class)

 #    finetune_net.output.initialize(init.Xavier(), ctx = ctx) #对网络进行初始化参数
 #    finetune_net.collect_params().reset_ctx(ctx) #参数放在gpu上
 #    finetune_net.hybridize()
    #使用网络融合
    finetune_net = Net(ctx,task_num_class).output
    finetune_net.collect_params().reset_ctx(ctx) #参数放在gpu上
    finetune_net.hybridize()

    # Define DataLoader定义数据加载器
    train_data = gluon.data.DataLoader(
        gluon.data.vision.ImageFolderDataset(
            os.path.join('data/train_valid', task, 'train'),
            transform=transform_train),
        batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard')

    val_data = gluon.data.DataLoader(
        gluon.data.vision.ImageFolderDataset(
            os.path.join('data/train_valid', task, 'val'),
            transform=transform_val),
        batch_size=batch_size, shuffle=False, num_workers = num_workers)

    # Define Trainer 训练器
    trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
        'learning_rate': lr, 'momentum': momentum, 'wd': wd})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()#损失函数
    lr_counter = 0
    num_batch = len(train_data)#训练数据有多少个batch-size

    # Start Training
    for epoch in range(epochs): #每次训练一个epoch
        if epoch == lr_steps[lr_counter]: 
        	#学习率衰减为原来的lr_factor
            trainer.set_learning_rate(trainer.learning_rate*lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()
        AP = 0.
        AP_cnt = 0

        #每次从训练数据中拿出batch-size个数据进行训练
        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data] #每个图像经过网络计算得到结果
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]#计算loss
            for l in loss:
                l.backward() #计算梯度

            trainer.step(batch_size) #每次迭代batch-size个数据
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

            metric.update(label, outputs)
            ap, cnt = calculate_ap(label, outputs)
            AP += ap
            AP_cnt += cnt

            progressbar(i, num_batch-1) #训练进度条

        train_map = AP / AP_cnt
        _, train_acc = metric.get()
        train_loss /= num_batch

        val_acc, val_map, val_loss = validate(finetune_net, val_data, ctx) #计算验证精度

        logging.info('[Epoch %d] Train-acc: %.3f, mAP: %.3f, loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f' %
                 (epoch, train_acc, train_map, train_loss, val_acc, val_map, val_loss, time.time() - tic))

    logging.info('\n')
    return (finetune_net)