def model_trainer(self, config: Configuration, model_path, network_model,
                      model_name):
        model_checkpoint_path = '/checkpoints/' + config.weights_name + '/' + model_name
        inference_configuration = self.define_inference_configuration(
            config.processor)

        # add network name to config.json
        inference_configuration['network'] = config.weights_name

        if not os.path.exists(model_checkpoint_path):
            os.makedirs(model_checkpoint_path, exist_ok=True)
        with open(model_checkpoint_path + '/config.json', 'w') as outfile:
            json.dump(inference_configuration, outfile)
        classes = self.get_classes(model_path, config.weights_name, model_name)
        ctx = self.get_ctx(config.processor, config.gpus_count)

        if (config.weights_type == "from_scratch"):
            net = network_model
            network = str(net)

            if (config.Xavier == True):
                net.initialize(init.Xavier(), ctx=ctx)
            else:
                net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

        elif (config.weights_type
              == 'pre_trained') or (config.weights_type
                                    == 'pretrained_offline'):
            net = network_model

            network = str(net)
            network = str(net)
            print(net.name)
            output_exists = hasattr(net, 'output')  ##check if output exists
            network_name = net.name  ##get the model's name

            if ("resnext" in network_name):  ##check if model is resnext
                with net.name_scope():
                    net.output = nn.Dense(classes)
                net.initialize()
            elif (output_exists):  ##check if output exists
                If_HybridSequential = False
                if ("HybridSequential" in str(net.output)
                    ):  ##check if output contains HybridSequential
                    If_HybridSequential = True

                if If_HybridSequential:
                    If_HybridSequential_2 = len(
                        net.output
                    )  ##check if HybridSequential contains more than 2 items
                    if (If_HybridSequential_2 > 2):
                        with net.name_scope():
                            print('2------------------------')

                            print('------------------------')
                            x = nn.HybridSequential()
                            x.add(nn.Conv2D(classes, 1, strides=1))
                            x.add(net.output[1])
                            x.add(net.output[2])
                            x.add(net.output[3])
                            net.output = x
                    else:
                        with net.name_scope():
                            print('3------------------------')
                            # print(net.output[1])
                            print('------------------------')
                            x = nn.HybridSequential()
                            x.add(nn.Conv2D(classes, 1, strides=1))
                            x.add(net.output[1])
                            net.output = x
                else:
                    print("4")
                    with net.name_scope():
                        net.output = nn.Dense(classes)
                if (config.Xavier == True):
                    net.output.initialize(init.Xavier(), ctx=ctx)
                else:
                    net.output.initialize(mx.init.MSRAPrelu(), ctx=ctx)
            else:
                print("5")
                with net.name_scope():
                    net.fc = nn.Dense(classes)
                if (config.Xavier == True):
                    net.fc.initialize(init.Xavier(), ctx=ctx)
                else:
                    net.fc.initialize(mx.init.MSRAPrelu(), ctx=ctx)
        else:
            net = network_model
        net.collect_params().reset_ctx(ctx)
        net.hybridize()

        trainer = gluon.Trainer(
            net.collect_params(), 'sgd', {
                'learning_rate': config.lr,
                'momentum': config.momentum,
                'wd': config.wd
            })

        metric = mx.metric.Accuracy()
        L = gluon.loss.SoftmaxCrossEntropyLoss()

        return trainer, metric, L
Example #2
0
    from mxnet import init
    # net = get_R2plus1d(101,model_depth=34)
    # net.initialize()
    # print(net)
    # x = nd.random.uniform(shape=(2,3,32,112,112))
    # for layer in net:
    #     x = layer(x)
    #     print(layer.name,'output shape',x.shape)

    net2 = R2Plus2D(num_class=101, model_depth=34)

    import os

    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
    context = mx.gpu(2)  #context = mx.cpu()
    net2.initialize(init=init.Xavier(), ctx=context)
    net2.hybridize()
    x = nd.random.uniform(shape=(1, 3, 8, 112, 112))
    #net2.collect_params().reset_ctx(mx.gpu(1))
    x = x.as_in_context(context)

    print("beging to fintune time")
    for i in range(10):
        y = net2(x)
        y.wait_to_read()

    print("begin to calculate time")
    tic = time()
    for i in range(100):
        y = net2(x)
        y.wait_to_read()
Example #3
0
    if resize:
        transformer += [gdata.vision.transforms.Resize(resize)]
    transformer += [gdata.vision.transforms.ToTensor()]
    transformer = gdata.vision.transforms.Compose(transformer)
    mnist_train = gdata.vision.FashionMNIST(root=root, train=True)
    mnist_test = gdata.vision.FashionMNIST(root=root, train=False)
    num_workers = 0 if sys.platform.startswith('win32') else 4
    train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),
                                  batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),
                                 batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)
    return train_iter, test_iter


batch_size = 128
# 如出现“ out of memory”的报错信息,可减⼩batch_size或resize
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

##############################################################################
# 开始训练
##############################################################################
lr, num_epochs, ctx = 0.01, 5, mx.cpu()
# d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
Example #4
0
def main(args):
    with open(args.file, 'r') as f:
        settings = yaml.load(f)
    assert args.file[:-5].endswith(settings['model']['name']), \
     'The model name is not consistent! %s != %s' % (args.file[:-5], settings['model']['name'])

    mx.random.seed(settings['seed'])
    np.random.seed(settings['seed'])
    random.seed(settings['seed'])

    setting_dataset = settings['dataset']
    setting_model = settings['model']
    setting_train = settings['training']

    name = os.path.join(PARAM_PATH, setting_model['name'])
    model_type = getattr(model, setting_model['type'])
    net = model_type.net(settings)

    try:
        logger = Logger.load('%s.yaml' % name)
        net.load_parameters('%s-%04d.params' % (name, logger.best_epoch()),
                            ctx=args.gpus)
        logger.set_net(net)
        print('Successfully loading the model %s [epoch: %d]' %
              (setting_model['name'], logger.best_epoch()))
    except:
        logger = Logger(name, net, setting_train['early_stop_metric'],
                        setting_train['early_stop_epoch'])
        net.initialize(init.Xavier(), ctx=args.gpus)
        print('Initialize the model')

    num_params = 0
    for v in net.collect_params().values():
        num_params += np.prod(v.shape)
    print(net.collect_params())
    print('NUMBER OF PARAMS:', num_params)

    flow_train, flow_eval, flow_test, flow_scaler = getattr(
        data.dataloader, setting_dataset['flow'])(settings)

    model_trainer = ModelTrainer(
        net=net,
        trainer=gluon.Trainer(
            net.collect_params(),
            mx.optimizer.Adam(
                learning_rate=setting_train['lr'],
                lr_scheduler=mx.lr_scheduler.FactorScheduler(
                    step=setting_train['lr_decay_step'] * len(args.gpus),
                    factor=setting_train['lr_decay_factor'],
                    stop_factor_lr=1e-6)),
            update_on_kvstore=False),
        clip_gradient=setting_train['clip_gradient'],
        logger=logger,
        ctx=args.gpus)

    flow_metrics = [
        MAE(scaler=flow_scaler,
            pred_name='flow_pred',
            label_name='flow_label',
            name='flow_mae'),
        RMSE(scaler=flow_scaler,
             pred_name='flow_pred',
             label_name='flow_label',
             name='flow_rmse'),
        MAPE(scaler=flow_scaler,
             pred_name='flow_pred',
             label_name='flow_label',
             name='flow_mape'),
        SMAPE(scaler=flow_scaler,
              pred_name='flow_pred',
              label_name='flow_label',
              name='flow_smape')
    ]

    model_trainer.fit(begin_epoch=logger.best_epoch(),
                      num_epochs=args.epochs,
                      train=flow_train,
                      eval=flow_eval,
                      test=flow_test,
                      metrics=flow_metrics)

    net.load_parameters('%s-%04d.params' % (name, logger.best_epoch()),
                        ctx=args.gpus)
    model_trainer.fit(begin_epoch=0,
                      num_epochs=1,
                      train=None,
                      eval=flow_eval,
                      test=flow_test,
                      metrics=flow_metrics)
Example #5
0
    # net首次被初始化,使用默认初始化方式
    net.initialize()

    X = nd.random.uniform(shape=(2, 20))
    Y = net(X)  # 前向计算

    # net再次被初始化,使用init模块中正太分布初始化方法
    net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
    print(net[0].weight.data()[0])

    # net再次被初始化,使用init模块的常数来初始化权重参数
    net.initialize(init=init.Constant(1), force_reinit=True)
    print(net[0].weight.data()[0])

    # net再次被初始化,使用init模块的Xavier随机初始化方法
    net.initialize(init=init.Xavier(), force_reinit=True)
    print(net[0].weight.data()[0])

    # net再次被初始化,使用自定义的初始化方法
    net.initialize(init=MyInit(), force_reinit=True)
    print(net[0].weight.data()[0])

    # 测试共享模型参数,第二隐藏层和第三隐藏层共享模型参数
    net2 = nn.Sequential()
    second = nn.Dense(8, activation='relu')
    third = nn.Dense(8, activation='relu', params=second.params)
    net2.add(
        nn.Dense(8, activation='relu'),
        second,  # 第二隐藏层
        third,  # 第三隐藏层
        nn.Dense(10))
Example #6
0
def get_net(ctx):
    num_outputs = 100
    net = Alexnet(num_outputs)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net
def train(encoder, decoder, max_seq_len, ctx, eval_fr_ens):
    encoder.initialize(init.Xavier(), ctx=ctx)
    decoder.initialize(init.Xavier(), ctx=ctx)
    encoder_optimizer = gluon.Trainer(encoder.collect_params(), 'adam',
                                      {'learning_rate': lr})
    decoder_optimizer = gluon.Trainer(decoder.collect_params(), 'adam',
                                      {'learning_rate': lr})

    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

    l_sum = 0  #loss?
    for epoch in range(1, num_epochs + 1):
        for x, y in data_iter:
            cur_batch_size = x.shape[0]
            with autograd.record():
                l = nd.array([0], ctx=ctx)
                valid_length = nd.array([0], ctx=ctx)
                encoder_state = encoder.begin_state(func=nd.zeros,
                                                    batch_size=cur_batch_size,
                                                    ctx=ctx)
                # encoder_outputs 包含了编码器在每个时间步的隐藏状态。
                encoder_outputs, encoder_state = encoder(x, encoder_state)
                encoder_outputs = encoder_outputs.flatten()

                # 解码器的第一个输入为 BOS 符号。
                decoder_input = nd.array([output_vocab.token_to_idx[BOS]] *
                                         cur_batch_size,
                                         ctx=ctx)
                mask = nd.ones(shape=(cur_batch_size, ), ctx=ctx)  #用处

                decoder_state = decoder.begin_state(func=nd.zeros,
                                                    batch_size=cur_batch_size,
                                                    ctx=ctx)

                for i in range(max_seq_len):
                    #print(i)
                    decoder_output, decoder_state = decoder(
                        decoder_input, decoder_state, encoder_outputs)

                    decoder_input = y[:, i]

                    valid_length = valid_length + mask.sum()

                    l = l + (mask * loss(decoder_output, y[:, i])).sum()
                    #print(l)

                    mask = mask * (y[:, i] != eos_id)
                    #print(y[:,i])
                    #print(mask)

                l = l / valid_length
                print(l)
                with open('train_loss.txt', 'a', encoding="utf-8") as f:
                    f.write('epoch:' + str(epoch) + 'batch_size_loss' +
                            str(l) + '\n')

            l.backward()
            encoder_optimizer.step(1)
            decoder_optimizer.step(1)

            l_sum += l.asscalar() / max_seq_len
Example #8
0
def train(net, train_iter, test_iter, batch_size, trainer, ctx,
          num_epochs):  # 训练函数,与之前写的一样
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n,
                                 test_acc, time.time() - start))


ctx = try_gpu()
lr, num_epochs = 0.9, 5
net.initialize(force_reinit=True, ctx=ctx,
               init=init.Xavier())  # 用Xaiver函数进行初始化
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
Example #9
0
    batch_size=batch_size, shuffle=True, num_workers=num_workers)

val_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test),
    batch_size=batch_size, shuffle=False, num_workers = num_workers)

test_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(test_path).transform_first(transform_test),
    batch_size=batch_size, shuffle=False, num_workers = num_workers)

# Model and Trainer
model_name = sys.argv[1]
finetune_net = get_model(model_name, pretrained=True)
with finetune_net.name_scope():
    finetune_net.output = nn.Dense(classes)
finetune_net.output.initialize(init.Xavier(), ctx = ctx)
finetune_net.collect_params().reset_ctx(ctx)
finetune_net.hybridize()

trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
                        'learning_rate': lr, 'momentum': momentum, 'wd': wd})
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()

# we define a evaluation function for validation and testing
def test(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        outputs = [net(X) for X in data]
Example #10
0
def Cam_resp(network,
             image,
             model_path,
             ctx,
             data_shape,
             class_names,
             thresh,
             num_class=14):
    for i in range(num_class):
        net = getattr(models, network)(classes=1)

        with net.name_scope():
            net.output = nn.Dense(1, activation="sigmoid")
        net.output.initialize(init.Xavier())

        params_features = os.path.join(model_path,
                                       '%s_f.params' % class_names[i])
        params_output = os.path.join(model_path,
                                     '%s_o.params' % class_names[i])
        net.features.load_params(params_features, ctx=ctx)
        net.output.load_params(params_output, ctx=mx.cpu(0))
        net.collect_params().reset_ctx(ctx)
        net.hybridize()

        params = net.output.collect_params()
        class_weights = params[list(params.keys())[0]]

        c = nn.Conv2D(channels=1, kernel_size=1)
        c.initialize(ctx=ctx)
        test = nd.random.normal(shape=(8, 1024, 7, 7), ctx=ctx)
        c(test)
        c.weight.set_data(class_weights.data().reshape((1, 1024, 1, 1)))

        n = len(image)
        X = np.zeros((n, 3, data_shape, data_shape), dtype=np.float32)

        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])

        for j in range(n):
            img = cv2.imread(image[j])
            X[j] = ((cv2.resize(img,
                                (data_shape, data_shape))[:, :, ::-1] / 255.0 -
                     mean) / std).transpose((2, 0, 1))

        predictions, cams = forward(X[np.arange(n)], net, ctx, c)
        predictions = predictions.asnumpy()
        cams = cams.asnumpy()

        for j in range(n):
            img = cv2.imread(image[j])
            X[j] = ((cv2.resize(img,
                                (data_shape, data_shape))[:, :, ::-1] / 255.0 -
                     mean) / std).transpose((2, 0, 1))
            if predictions[j, 0] > thresh:
                cam = cams[j][0]
                cam -= cam.min()
                cam /= cam.max()
                cam = cv2.resize((cam * 255).astype(np.uint8),
                                 (img.shape[1], img.shape[0]))
                heatmap = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
                out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0)
                cv2.imshow('Image:%s pred:%s' % (image[j], class_names[i]),
                           out)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
def train_cifar10(args, config, reporter):
    vars(args).update(config)
    np.random.seed(args.seed)
    random.seed(args.seed)
    mx.random.seed(args.seed)

    # Set Hyper-params
    batch_size = args.batch_size * max(args.num_gpus, 1)
    ctx = [mx.gpu(i)
           for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()]

    # Define DataLoader
    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    train_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        last_batch="discard",
        num_workers=args.num_workers)

    test_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    # Load model architecture and Initialize the net with pretrained model
    finetune_net = get_model(args.model, pretrained=True)
    with finetune_net.name_scope():
        finetune_net.fc = nn.Dense(args.classes)
    finetune_net.fc.initialize(init.Xavier(), ctx=ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Define trainer
    trainer = gluon.Trainer(finetune_net.collect_params(), "sgd", {
        "learning_rate": args.lr,
        "momentum": args.momentum,
        "wd": args.wd
    })
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    metric = mx.metric.Accuracy()

    def train(epoch):
        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for l in loss:
                l.backward()

            trainer.step(batch_size)
        mx.nd.waitall()

    def test():
        test_loss = 0
        for i, batch in enumerate(test_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            outputs = [finetune_net(X) for X in data]
            loss = [L(yhat, y) for yhat, y in zip(outputs, label)]

            test_loss += sum(l.mean().asscalar() for l in loss) / len(loss)
            metric.update(label, outputs)

        _, test_acc = metric.get()
        test_loss /= len(test_data)
        reporter(mean_loss=test_loss, mean_accuracy=test_acc)

    for epoch in range(1, args.epochs + 1):
        train(epoch)
        test()
        nn.GlobalAvgPool2D(),
        # 将输出4D->2D(N,10)
        nn.Flatten())

# Test
X = nd.uniform(shape=(1, 1, 224, 224))
net.initialize()       # 默认初始化init=initializer.Uniform()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)



# 读取数据
batch_size = 32
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

# 重新初始化模型
ctx = d2l.try_gpu()
net.initialize(force_reinit=True, init=init.Xavier(), ctx=ctx)              # 模型重新初始化

# 优化函数, 0.1
lr = 0.05
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})  # 这行代码要放在模型初始化完成后,否则collect_params出错

# 训练
num_epochs = 5
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)


Example #13
0
def train(encoder, decoder, decoder_init_state, max_seq_len, ctx,
          eval_fr_ens):
    encoder.initialize(init.Xavier(), ctx=ctx)
    decoder.initialize(init.Xavier(), ctx=ctx)
    decoder_init_state.initialize(init.Xavier(), ctx=ctx)
    encoder_optimizer = gluon.Trainer(encoder.collect_params(), 'adam',
                                      {'learning_rate': lr})
    decoder_optimizer = gluon.Trainer(decoder.collect_params(), 'adam',
                                      {'learning_rate': lr})
    decoder_init_state_optimizer = gluon.Trainer(
        decoder_init_state.collect_params(), 'adam', {'learning_rate': lr})

    data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
    l_sum = 0
    for epoch in range(1, epochs + 1):
        for x, y in data_iter:
            cur_batch_size = x.shape[0]
            with autograd.record():
                l = nd.array([0], ctx=ctx)
                valid_length = nd.array([0], ctx=ctx)

                # encoder状态尺寸: (num_layers, batch_size, hidden_dim)
                encoder_state = encoder.begin_state(
                    func=nd.zeros, batch_size=cur_batch_size, ctx=ctx)
                
                # encoder_outputs尺寸: (step_size, batch_size, hidden_dim)
                encoder_outputs, encoder_state = encoder(x, encoder_state)

                # For an input array with shape (d1, d2, ..., dk), flatten operation reshapes the input array into # an output array of shape (d1, d2*...*dk)
                # flatten将矩阵变换为二维的,encoder_outputs : (step_size, batch_size * hidden_dim)
                encoder_outputs = encoder_outputs.flatten()
                
                # 解码器的第一个输入为 BOS 字符。
                decoder_input = nd.array(
                    [output_vocab.token_to_idx[BOS]] * cur_batch_size,
                    ctx=ctx)
                mask = nd.ones(shape=(cur_batch_size,), ctx=ctx)

                # encoder_state是一个list,其中的每个元素是一个()
                #print(encoder_state[0].shape)
                #print(len(encoder_state))
                # 编码器的最终状态来初始化解码器的初始状态
                decoder_state = decoder_init_state(encoder_state[0])
                #print(decoder_state)
                for i in range(max_seq_len):
                    # 解码器的输入: 解码器前一时刻状态, 前一时刻输出, 编码器的输出
                    decoder_output, decoder_state = decoder(
                        decoder_input, decoder_state, encoder_outputs)

                    # 解码器使用当前时刻的预测结果作为下一时刻的输入。
                    # 输出所有词的概率,argmax在第1维上(第0维是bath_size)选择概率最大的那个
                    decoder_input = decoder_output.argmax(axis=1)

                    valid_length = valid_length + mask.sum()
                    l = l + (mask * loss(decoder_output, y[:, i])).sum()
                    mask = mask * (y[:, i] != eos_id)
                l = l / valid_length
            l.backward()
            encoder_optimizer.step(1)
            decoder_optimizer.step(1)
            decoder_init_state_optimizer.step(1)
            l_sum += l.asscalar() / max_seq_len

        if epoch % epoch_period == 0 or epoch == 1:
            if epoch == 1:
                print('epoch %d, loss %f, ' % (epoch, l_sum / len(data_iter)))
            else:
                print('epoch %d, loss %f, '
                      % (epoch, l_sum / epoch_period / len(data_iter)))
            if epoch != 1:
                l_sum = 0
            translate(encoder, decoder, decoder_init_state, eval_fr_ens, ctx,
                      max_seq_len)
num_convs_in_dense_blocks = [4,4,4,4]

for i,num_convs in enumerate(num_convs_in_dense_blocks):
    net.add(DenseBlock(num_convs, grow_rate))
    num_channels += num_convs * grow_rate
    if i != len(num_convs_in_dense_blocks) - 1:
        net.add(transition_block(num_channels // 2))

net.add(nn.BatchNorm(),
        nn.Activation('relu'),
        nn.GlobalAvgPool2D(),
        nn.Dense(10))


#【初始化模型参数】
net.initialize(init=init.Xavier(), ctx=mx.gpu())

#【定义损失函数】
loss = gloss.SoftmaxCrossEntropyLoss()

#【定义优化算法】
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.05})

#【训练模型】
def batch_accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

def all_accuracy(data_iter, net):
    acc = 0
    for X, y in data_iter:
        X, y = X.as_in_context(mx.gpu()), y.as_in_context(mx.gpu())
Example #15
0
        for i in range(int(60000/batch_size)):
            X = X_train[i * batch_size: i * batch_size + batch_size]    # 批量
            y = y_train[i * batch_size: i * batch_size + batch_size]
            X,y = X.as_in_context(ctx),y.as_in_context(ctx)

            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y).sum()
            l.backward()                # 更新权重
            trainer.step(batch_size)    # 训练
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum +=(y_hat.argmax(axis=1)==y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(net)
        loss_list.append(train_l_sum / n)
        acc_list.append(train_acc_sum / n)
        test_acc_list.append(test_acc)
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start))
    return loss_list, acc_list, test_acc_list
if __name__ == '__main__':
    lr,num_epochs = 0.9,10
    batch_size = 200
    net = Net() # 定义网络模型
    net.initialize(force_reinit=True,init=init.Xavier())    # 首次对模型初始化需要指定force_reinit为真
    # init=init.Xavier() 一种参数初始化的方式
    trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
    loss_list, acc_list, test_acc_list = train(batch_size,trainer,num_epochs)
    plot(loss_list, acc_list, test_acc_list)
Example #16
0
    ## 2.从文件夹读取数据
    print("读取数据")
    train_data, test_data = d2l.read_imdb('train'), d2l.read_imdb('test')
    print("读取数据 ok")
    ## 3.整理数据
    vocab = d2l.get_vocab_imdb(train_data)
    train_iter = gdata.DataLoader(
        gdata.ArrayDataset(*d2l.preprocess_imdb(train_data, vocab)),
        batch_size,
        shuffle=True)
    test_iter = gdata.DataLoader(
        gdata.ArrayDataset(*d2l.preprocess_imdb(test_data, vocab)), batch_size)
    ## 4.指定参数并加载模型
    embed_size, num_hiddens, num_layers, ctx = 100, 200, 2, d2l.try_all_gpus()
    net = BiRNN(vocab, embed_size, num_hiddens, num_layers)  #实例化一个双向RNN
    net.initialize(init.Xavier(), ctx=ctx)  #对模型进行初始化
    ## 4.1 加载词向量预训练集
    print("加载预训练的词向量,若电脑没有预训练数据集将会自动从网上下载")
    glove_embedding = text.embedding.create(
        'glove', pretrained_file_name='glove.6B.100d.txt',
        vocabulary=vocab)  #其中的100指的是词向量长度为100
    print("加载与训练的词向量 ok")
    ## 4.2 初试化模型参数
    net.embedding.weight.set_data(glove_embedding.idx_to_vec)
    net.embedding.collect_params().setattr('grad_req', 'null')

    ## 5. 指定损失函数下降速度,训练轮数,并训练模型
    lr, num_epochs = 0.01, 5
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
Example #17
0
from mxnet import nd, init
from mxnet.gluon import nn

import matplotlib.pyplot as plt

################################################################################

# init.Constant
# init.Normal
# init.Zero
# init.One
# ...

## init with Xavier (search for more)
layer = nn.Conv2D(channels=1, kernel_size=(3, 3), in_channels=1)
layer.initialize(init.Xavier())
print(layer.weight.data())
# [[[[ 0.05636501  0.10720772  0.24847925]
#    [ 0.39752382  0.11866093  0.41332   ]
#    [ 0.05182666  0.4009717  -0.08815584]]]]
# <NDArray 1x1x3x3 @cpu(0)>

## init with ones
# set it directly
layer.weight.set_data(nd.ones((1, 1, 3, 3), ctx=mx.cpu()))
print(layer.weight.data())
# [[[[1. 1. 1.]
#    [1. 1. 1.]
#    [1. 1. 1.]]]]
# <NDArray 1x1x3x3 @cpu(0)>
def train():
    logging.info('Start Training for Task: %s\n' % (task))

    # Initialize the net with pretrained model
    pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True)

    finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class)
    finetune_net.features = pretrained_net.features
    finetune_net.output.initialize(init.Xavier(), ctx = ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    train_transform = transforms.Compose([
        transforms.Resize(input_scale),
        #transforms.RandomResizedCrop(448,scale=(0.76, 1.0),ratio=(0.999, 1.001)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'train'))
    train_data = gluon.data.DataLoader(train_dataset.transform_first(train_transform),
        batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard')


    val_transform = transforms.Compose([
        transforms.Resize(input_scale),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'val'))
    val_data = gluon.data.DataLoader(val_dataset.transform_first(val_transform),
        batch_size=batch_size, shuffle=False, num_workers = num_workers, last_batch='discard')

    trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', {
        'learning_rate': lr})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    lr_counter = 0
    num_batch = len(train_data)

    # Start Training
    best_AP = 0
    best_acc = 0
    for epoch in range(epochs):
        if epoch == lr_steps[lr_counter]:
            finetune_net.collect_params().load(best_path, ctx= ctx)
            trainer.set_learning_rate(trainer.learning_rate*lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()
        AP = 0.
        AP_cnt = 0

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for l in loss:
                l.backward()

            trainer.step(batch_size)
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

            metric.update(label, outputs)
            #ap, cnt = calculate_ap(label, outputs)
            #AP += ap
            #AP_cnt += cnt
            #progressbar(i, num_batch-1)

        #train_map = AP / AP_cnt
        _, train_acc = metric.get()
        train_loss /= num_batch

        this_AP, val_acc, val_loss = validate(finetune_net, val_data, ctx)

        logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' %
                 (epoch, train_acc,  train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate))
        f_val.writelines('[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' %
                 (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate))
        if val_acc > best_acc:
            best_AP = this_AP
            best_acc = val_acc
            best_path = os.path.join('.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc))
            finetune_net.collect_params().save(best_path)

    logging.info('\n')
    finetune_net.collect_params().load(best_path, ctx= ctx)
    f_val.writelines('Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, Best-mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' %
         (epoch, train_acc, train_loss, best_acc, best_AP, val_loss, time.time() - tic, trainer.learning_rate))
    return (finetune_net)
Example #19
0
 def create_and_train(self, model_name: str, batch_size: int,
                      learning_rate: float, epochs: int):
     """
     Create a ModelCreationService and Train it
     :param model_name: model name
     :param batch_size: size of the batch
     :param learning_rate: learning rate to be used
     :param epochs: nb of epochs to train the model
     :return: 
     """
     model = model_creation_service.ModelCreationService()
     acc = accuracy_calculation_service.AccuracyCalculationService()
     save_model = model_manipulation_service.ModelManipulationService()
     # datasets
     try:
         cifar_train = datasets.CIFAR10(train=True)
         X, y = cifar_train[0:10]
     except Exception as ex:
         raise ex
     # transform image
     try:
         transform = data_transformation_service.DataTransformationService()
         transformer = transform.data_transformation()
         cifar_train = cifar_train.transform_first(transformer)
         # train data
         train_data = gluon.data.DataLoader(cifar_train,
                                            batch_size=batch_size,
                                            shuffle=True)
         cifar_valid = gluon.data.vision.CIFAR10(train=False)
         valid_data = gluon.data.DataLoader(
             cifar_valid.transform_first(transformer),
             batch_size=batch_size)
     except Exception as ex:
         raise ex
     # build model
     try:
         net = model.create_model()
         net.initialize(init=init.Xavier())
         softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
     except Exception as ex:
         raise ex
     try:
         trainer = gluon.Trainer(net.collect_params(), 'sgd',
                                 {'learning_rate': learning_rate})
         for epoch in range(epochs):
             train_loss, train_acc, valid_acc = 0., 0., 0.
             tic = time.time()
             for data, label in train_data:
                 # forward + backward
                 with autograd.record():
                     output = net(data)
                     loss = softmax_cross_entropy(output, label)
                 loss.backward()
                 # update parameters
                 trainer.step(batch_size)
                 # calculate training metrics
                 train_loss += loss.mean().asscalar()
                 train_acc += acc.acc(output=output, label=label)
             # calculate validation accuracy
             for data, label in valid_data:
                 valid_acc += acc.acc(net(data), label)
             print(
                 "Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec"
                 % (epoch, train_loss / len(train_data),
                    train_acc / len(train_data),
                    valid_acc / len(valid_data), time.time() - tic))
     except Exception as ex:
         raise ex
     try:
         save_model.save_model(
             net, os.path.join(self.path.model_dir, model_name))
     except Exception as ex:
         raise ex
Example #20
0
def train_net_resp(network,
                   train_csv,
                   num_classes,
                   batch_size,
                   data_shape,
                   ctx,
                   epochs,
                   learning_rate,
                   momentum,
                   weight_decay,
                   lr_refactor_step,
                   lr_refactor_ratio,
                   identifier,
                   class_names=None,
                   optimizer='sgd'):
    """
    Wrapper for training phase.

    Parameters:
    ----------
    network : str
        name for the network structure
    train_csv : str
        .csv file path for training
    num_classes : int
        number of object classes, not including background
    batch_size : int
        training batch-size
    data_shape : int or tuple
        width/height as integer or (3, height, width) tuple
    ctx : [mx.cpu()] or [mx.gpu(x)]
        list of mxnet contexts
    epochs : int
        epochs of training
    optimizer : str
        usage of different optimizers, other then default sgd
    learning_rate : float
        training learning rate
    momentum : float
        trainig momentum
    weight_decay : float
        training weight decay param
    lr_refactor_ratio : float
        multiplier for reducing learning rate
    lr_refactor_step : comma separated integers
        at which epoch to rescale learning rate, e.g. '30, 60, 90'
    identifier : int
        identifier(number) of the object of class to classify
    """
    # load data
    df = pd.read_csv(train_csv)
    n = len(df)
    X = np.zeros((n, 3, data_shape, data_shape), dtype=np.float32)
    Y = np.zeros((n, 1), dtype=np.float32)

    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    # if
    for i, dfv in enumerate(df.values):
        img = cv2.imread('./images/%s' % dfv[0])
        X[i] = (
            (cv2.resize(img,
                        (data_shape, data_shape))[:, :, ::-1] / 255.0 - mean) /
            std).transpose((2, 0, 1))
        Y[i, 0] = dfv[identifier + 2]

    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, random_state=8)
    w_train = 1. - np.sum(Y_train) / len(Y_train)
    w_val = 1. - np.sum(Y_valid) / len(Y_valid)

    # fine-tune net
    pretrained_net = getattr(models, network)(pretrained=True)
    net = getattr(models, network)(classes=1)

    with net.name_scope():
        net.features = pretrained_net.features
        net.output = nn.Dense(1, activation="sigmoid")
    net.output.initialize(init.Xavier())

    # init
    net.collect_params().reset_ctx(ctx)
    net.hybridize()
    loss = wSigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    best_auc_avg = 0

    # optimizer
    opt, opt_params = get_optimizer_params(optimizer=optimizer,
                                           learning_rate=learning_rate,
                                           momentum=momentum,
                                           weight_decay=weight_decay,
                                           ctx=ctx)

    train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(
        X_train, Y_train),
                                       batch_size,
                                       shuffle=True)
    test_data = gluon.data.DataLoader(
        gluon.data.ArrayDataset(X_valid, Y_valid), batch_size)
    print('Running on', ctx)

    trainer = gluon.Trainer(net.collect_params(), opt, opt_params)
    for epoch in range(epochs):
        train_loss = 0.
        steps = len(train_data)
        if len(lr_refactor_step) > 0:
            if epoch == lr_refactor_step[0]:
                trainer.set_learning_rate(trainer.learning_rate *
                                          lr_refactor_ratio)
                del lr_refactor_step[0]

        for data, label in train_data:
            data_list = gluon.utils.split_and_load(data, ctx)
            label_list = gluon.utils.split_and_load(label, ctx)

            with autograd.record():
                losses = [
                    loss(net(x), y, w_train)
                    for x, y in zip(data_list, label_list)
                ]
            for l in losses:
                l.backward()

            lmean = [l.mean().asscalar() for l in losses]
            train_loss += sum(lmean) / len(lmean)
            trainer.step(batch_size)

        val_loss = evaluate_resp(net, test_data, w_val, ctx[0])
        val_aucs = AUC(net, test_data, 1, ctx[0])
        val_aucs_avg = val_aucs.mean()

        print("Epoch %d. loss: %.4f, val_loss %.4f" %
              (epoch, train_loss / steps, val_loss))
        print('The AUROC of {} is {}'.format(class_names[identifier],
                                             val_aucs_avg))

        if val_aucs_avg >= best_auc_avg:
            best_auc_avg = val_aucs_avg
            net.features.save_params('./model/%s_f_Epoch%d.params' %
                                     (class_names[identifier], epoch))
            net.output.save_params('./model/%s_o_Epoch%d.params' %
                                   (class_names[identifier], epoch))
Example #21
0

if __name__ == '__main__':
    from train import arg_parse
    config = arg_parse()
    config['embedding_dim'] = 300
    config['vocab_size'] = 10
    net = Source2TokenAttention(config)

    data = nd.array([[np.random.randint(10) for _ in range(200)]
                     for _ in range(1000)])
    label = nd.array([np.random.randint(2) for _ in range(1000)])
    dataset_train = gluon.data.ArrayDataset(data, label)
    train_data = gluon.data.DataLoader(dataset_train,
                                       batch_size=50,
                                       shuffle=True,
                                       last_batch='rollover')
    embedding = text.embedding.CustomEmbedding(
        'embedding_files/dummy.embedding', elem_delim=' ')

    net.collect_params().initialize(init.Xavier(), ctx=mx.cpu())
    net.embedding.weight.set_data(embedding.idx_to_vec)
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    for data, label in train_data:
        with autograd.record():
            out = net(data)
            loss = softmax_cross_entropy(out, label)
            loss.backward()
        print(loss.sum().asscalar())
Example #22
0
def get_net(ctx):
    num_classes = 10
    net = resnet18(num_classes)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net
Example #23
0
    num_anchors = len(sizes[0]) + len(ratios[0]) - 1

    # 创建模型
    net = TinySSD(num_classes=1)
    net.initialize()
    X = nd.zeros((32.3, 256, 256))
    anchors, cls_preds, bbox_preds = net(X)
    print('output anchors:', anchors.shape)
    print('output class preds:', cls_preds.shape)
    print('output bbox preds:', bbox_preds.shape)

    # 读物数据集,初始化
    batch_size = 32
    train_iter, _ = util.load_data_pikachu(batch_size)
    ctx, net = util.try_gpu(), TinySSD(num_classes=1)
    net.initialize(init=init.Xavier(), ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': 0.2,
        'wd': 5e-4
    })

    # 定义损失函数和评价函数
    cls_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    bbox_loss = gluon.loss.L1Loss()

    # 训练模型
    for epoch in range(20):
        acc_sum, mae_sum, n, m = 0.0, 0.0, 0, 0
        train_iter.reset()  # 从头读取数据
        start = time.time()
        for batch in train_iter:
Example #24
0
 def function_set(self):
     self.__net = GoogLeNet(num_classes=10, verbose=False)
     self.__net.initialize(init=init.Xavier(), ctx=self.__ctx)
Example #25
0
    ctx = [mx.gpu(0), mx.gpu(1)]
    net.collect_params().reset_ctx(ctx)
    net.hybridize()
    loss = gloss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": lr, "wd": 0.001})
    gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)


if __name__ == "__main__":
    train_imgs = gdata.vision.ImageFolderDataset("../data/hotdog/train")
    test_imgs = gdata.vision.ImageFolderDataset("../data/hotdog/test")
    train_augs = gdata.vision.transforms.Compose([
        gdata.vision.transforms.RandomResizedCrop(224),
        gdata.vision.transforms.RandomFlipLeftRight(),
        gdata.vision.transforms.ToTensor(),
        gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    test_augs = gdata.vision.transforms.Compose([
        gdata.vision.transforms.Resize(256),
        gdata.vision.transforms.CenterCrop(224),
        gdata.vision.transforms.ToTensor(),
        gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    pretrained_net = model_zoo.vision.resnet18_v2(pretrained=True, root="../models")
    finetune_net = model_zoo.vision.resnet18_v2(classes=2)
    finetune_net.features = pretrained_net.features
    finetune_net.features.collect_params().setattr("grad_req", "null")
    finetune_net.output.initialize(init=init.Xavier())
    finetune_net.output.collect_params().setattr("lr_mult", 10)
    train_fine_tunning(finetune_net, lr=0.01, batch_size=128, num_epochs=5)
Example #26
0
        loss = F.smooth_l1((output - label) * mask, scalar=1.0)
        return loss.mean(self._batch_axis, exclude=True)


box_loss = SmoothL1Loss()
from mxnet import init
from mxnet import gpu

ctx = gpu(0)
# the CUDA implementation requres each image has at least 3 lables.
# Padd two -1 labels for each instance
train_data.reshape(label_shape=(3, 5))
train_data = test_data.sync_label_shape(train_data)

net = ToySSD(num_class)
net.initialize(init.Xavier(magnitude=2), ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.1,
    'wd': 5e-4
})

from mxnet import metric

cls_metric = metric.Accuracy()
box_metric = metric.MAE()
import time
from mxnet import autograd
for epoch in range(5):
    # reset data iterators and metrics
    train_data.reset()
    cls_metric.reset()
test_pic = np.transpose(test_pic, (0, 3, 1, 2))
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(
    train_pic, train_label.astype('float32')),
                                   batch_size,
                                   shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.ArrayDataset(
    test_pic, test_label.astype('float32')),
                                  batch_size,
                                  shuffle=False)

aug_train = image.CreateAugmenter(data_shape=(3, 32, 32),
                                  rand_crop=True,
                                  rand_mirror=True)

net = ResNet(10)
net.initialize(ctx=ctx, init=init.Xavier())
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'nag', {
    'learning_rate': 0.1,
    'momentum': 0.9,
    'wd': 5e-4
})

epochs = 160

a, b = [], []
for epoch in range(epochs):
    if epoch == 80:
        trainer.set_learning_rate(0.01)
    if epoch == 140:
        trainer.set_learning_rate(0.001)
Example #28
0
net = nn.Sequential()
with net.name_scope():
    # net.add(nn.Conv2D(256, kernel_size=(5, DIMENSION), padding=(1, 0), activation='relu'))
    net.add(
        nn.Conv2D(256,
                  kernel_size=(3, DIMENSION),
                  padding=(1, 0),
                  activation='relu'))
    # net.add(nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH - 2, 1)))
    net.add(nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH, 1)))
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dropout(0.5))
    net.add(nn.Dense(18))

net.collect_params().initialize(init=init.Xavier(), ctx=ctx)

print(net)

batch_size = 100
num_epochs = 100
decay_rate = 0.1
gap = 25
loss = gloss.SoftmaxCrossEntropyLoss()
# trainer = gluon.Trainer(net.collect_params(), 'AdaDelta', {'rho': 0.95, 'epsilon': 1e-6, 'wd': 0.01})
# trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.0001})
# trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01})
if ADAPTIVE_LEARNING_RATE:
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': 0.01})
else:
Example #29
0
valid_data = gluon.data.DataLoader(
    mnist_valid.transform_first(transformer),
    batch_size=batch_size, num_workers=4)

##

net = nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(10))
net.initialize(init=init.Xavier())

##

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

##

def acc(output, label):
    # output: (batch, num_output) float32 ndarray
    # label: (batch, ) int32 ndarray
    return (output.argmax(axis=1) ==
            label.astype('float32')).mean().asscalar()
Example #30
0
    def __init__(
        self,
        d_hidden: int,
        kernel_sizes: List[int],
        n_head: int = 1,
        bias: bool = True,
        bidirectional: bool = False,
        dist_enc: Optional[str] = None,
        share_values: bool = False,
        dropout: float = 0.0,
        temperature: float = 1.0,
        **kwargs,
    ):
        """
        Self-attention module with q,k,v from the same input

        Parameters
        ----------
        d_hidden : int
            hidden dimension
        kernel_sizes: int
            kernel sizes of convolutions to generate queries and keys
        n_head : int, optional
            number of attention heads, by default 1
        bias : bool, optional
            add bias term in input and output projections, by default True
        bidirectional : bool, optional
            if False, add a mask to avoid backward attention, by default False
        dist_enc : Optional[str], optional
            add relative distance embeddings to dot-product attention, can be 
                'add' (linearly combine key and dist),
                'dot' (dot product between key and dist), 
                or None (disabled),
            by default None
        share_values : bool, optional
            if True, a value reprensentation is shared by all attention heads, by default False
            ref. https://arxiv.org/abs/1912.09363
        dropout : float, optional
            dropout rate, by default 0.0
        temperature : float, optional
            softmax temperature, by default 1.0
        """
        super(SelfAttention, self).__init__(**kwargs)
        n_groups = len(kernel_sizes)
        assert (
            d_hidden % n_head == 0
        ), f"hidden dim {d_hidden} cannot be split into {n_head} heads."
        assert (
            d_hidden % n_groups == 0
        ), f"hidden dim {d_hidden} cannot be split into {n_groups} groups."
        assert (
            n_head % n_groups == 0
        ), f"num_heads {n_heads} cannot be allocated for {n_groups} groups."
        self.d_hidden = d_hidden
        self.kernel_sizes = kernel_sizes
        self.n_groups = n_groups
        self.d_group = self.d_hidden // self.n_groups
        self.n_head = n_head
        self.d_head = self.d_hidden // self.n_head
        self.bias = bias
        self.dist_enc = dist_enc
        self.bidirectional = bidirectional
        self.share_values = share_values
        self.temperature = temperature

        with self.name_scope():
            self.qk_proj = HybridConcurrent(axis=-1, prefix="qk_proj_")
            for ksize in self.kernel_sizes:
                self.qk_proj.add(
                    CausalConv1D(
                        channels=self.d_group * 2,
                        kernel_size=ksize,
                        prefix=f"conv{ksize}_",
                    ))
            self.v_proj = nn.Dense(
                units=self.d_head if self.share_values else d_hidden,
                use_bias=bias,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="v_proj_",
            )
            self.out_proj = nn.Dense(
                units=d_hidden,
                use_bias=bias,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="out_proj_",
            )

            if self.dist_enc is not None:
                assert self.dist_enc in [
                    "dot",
                    "add",
                ], f"distance encoding type {self.dist_enc} is not supported"
                self.posemb = SinusoidalPositionalEmbedding(d_hidden)
                self.pos_proj = nn.Dense(
                    units=d_hidden,
                    use_bias=bias,
                    flatten=False,
                    weight_initializer=init.Xavier(),
                    prefix="pos_proj_",
                )
                if self.dist_enc == "add":
                    self._ctt_bias_weight = Parameter(
                        "_ctt_bias_weight",
                        shape=(1, n_head, 1, self.d_head),
                        init=init.Xavier(),
                    )
                    self._pos_bias_weight = Parameter(
                        "_pos_bias_weight",
                        shape=(1, n_head, 1, self.d_head),
                        init=init.Xavier(),
                    )

            self.dropout = nn.Dropout(dropout)