def test_invalid_parameter_grad_stype():
    p = gluon.Parameter('weight', shape=(10, 10), grad_stype='invalid')
Beispiel #2
0
 def __init__(self):
     super(TestBlock2, self).__init__()
     self.w = gluon.Parameter('w', shape=(K, N), allow_deferred_init=True)
Beispiel #3
0
 def __init__(self, num_input_dim=0, num_hidden_dim=100, num_output_dim=10):
     super(LinearRegression, self).__init__()
     self.w1 = gluon.Parameter('w1', shape=(num_input_dim, num_hidden_dim),
                               allow_deferred_init=True)
     self.w2 = gluon.Parameter('w2', shape=(num_hidden_dim, num_output_dim),
                               allow_deferred_init=True)

# 定义简单多层感知机模型
class MLP(gluon.nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.hidden = gluon.nn.Dense(256, activation='relu')
            self.output = gluon.nn.Dense(10)

    def forward(self, x):
        return self.output(self.hidden(x))


# 访问模型参数
my_param = gluon.Parameter('good_param', shape=(2, 3))
# 参数初始化
my_param.initialize()
print('data:', my_param.data())
print('grad:', my_param.grad())
print('name:', my_param.name)

# 初始化模型参数
x = nd.random.uniform(shape=(3, 5))
net = MLP()
net.initialize()
net(x)
params = net.collect_params()
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print('hidden weight: ', net.hidden.weight.data(), '\nhidden bias: ',
      net.hidden.bias.data(), '\noutput weight: ', net.output.weight.data(),
Beispiel #5
0
def test_block_attr_param():
    b = gluon.Block()

    # regular variables can't change types
    b.b = gluon.Parameter()
    b.b = (2, )
 def __init__(self, w_init, **kwargs):
     super(SignSTENET, self).__init__(**kwargs)
     self.w = gluon.Parameter('w',
                              shape=30,
                              init=mx.initializer.Constant(w_init),
                              grad_req='write')
Beispiel #7
0
net = nn.Sequential()
with net.name_scope():
	net.add(nn.Dense(128))
	net.add(nn.Dense(10))
	net.add(CenteredLayer())

net.initialize() # net has parameters so need initialization
y = net(nd.random.uniform(shape=(3,2)))

print(y)
print(y.mean())


## with parameters
from mxnet import gluon
my_param = gluon.Parameter("exciting_parameter_yay", shape=(3,3)) # prefix, shape

my_param.initialize()

print(my_param.data(), my_param.grad())


# another way
pd = gluon.ParameterDict(prefix="block1_")
pd.get("exciting_parameter_yay",shape=(3,3))
# print(pd)

class MyDense(nn.Block):
	def __init__(self,units,in_units,**kwargs):
		super(MyDense,self).__init__(**kwargs)
		with self.name_scope():
Beispiel #8
0
def run(args, outdir):
    """ Run training for NN4 architecture with Variational Bayes. """
    ''' Hyperparameters '''
    epochs = int(args.iterations)
    learning_rate = float(args.learning_rate)
    wd = float(args.weight_decay)
    hidden_size = int(args.hidden_size)
    train_experiments = int(args.experiments)
    learning_rate_factor = float(args.learning_rate_factor)
    learning_rate_steps = int(
        args.learning_rate_steps
    )  # changes the learning rate for every n updates.
    epoch_output_iter = int(args.epoch_output_iter)
    ''' Logging '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()

    config = {  # TODO may need adjustments
        # "sigma_p1": 1.5,
        "sigma_p1": 1.75,  # og
        # "sigma_p2": 0.25,
        # "sigma_p2": 0.5, # og
        "sigma_p2": 0.5,
        "pi": 0.5,
        "lambda_p": 24.5
    }
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        args.num_workers)  # replace num_workers with the number of cores
    ctx = [mx.gpu(i) for i in range(num_gpus)
           ] if num_gpus > 0 else [mx.cpu()]  # todo change as cfr_net_train
    batch_size_per_unit = int(args.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(num_gpus, 1)
    ''' Set seeds '''
    for c in ctx:
        mx.random.seed(int(args.seed), c)
    np.random.seed(int(args.seed))
    ''' Feed Forward Neural Network Model (4 hidden layers) '''
    net = ff4_relu_architecture(hidden_size)
    ''' Load datasets '''
    # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run
    train_dataset = load_data(args.data_dir + args.data_train)  # Terminal run

    log(logfile, 'Training data: ' + args.data_dir + args.data_train)
    log(logfile, 'Valid data:     ' + args.data_dir + args.data_test)
    log(
        logfile, 'Loaded data with shape [%d,%d]' %
        (train_dataset['n'], train_dataset['dim']))

    # ''' Feature correlation '''
    # import pandas as pd
    # df = pd.DataFrame.from_records(train_dataset['x'][:, :, 20])
    # df.insert(25, "t", train_dataset['t'][:, 20])
    # corr = df.corr()
    # import seaborn as sns
    # sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, fmt='.1f')
    ''' Instantiate net '''
    ''' Param. init. '''
    net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
    net.hybridize()
    ''' Forward-propagate a single data set entry once to set up all network 
    parameters (weights and biases) with the desired initializer specified above. '''
    x = train_dataset['x'][:, :, 0]
    t = np.reshape(train_dataset['t'][:, 0], (-1, 1))
    yf = train_dataset['yf'][:, 0]
    yf_m, yf_std = np.mean(yf, axis=0), np.std(yf, axis=0)
    yf = (yf - yf_m) / yf_std
    factual_features = np.hstack((x, t))
    zero_train_factual_dataset = gluon.data.ArrayDataset(
        mx.nd.array(factual_features), mx.nd.array(yf))
    zero_train_factual_loader = gluon.data.DataLoader(
        zero_train_factual_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers)
    for i, (batch_f_features,
            batch_yf) in enumerate(zero_train_factual_loader):
        batch_f_features = gluon.utils.split_and_load(batch_f_features,
                                                      ctx_list=ctx,
                                                      even_split=False)
        [net(x) for x in batch_f_features]
        break

    weight_scale = .1
    rho_offset = -3
    lambda_init = 25
    ''' Initialize variational parameters; mean and variance for each weight '''
    mus = []
    rhos = []
    lambdas = []

    shapes = list(map(lambda x: x.shape, net.collect_params().values()))

    for shape in shapes:
        # mu = gluon.Parameter('mu', shape=shape, init=mx.init.Normal(weight_scale))
        # rho = gluon.Parameter('rho', shape=shape, init=mx.init.Constant(rho_offset))
        lmb = gluon.Parameter('lmb',
                              shape=shape,
                              init=mx.init.Constant(lambda_init))
        # mu.initialize(ctx=ctx)
        # rho.initialize(ctx=ctx)
        lmb.initialize(ctx=ctx)
        # mus.append(mu)
        # rhos.append(rho)
        lambdas.append(lmb)
    # variational_params = mus + rhos
    variational_params = lambdas

    # raw_mus = list(map(lambda x: x.data(ctx[0]), mus))
    # raw_rhos = list(map(lambda x: x.data(ctx[0]), rhos))
    raw_lambdas = list(map(lambda x: x.data(ctx[0]), lambdas))
    ''' Metric, Loss and Optimizer '''
    rmse_metric = mx.metric.RMSE()
    l2_loss = gluon.loss.L2Loss()
    bbb_loss = BBBLoss(ctx[0],
                       log_prior="exponential",
                       sigma_p1=config['sigma_p1'],
                       sigma_p2=config['sigma_p2'],
                       pi=config['pi'],
                       lambda_p=config['lambda_p'])
    # bbb_loss = BBBLoss(ctx[0], log_prior="scale_mixture", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'],
    #                    pi=config['pi'])
    scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps,
                                                factor=learning_rate_factor,
                                                base_lr=learning_rate)
    # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler)
    optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate,
                                     lr_scheduler=scheduler,
                                     wd=wd)
    # optimizer = mx.optimizer.Adam(learning_rate=learning_rate)
    trainer = gluon.Trainer(variational_params, optimizer=optimizer)
    ''' Initialize train score results '''
    train_scores = np.zeros((train_experiments, 3))
    ''' Initialize train experiment durations '''
    train_durations = np.zeros((train_experiments, 1))
    ''' Initialize test score results '''
    test_scores = np.zeros((train_experiments, 3))
    ''' Train experiments means and stds '''
    means = np.array([])
    stds = np.array([])
    ''' Train '''
    for train_experiment in range(train_experiments):
        ''' Create training dataset '''
        x = train_dataset['x'][:, :, train_experiment]
        t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1))
        yf = train_dataset['yf'][:, train_experiment]
        ycf = train_dataset['ycf'][:, train_experiment]
        mu0 = train_dataset['mu0'][:, train_experiment]
        mu1 = train_dataset['mu1'][:, train_experiment]

        train, valid, test, _ = split_data_in_train_valid_test(
            x, t, yf, ycf, mu0, mu1)
        ''' With-in sample '''
        train_evaluator = Evaluator(
            np.concatenate([train['t'], valid['t']]),
            np.concatenate([train['yf'], valid['yf']]),
            y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0),
            mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0),
            mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0))
        test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'],
                                   test['mu0'], test['mu1'])
        ''' Normalize yf '''  # TODO check for normalize input?
        yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'],
                                                            axis=0)
        train['yf'] = (train['yf'] - yf_m) / yf_std
        valid['yf'] = (valid['yf'] - yf_m) / yf_std
        test['yf'] = (test['yf'] - yf_m) / yf_std
        ''' Save mean and std '''
        means = np.append(means, yf_m)
        stds = np.append(stds, yf_std)
        ''' Train dataset '''
        factual_features = np.hstack((train['x'], train['t']))
        train_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(factual_features), mx.nd.array(train['yf']))
        ''' With-in sample '''
        train_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(np.concatenate([train['x'], valid['x']])))
        ''' Valid dataset '''
        valid_factual_features = np.hstack((valid['x'], valid['t']))
        valid_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(valid_factual_features), mx.nd.array(valid['yf']))
        ''' Test dataset '''
        test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(test['x']))
        ''' Train DataLoader '''
        train_factual_loader = gluon.data.DataLoader(train_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=True,
                                                     num_workers=num_workers)
        train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=num_workers)
        ''' Valid DataLoader '''
        valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)

        num_batch = len(train_factual_loader)

        train_start = time.time()

        train_acc = []
        test_acc = []
        ''' Train model '''
        for epoch in range(
                1, epochs +
                1):  # start with epoch 1 for easier learning rate calculation

            train_loss = 0
            rmse_metric.reset()

            for i, (batch_f_features,
                    batch_yf) in enumerate(train_factual_loader):
                ''' Get data and labels into slices and copy each slice into a context.'''
                batch_f_features = batch_f_features.as_in_context(
                    ctx[0]).reshape((-1, 26))
                batch_yf = batch_yf.as_in_context(ctx[0]).reshape(
                    (len(batch_yf), -1))
                ''' Forward '''
                with autograd.record():
                    ''' Generate sample '''
                    # layer_params, sigmas = generate_weight_sample(shapes, raw_mus, raw_rhos, ctx[0])
                    layer_params = generate_weight_sample_exp(
                        shapes, raw_lambdas, ctx[0])
                    ''' Overwrite network parameters with sampled parameters '''
                    for sample, param in zip(layer_params,
                                             net.collect_params().values()):
                        param._data[0] = sample
                    ''' Forward-propagate the batch '''
                    outputs = net(batch_f_features)

                    # if epoch == epochs:
                    #     ''' Factual outcomes and batch_yf histograms '''
                    #     import pandas as pd
                    #     df = pd.DataFrame({'layer_params': layer_params[6][0].asnumpy().flatten()}, columns=['layer_params'])
                    #     df = pd.DataFrame(
                    #         {'outputs': outputs.asnumpy().flatten(), 'batch_yf': batch_yf.asnumpy().flatten()},
                    #         columns=['outputs', 'batch_yf'])
                    #     df.plot(kind='hist', alpha=0.5)
                    #     df.plot.kde()
                    ''' Calculate the loss '''
                    l2_loss_value = l2_loss(outputs, batch_yf)
                    # bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_mus, sigmas, num_batch)
                    bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params,
                                              raw_lambdas, [], num_batch)
                    loss = bbb_loss_value + l2_loss_value
                    # loss = bbb_loss_value
                    # loss = l2_loss_value
                    ''' Backpropagate for gradient calculation '''
                    loss.backward()
                ''' Optimize '''
                trainer.step(batch_size)

                train_loss += sum([l.mean().asscalar()
                                   for l in loss]) / len(loss)

                rmse_metric.update(batch_yf, outputs)

            if epoch % epoch_output_iter == 0 or epoch == 1:
                _, train_rmse_factual = rmse_metric.get()
                train_loss /= num_batch
                _, valid_rmse_factual = test_net_vb(net, valid_factual_loader,
                                                    layer_params, ctx)

                # _, train_RMSE = evaluate_RMSE(train_factual_loader, net, raw_mus, ctx)
                # _, test_RMSE = evaluate_RMSE(valid_factual_loader, net, raw_mus, ctx)
                # train_acc.append(np.asscalar(train_RMSE))
                # test_acc.append(np.asscalar(test_RMSE))
                # print("Epoch %s. Train-RMSE %s, Test-RMSE %s" %
                #       (epoch, train_RMSE, test_RMSE))

                log(
                    logfile, 'l2-loss: %.3f, bbb-loss: %.3f' %
                    (l2_loss_value[0].asscalar(),
                     bbb_loss_value[0].asscalar()))

                log(
                    logfile,
                    '[Epoch %d/%d] Train-rmse-factual: %.3f, loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: '
                    '%.3E' % (epoch, epochs, train_rmse_factual, train_loss,
                              valid_rmse_factual, trainer.learning_rate))

        train_durations[train_experiment, :] = time.time() - train_start
        ''' Test model '''
        # y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, raw_mus, ctx)
        y_t0, y_t1 = predict_treated_and_controlled_vb(net,
                                                       train_rmse_ite_loader,
                                                       layer_params, ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        train_score = train_evaluator.get_metrics(y_t1, y_t0)
        train_scores[train_experiment, :] = train_score

        # y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, raw_mus, ctx)
        y_t0, y_t1 = predict_treated_and_controlled_vb(net,
                                                       test_rmse_ite_loader,
                                                       layer_params, ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[train_experiment, :] = test_score

        log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \
                     ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1,
                                                                                             train_experiments,
                                                                                             train_score[0],
                                                                                             train_score[1],
                                                                                             train_score[2],
                                                                                             test_score[0],
                                                                                             test_score[1],
                                                                                             test_score[2]))
        # plt.plot(train_acc)
        # plt.plot(test_acc)
    ''' Save means and stds NDArray values for inference '''
    mx.nd.save(
        outdir + args.architecture.lower() + '_means_stds_ihdp_' +
        str(train_experiments) + '_.nd', {
            "means": mx.nd.array(means),
            "stds": mx.nd.array(stds)
        })
    ''' Export trained model '''
    net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" +
               str(train_experiments),
               epoch=epochs)

    log(logfile,
        '\n{} architecture total scores:'.format(args.architecture.upper()))
    ''' Train and test scores '''
    means, stds = np.mean(train_scores, axis=0), sem(train_scores,
                                                     axis=0,
                                                     ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]),
                                      axis=0), sem(np.sqrt(train_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \
                             'train root PEHE: {:.2f} ± {:.2f}' \
                             ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]),
                                      axis=0), sem(np.sqrt(test_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \
                            'test root PEHE: {:.2f} ± {:.2f}' \
                            ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    log(logfile, train_total_scores_str)
    log(logfile, test_total_scores_str)

    mean_duration = float("{0:.2f}".format(
        np.mean(train_durations, axis=0)[0]))

    with open(outdir + args.architecture.lower() + "-total-scores-" +
              str(train_experiments),
              "w",
              encoding="utf8") as text_file:
        print(train_total_scores_str,
              "\n",
              test_total_scores_str,
              file=text_file)

    return {
        "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]),
        "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]),
        "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]),
        "mean_duration": mean_duration
    }
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(128))
    net.add(nn.Dense(10))
    net.add(CenteredLayer())
net.initialize()


x = nd.random_uniform(shape=[4, 8])
y = net(x)
# print(nd.mean(y))


# Customized Layer with Params
my_param = gluon.Parameter(name='exciting_params', shape=(3, 3))
my_param.initialize()
# print('weight: ', my_param.data())
# print('gradients: ', my_param.grad())


pd = gluon.ParameterDict(prefix='block1_')
pd.get(name='exciting_params', shape=(3, 3))
# print(pd)


class MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__()
        with self.name_scope():
            self.weight = self.params.get('weight', shape=(in_units, units))
Beispiel #10
0
import cv2