def test_invalid_parameter_grad_stype(): p = gluon.Parameter('weight', shape=(10, 10), grad_stype='invalid')
def __init__(self): super(TestBlock2, self).__init__() self.w = gluon.Parameter('w', shape=(K, N), allow_deferred_init=True)
def __init__(self, num_input_dim=0, num_hidden_dim=100, num_output_dim=10): super(LinearRegression, self).__init__() self.w1 = gluon.Parameter('w1', shape=(num_input_dim, num_hidden_dim), allow_deferred_init=True) self.w2 = gluon.Parameter('w2', shape=(num_hidden_dim, num_output_dim), allow_deferred_init=True)
# 定义简单多层感知机模型 class MLP(gluon.nn.Block): def __init__(self, **kwargs): super(MLP, self).__init__(**kwargs) with self.name_scope(): self.hidden = gluon.nn.Dense(256, activation='relu') self.output = gluon.nn.Dense(10) def forward(self, x): return self.output(self.hidden(x)) # 访问模型参数 my_param = gluon.Parameter('good_param', shape=(2, 3)) # 参数初始化 my_param.initialize() print('data:', my_param.data()) print('grad:', my_param.grad()) print('name:', my_param.name) # 初始化模型参数 x = nd.random.uniform(shape=(3, 5)) net = MLP() net.initialize() net(x) params = net.collect_params() params.initialize(init=init.Normal(sigma=0.02), force_reinit=True) print('hidden weight: ', net.hidden.weight.data(), '\nhidden bias: ', net.hidden.bias.data(), '\noutput weight: ', net.output.weight.data(),
def test_block_attr_param(): b = gluon.Block() # regular variables can't change types b.b = gluon.Parameter() b.b = (2, )
def __init__(self, w_init, **kwargs): super(SignSTENET, self).__init__(**kwargs) self.w = gluon.Parameter('w', shape=30, init=mx.initializer.Constant(w_init), grad_req='write')
net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(128)) net.add(nn.Dense(10)) net.add(CenteredLayer()) net.initialize() # net has parameters so need initialization y = net(nd.random.uniform(shape=(3,2))) print(y) print(y.mean()) ## with parameters from mxnet import gluon my_param = gluon.Parameter("exciting_parameter_yay", shape=(3,3)) # prefix, shape my_param.initialize() print(my_param.data(), my_param.grad()) # another way pd = gluon.ParameterDict(prefix="block1_") pd.get("exciting_parameter_yay",shape=(3,3)) # print(pd) class MyDense(nn.Block): def __init__(self,units,in_units,**kwargs): super(MyDense,self).__init__(**kwargs) with self.name_scope():
def run(args, outdir): """ Run training for NN4 architecture with Variational Bayes. """ ''' Hyperparameters ''' epochs = int(args.iterations) learning_rate = float(args.learning_rate) wd = float(args.weight_decay) hidden_size = int(args.hidden_size) train_experiments = int(args.experiments) learning_rate_factor = float(args.learning_rate_factor) learning_rate_steps = int( args.learning_rate_steps ) # changes the learning rate for every n updates. epoch_output_iter = int(args.epoch_output_iter) ''' Logging ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() config = { # TODO may need adjustments # "sigma_p1": 1.5, "sigma_p1": 1.75, # og # "sigma_p2": 0.25, # "sigma_p2": 0.5, # og "sigma_p2": 0.5, "pi": 0.5, "lambda_p": 24.5 } ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( args.num_workers) # replace num_workers with the number of cores ctx = [mx.gpu(i) for i in range(num_gpus) ] if num_gpus > 0 else [mx.cpu()] # todo change as cfr_net_train batch_size_per_unit = int(args.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(num_gpus, 1) ''' Set seeds ''' for c in ctx: mx.random.seed(int(args.seed), c) np.random.seed(int(args.seed)) ''' Feed Forward Neural Network Model (4 hidden layers) ''' net = ff4_relu_architecture(hidden_size) ''' Load datasets ''' # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run train_dataset = load_data(args.data_dir + args.data_train) # Terminal run log(logfile, 'Training data: ' + args.data_dir + args.data_train) log(logfile, 'Valid data: ' + args.data_dir + args.data_test) log( logfile, 'Loaded data with shape [%d,%d]' % (train_dataset['n'], train_dataset['dim'])) # ''' Feature correlation ''' # import pandas as pd # df = pd.DataFrame.from_records(train_dataset['x'][:, :, 20]) # df.insert(25, "t", train_dataset['t'][:, 20]) # corr = df.corr() # import seaborn as sns # sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, fmt='.1f') ''' Instantiate net ''' ''' Param. init. ''' net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) net.hybridize() ''' Forward-propagate a single data set entry once to set up all network parameters (weights and biases) with the desired initializer specified above. ''' x = train_dataset['x'][:, :, 0] t = np.reshape(train_dataset['t'][:, 0], (-1, 1)) yf = train_dataset['yf'][:, 0] yf_m, yf_std = np.mean(yf, axis=0), np.std(yf, axis=0) yf = (yf - yf_m) / yf_std factual_features = np.hstack((x, t)) zero_train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(yf)) zero_train_factual_loader = gluon.data.DataLoader( zero_train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) for i, (batch_f_features, batch_yf) in enumerate(zero_train_factual_loader): batch_f_features = gluon.utils.split_and_load(batch_f_features, ctx_list=ctx, even_split=False) [net(x) for x in batch_f_features] break weight_scale = .1 rho_offset = -3 lambda_init = 25 ''' Initialize variational parameters; mean and variance for each weight ''' mus = [] rhos = [] lambdas = [] shapes = list(map(lambda x: x.shape, net.collect_params().values())) for shape in shapes: # mu = gluon.Parameter('mu', shape=shape, init=mx.init.Normal(weight_scale)) # rho = gluon.Parameter('rho', shape=shape, init=mx.init.Constant(rho_offset)) lmb = gluon.Parameter('lmb', shape=shape, init=mx.init.Constant(lambda_init)) # mu.initialize(ctx=ctx) # rho.initialize(ctx=ctx) lmb.initialize(ctx=ctx) # mus.append(mu) # rhos.append(rho) lambdas.append(lmb) # variational_params = mus + rhos variational_params = lambdas # raw_mus = list(map(lambda x: x.data(ctx[0]), mus)) # raw_rhos = list(map(lambda x: x.data(ctx[0]), rhos)) raw_lambdas = list(map(lambda x: x.data(ctx[0]), lambdas)) ''' Metric, Loss and Optimizer ''' rmse_metric = mx.metric.RMSE() l2_loss = gluon.loss.L2Loss() bbb_loss = BBBLoss(ctx[0], log_prior="exponential", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'], pi=config['pi'], lambda_p=config['lambda_p']) # bbb_loss = BBBLoss(ctx[0], log_prior="scale_mixture", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'], # pi=config['pi']) scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps, factor=learning_rate_factor, base_lr=learning_rate) # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler) optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd) # optimizer = mx.optimizer.Adam(learning_rate=learning_rate) trainer = gluon.Trainer(variational_params, optimizer=optimizer) ''' Initialize train score results ''' train_scores = np.zeros((train_experiments, 3)) ''' Initialize train experiment durations ''' train_durations = np.zeros((train_experiments, 1)) ''' Initialize test score results ''' test_scores = np.zeros((train_experiments, 3)) ''' Train experiments means and stds ''' means = np.array([]) stds = np.array([]) ''' Train ''' for train_experiment in range(train_experiments): ''' Create training dataset ''' x = train_dataset['x'][:, :, train_experiment] t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1)) yf = train_dataset['yf'][:, train_experiment] ycf = train_dataset['ycf'][:, train_experiment] mu0 = train_dataset['mu0'][:, train_experiment] mu1 = train_dataset['mu1'][:, train_experiment] train, valid, test, _ = split_data_in_train_valid_test( x, t, yf, ycf, mu0, mu1) ''' With-in sample ''' train_evaluator = Evaluator( np.concatenate([train['t'], valid['t']]), np.concatenate([train['yf'], valid['yf']]), y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0), mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0), mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0)) test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'], test['mu0'], test['mu1']) ''' Normalize yf ''' # TODO check for normalize input? yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'], axis=0) train['yf'] = (train['yf'] - yf_m) / yf_std valid['yf'] = (valid['yf'] - yf_m) / yf_std test['yf'] = (test['yf'] - yf_m) / yf_std ''' Save mean and std ''' means = np.append(means, yf_m) stds = np.append(stds, yf_std) ''' Train dataset ''' factual_features = np.hstack((train['x'], train['t'])) train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(train['yf'])) ''' With-in sample ''' train_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(np.concatenate([train['x'], valid['x']]))) ''' Valid dataset ''' valid_factual_features = np.hstack((valid['x'], valid['t'])) valid_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(valid_factual_features), mx.nd.array(valid['yf'])) ''' Test dataset ''' test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(test['x'])) ''' Train DataLoader ''' train_factual_loader = gluon.data.DataLoader(train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Valid DataLoader ''' valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) num_batch = len(train_factual_loader) train_start = time.time() train_acc = [] test_acc = [] ''' Train model ''' for epoch in range( 1, epochs + 1): # start with epoch 1 for easier learning rate calculation train_loss = 0 rmse_metric.reset() for i, (batch_f_features, batch_yf) in enumerate(train_factual_loader): ''' Get data and labels into slices and copy each slice into a context.''' batch_f_features = batch_f_features.as_in_context( ctx[0]).reshape((-1, 26)) batch_yf = batch_yf.as_in_context(ctx[0]).reshape( (len(batch_yf), -1)) ''' Forward ''' with autograd.record(): ''' Generate sample ''' # layer_params, sigmas = generate_weight_sample(shapes, raw_mus, raw_rhos, ctx[0]) layer_params = generate_weight_sample_exp( shapes, raw_lambdas, ctx[0]) ''' Overwrite network parameters with sampled parameters ''' for sample, param in zip(layer_params, net.collect_params().values()): param._data[0] = sample ''' Forward-propagate the batch ''' outputs = net(batch_f_features) # if epoch == epochs: # ''' Factual outcomes and batch_yf histograms ''' # import pandas as pd # df = pd.DataFrame({'layer_params': layer_params[6][0].asnumpy().flatten()}, columns=['layer_params']) # df = pd.DataFrame( # {'outputs': outputs.asnumpy().flatten(), 'batch_yf': batch_yf.asnumpy().flatten()}, # columns=['outputs', 'batch_yf']) # df.plot(kind='hist', alpha=0.5) # df.plot.kde() ''' Calculate the loss ''' l2_loss_value = l2_loss(outputs, batch_yf) # bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_mus, sigmas, num_batch) bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_lambdas, [], num_batch) loss = bbb_loss_value + l2_loss_value # loss = bbb_loss_value # loss = l2_loss_value ''' Backpropagate for gradient calculation ''' loss.backward() ''' Optimize ''' trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) rmse_metric.update(batch_yf, outputs) if epoch % epoch_output_iter == 0 or epoch == 1: _, train_rmse_factual = rmse_metric.get() train_loss /= num_batch _, valid_rmse_factual = test_net_vb(net, valid_factual_loader, layer_params, ctx) # _, train_RMSE = evaluate_RMSE(train_factual_loader, net, raw_mus, ctx) # _, test_RMSE = evaluate_RMSE(valid_factual_loader, net, raw_mus, ctx) # train_acc.append(np.asscalar(train_RMSE)) # test_acc.append(np.asscalar(test_RMSE)) # print("Epoch %s. Train-RMSE %s, Test-RMSE %s" % # (epoch, train_RMSE, test_RMSE)) log( logfile, 'l2-loss: %.3f, bbb-loss: %.3f' % (l2_loss_value[0].asscalar(), bbb_loss_value[0].asscalar())) log( logfile, '[Epoch %d/%d] Train-rmse-factual: %.3f, loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: ' '%.3E' % (epoch, epochs, train_rmse_factual, train_loss, valid_rmse_factual, trainer.learning_rate)) train_durations[train_experiment, :] = time.time() - train_start ''' Test model ''' # y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, raw_mus, ctx) y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, layer_params, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m train_score = train_evaluator.get_metrics(y_t1, y_t0) train_scores[train_experiment, :] = train_score # y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, raw_mus, ctx) y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, layer_params, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[train_experiment, :] = test_score log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \ ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1, train_experiments, train_score[0], train_score[1], train_score[2], test_score[0], test_score[1], test_score[2])) # plt.plot(train_acc) # plt.plot(test_acc) ''' Save means and stds NDArray values for inference ''' mx.nd.save( outdir + args.architecture.lower() + '_means_stds_ihdp_' + str(train_experiments) + '_.nd', { "means": mx.nd.array(means), "stds": mx.nd.array(stds) }) ''' Export trained model ''' net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" + str(train_experiments), epoch=epochs) log(logfile, '\n{} architecture total scores:'.format(args.architecture.upper())) ''' Train and test scores ''' means, stds = np.mean(train_scores, axis=0), sem(train_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]), axis=0), sem(np.sqrt(train_scores[:, 2]), axis=0, ddof=0) train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \ 'train root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]), axis=0), sem(np.sqrt(test_scores[:, 2]), axis=0, ddof=0) test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \ 'test root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) log(logfile, train_total_scores_str) log(logfile, test_total_scores_str) mean_duration = float("{0:.2f}".format( np.mean(train_durations, axis=0)[0])) with open(outdir + args.architecture.lower() + "-total-scores-" + str(train_experiments), "w", encoding="utf8") as text_file: print(train_total_scores_str, "\n", test_total_scores_str, file=text_file) return { "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]), "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]), "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]), "mean_duration": mean_duration }
net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(128)) net.add(nn.Dense(10)) net.add(CenteredLayer()) net.initialize() x = nd.random_uniform(shape=[4, 8]) y = net(x) # print(nd.mean(y)) # Customized Layer with Params my_param = gluon.Parameter(name='exciting_params', shape=(3, 3)) my_param.initialize() # print('weight: ', my_param.data()) # print('gradients: ', my_param.grad()) pd = gluon.ParameterDict(prefix='block1_') pd.get(name='exciting_params', shape=(3, 3)) # print(pd) class MyDense(nn.Block): def __init__(self, units, in_units, **kwargs): super(MyDense, self).__init__() with self.name_scope(): self.weight = self.params.get('weight', shape=(in_units, units))
import cv2