def _create_mlp(self): # Create the layers one by one, connecting to previous. mlp_layers = [] for i, layer in enumerate(self.layers): fan_in = self.unit_counts[i] fan_out = self.unit_counts[i + 1] lim = numpy.sqrt(6) / numpy.sqrt(fan_in + fan_out) if layer.type == "Tanh": lim *= 1.1 * lim elif layer.type in ("Rectifier", "Maxout", "Convolution"): # He, Rang, Zhen and Sun, converted to uniform. lim *= numpy.sqrt(2) elif layer.type == "Sigmoid": lim *= 4 mlp_layer = self._create_layer(layer.name, layer, irange=lim) mlp_layers.append(mlp_layer) self.mlp = mlp.MLP( mlp_layers, nvis=None if self.is_convolution else self.unit_counts[0], seed=self.random_state, input_space=self.input_space) if self.weights is not None: self._array_to_mlp(self.weights, self.mlp) self.weights = None inputs = self.mlp.get_input_space().make_theano_batch() self.f = theano.function([inputs], self.mlp.fprop(inputs))
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels, input_path, pred_path): t = time.time() sub_window = gen_center_sub_window(76, num_cols) trn = SarDataset(ds[0][0], ds[0][1], sub_window) vld = SarDataset(ds[1][0], ds[1][1], sub_window) tst = SarDataset(ds[2][0], ds[2][1], sub_window) print 'Take {}s to read data'.format(time.time() - t) t = time.time() batch_size = 100 h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1) hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3', output_channels=8, irange=0.05, kernel_shape=[5, 5], pool_shape=[2, 2], pool_stride=[2, 2], max_kernel_norm=1.9365) #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1) output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05) trainer = sgd.SGD(learning_rate=0.001, batch_size=100, termination_criterion=EpochCounter(2000), cost=dropout.Dropout(), train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_shuffled_sequential', monitoring_dataset={ 'test': tst, 'valid': vld, 'train': trn }) layers = [hidden_layer, hidden_layer2, output_layer] input_space = space.Conv2DSpace(shape=[num_rows, num_cols], num_channels=num_channels) ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective', save_path='sar_cnn_mlp.pkl') experiment = Train(dataset=trn, model=ann, algorithm=trainer, extensions=[watcher]) print 'Take {}s to compile code'.format(time.time() - t) t = time.time() experiment.main_loop() print 'Training time: {}s'.format(time.time() - t) serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols), ann, on_overwrite='backup') #read hh and hv into a 3D numpy image = read_hhv(input_path) return ann, sar_predict(ann, image, pred_path)
def runSP(): ds = StockPrice() # create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=10000, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Linear(layer_name='output', dim=1, irange=.1, init_bias=1.) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.005, batch_size=500, termination_criterion=EpochCounter(10)) layers = [hidden_layer, output_layer] # create neural net that takes two inputs ann = mlp.MLP(layers, nvis=1000) trainer.setup(ann, ds) # train neural net until the termination criterion is true while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break #accuracy = Accuracy() acc = Accuracy() for i, predict in enumerate(ann.fprop(theano.shared(ds.valid[0], name='inputs')).eval()): print predict, ds.valid[1][i] acc.evaluatePN(predict[0], ds.valid[1][i][0]) acc.printResult()
def construct_dbn_from_stack(stack): # some settings irange = 0.05 layers = [] for ii, layer in enumerate(stack.layers()): lr_scale = 0.25 if ii == 0 else 0.25 layers.append( mlp.Sigmoid(dim=layer.nhid, layer_name='h' + str(ii), irange=irange, W_lr_scale=lr_scale, max_col_norm=2.)) # softmax layer at then end for classification layers.append( mlp.Softmax(n_classes=9, layer_name='y', irange=irange, W_lr_scale=0.25)) dbn = mlp.MLP(layers=layers, nvis=stack.layers()[0].get_input_space().dim) # copy weigths to DBN for ii, layer in enumerate(stack.layers()): dbn.layers[ii].set_weights(layer.get_weights()) dbn.layers[ii].set_biases(layer.hidbias.get_value(borrow=False)) return dbn
def __init__(self, data): self.N = 5 * 5 self.predictionLength = 2 # create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=25, irange=.1, init_bias=1.) # create Linear output layer output_layer = mlp.Linear(1, 'output', irange=.1, init_bias=1.) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.005, batch_size=10, termination_criterion=EpochCounter(100)) layers = [hidden_layer, output_layer] # create neural net that takes two inputs nn = mlp.MLP(layers, nvis=self.N) NeuralNetwork.__init__(self, data, nn, trainer)
def runXOR(): ds = XOR() hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=10, irange=.1, init_bias=1.) output_layer = mlp.Linear(layer_name='output', dim=1, irange=.1, init_bias=1.) trainer = sgd.SGD(learning_rate=.05, batch_size=1, termination_criterion=EpochCounter(1000)) layers = [hidden_layer, output_layer] # create neural net that takes two inputs ann = mlp.MLP(layers, nvis=4) trainer.setup(ann, ds) # train neural net until the termination criterion is true while True: trainer.train(dataset=ds) #ann.monitor.report_epoch() #ann.monitor() if not trainer.continue_learning(ann): break inputs= np.array([[0, 0, 0, 1]]) print ann.fprop(theano.shared(inputs, name='inputs')).eval() inputs = np.array([[0, 1, 0, 1]]) print ann.fprop(theano.shared(inputs, name='inputs')).eval() inputs = np.array([[1, 1, 1, 1]]) print ann.fprop(theano.shared(inputs, name='inputs')).eval() inputs = np.array([[1, 1, 0, 0]]) print ann.fprop(theano.shared(inputs, name='inputs')).eval()
def main(): base_name = sys.argv[1] n_epoch = int(sys.argv[2]) n_hidden = int(sys.argv[3]) include_rate = float(sys.argv[4]) in_size = 943 out_size = 4760 b_size = 200 l_rate = 3e-4 l_rate_min = 1e-5 decay_factor = 0.9 lr_scale = 3.0 momentum = 0.5 init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden, n_hidden, n_hidden])+np.array([n_hidden, n_hidden, n_hidden, out_size]))) print 'loading data...' X_tr = np.load('bgedv2_X_tr_float64.npy') Y_tr = np.load('bgedv2_Y_tr_4760-9520_float64.npy') Y_tr_target = np.array(Y_tr) X_va = np.load('bgedv2_X_va_float64.npy') Y_va = np.load('bgedv2_Y_va_4760-9520_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('bgedv2_X_te_float64.npy') Y_te = np.load('bgedv2_Y_te_4760-9520_float64.npy') Y_te_target = np.array(Y_te) X_1000G = np.load('1000G_X_float64.npy') Y_1000G = np.load('1000G_Y_4760-9520_float64.npy') Y_1000G_target = np.array(Y_1000G) X_GTEx = np.load('GTEx_X_float64.npy') Y_GTEx = np.load('GTEx_Y_4760-9520_float64.npy') Y_GTEx_target = np.array(Y_GTEx) random.seed(0) monitor_idx_tr = random.sample(range(88807), 5000) data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) h2_layer = p2_md_mlp.Tanh(layer_name='h2', dim=n_hidden, irange=init_vals[1], W_lr_scale=lr_scale, b_lr_scale=1.0) h3_layer = p2_md_mlp.Tanh(layer_name='h3', dim=n_hidden, irange=init_vals[2], W_lr_scale=lr_scale, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, h2_layer, h3_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'h2':include_rate, 'h3':include_rate, 'y':include_rate}, input_scales={'h1':1.0, 'h2':np.float32(1.0/include_rate), 'h3':np.float32(1.0/include_rate), 'y':np.float32(1.0/include_rate)}) algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() y = model.fprop(x) f = theano.function([x], y) MAE_va_old = 10.0 MAE_va_best = 10.0 MAE_tr_old = 10.0 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 outlog = open(base_name + '.log', 'w') log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)'])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() for epoch in range(0, n_epoch): t_old = time.time() train.algorithm.train(train.dataset) Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64') Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64') MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean() MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean() MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr MAE_1000G_old = MAE_1000G MAE_GTEx_old = MAE_GTEx t_new = time.time() l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change, '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change, '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: l_rate = l_rate*decay_factor if l_rate < l_rate_min: l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) if MAE_1000G < MAE_1000G_best: MAE_1000G_best = MAE_1000G outmodel = open(base_name + '_best1000G_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat) np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) print 'MAE_1000G_best : %.6f' % (MAE_1000G_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n') outlog.close()
l6 = mlp.RectifiedLinear( layer_name='l6', #sparse_init=12, irange=0.01, dim=300, #max_col_norm=1. ) output = mlp.Softmax(n_classes=2, layer_name='y', irange=.01) #output = mlp.HingeLoss(layer_name='y',n_classes=2,irange=.05) #layers = [l5, l6, output] layers = [l1, l2, l3, l4, l5, output] ann = mlp.MLP(layers, nvis=X[0].reshape(-1).shape[0]) lr = 0.1 epochs = 400 trainer = sgd.SGD( learning_rate=lr, batch_size=100, learning_rule=learning_rule.Momentum(.05), # Remember, default dropout is .5 #cost=Dropout(input_include_probs={'l1': .5}, # input_scales={'l1': 1.}), termination_criterion=EpochCounter(epochs), monitoring_dataset={ 'train': ds, 'valid': ds_test })
def main(): training_data, validation_data, test_data, std_scale = load_training_data() kaggle_test_features = load_test_data(std_scale) ############### # pylearn2 ML hl1 = mlp.Sigmoid(layer_name='hl1', dim=200, irange=.1, init_bias=1.) hl2 = mlp.Sigmoid(layer_name='hl2', dim=100, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Softmax(9, 'output', irange=.1) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.05, batch_size=300, learning_rule=learning_rule.Momentum(.5), termination_criterion=MonitorBased( channel_name='valid_objective', prop_decrease=0., N=10), monitoring_dataset={ 'valid': validation_data, 'train': training_data }) layers = [hl1, hl2, output_layer] # create neural net model = mlp.MLP(layers, nvis=93) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_objective', save_path='pylearn2_results/pylearn2_test.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.6, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) ###################### experiment = Train(dataset=training_data, model=model, algorithm=trainer, extensions=[watcher, velocity, decay]) experiment.main_loop() #load best model and test ################ model = serial.load('pylearn2_results/pylearn2_test.pkl') # get an prediction of the accuracy from the test_data test_results = model.fprop(theano.shared(test_data[0], name='test_data')).eval() print test_results.shape loss = multiclass_log_loss(test_data[1], test_results) print 'Test multiclass log loss:', loss out_file = 'pylearn2_results/' + str(loss) + 'ann' #exp.save(out_file + '.pkl') #save the kaggle results results = model.fprop( theano.shared(kaggle_test_features, name='kaggle_test_data')).eval() save_results(out_file + '.csv', kaggle_test_features, results)
X_te = np.load('geno_X_te.npy') #测试集(对学习方法的评估) Y_te = np.load('pheno_Y_te.npy') Y_te_target = np.array(Y_te) random.seed(0) #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数 monitor_idx_tr = random.sample(range(88807), 5000) #监测训练 #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature. data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #Multilayer Perceptron;nvis(Number of “visible units” input units) layers(a list of layer objects,最后1层指定MLP的输出空间) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, input_scales={'h1':1.0, 'y':np.float32(1.0/include_rate)}) #随机梯度下降法 algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() #定义为一个二维数组 #fprop(state_below) does the forward prop transformation y = model.fprop(x)
trainer = sgd.SGD(learning_rate=learn_rate, batch_size=batch_size, learning_rule=learning_rule.Momentum(momentum_start), cost=Dropout( input_include_probs={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.}, input_scales={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.} ), termination_criterion=EpochCounter(max_epochs=max_epochs), monitoring_dataset={'train':X_train, 'valid':X_test}, ) input_space = Conv2DSpace(shape=(central_window_shape, central_window_shape), axes = axes, num_channels = 1) ann = mlp.MLP(layers, input_space=input_space) velocity = learning_rule.MomentumAdjustor(final_momentum=momentum_end, start=1, saturate=momentum_saturate) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_y_nll', save_path=save_path) decay = sgd.LinearDecayOverEpoch(start=1, saturate=decay_saturate, decay_factor=decay_factor) ra = RealtimeAugment(window_shape=[img_dim, img_dim], randomize=[X_train, X_test], scale_diff=scale_diff, translation=translation, center_shape=center_shape, center=[X_train, X_test], preprocess=preprocess) train = Train(dataset=X_train, model=ann, algorithm=trainer,
from pylearn2.termination_criteria import EpochCounter raw_ds = CLICK4DAY(which_set='train', which_day=21) transformer = Transformer(raw=raw_ds, nfeatures=1024, rng=None) ds = TransformerDataset(raw=raw_ds, transformer=transformer, cpu_only=False, \ space_preserving=False) hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=256, irange=.1, init_bias=1.) output_layer = mlp.Softmax(2, 'output', irange=.1) trainer = sgd.SGD(learning_rate=.05, batch_size=1024, \ train_iteration_mode='even_sequential',termination_criterion=EpochCounter(400)) layers = [hidden_layer, output_layer] ann = mlp.MLP(layers, nvis=1024) trainer.setup(ann, ds) # train neural net until the termination criterion is true while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break
max_kernel_norm=1.9365) l4 = maxout.Maxout(layer_name='l4', irange=.005, num_units=500, num_pieces=5, max_col_norm=1.9) output = mlp.Softmax(layer_name='y', n_classes=10, irange=.005, max_col_norm=1.9365) layers = [l1, l2, l3, l4, output] mdl = mlp.MLP(layers, input_space=in_space) trainer = sgd.SGD(learning_rate=.17, batch_size=128, learning_rule=learning_rule.Momentum(.5), # Remember, default dropout is .5 cost=Dropout(input_include_probs={'l1': .8}, input_scales={'l1': 1.}), termination_criterion=EpochCounter(max_epochs=475), monitoring_dataset={'valid': tst, 'train': trn}) preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()]) trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True) tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False) serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor)
def __linit(self, X, y): if (self.verbose > 0): print "Lazy initialisation" layers = self.layers pylearn2mlp_layers = [] self.units_per_layer = [] #input layer units self.units_per_layer += [X.shape[1]] for layer in layers[:-1]: self.units_per_layer += [layer[1]] #Output layer units self.units_per_layer += [y.shape[1]] if (self.verbose > 0): print "Units per layer", str(self.units_per_layer) for i, layer in enumerate(layers[:-1]): fan_in = self.units_per_layer[i] + 1 fan_out = self.units_per_layer[i + 1] lim = np.sqrt(6) / (np.sqrt(fan_in + fan_out)) layer_name = "Hidden_%i_%s" % (i, layer[0]) activate_type = layer[0] if activate_type == "RectifiedLinear": hidden_layer = mlp.RectifiedLinear(dim=layer[1], layer_name=layer_name, irange=lim) elif activate_type == "Sigmoid": hidden_layer = mlp.Sigmoid(dim=layer[1], layer_name=layer_name, irange=lim) elif activate_type == "Tanh": hidden_layer = mlp.Tanh(dim=layer[1], layer_name=layer_name, irange=lim) elif activate_type == "Maxout": hidden_layer = maxout.Maxout(num_units=layer[1], num_pieces=layer[2], layer_name=layer_name, irange=lim) else: raise NotImplementedError( "Layer of type %s are not implemented yet" % layer[0]) pylearn2mlp_layers += [hidden_layer] output_layer_info = layers[-1] output_layer_name = "Output_%s" % output_layer_info[0] fan_in = self.units_per_layer[-2] + 1 fan_out = self.units_per_layer[-1] lim = np.sqrt(6) / (np.sqrt(fan_in + fan_out)) if (output_layer_info[0] == "Linear"): output_layer = mlp.Linear(dim=self.units_per_layer[-1], layer_name=output_layer_name, irange=lim) pylearn2mlp_layers += [output_layer] self.mlp = mlp.MLP(pylearn2mlp_layers, nvis=self.units_per_layer[0]) self.ds = DenseDesignMatrix(X=X, y=y) self.trainer.setup(self.mlp, self.ds) inputs = self.mlp.get_input_space().make_theano_batch() self.f = theano.function([inputs], self.mlp.fprop(inputs))
import pylearn2.models.autoencoder as auto import numpy as np #from pylearn2.space import CompositeSpace, Conv2DSpace, VectorSpace, IndexSpace from pylearn2.space import VectorSpace if __name__ == '__main__': patchSize = 39 layer = mlp.Linear(dim=patchSize**2, layer_name='fixed_input', irange=0.01, use_bias=False) mlp_stupid = mlp.MLP(layers=[layer], batch_size=None, input_space=None, nvis=patchSize**2, seed=None, layer_name=None) W_fixed = np.eye(patchSize**2).astype(np.float32) layer.set_input_space(VectorSpace(dim=patchSize**2)) layer.set_weights(W_fixed) autoencoder = auto.Autoencoder(patchSize**2, patchSize**2, None, None, tied_weights=True) autoencoder.weights = W_fixed
valid = csv_dataset.CSVDataset("../data/valid.csv", expect_labels=True, expect_headers=False, delimiter=',') test = csv_dataset.CSVDataset("../data/test.csv", expect_labels=True, expect_headers=False, delimiter=',') # ------------------------------------------Simple ANN h0 = mlp.Sigmoid(layer_name="h0", dim=73, sparse_init=0) y0 = mlp.Softmax(n_classes=5, layer_name="y0", irange=0) layers = [h0, y0] nn = mlp.MLP(layers, nvis=train.X.shape[1]) algo = sgd.SGD(learning_rate=0.05, batch_size=100, monitoring_dataset=valid, termination_criterion=EpochCounter(100)) algo.setup(nn, train) save_best = best_params.MonitorBasedSaveBest(channel_name="objective", save_path='best_params.pkl') while True: algo.train(dataset=train) nn.monitor.report_epoch() nn.monitor() save_best.on_monitor(nn, train, algo) if not algo.continue_learning(nn): break
def main(): base_name = sys.argv[1] #文件名前缀 n_epoch = int(sys.argv[2]) # epoch次数 n_hidden = int(sys.argv[3]) # 隐含层节点数 include_rate = float(sys.argv[4]) # 包含率(1-dropout) in_size = 943 # 输入层节点数目 out_size = 4760 #输出层节点数 b_size = 200 #batch的大小 l_rate = 5e-4 #学习速率 l_rate_min = 1e-5 #学习速率最小值 decay_factor = 0.9 # lr_scale = 3.0 # momentum = 0.5 #摄动因子 init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden])+np.array([n_hidden, out_size]))) print 'loading data...' #读取数据Train,Validation,Test X_tr = np.load('bgedv2_X_tr_float64.npy') Y_tr = np.load('bgedv2_Y_tr_0-4760_float64.npy') Y_tr_target = np.array(Y_tr) X_va = np.load('bgedv2_X_va_float64.npy') Y_va = np.load('bgedv2_Y_va_0-4760_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('bgedv2_X_te_float64.npy') Y_te = np.load('bgedv2_Y_te_0-4760_float64.npy') Y_te_target = np.array(Y_te) X_1000G = np.load('1000G_X_float64.npy') Y_1000G = np.load('1000G_Y_0-4760_float64.npy') Y_1000G_target = np.array(Y_1000G) X_GTEx = np.load('GTEx_X_float64.npy') Y_GTEx = np.load('GTEx_Y_0-4760_float64.npy') Y_GTEx_target = np.array(Y_GTEx) #随机化 random.seed(0) #随机抽取5000样本进行训练 monitor_idx_tr = random.sample(range(88807), 5000) #将数据X,Y整合成DensenMatrix类型 data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) #取出X中对应5000样本进行训练 X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] #设置多层感知机的隐含层计算方式 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) #设置多层感知机的输出层计算方式 o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #设置好模型 model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) #设置dropout比例 dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, input_scales={'h1':1.0, 'y':np.float32(1.0/include_rate)}) #设置训练算法(batch大小,学习速率,学习规则,终止条件,dropout比例) algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #设置训练类(数据集,训练模型,训练算法) train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() y = model.fprop(x) #训练好的模型对X的预测值 f = theano.function([x], y) MAE_va_old = 10.0 MAE_va_best = 10.0 MAE_tr_old = 10.0 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 outlog = open(base_name + '.log', 'w') log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)'])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() #刷新缓冲区 for epoch in range(0, n_epoch): t_old = time.time() #开始时间 train.algorithm.train(train.dataset)#训练 #计算不同数据集预测值 Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64') Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64') #计算预测值与真实值的MAE MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean() MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean() #计算迭代误差 MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old #更新MAE MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr MAE_1000G_old = MAE_1000G MAE_GTEx_old = MAE_GTEx t_new = time.time() #终止时间 l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change, '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change, '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: #如果误差增大,减小学习速率 l_rate = l_rate*decay_factor if l_rate < l_rate_min: #学习速率最小为l_rate_min l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) #更改训练类的学习速率参数 #更新Validation误差值 if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) #更新1000G误差值 if MAE_1000G < MAE_1000G_best: MAE_1000G_best = MAE_1000G outmodel = open(base_name + '_best1000G_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat) np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) print 'MAE_1000G_best : %.6f' % (MAE_1000G_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n') outlog.close()
def main( x ): l1_dim = x[0] l2_dim = x[1] learning_rate = x[2] momentum = x[3] l1_dropout = x[4] decay_factor = x[5] min_lr = 1e-7 # train = np.loadtxt( train_file, delimiter = ',' ) x_train = train[:,0:-1] y_train = train[:,-1] y_train.shape = ( y_train.shape[0], 1 ) # validation = np.loadtxt( validation_file, delimiter = ',' ) x_valid = validation[:,0:-1] y_valid = validation[:,-1] y_valid.shape = ( y_valid.shape[0], 1 ) # #input_space = VectorSpace( dim = x.shape[1] ) full = DenseDesignMatrix( X = x_train, y = y_train ) valid = DenseDesignMatrix( X = x_valid, y = y_valid ) l1 = mlp.RectifiedLinear( layer_name='l1', irange=.001, dim = l1_dim, # "Rather than using weight decay, we constrain the norms of the weight vectors" max_col_norm=1. ) l2 = mlp.RectifiedLinear( layer_name='l2', irange=.001, dim = l2_dim, max_col_norm=1. ) output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 ) layers = [l1, l2, output] nvis = x_train.shape[1] mdl = mlp.MLP( layers, nvis = nvis ) # input_space = input_space #lr = .001 #epochs = 100 decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr ) trainer = sgd.SGD( learning_rate = learning_rate, batch_size=128, learning_rule=learning_rule.Momentum( momentum ), update_callbacks = [ decay ], # Remember, default dropout is .5 cost = Dropout( input_include_probs = {'l1': l1_dropout}, input_scales={'l1': 1.}), #termination_criterion = EpochCounter(epochs), termination_criterion = MonitorBased( channel_name = "valid_objective", prop_decrease = 0.001, # 0.1% of objective N = 10 ), # valid_objective is MSE monitoring_dataset = { 'train': full, 'valid': valid } ) watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file ) experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] ) experiment.main_loop() ### error = get_error_from_model( output_model_file ) print "*** error: {} ***".format( error ) return error
output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) ''' Note: changed the number of classes ''' layery = mlp.Softmax(max_col_norm=1.9365, layer_name='y', n_classes=121, istdev=.05) print 'Setting up trainers' trainer = sgd.SGD(learning_rate=0.5, batch_size=50, termination_criterion=EpochCounter(200), learning_rule=Momentum(init_momentum=0.5)) layers = [layerh2, layerh3, layery] ann = mlp.MLP(layers, input_space=Conv2DSpace(shape=[28, 28], num_channels=1)) trainer.setup(ann, ds) print 'Start Training' while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break # 3. Predict XReport, Y_info = plankton.loadReportData() probMatrix = ann.fprop(theano.shared(XReport, name='XReport')).eval() plankton.generateSubmissionFile(probMatrix, classDict, Y_info)
def train(d=None): train_X = np.array(d.train_X) train_y = np.array(d.train_Y) valid_X = np.array(d.valid_X) valid_y = np.array(d.valid_Y) test_X = np.array(d.test_X) test_y = np.array(d.test_Y) nb_classes = len(np.unique(train_y)) train_y = convert_one_hot(train_y) valid_y = convert_one_hot(valid_y) # train_set = RotationalDDM(X=train_X, y=train_y) train_set = DenseDesignMatrix(X=train_X, y=train_y) valid_set = DenseDesignMatrix(X=valid_X, y=valid_y) print 'Setting up' batch_size = 100 c0 = mlp.ConvRectifiedLinear( layer_name='c0', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], # W_lr_scale=0.25, max_kernel_norm=1.9365) c1 = mlp.ConvRectifiedLinear( layer_name='c1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], # W_lr_scale=0.25, max_kernel_norm=1.9365) c2 = mlp.ConvRectifiedLinear( layer_name='c2', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[5, 4], W_lr_scale=0.25, # max_kernel_norm=1.9365 ) sp0 = mlp.SoftmaxPool( detector_layer_dim=16, layer_name='sp0', pool_size=4, sparse_init=512, ) sp1 = mlp.SoftmaxPool( detector_layer_dim=16, layer_name='sp1', pool_size=4, sparse_init=512, ) r0 = mlp.RectifiedLinear( layer_name='r0', dim=512, sparse_init=512, ) r1 = mlp.RectifiedLinear( layer_name='r1', dim=512, sparse_init=512, ) s0 = mlp.Sigmoid( layer_name='s0', dim=500, # max_col_norm=1.9365, sparse_init=15, ) out = mlp.Softmax( n_classes=nb_classes, layer_name='output', irange=.0, # max_col_norm=1.9365, # sparse_init=nb_classes, ) epochs = EpochCounter(100) layers = [s0, out] decay_coeffs = [.00005, .00005, .00005] in_space = Conv2DSpace( shape=[d.size, d.size], num_channels=1, ) vec_space = VectorSpace(d.size**2) nn = mlp.MLP( layers=layers, # input_space=in_space, nvis=d.size**2, # batch_size=batch_size, ) trainer = sgd.SGD( learning_rate=0.01, # cost=SumOfCosts(costs=[ # dropout.Dropout(), # MethodCost(method='cost_from_X'), # WeightDecay(decay_coeffs), # ]), # cost=MethodCost(method='cost_from_X'), batch_size=batch_size, # train_iteration_mode='even_shuffled_sequential', termination_criterion=epochs, # learning_rule=learning_rule.Momentum(init_momentum=0.5), ) trainer = bgd.BGD( batch_size=10000, line_search_mode='exhaustive', conjugate=1, updates_per_batch=10, termination_criterion=epochs, ) lr_adjustor = LinearDecayOverEpoch( start=1, saturate=10, decay_factor=.1, ) momentum_adjustor = learning_rule.MomentumAdjustor( final_momentum=.99, start=1, saturate=10, ) trainer.setup(nn, train_set) print 'Learning' test_X = vec_space.np_format_as(test_X, nn.get_input_space()) train_X = vec_space.np_format_as(train_X, nn.get_input_space()) i = 0 X = nn.get_input_space().make_theano_batch() Y = nn.fprop(X) predict = theano.function([X], Y) best = -40 best_iter = -1 while trainer.continue_learning(nn): print '--------------' print 'Training Epoch ' + str(i) trainer.train(dataset=train_set) nn.monitor() print 'Evaluating...' predictions = convert_categorical(predict(train_X[:2000])) score = accuracy_score(convert_categorical(train_y[:2000]), predictions) print 'Score on train: ' + str(score) predictions = convert_categorical(predict(test_X)) score = accuracy_score(test_y, predictions) print 'Score on test: ' + str(score) best, best_iter = (best, best_iter) if best > score else (score, i) print 'Current best: ' + str(best) + ' at iter ' + str(best_iter) print classification_report(test_y, predictions) print 'Adjusting parameters...' # momentum_adjustor.on_monitor(nn, valid_set, trainer) # lr_adjustor.on_monitor(nn, valid_set, trainer) i += 1 print ' '
def main(): base_name = sys.argv[ 1] # 获取第一个参数 sys.argv[ ]记录(获取)命令行参数 sys(system) argv(argument variable)参数变量,该变量为list列表 n_epoch = int(sys.argv[2]) #获取第二个参数 n_hidden = int(sys.argv[3]) #获取第三个参数作为隐层神经元个数 include_rate = float(sys.argv[4]) in_size = 1001 #输入层神经元个数(标记基因个数) out_size = 1 #输出层神经元个数 b_size = 200 #偏差值 l_rate = 5e-4 #学习速率 l_rate_min = 1e-5 #学习速率最小值 decay_factor = 0.9 #衰减因数 lr_scale = 3.0 momentum = 0.5 init_vals = np.sqrt(6.0 / (np.array([in_size, n_hidden]) + np.array([n_hidden, out_size]))) #初始值,返回平方根 print 'loading data...' #显示载入数据 X_tr = np.load( 'geno_X_tr_float64.npy') # tr(traing)以numpy专用二进制类型保存训练数据集的数据 Y_tr = np.load('pheno_Y_tr_0-4760_float64.npy') Y_tr_pheno = np.array(Y_tr) X_va = np.load( 'geno_X_va_float64.npy') #验证集(模型选择,在学习到不同复杂度的模型中,选择对验证集有最小预测误差的模型) Y_va = np.load('pheno_Y_va_0-4760_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('geno_te_float64.npy') #测试集(对学习方法的评估) Y_te = np.load('pheno_Y_te_0-4760_float64.npy') Y_te_target = np.array(Y_te) random.seed(0) #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数 monitor_idx_tr = random.sample(range(88807), 5000) #监测训练 #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature. data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[ monitor_idx_tr, :] #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #Multilayer Perceptron;nvis(Number of “visible units” input units) layers(a list of layer objects,最后1层指定MLP的输出空间) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={ 'h1': 1.0, 'y': include_rate }, input_scales={ 'h1': 1.0, 'y': np.float32(1.0 / include_rate) }) #随机梯度下降法 algorithm = p2_alg_sgd.SGD( batch_size=b_size, learning_rate=l_rate, learning_rule=p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() #定义为一个二维数组 #fprop(state_below) does the forward prop transformation y = model.fprop(x) f = theano.function([x], y) #定义一个function函数,输入为x,输出为y MAE_va_old = 10.0 #平均绝对误差 MAE_va_best = 10.0 MAE_tr_old = 10.0 #训练误差 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 #base_name = sys.argv[1] # 获取第一个参数 sys.argv[ ]记录(获取)命令行参数 outlog = open(base_name + '.log', 'w') log_str = '\t'.join( map(str, [ 'epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)' ])) print log_str #输出运行日志 outlog.write(log_str + '\n') #Python的标准输出缓冲(这意味着它收集“写入”标准出来之前,将其写入到终端的数据)。调用sys.stdout.flush()强制其“缓冲 sys.stdout.flush() for epoch in range(0, n_epoch): t_old = time.time() train.algorithm.train(train.dataset) Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') #计算平均绝对误差 MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() #误差变换率 MAE_va_change = (MAE_va - MAE_va_old) / MAE_va_old MAE_te_change = (MAE_te - MAE_te_old) / MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old) / MAE_tr_old #将old误差值更新为当前误差值 MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr #返回当前的时间戳(1970纪元后经过的浮点秒数) t_new = time.time() l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join( map(str, [ epoch + 1, '%.6f' % MAE_va, '%.6f' % MAE_va_change, '%.6f' % MAE_te, '%.6f' % MAE_te_change, '%.6f' % MAE_tr, '%.6f' % MAE_tr_change, '%.5f' % l_rate, int(t_new - t_old) ])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: #训练误差变换率大于0时,学习速率乘上一个衰减因子 l_rate = l_rate * decay_factor if l_rate < l_rate_min: #学习速率小于最小速率时,更新为最小速率 l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.close()
def evaluateTeam(eventCodeList, teamNumber): print "### saving example dataset for team #" + str(teamNumber) roboDataset = [[[], []], [[], []]] reattempts = 10 brokenRequest = False for event in eventCodeList: r = [] print "requesting matches at", event, "for team", teamNumber rStr = 'http://www.thebluealliance.com/api/v2/team/frc' + str( teamNumber ) + '/event/' + event + '/matches?X-TBA-App-Id=frc4534:auto-scouting:2' try: r = requests.get(rStr) except: print "first match event request for team", teamNumber, "failed, beginning reattempts", r time.sleep(2) while r == [] and reattempts > 0: try: r = requests.get(rStr) except: pass time.sleep(2) reattempts -= 1 print reattempts, "more attempts to request matches at", event, "for team", teamNumber if r == []: brokenRequest = True if brokenRequest: print "broken request, team", teamNumber, ", event:", event, ". internet disconnected/unreachable?" if brokenRequest == False: for i in r.json(): try: stringMatchData = [[], []] numMatchData = [[], []] if "frc" + str( teamNumber) in i['alliances']['blue']['teams']: alliance = 'blue' elif "frc" + str( teamNumber) in i['alliances']['red']['teams']: alliance = 'red' else: alliance = 'team is not in match alliances...' stringMatchData[0].append('E_LowBar') stringMatchData[0].append( i['score_breakdown'][alliance]['position2']) stringMatchData[0].append( i['score_breakdown'][alliance]['position3']) stringMatchData[0].append( i['score_breakdown'][alliance]['position4']) stringMatchData[0].append( i['score_breakdown'][alliance]['position5']) stringMatchData[1].append( i['score_breakdown'][alliance]['position1crossings']) stringMatchData[1].append( i['score_breakdown'][alliance]['position2crossings']) stringMatchData[1].append( i['score_breakdown'][alliance]['position3crossings']) stringMatchData[1].append( i['score_breakdown'][alliance]['position4crossings']) stringMatchData[1].append( i['score_breakdown'][alliance]['position5crossings']) incompleteMatchDataError = False for j in stringMatchData[0]: if j == 'E_LowBar': numMatchData[0].append(0) elif j == 'A_Portcullis': numMatchData[0].append(1) elif j == 'A_ChevalDeFrise': numMatchData[0].append(2) elif j == 'B_Moat': numMatchData[0].append(3) elif j == 'B_Ramparts': numMatchData[0].append(4) elif j == 'C_Drawbridge': numMatchData[0].append(5) elif j == 'C_SallyPort': numMatchData[0].append(6) elif j == 'D_RockWall': numMatchData[0].append(7) elif j == 'D_RoughTerrain': numMatchData[0].append(8) else: incompleteMatchDataError = True for j in stringMatchData[1]: numMatchData[1].append(j) if incompleteMatchDataError == False: for j in numMatchData[0]: roboDataset[0][0].append([j]) for j in numMatchData[1]: roboDataset[0][1].append([j]) #roboDataset[0][0].append(numMatchData[0]) #roboDataset[0][1].append(numMatchData[1]) roboDataset[1][1].append([ i['score_breakdown'][alliance]['autoBouldersLow'], i['score_breakdown'][alliance]['autoBouldersHigh'], i['score_breakdown'][alliance]['teleopBouldersLow'], i['score_breakdown'][alliance]['teleopBouldersHigh'] ]) roboDataset[1][0].append([0]) except: print "exception in event " + event + ", team " + str( teamNumber) + ", match #" + str(i['match_number']) pass hidden_layer_1 = mlp.Tanh(layer_name='hidden1', dim=16, irange=.1, init_bias=1.) hidden_layer_2 = mlp.Tanh(layer_name='hidden2', dim=8, irange=.1, init_bias=1.) output_layer = mlp.Linear(layer_name='output', dim=4, irange=.1, init_bias=1.) layers = [hidden_layer_1, hidden_layer_2, output_layer] trainer = sgd.SGD(learning_rate=.05, batch_size=10, termination_criterion=EpochCounter(epochsMode)) ann = mlp.MLP(layers, nvis=1) roboDataset[1][0] = numpy.array(roboDataset[1][0]) roboDataset[1][1] = numpy.array(roboDataset[1][1]) try: ds = datasets.DenseDesignMatrix(X=roboDataset[1][0], y=roboDataset[1][1]) except IndexError: print "IndexError in dataset creation for team", teamNumber, ",", "length of dataset=", len( roboDataset[1]) ret = [[], []] start = time.time() if len( roboDataset[1][1] ) > 4: ## only here to train for teams with enough matches to get _anywhere_ within reasonably reliable net results print "Scoring team", teamNumber, "in goals" trainer.setup(ann, ds) print "training for <=", epochsMode, "epochs (team", teamNumber, ")" while True: trainer.train(dataset=ds) ann.monitor.report_epoch() if not trainer.continue_learning(ann): break print "network training time:", int( time.time() - start), "seconds for team", teamNumber inputs = numpy.array([[0]]) for i in ann.fprop(theano.shared(inputs, name='inputs')).eval()[0]: ret[0].append(i) hidden_layer_1 = mlp.Tanh(layer_name='hidden1', dim=16, irange=.1, init_bias=1.) hidden_layer_2 = mlp.Tanh(layer_name='hidden2', dim=8, irange=.1, init_bias=1.) output_layer = mlp.Linear(layer_name='output', dim=1, irange=.1, init_bias=1.) layers = [hidden_layer_1, hidden_layer_2, output_layer] trainer = sgd.SGD(learning_rate=.05, batch_size=10, termination_criterion=EpochCounter(epochsMode)) ann = mlp.MLP(layers, nvis=1) roboDataset[0][0] = numpy.array(roboDataset[0][0]) roboDataset[0][1] = numpy.array(roboDataset[0][1]) try: ds = datasets.DenseDesignMatrix(X=roboDataset[0][0], y=roboDataset[0][1]) except IndexError: print "IndexError in dataset creation for team", teamNumber, ",", "length of dataset=", len( roboDataset[0][1]) start = time.time() if len( roboDataset[0][1] ) > 4: ## only here to train for teams with enough matches to get _anywhere_ within reasonably reliable net results print "Scoring team", teamNumber, "in defenses" trainer.setup(ann, ds) print "training for <=", epochsMode, "epochs (team", teamNumber, ")" while True: trainer.train(dataset=ds) ann.monitor.report_epoch() if not trainer.continue_learning(ann): break print "network training time:", int( time.time() - start), "seconds for team", teamNumber # inputs = numpy.array([[0]]) inputs = [[0], [1], [2], [3], [4], [5], [6], [7], [8]] for i in inputs: ret[1].append( ann.fprop(theano.shared(numpy.array([i]), name='inputs')).eval()[0][0]) # for i in ann.fprop(theano.shared(inputs, name='inputs')).eval()[0]: # ret.append(i) return ret
def train(d): print 'Creating dataset' # load mnist here # X = d.train_X # y = d.train_Y # test_X = d.test_X # test_Y = d.test_Y # nb_classes = len(np.unique(y)) # train_y = convert_one_hot(y) # train_set = DenseDesignMatrix(X=X, y=y) train = DenseDesignMatrix(X=d.train_X, y=convert_one_hot(d.train_Y)) valid = DenseDesignMatrix(X=d.valid_X, y=convert_one_hot(d.valid_Y)) test = DenseDesignMatrix(X=d.test_X, y=convert_one_hot(d.test_Y)) print 'Setting up' batch_size = 1000 conv = mlp.ConvRectifiedLinear( layer_name='c0', output_channels=20, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], # W_lr_scale=0.25, max_kernel_norm=1.9365) mout = MaxoutConvC01B(layer_name='m0', num_pieces=4, num_channels=96, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.25, max_kernel_norm=1.9365) mout2 = MaxoutConvC01B(layer_name='m1', num_pieces=4, num_channels=96, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.25, max_kernel_norm=1.9365) sigmoid = mlp.Sigmoid( layer_name='Sigmoid', dim=500, sparse_init=15, ) smax = mlp.Softmax(layer_name='y', n_classes=10, irange=0.) in_space = Conv2DSpace(shape=[28, 28], num_channels=1, axes=['c', 0, 1, 'b']) net = mlp.MLP( layers=[mout, mout2, smax], input_space=in_space, # nvis=784, ) trainer = bgd.BGD(batch_size=batch_size, line_search_mode='exhaustive', conjugate=1, updates_per_batch=10, monitoring_dataset={ 'train': train, 'valid': valid, 'test': test }, termination_criterion=termination_criteria.MonitorBased( channel_name='valid_y_misclass')) trainer = sgd.SGD(learning_rate=0.15, cost=dropout.Dropout(), batch_size=batch_size, monitoring_dataset={ 'train': train, 'valid': valid, 'test': test }, termination_criterion=termination_criteria.MonitorBased( channel_name='valid_y_misclass')) trainer.setup(net, train) epoch = 0 while True: print 'Training...', epoch trainer.train(dataset=train) net.monitor() epoch += 1
def supervisedLayerwisePRL(trainset, testset): ''' The supervised layerwise training as used in the PRL Paper. Input ------ trainset : A path to an hdf5 file created through h5py. testset : A path to an hdf5 file created through h5py. ''' batch_size = 100 # Both train and test h5py files are expected to have a 'topo_view' and 'y' # datasets side them corresponding to the 'b01c' data format as used in pylearn2 # and 'y' equivalent to the one hot encoded labels trn = HDF5Dataset(filename=trainset, topo_view='topo_view', y='y', load_all=False) tst = HDF5Dataset(filename=testset, topo_view='topo_view', y='y', load_all=False) ''' The 1st Convolution and Pooling Layers are added below. ''' h1 = mlp.ConvRectifiedLinear(layer_name='h1', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) layers = [h1, fc, output] mdl = mlp.MLP(layers, input_space=Conv2DSpace(shape=(70, 70), num_channels=1)) trainer = sgd.SGD( learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts( costs=[Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best1.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl') mdl = push_monitor(mdl, 'k') ''' The 2nd Convolution and Pooling Layers are added below. ''' h2 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h2, fc, output]) trainer = sgd.SGD(learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best2.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl') mdl = push_monitor(mdl, 'l') ''' The 3rd Convolution and Pooling Layers are added below. ''' h3 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=10, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h3, output]) trainer = sgd.SGD( learning_rate=.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best3.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop()
# create XOR dataset ds = XOR() # create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=2, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Softmax(2, 'output', irange=.1) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.05, batch_size=10, termination_criterion=EpochCounter(400)) layers = [hidden_layer, output_layer] # create neural net that takes two inputs ann = mlp.MLP(layers, nvis=2) trainer.setup(ann, ds) # train neural net until the termination criterion is true while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break inputs = np.array([[0, 0]]) print(ann.fprop(theanoShared(inputs, name='inputs')).eval()) inputs = np.array([[0, 1]]) print(ann.fprop(theanoShared(inputs, name='inputs')).eval()) inputs = np.array([[1, 0]]) print(ann.fprop(theanoShared(inputs, name='inputs')).eval())
# create Softmax output layer output_layer = mlp.Softmax(3, 'output', irange=.1) # create Stochastic Gradient Descent trainer that runs for 400 epochs cost = NegativeLogLikelihoodCost() rule = Momentum(0.9) # rule = Momentum(0.9, True) # update_callbacks=ExponentialDecay(1 + 1e-5, 0.001) trainer = sgd.SGD(learning_rate=0.01, cost=cost, batch_size=128, termination_criterion=EpochCounter(1000), monitoring_dataset=vds, learning_rule=rule) layers = [hidden_layer, hidden_layer2, output_layer] # create neural net that takes two inputs ann = mlp.MLP(layers, nvis=ds.feat_cnt) trainer.setup(ann, ds) print trainer.cost # train neural net until the termination criterion is true iteration = 0 while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if iteration % 10 == 0: if not debug: with open(modelName, 'wb') as f: pickle.dump(ann, f)
from pylearn2.termination_criteria import EpochCounter from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix from csv_data import CSVData import numpy as np class MLPData(DenseDesignMatrix): def __init__(self, X, y): super(MLPData, self).__init__(X=X, y=y.astype(int), y_labels=2) threshold = 0.95 hidden_layer = mlp.Sigmoid(layer_name='h0', dim=10, sparse_init=10) output_layer = mlp.Softmax(layer_name='y', n_classes=2, irange=0.05) layers = [hidden_layer, output_layer] neural_net = mlp.MLP(layers, nvis=10) trainer = sgd.SGD(batch_size=5, learning_rate=.1, termination_criterion=EpochCounter(100)) first = True learning = True correct = 0 incorrect = 0 total = 0 data = CSVData("results2.csv") while True: X, y = data.get_data() if (X == None): break
def agent_init(self,task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ self.start_time = time.time() self.image = None self.show_ale = False self.total_reward = 0 self.mini_batch_size = 32 self.num_mini_batches = 1 self.frame_count = 0 self.frames_trained = 0 self.qvalue_sum = 0 self.qvalue_count = 0 self.predicted_reward learning_rate = .00001 self.testing_policy = False self.epoch_counter = 0 self.epochs_until_test = 5 self.policy_test_file_name = "results.csv" load_file = False load_file_name = "cnnparams.pkl" self.save_file_name = "cnnparams.pkl" self.counter = 0 self.cur_action = 0 #starting value for epsilon-greedy self.epsilon = 1 TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations())== 0) != \ (len(TaskSpec.getDoubleObservations()) == 0 )), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions())==0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions=TaskSpec.getIntActions()[0][1]+1 self.num_actions = 3 self.int_states = len(TaskSpec.getIntObservations()) > 0 # Create neural network and initialize trainer and dataset if load_file: thefile = open(load_file_name, "r") self.cnn = cPickle.load(thefile) else: self.first_conv_layer = maxout.MaxoutConvC01B(16, 1, (8, 8), (1, 1), (1, 1), "first conv layer", irange=.1, kernel_stride=(4, 4), min_zero=True) self.second_conv_layer = maxout.MaxoutConvC01B(32, 1, (4, 4), (1, 1), (1, 1), "second conv layer", irange=.1, kernel_stride=(2, 2), min_zero=True) self.rect_layer = mlp.RectifiedLinear(dim=256, layer_name="rectified layer", irange=.1) self.output_layer = mlp.Linear(self.num_actions, "output layer", irange=.1) layers = [self.first_conv_layer, self.second_conv_layer, self.rect_layer, self.output_layer] self.cnn = mlp.MLP(layers, input_space = Conv2DSpace((80, 80), num_channels=4, axes=('c', 0, 1, 'b')), batch_size=self.mini_batch_size) self.data = nqd.NeuralRewardPredictorDataset(self.cnn, mini_batch_size = self.mini_batch_size, num_mini_batches = self.num_mini_batches, learning_rate=learning_rate) #Create appropriate RL-Glue objects for storing these. self.last_action=Action() self.last_observation=Observation() thefile = open(self.policy_test_file_name, "w") thefile.write("Reward, Predicted reward, Frames trained\n") thefile.close()
ds_valid, ds_test = ds_valid.split(0.7) ##################################### #Define Model ##################################### # create sigmoid hidden layer with 20 nodes, init weights in range -0.05 to 0.05 and add # a bias with value 1 hidden_layer = mlp.Sigmoid(layer_name='h0', dim=1, irange=.05, init_bias=1.) # softmax output layer output_layer = mlp.Softmax(2, 'softmax', irange=.05) layers = [hidden_layer, output_layer] # create neural net ann = mlp.MLP(layers, nvis=ds_train.nr_inputs) ##################################### #Define Training ##################################### #L1 Weight Decay L1_cost = PL.costs.cost.SumOfCosts([PL.costs.cost.MethodCost(method='cost_from_X'), PL.costs.mlp.L1WeightDecay(coeffs=[0.1, 0.01])]) # momentum initial_momentum = .5 final_momentum = .99 start = 1 saturate = 20 momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate) momentum_rule = learning_rule.Momentum(initial_momentum)
def main(): start_time = time.clock() # optin parser parser = OptionParser() parser.add_option("-p", dest="plot_prediction", action="store_true", default=False, help="plot model prediction transitions") parser.add_option("-f", "--file", dest="out_filename", default=None, help="write animation to FILE (require -a option)", metavar="FILE") (options, args) = parser.parse_args() # make Detaset ds = sinDataset(SIZE_DATA) # make layers hidden_layer1 = mlp.Tanh(layer_name='hidden1', dim=20, irange=0.5, init_bias=1.0) hidden_layer2 = mlp.Tanh(layer_name='hidden2', dim=4, irange=0.5, init_bias=1.0) output_layer = mlp.Linear(layer_name='out', dim=1, irange=0.5, init_bias=1) # set layers layers = [hidden_layer1, hidden_layer2, output_layer] model = mlp.MLP(layers, nvis=1) # set training rule and extensions algorithm = sgd.SGD(learning_rate=0.01, batch_size=1, monitoring_batch_size=1, monitoring_batches=1, monitoring_dataset=ds, termination_criterion=EpochCounter(MAX_EPOCHS)) extensions = [sgd.MonitorBasedLRAdjuster()] if options.plot_prediction: plotEx = PlotPredictionOnMonitor() extensions.append(plotEx) trainer = Train(model=model, algorithm=algorithm, dataset=ds, extensions=extensions, save_path='./funcmodel.pkl', save_freq=500) # training loop trainer.main_loop() end_time = time.clock() print("tortal_seconds_this_learning : %f s(%f min)" % (end_time - start_time, (end_time - start_time) / 60)) if options.plot_prediction: plotEx.plot(out_filename=options.out_filename)