def real_time_prediction(): ### loading new images for classification starts here fo = open(save_path, 'rb') # batch path batch1 = pickle.load(fo) fo.close() xarr = np.array(batch1['data'], dtype='float32') xarr = global_contrast_normalize(xarr, scale=55.) no_of_row = len(batch1['data']) xdat = np.array( xarr.reshape((no_of_row, 3, 32, 32)), dtype='float32') #reshape first parameter = batch matrix no. of row xdat = np.transpose(xdat[:, :, :, :], (1, 2, 3, 0)) x = dense_design_matrix.DenseDesignMatrix(topo_view=xdat, axes=['c', 0, 1, 'b']) x.apply_preprocessor(my_pca_preprocessor, can_fit=False) tarr = x.get_topological_view() #print tarr y = f(tarr) ###########searching max in matrix################################################## #j = no. of row in prediction_batch #i = no. of classes (0-9) #result=('airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck') result = ('bottle', 'book', 'toy', 'pen', 'chair', 'coin', 'phone', 'hand', 'note', 'head') resultString = '' for j in range(0, no_of_row): max_index = 0 max_no = y[j][0] #print max_no for i in range(0, 10): if y[j][i] > max_no: max_no = y[j][i] max_index = i # print max_index print "======================" print 'Photo', j + 1, ' max=', result[max_index] if j > 0: resultString += ',' resultString += result[max_index] #print 'y =', y ###################################################################################3 return resultString
def extract_features(self, dset, preproc=None, can_fit=False): new_dset = dense_design_matrix.DenseDesignMatrix() if str(self.model.__class__).find('DBM') != -1: self.model.set_batch_size(len(dset.X)) self.model.setup_pos_func(dset.X) self.model.pos_func() new_dset.X = self.model.fn(dset.X) elif self.model: outsize = self.model.fn(dset.X[:1]).shape[1] X = numpy.zeros((len(dset.X), outsize)) for i in xrange(0, len(X), self.model.batch_size): batch = dset.X[i : i + self.model.batch_size] X[i : i + len(batch)] = self.model.fn(batch) new_dset.X = X else: new_dset.X = dset.X if preproc: preproc.apply(new_dset, can_fit=True) return new_dset
def main(): base_name = sys.argv[1] #文件名前缀 n_epoch = int(sys.argv[2]) # epoch次数 n_hidden = int(sys.argv[3]) # 隐含层节点数 include_rate = float(sys.argv[4]) # 包含率(1-dropout) in_size = 943 # 输入层节点数目 out_size = 4760 #输出层节点数 b_size = 200 #batch的大小 l_rate = 5e-4 #学习速率 l_rate_min = 1e-5 #学习速率最小值 decay_factor = 0.9 # lr_scale = 3.0 # momentum = 0.5 #摄动因子 init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden])+np.array([n_hidden, out_size]))) print 'loading data...' #读取数据Train,Validation,Test X_tr = np.load('bgedv2_X_tr_float64.npy') Y_tr = np.load('bgedv2_Y_tr_0-4760_float64.npy') Y_tr_target = np.array(Y_tr) X_va = np.load('bgedv2_X_va_float64.npy') Y_va = np.load('bgedv2_Y_va_0-4760_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('bgedv2_X_te_float64.npy') Y_te = np.load('bgedv2_Y_te_0-4760_float64.npy') Y_te_target = np.array(Y_te) X_1000G = np.load('1000G_X_float64.npy') Y_1000G = np.load('1000G_Y_0-4760_float64.npy') Y_1000G_target = np.array(Y_1000G) X_GTEx = np.load('GTEx_X_float64.npy') Y_GTEx = np.load('GTEx_Y_0-4760_float64.npy') Y_GTEx_target = np.array(Y_GTEx) #随机化 random.seed(0) #随机抽取5000样本进行训练 monitor_idx_tr = random.sample(range(88807), 5000) #将数据X,Y整合成DensenMatrix类型 data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) #取出X中对应5000样本进行训练 X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] #设置多层感知机的隐含层计算方式 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) #设置多层感知机的输出层计算方式 o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #设置好模型 model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) #设置dropout比例 dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, input_scales={'h1':1.0, 'y':np.float32(1.0/include_rate)}) #设置训练算法(batch大小,学习速率,学习规则,终止条件,dropout比例) algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #设置训练类(数据集,训练模型,训练算法) train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() y = model.fprop(x) #训练好的模型对X的预测值 f = theano.function([x], y) MAE_va_old = 10.0 MAE_va_best = 10.0 MAE_tr_old = 10.0 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 outlog = open(base_name + '.log', 'w') log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)'])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() #刷新缓冲区 for epoch in range(0, n_epoch): t_old = time.time() #开始时间 train.algorithm.train(train.dataset)#训练 #计算不同数据集预测值 Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64') Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64') #计算预测值与真实值的MAE MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean() MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean() #计算迭代误差 MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old #更新MAE MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr MAE_1000G_old = MAE_1000G MAE_GTEx_old = MAE_GTEx t_new = time.time() #终止时间 l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change, '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change, '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: #如果误差增大,减小学习速率 l_rate = l_rate*decay_factor if l_rate < l_rate_min: #学习速率最小为l_rate_min l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) #更改训练类的学习速率参数 #更新Validation误差值 if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) #更新1000G误差值 if MAE_1000G < MAE_1000G_best: MAE_1000G_best = MAE_1000G outmodel = open(base_name + '_best1000G_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat) np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) print 'MAE_1000G_best : %.6f' % (MAE_1000G_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n') outlog.close()
def main(): base_name = sys.argv[ 1] # 获取第一个参数 sys.argv[ ]记录(获取)命令行参数 sys(system) argv(argument variable)参数变量,该变量为list列表 n_epoch = int(sys.argv[2]) #获取第二个参数 n_hidden = int(sys.argv[3]) #获取第三个参数作为隐层神经元个数 include_rate = float(sys.argv[4]) in_size = 1001 #输入层神经元个数(标记基因个数) out_size = 1 #输出层神经元个数 b_size = 200 #偏差值 l_rate = 5e-4 #学习速率 l_rate_min = 1e-5 #学习速率最小值 decay_factor = 0.9 #衰减因数 lr_scale = 3.0 momentum = 0.5 init_vals = np.sqrt(6.0 / (np.array([in_size, n_hidden]) + np.array([n_hidden, out_size]))) #初始值,返回平方根 print 'loading data...' #显示载入数据 X_tr = np.load( 'geno_X_tr_float64.npy') # tr(traing)以numpy专用二进制类型保存训练数据集的数据 Y_tr = np.load('pheno_Y_tr_0-4760_float64.npy') Y_tr_pheno = np.array(Y_tr) X_va = np.load( 'geno_X_va_float64.npy') #验证集(模型选择,在学习到不同复杂度的模型中,选择对验证集有最小预测误差的模型) Y_va = np.load('pheno_Y_va_0-4760_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('geno_te_float64.npy') #测试集(对学习方法的评估) Y_te = np.load('pheno_Y_te_0-4760_float64.npy') Y_te_target = np.array(Y_te) random.seed(0) #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数 monitor_idx_tr = random.sample(range(88807), 5000) #监测训练 #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature. data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[ monitor_idx_tr, :] #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #Multilayer Perceptron;nvis(Number of “visible units” input units) layers(a list of layer objects,最后1层指定MLP的输出空间) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={ 'h1': 1.0, 'y': include_rate }, input_scales={ 'h1': 1.0, 'y': np.float32(1.0 / include_rate) }) #随机梯度下降法 algorithm = p2_alg_sgd.SGD( batch_size=b_size, learning_rate=l_rate, learning_rule=p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() #定义为一个二维数组 #fprop(state_below) does the forward prop transformation y = model.fprop(x) f = theano.function([x], y) #定义一个function函数,输入为x,输出为y MAE_va_old = 10.0 #平均绝对误差 MAE_va_best = 10.0 MAE_tr_old = 10.0 #训练误差 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 #base_name = sys.argv[1] # 获取第一个参数 sys.argv[ ]记录(获取)命令行参数 outlog = open(base_name + '.log', 'w') log_str = '\t'.join( map(str, [ 'epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)' ])) print log_str #输出运行日志 outlog.write(log_str + '\n') #Python的标准输出缓冲(这意味着它收集“写入”标准出来之前,将其写入到终端的数据)。调用sys.stdout.flush()强制其“缓冲 sys.stdout.flush() for epoch in range(0, n_epoch): t_old = time.time() train.algorithm.train(train.dataset) Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') #计算平均绝对误差 MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() #误差变换率 MAE_va_change = (MAE_va - MAE_va_old) / MAE_va_old MAE_te_change = (MAE_te - MAE_te_old) / MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old) / MAE_tr_old #将old误差值更新为当前误差值 MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr #返回当前的时间戳(1970纪元后经过的浮点秒数) t_new = time.time() l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join( map(str, [ epoch + 1, '%.6f' % MAE_va, '%.6f' % MAE_va_change, '%.6f' % MAE_te, '%.6f' % MAE_te_change, '%.6f' % MAE_tr, '%.6f' % MAE_tr_change, '%.5f' % l_rate, int(t_new - t_old) ])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: #训练误差变换率大于0时,学习速率乘上一个衰减因子 l_rate = l_rate * decay_factor if l_rate < l_rate_min: #学习速率小于最小速率时,更新为最小速率 l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.close()
def main(): base_name = sys.argv[1] n_epoch = int(sys.argv[2]) n_hidden = int(sys.argv[3]) include_rate = float(sys.argv[4]) in_size = 943 out_size = 4760 b_size = 200 l_rate = 3e-4 l_rate_min = 1e-5 decay_factor = 0.9 lr_scale = 3.0 momentum = 0.5 init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden, n_hidden, n_hidden])+np.array([n_hidden, n_hidden, n_hidden, out_size]))) print 'loading data...' X_tr = np.load('bgedv2_X_tr_float64.npy') Y_tr = np.load('bgedv2_Y_tr_4760-9520_float64.npy') Y_tr_target = np.array(Y_tr) X_va = np.load('bgedv2_X_va_float64.npy') Y_va = np.load('bgedv2_Y_va_4760-9520_float64.npy') Y_va_target = np.array(Y_va) X_te = np.load('bgedv2_X_te_float64.npy') Y_te = np.load('bgedv2_Y_te_4760-9520_float64.npy') Y_te_target = np.array(Y_te) X_1000G = np.load('1000G_X_float64.npy') Y_1000G = np.load('1000G_Y_4760-9520_float64.npy') Y_1000G_target = np.array(Y_1000G) X_GTEx = np.load('GTEx_X_float64.npy') Y_GTEx = np.load('GTEx_Y_4760-9520_float64.npy') Y_GTEx_target = np.array(Y_GTEx) random.seed(0) monitor_idx_tr = random.sample(range(88807), 5000) data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) h2_layer = p2_md_mlp.Tanh(layer_name='h2', dim=n_hidden, irange=init_vals[1], W_lr_scale=lr_scale, b_lr_scale=1.0) h3_layer = p2_md_mlp.Tanh(layer_name='h3', dim=n_hidden, irange=init_vals[2], W_lr_scale=lr_scale, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, h2_layer, h3_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'h2':include_rate, 'h3':include_rate, 'y':include_rate}, input_scales={'h1':1.0, 'h2':np.float32(1.0/include_rate), 'h3':np.float32(1.0/include_rate), 'y':np.float32(1.0/include_rate)}) algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm) train.setup() x = T.matrix() y = model.fprop(x) f = theano.function([x], y) MAE_va_old = 10.0 MAE_va_best = 10.0 MAE_tr_old = 10.0 MAE_te_old = 10.0 MAE_1000G_old = 10.0 MAE_1000G_best = 10.0 MAE_GTEx_old = 10.0 outlog = open(base_name + '.log', 'w') log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change', 'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)'])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() for epoch in range(0, n_epoch): t_old = time.time() train.algorithm.train(train.dataset) Y_va_hat = f(X_va.astype('float32')).astype('float64') Y_te_hat = f(X_te.astype('float32')).astype('float64') Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64') Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64') Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64') MAE_va = np.abs(Y_va_target - Y_va_hat).mean() MAE_te = np.abs(Y_te_target - Y_te_hat).mean() MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean() MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean() MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean() MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old MAE_va_old = MAE_va MAE_te_old = MAE_te MAE_tr_old = MAE_tr MAE_1000G_old = MAE_1000G MAE_GTEx_old = MAE_GTEx t_new = time.time() l_rate = train.algorithm.learning_rate.get_value() log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change, '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change, '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)])) print log_str outlog.write(log_str + '\n') sys.stdout.flush() if MAE_tr_change > 0: l_rate = l_rate*decay_factor if l_rate < l_rate_min: l_rate = l_rate_min train.algorithm.learning_rate.set_value(np.float32(l_rate)) if MAE_va < MAE_va_best: MAE_va_best = MAE_va outmodel = open(base_name + '_bestva_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat) np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat) if MAE_1000G < MAE_1000G_best: MAE_1000G_best = MAE_1000G outmodel = open(base_name + '_best1000G_model.pkl', 'wb') pkl.dump(model, outmodel) outmodel.close() np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat) np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat) print 'MAE_va_best : %.6f' % (MAE_va_best) print 'MAE_1000G_best : %.6f' % (MAE_1000G_best) outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n') outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n') outlog.close()
X_tr = np.load('geno_X_tr.npy') # tr(traing)以numpy专用二进制类型保存训练数据集的数据 Y_tr = np.load('pheno_Y_tr.npy') Y_tr_pheno = np.array(Y_tr) X_va = np.load('geno_X_va.npy') #验证集(模型选择,在学习到不同复杂度的模型中,选择对验证集有最小预测误差的模型) Y_va = np.load('pheno_Y_va.npy') Y_va_target = np.array(Y_va) X_te = np.load('geno_X_te.npy') #测试集(对学习方法的评估) Y_te = np.load('pheno_Y_te.npy') Y_te_target = np.array(Y_te) random.seed(0) #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数 monitor_idx_tr = random.sample(range(88807), 5000) #监测训练 #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature. data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32')) X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :] #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数 h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0) #Multilayer Perceptron;nvis(Number of “visible units” input units) layers(a list of layer objects,最后1层指定MLP的输出空间) model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1) dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, input_scales={'h1':1.0, 'y':np.float32(1.0/include_rate)}) #随机梯度下降法 algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, learning_rule = p2_alg_lr.Momentum(momentum), termination_criterion=p2_termcri.EpochCounter(max_epochs=1000), cost=dropout_cost) #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD
def generate(opc): """ Summary (Generates a dataset with the chosen transformation). Parameters ---------- opc: string Only two options, shifts or rotations. """ dim = 19 # outer square # A bigger image is used to avoid empty pixels in the # borders. reg = 13 # inner square total = 20000 # Number of training examples im1 = numpy.zeros((total, reg, reg, 1), dtype='float32') im2 = numpy.zeros((total, reg, reg, 1), dtype='float32') Y = numpy.zeros((total, 1), dtype='uint8') rng = make_np_rng(9001, [1, 2, 3], which_method="uniform") transformation = opc if transformation == 'shifts': # Shifts # only shifts between [-3, +3] pixels shifts = list(itertools.product(range(-3, 4), range(-3, 4))) t = 0 while t < total: x = rng.uniform(0, 1, (dim, dim)) x = numpy.ceil(x * 255) im_x = x[3:16, 3:16][:, :, None] ind = rng.randint(0, len(shifts)) Y[t] = ind txy = shifts[ind] tx, ty = txy im_y = x[(3 + tx):(16 + tx), (3 + ty):(16 + ty)][:, :, None] im1[t, :] = im_x im2[t, :] = im_y t += 1 else: assert transformation == 'rotations' # Rotations import Image # import cv2 angs = numpy.linspace(0, 359, 90) t = 0 while t < total: x = rng.uniform(0, 1, (dim, dim)) x = numpy.ceil(x * 255) im_x = x[3:16, 3:16][:, :, None] ind = rng.randint(0, len(angs)) Y[t] = ind ang = angs[ind] y = numpy.asarray(Image.fromarray(x).rotate(ang)) # scale = 1 # M1 = cv2.getRotationMatrix2D((dim/2, dim/2), ang, scale) # y = cv2.warpAffine(x, M1, (dim, dim)) im_y = y[3:16, 3:16][:, :, None] im1[t, :] = im_x im2[t, :] = im_y t += 1 view_converter = dense_design_matrix.DefaultViewConverter((reg, reg, 1)) design_X = view_converter.topo_view_to_design_mat(im1) design_Y = view_converter.topo_view_to_design_mat(im2) # Normalize data: pipeline = preprocessing.Pipeline() gcn = preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True) pipeline.items.append(gcn) XY = numpy.concatenate((design_X, design_Y), 0) XY_ImP = dense_design_matrix.DenseDesignMatrix(X=XY) XY_ImP.apply_preprocessor(preprocessor=pipeline, can_fit=True) X1 = XY_ImP.X[0:design_X.shape[0], :] X2 = XY_ImP.X[design_X.shape[0]:, :] # As a Conv2DSpace topo_X1 = view_converter.design_mat_to_topo_view(X1) topo_X2 = view_converter.design_mat_to_topo_view(X2) axes = ('b', 0, 1, 'c') data_specs = (CompositeSpace([ Conv2DSpace((reg, reg), num_channels=1, axes=axes), Conv2DSpace((reg, reg), num_channels=1, axes=axes), VectorSpace(1) ]), ('featuresX', 'featuresY', 'targets')) train = VectorSpacesDataset((topo_X1, topo_X2, Y), data_specs=data_specs) # As a VectorSpace # data_specs = (CompositeSpace( # [VectorSpace(reg * reg), # VectorSpace(reg * reg), # VectorSpace(1)]), # ('featuresX', 'featuresY', 'targets')) # train = VectorSpacesDataset(data=(X1, X2, Y), data_specs=data_specs) import os save_path = os.path.dirname(os.path.realpath(__file__)) serial.save(os.path.join(save_path, 'train_preprocessed.pkl'), train)