def main(): ''' Paper found parameters to be efficient: 6 word contexts --> 3 num_grams 2 hidden_units ''' X_train = np.load('storage/data_nn_disambiguator/X_train.npy') X_test = np.load('storage/data_nn_disambiguator/X_test.npy') y_train = np.load('storage/data_nn_disambiguator/y_train.npy') y_test = np.load('storage/data_nn_disambiguator/y_test.npy') trained_weights = nn.train_nn(X_train, y_train, [50, 10], batch_size=256, param_scale=0.1, num_epochs=20, step_size=0.001, L2_reg=1.0) # save the weights np.save('storage/trained_weights.npy', trained_weights) y_pred = nn.neural_net_predict(trained_weights, X_test) # don't forget to exp print("auc: {}".format(get_auc(y_test[:, 1], np.exp(y_pred))))
def exec_unit(params_dic=None, simulation_id=None): torch.manual_seed(214) torch.cuda.manual_seed(214) random.seed(214) np.random.seed(214) params = Configuration(params_dic, simulation_id) stats_train, stats_test = nn.train_nn(params) with open(params.folder_name + '/' + params.simulation_name + str(params.simulation_id), 'wb') as pickle_out: pickle.dump((stats_test, stats_train), pickle_out) logging.info(json.dumps(params.to_dict()))
def train_classify(num_epochs, net, num_hidden_nodes, directory=".", out_directory=".", restore=False, ckpt_directory=None): data_clips_labels = pickle.load(open(directory+"/pickles/data_clips_labels.pickle", 'rb')) train_clips_rot_all = pickle.load(open(directory+"/pickles/train_clips_rot_all.pickle", 'rb')) train_labels_rot_all = pickle.load(open(directory+"/pickles/train_labels_rot_all.pickle", 'rb')) val_clips_rot_all = pickle.load(open(directory+"/pickles/val_clips_rot_all.pickle", 'rb')) val_labels_rot_all = pickle.load(open(directory+"/pickles/val_labels_rot_all.pickle", 'rb')) batch_size = pickle.load(open(directory+"/pickles/batch_size.pickle", 'rb')) nn_best_val, nn_last_epoch = train_nn(train_clips_rot_all, train_labels_rot_all, val_clips_rot_all, val_labels_rot_all, net, num_hidden_nodes, num_epochs, batch_size, prefix=out_directory, restore=restore, ckpt_directory=ckpt_directory) nn_labels_best_val = [nn_best_val(t[0]) for t in data_clips_labels] pickle.dump(nn_labels_best_val, open(out_directory+"/pickles/nn_labels_best_val.pickle",'wb'), protocol=2) if not restore: nn_labels_last_epoch = [nn_last_epoch(t[0]) for t in data_clips_labels] pickle.dump(nn_labels_last_epoch, open(out_directory+"/pickles/nn_labels_last_epoch.pickle", 'wb'), protocol=2) return
#.. balance them. # dset.balance_data() import sys sys.exit(0) # train seeds versus seeds import nn for i in range(0, len(dset.all_hashtags) - 1, 2): logger.info("Getting seeds for subject pair: %s, %s", dset.all_hashtags[i], dset.all_hashtags[i + 1]) nn_data = dset.get_seed_dataset([i, i + 1]) logger.info("Transforming data.") nn_data = dset.transform_dataset(nn_data, i, 2) nn.train_nn(root + "results/seeds/", nn_data, i) dset.make_probs_file(dset.all_hashtags[i], i, 0) dset.make_probs_file(dset.all_hashtags[i + 1], i, 1) import sys sys.exit(0) # # Get the thresholds belonging to the first two seeds import al for i in range(4, len(dset.all_hashtags)): print dset.all_hashtags[i] al.find_threshold_subject(dset.all_hashtags[i], root) dset.make_probs_file("moslim", 0, 1) al.find_threshold_subject("moslim", root) import sys sys.exit(0)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,timegap = 0.5,): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ train_set, valid_set, test_set = load_data(theano_shared=False) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) print test_set_y.eval().shape # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size print 'n_train_batches : ',n_train_batches print 'n_valid_batches : ',n_valid_batches print 'n_test_batches : ',n_test_batches ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. # input, n_in, n_hidden, n_out, n_hiddenLayers classifier = myMLP(rng, input = x, n_in =2304 , n_hidden = n_hidden, n_out= 7, n_hiddenLayers= n_hiddenLayers,parameters =None) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) print ('MODEL TRAINED..') ########## # SAVE the MODEL ########## import os modelFolderName = 'mlp_models' cmd = 'mkdir %s'%modelFolderName os.system(cmd) save_model(classifier,n_hidden,n_hiddenLayers,modelFolderName + '/'+'mlp_classifier_nhidden_%s_hiddenlayers_%s_batchSize_%s_epochs_%s'%(n_hidden,n_hiddenLayers,batch_size,n_epochs)) print 'Model Saved. ' # modelFolderName = 'mlp_models' # modelName = 'mlp_classifier_nhidden_500_hiddenlayers_3_batchSize_20_epochs_2_json.save' # test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=2,batch_size=20, n_hidden=500, n_hiddenLayers=3,verbose=True, smaller_set=False) # predict_from_trained_model(modelFolderName +'/'+modelName)
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20],batch_size=200, verbose=False,filterwidth_layer0=2,filterheight_layer0=2,poolsize_layer0=2,filterwidth_layer1=2,filterheight_layer1=2,poolsize_layer1=1,filterwidth_layer2=2,filterheight_layer2=2,poolsize_layer2=1,neurons_hidden = 300,smaller_set= False): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape= (batch_size, 1, 48, 48), filter_shape= (nkerns[0],1,filterwidth_layer0,filterheight_layer0), poolsize= (poolsize_layer0,poolsize_layer0) ) print '-------------------------------------------------------------------------------------------- \n' layer0_outputwidth,layer0_outputheight = ( (48-filterwidth_layer0+1)/poolsize_layer0,(48-filterheight_layer0+1)/poolsize_layer0 ) print 'Layer0 build. Shape of feature map :',layer0_outputwidth, layer0_outputheight, 'Number of feature maps : ',nkerns[0] print '-------------------------------------------------------------------------------------------- \n' layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size,nkerns[0],layer0_outputwidth,layer0_outputheight), filter_shape= (nkerns[1],nkerns[0],filterwidth_layer1,filterheight_layer1), poolsize=(poolsize_layer1,poolsize_layer1) ) layer1_outputwidth,layer1_outputheight = (layer0_outputwidth-filterwidth_layer1+1)/poolsize_layer1,(layer0_outputheight-filterwidth_layer1+1)/poolsize_layer1 print 'Layer1 build. Shape of feature map :',layer1_outputwidth,layer1_outputheight, 'Number of feature maps : ',nkerns[1] print '-------------------------------------------------------------------------------------------- \n' poolsize_width_layer0_to_layer1 = layer0_outputwidth/layer1_outputwidth poolsize_height_layer0_to_layer1 = layer0_outputheight/layer1_outputheight print 'poolsize layer 0 o/p to layer 1 o/p width :',layer0_outputwidth/layer1_outputwidth print 'poolsize layer 0 o/p to layer 1 o/p height :',layer0_outputheight/layer1_outputheight layer0_output_ds = downsample.max_pool_2d( input=layer0.output, ds=(poolsize_width_layer0_to_layer1,poolsize_height_layer0_to_layer1), # TDOD: change ds ignore_border=True ) # concatenate layer print 'max pool layer created. between output of layer0 and output of layer1. output of this max pool layer : ',layer0_outputwidth/poolsize_width_layer0_to_layer1,layer0_outputheight/poolsize_height_layer0_to_layer1 print '-------------------------------------------------------------------------------------------- \n' layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer2_input, image_shape= (batch_size,nkerns[0]+nkerns[1],layer1_outputwidth,layer1_outputheight), filter_shape= (nkerns[2],nkerns[0]+nkerns[1],filterwidth_layer2,filterheight_layer2), poolsize=(poolsize_layer2,poolsize_layer2) ) print 'Input to Layer2 (not equal to output of Layer1) : ', nkerns[0]+nkerns[1] layer2_outputwidth,layer2_outputheight = (layer1_outputwidth-filterwidth_layer2+1)/poolsize_layer2,(layer1_outputheight-filterwidth_layer2+1)/poolsize_layer2 print 'Layer2 build. Shape of feature map :',layer2_outputwidth,layer2_outputheight, 'Number of feature maps : ',nkerns[2] print '-------------------------------------------------------------------------------------------- \n' # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * layer2_outputwidth * layer2_outputwidth, n_out= neurons_hidden, activation=T.tanh ) print 'MLP Layer created. Input neurons : ',nkerns[2] * layer2_outputwidth * layer2_outputwidth, ' Output neurons :',neurons_hidden print '-------------------------------------------------------------------------------------------- \n' # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= neurons_hidden, n_out=7) print 'Logistic Layer created. Input neurons : ',neurons_hidden, ' output neurons :',10 print '-------------------------------------------------------------------------------------------- \n' # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model,n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ############### # SAVE MODEL # ############### _ = '_' import os modelFolderName = 'convo_models' cmd = 'mkdir %s'%modelFolderName os.system(cmd) model_name = str(filterwidth_layer0)+_+str(filterheight_layer0)+_+str(poolsize_layer0)+_+str(nkerns[0])+_+str(filterwidth_layer1)+_+ str(filterheight_layer1) +_+str(poolsize_layer1) +_+str(nkerns[1])+_+str(filterwidth_layer2) +_+str(filterheight_layer2)+_+ str(poolsize_layer2)+_+str(nkerns[2])+_+str(neurons_hidden) save_model(modelFolderName+'/'+'model_%s'%model_name,params,learning_rate, n_epochs, nkerns,batch_size, verbose,filterwidth_layer0,filterheight_layer0,poolsize_layer0,filterwidth_layer1,filterheight_layer1,poolsize_layer1,filterwidth_layer2,filterheight_layer2,poolsize_layer2,neurons_hidden ,smaller_set) print 'Model saved.'
def main(): if len(sys.argv) == 1: print "Usage: python {} clip_sz clip_step min_batch_size num_epochs prefix [load]".format(sys.argv[0]) return clip_sz = int(sys.argv[1]) clip_step = int(sys.argv[2]) min_batch_size = int(sys.argv[3]) num_epochs = int(sys.argv[4]) prefix = '' if (len(sys.argv) <= 5) else sys.argv[5] load = True if (len(sys.argv) > 6 and sys.argv[6] == 'load') else False num_hidden_nodes = 10 radius = 2 net = onelayer_7x7 data_raw = load_data() data = preprocess(load_data(), radius) train_data = data[0:8] val_data = data[8:10] test_data = data[10:12] # clips: list of 3d arrays, labels: list of 2-element vectors train_clips_labels = [clips_and_labels_stk(t, clip_sz, clip_step) for t in train_data] val_clips_labels = [clips_and_labels_stk(t, clip_sz, clip_step) for t in val_data] test_clips_labels = [clips_and_labels_stk(t, clip_sz, clip_step) for t in test_data] # rotate clips to get even more training data train_clips_labels_rot = [rotate_augment(tc, tl) for (tc, tl) in train_clips_labels] val_clips_labels_rot = [rotate_augment(vc, vl) for (vc, vl) in val_clips_labels] # concatenate all training and validation examples train_clips_rot_all = [c for t in train_clips_labels_rot for c in t[0]] train_labels_rot_all = [c for t in train_clips_labels_rot for c in t[1]] val_clips_rot_all = [c for t in val_clips_labels_rot for c in t[0]] val_labels_rot_all = [c for t in val_clips_labels_rot for c in t[1]] # throw out some negative examples to help equalize pos/neg ratio train_clips_rot_all, train_labels_rot_all = equalize_posneg(train_clips_rot_all, train_labels_rot_all) val_clips_rot_all, val_labels_rot_all = equalize_posneg(val_clips_rot_all, val_labels_rot_all) # find a batch size batch_size = min_batch_size while len(train_clips_labels[0][0]) % batch_size != 0: batch_size += 1 # ensure batch_size divides evenly into training clips train_clips_rot_all = train_clips_rot_all[0:-(len(train_clips_rot_all)%batch_size)] train_labels_rot_all = train_labels_rot_all[0:-(len(train_labels_rot_all)%batch_size)] val_clips_rot_all = val_clips_rot_all[0:-(len(val_clips_rot_all)%batch_size)] val_labels_rot_all = val_labels_rot_all[0:-(len(val_labels_rot_all)%batch_size)] print "Number of training frames: {}".format(len(train_clips_rot_all)) if not load: # train neural net nn_best_val, nn_last_epoch = train_nn(train_clips_rot_all, train_labels_rot_all, val_clips_rot_all, val_labels_rot_all, net, num_hidden_nodes, num_epochs, batch_size, prefix=prefix) # classify data train_nn_labels_best_val = [nn_best_val(t[0]) for t in train_clips_labels] val_nn_labels_best_val = [nn_best_val(t[0]) for t in val_clips_labels] test_nn_labels_best_val = [nn_best_val(t[0]) for t in test_clips_labels] train_nn_labels_last_epoch = [nn_last_epoch(t[0]) for t in train_clips_labels] val_nn_labels_last_epoch = [nn_last_epoch(t[0]) for t in val_clips_labels] test_nn_labels_last_epoch = [nn_last_epoch(t[0]) for t in test_clips_labels] # save results pickle.dump(train_nn_labels_best_val, open(prefix+"nn_train_labels_best_val.pickle",'wb')) pickle.dump(val_nn_labels_best_val, open(prefix+"nn_val_labels_best_val.pickle", 'wb')) pickle.dump(test_nn_labels_best_val, open(prefix+"nn_test_labels_best_val.pickle", 'wb')) pickle.dump(train_nn_labels_last_epoch, open(prefix+"nn_train_labels_last_epoch.pickle",'wb')) pickle.dump(val_nn_labels_last_epoch, open(prefix+"nn_val_labels_last_epoch.pickle", 'wb')) pickle.dump(test_nn_labels_last_epoch, open(prefix+"nn_test_labels_last_epoch.pickle", 'wb')) else: train_nn_labels_best_val = pickle.load(open(prefix+"nn_train_labels_best_val.pickle",'rb')) val_nn_labels_best_val = pickle.load(open(prefix+"nn_val_labels_best_val.pickle", 'rb')) test_nn_labels_best_val = pickle.load(open(prefix+"nn_test_labels_best_val.pickle", 'rb')) train_nn_labels_last_epoch = pickle.load(open(prefix+"nn_train_labels_last_epoch.pickle",'rb')) val_nn_labels_last_epoch = pickle.load(open(prefix+"nn_val_labels_last_epoch.pickle", 'rb')) test_nn_labels_last_epoch = pickle.load(open(prefix+"nn_test_labels_last_epoch.pickle", 'rb')) # convert predictions back to 1/0 arrays actual_train_labels = [t[1] for t in data_raw[0:8]] actual_test_labels = [t[1] for t in data_raw[10:12]] actual_val_labels = [t[1] for t in data_raw[8:10]] #threshold, eps, min_samples, final_radius = train_threshold_hyperparameters(val_nn_labels_best_val[0], actual_val_labels[0], clip_sz, clip_step) threshold, eps, min_samples, final_radius = (0.9, 0.0147, 10, 7.8)#(0.0254, 72, 7.8) #threshold = None print "threshold: {}\neps: {}\nmin_samples: {}\nradius: {}\n".format(threshold, eps, min_samples, final_radius) train_nn_pred_stk_best_val = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in train_nn_labels_best_val] val_nn_pred_stk_best_val = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in val_nn_labels_best_val] test_nn_pred_stk_best_val = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in test_nn_labels_best_val] train_nn_pred_stk_last_epoch = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in train_nn_labels_last_epoch] val_nn_pred_stk_last_epoch = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in val_nn_labels_last_epoch] test_nn_pred_stk_last_epoch = [labels_to_stk(x, (512/DOWNSCALE_FACTOR, 512/DOWNSCALE_FACTOR), clip_sz, clip_step, threshold) for x in test_nn_labels_last_epoch] # plot things thresh = "_thresh" if threshold is not None else "" plt.figure() plt.imshow(train_data[0][0].squeeze(), cmap="gray") plt.savefig(prefix+"nn_train_raw.png") plt.figure() plt.imshow(train_data[0][1], cmap="gray") plt.savefig(prefix+"nn_train_actual.png") plt.figure() plt.imshow(train_nn_pred_stk_best_val[0], cmap="gray") plt.savefig(prefix+"nn_train_pred_best_val"+str(thresh)+".png") plt.figure() plt.imshow(train_nn_pred_stk_last_epoch[0], cmap="gray") plt.savefig(prefix+"nn_train_pred_last_epoch"+str(thresh)+".png") plt.figure() plt.imshow(test_data[0][0].squeeze(), cmap="gray") plt.savefig(prefix+"nn_test0_raw.png") plt.figure() plt.imshow(test_data[0][1], cmap="gray") plt.savefig(prefix+"nn_test0_actual.png") plt.figure() plt.imshow(test_nn_pred_stk_best_val[0], cmap="gray") plt.savefig(prefix+"nn_test0_pred_best_val"+str(thresh)+".png") plt.figure() plt.imshow(test_nn_pred_stk_last_epoch[0], cmap="gray") plt.savefig(prefix+"nn_test0_pred_last_epoch"+str(thresh)+".png") plt.figure() plt.imshow(test_data[1][0].squeeze(), cmap="gray") plt.savefig(prefix+"test1_raw.png") plt.figure() plt.imshow(test_data[1][1], cmap="gray") plt.savefig(prefix+"nn_test1_actual.png") plt.figure() plt.imshow(test_nn_pred_stk_best_val[1], cmap="gray") plt.savefig(prefix+"nn_test1_pred_best_val"+str(thresh)+".png") plt.figure() plt.imshow(test_nn_pred_stk_last_epoch[1], cmap="gray") plt.savefig(prefix+"nn_test1_pred_last_epoch"+str(thresh)+".png") # convert stacked predictions to final ROI format train_nn_pred_final = [nn_stk_pred_to_final_roi_format(x, eps, min_samples, final_radius) for x in train_nn_pred_stk_best_val] val_nn_pred_final = [nn_stk_pred_to_final_roi_format(x, eps, min_samples, final_radius) for x in val_nn_pred_stk_best_val] test_nn_pred_final = [nn_stk_pred_to_final_roi_format(x, eps, min_samples, final_radius) for x in test_nn_pred_stk_best_val] # get final score #print actual_train_labels[0].shape #print train_nn_pred_final[0].shape #print actual_test_labels[0].shape #print test_nn_pred_final[0].shape train_score = Score(None, None, actual_train_labels, train_nn_pred_final) test_score = Score(None, None, actual_test_labels, test_nn_pred_final) test0_score = Score(None, None, actual_test_labels[0:1], test_nn_pred_final[0:1]) test1_score = Score(None, None, actual_test_labels[1:2], test_nn_pred_final[1:2]) print str(train_score) print print str(test_score) print print str(test0_score) print print str(test1_score) # plot things train_score.plot() test_score.plot() test0_score.plot() test1_score.plot() plt.show() return plt.figure() plt.imshow(train_nn_pred_final[0].max(axis=0), cmap="gray") plt.savefig(prefix+"nn_train_final.png") plt.figure() plt.imshow(test_nn_pred_final[0].max(axis=0), cmap="gray") plt.savefig(prefix+"nn_test_final.png") # show all the plots! plt.show()
def test_mlp_parity(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=64, n_hidden=500, n_hiddenLayers=1, verbose=False): reader = csv.reader(open("joint_knee.csv", "rb"), delimiter=',') x = list(reader) #print x result = numpy.array(x) #print result.shape def score_to_numeric(x, a): if (x == 'Hospice - Home'): return 11 if (x == 'Psychiatric Hospital or Unit of Hosp'): return 10 if (x == 'Hospice - Medical Facility'): return 9 if (x == 'Expired'): return 8 if (x == 'Facility w/ Custodial/Supportive Care'): return 7 if (x.lower() == 'left against medical advice'): return 6 if (x.lower() == 'short-term hospital'): return 5 if (x.lower() == 'multi-racial' or x.lower() == 'home or self care'): return 4 if (x.lower() == 'other race' or x.lower() == 'emergency' or x.lower() == 'skilled nursing home' or x.lower() == 'not available'): return 3 if (x.lower() == 'm' or x.lower() == 'black/african american' or x.lower() == 'urgent' or x.lower() == 'inpatient rehabilitation facility'): return 2 if (x.lower() == 'f' or x.lower() == 'white' or x.lower() == 'elective' or x.lower() == 'home w/ home health services'): return 1 if (a == 1): return int(x[:2]) if (a == 2): return float(x[1:]) else: return float(x) rownum = 0 for row in result: # Save header row. if rownum == 0: rownum += 1 header = row for i in range(0, len(header)): if header[i].lower() == 'gender': gender = i if header[i].lower() == 'race': race = i if header[i].lower() == 'type of admission': admi = i if header[i].lower() == 'patient disposition': disp = i if header[i].lower() == 'age group': age = i if header[i].lower() == 'total charges': price = i else: row[gender] = score_to_numeric(row[gender], 0) row[race] = score_to_numeric(row[race], 0) row[admi] = score_to_numeric(row[admi], 0) row[disp] = score_to_numeric(row[disp], 0) row[age] = score_to_numeric(row[age], 1) row[price] = score_to_numeric(row[price], 2) for i in range(0, len(row)): row[i] = float(row[i]) #y = row[i].astype(numpy.float) #row[i] = y #print type(row[i]) #print type(result) #result = numpy.array(result).astype('float') #print result[1:(len(result)),1:] res = result[1:(len(result)), 1:].astype(numpy.float) for i in range(len(res)): for j in range(len(res[0])): if (j == 9): res[i, j] = int(round(res[i, j] / 10000)) else: res[i, j] = int(round(res[i, j])) myset = set(res[:, 9]) nout = len(myset) y = res[:, 9] #print y x = res[:, 0:9] iris = load_iris() clf = ExtraTreesClassifier() clf = clf.fit(x, y) model = SelectFromModel(clf, prefit=True) X_new = model.transform(x) data = np.c_[X_new, y] totallen = len(data) numpy.random.shuffle(data) training, validation, testing = data[:totallen / 2, :], data[totallen / 2:( 3 * totallen / 4), :], data[(3 * totallen / 4):, :] l = len(data[0]) - 1 train_set = [training[:, 0:l], training[:, l]] valid_set = [validation[:, 0:l], validation[:, l]] test_set = [testing[:, 0:l], testing[:, l]] #print train_set #print valid_set #print test_set # Convert raw dataset to Theano shared variables. train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = myMLP(rng=rng, input=x, n_in=l, n_hidden=n_hidden, n_out=len(myset), n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) y_p_train = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: train_set_x}) y_predict = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: test_set_x}) y_pred1 = y_p_train() y_pred2 = y_predict() return y_pred1, y_pred2
def test_data_augmentation(learning_rate=0.01,L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False,steps = 1): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0],'top',steps) train_set_x_d = translate_image(train_set[0],'bottom',steps) train_set_x_r = translate_image(train_set[0],'right',steps) train_set_x_l = translate_image(train_set[0],'left',steps) # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def mlp_with_gaussian_filter(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,std = 0.1): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data2(ds_rate=5,std=std) else: datasets = load_data2(std=std) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size print 'n_train_batches : ',n_train_batches print 'n_valid_batches : ',n_valid_batches print 'n_test_batches : ',n_test_batches ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. # input, n_in, n_hidden, n_out, n_hiddenLayers classifier = myMLP(rng, input = x, n_in =3072 , n_hidden = n_hidden, n_out= 10, n_hiddenLayers= n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],batch_size=200, verbose=False,filterwidth_layer0=3,filterheight_layer0=3,poolsize_layer0=2,filterwidth_layer1 = 6,filterheight_layer1=6,poolsize_layer1 = 2,neurons_layer2 = 300,neurons_layer3= 300,smaller_set= False): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 48, 48), filter_shape=(nkerns[0],1,filterwidth_layer0,filterheight_layer0), poolsize=(poolsize_layer0,poolsize_layer0) ) # At the output of convo layer in the layer0 the output size reduces to 32 - filterwidth + 1,32 - filterheight + 1 # At output of the (32-filterwidth+1/poolsize,(32-filterwidth+1)/poolsize) # TODO: Construct the second convolutional pooling layer layer0_outputwidth,layer0_outputheight = ( (48-filterwidth_layer0+1)/poolsize_layer0,(48-filterheight_layer0+1)/poolsize_layer0 ) print '-------------------------------------------------------------------------------------------- \n' print 'Layer0 build. Shape of feature map :',layer0_outputwidth,layer0_outputheight, 'Number of feature maps : ',nkerns[0] layer1 = LeNetConvPoolLayer( rng, input= layer0.output, image_shape= (batch_size,nkerns[0],layer0_outputwidth,layer0_outputheight), filter_shape= (nkerns[1],nkerns[0],filterwidth_layer1,filterheight_layer1), poolsize=(poolsize_layer1,poolsize_layer1) ) print '-------------------------------------------------------------------------------------------- \n' layer1_outputwidth,layer1_outputheight = (layer0_outputwidth-filterwidth_layer1+1)/poolsize_layer1,(layer0_outputheight-filterwidth_layer1+1)/poolsize_layer1 print 'Layer1 build. Shape of feature map :',layer1_outputwidth,layer1_outputheight, 'Number of feature maps : ',nkerns[1] # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input= layer2_input, n_in= nkerns[1]*layer1_outputwidth*layer1_outputheight, n_out= neurons_layer2, activation= T.tanh ) print '-------------------------------------------------------------------------------------------- \n' print 'Layer2 build - MLP layer. Input neurons : ',nkerns[1]*layer1_outputwidth*layer1_outputheight, ' output neurons : ',neurons_layer2 layer3 = HiddenLayer( rng, input= layer2.output, n_in= neurons_layer2, n_out= neurons_layer3, activation= T.tanh ) print '-------------------------------------------------------------------------------------------- \n' print 'Layer3 build - MLP layer. Input neurons : ',neurons_layer2, ' output neurons : ',neurons_layer3 # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression( input=layer3.output, n_in= neurons_layer3, n_out=7) print '-------------------------------------------------------------------------------------------- \n' print 'Logistic Regression layer build. Input neurons: ',neurons_layer3,' Output neurons :',10 # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params =layer4.params +layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp_parity(n_bit): #f=open('./problem_b/shallow_mlp_8bit.txt','w') #f=open('./problem_b/shallow_mlp_12bit.txt','w') f = open('./problem_b/deep_mlp_8bit.txt', 'w') #f=open('./problem_b/deep_mlp_12bit.txt','w') batch_size = 24 #n_hidden=24 n_hidden = (24, 24, 24, 24) learning_rate = 0.08 L1_reg = 0.0 L2_reg = 0.0 n_epochs = 300 n_hiddenLayers = 4 # generate datasets train_set = gen_parity_pair(n_bit, 2000) valid_set = gen_parity_pair(n_bit, 500) test_set = gen_parity_pair(n_bit, 100) # Convert raw dataset to Theano shared variables. train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels #training_enabled = T.iscalar('training_enabled') ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') #print('... building the model', file=f) rng = np.random.RandomState(23455) layers_input = x.reshape((batch_size, n_bit)) layers = myMLP(rng, input=layers_input, n_in=n_bit, n_hidden=n_hidden, n_out=2, n_hiddenLayers=n_hiddenLayers) test_model = theano.function( [index], layers.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layers.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](0) }) cost = layers.negative_log_likelihood( y) + layers.L1 * L1_reg + layers.L2_sqr * L2_reg params = layers.params grads = [T.grad(cost, param) for param in params] updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], #training_enabled: numpy.cast['int32'](1) }) ############### # TRAIN MODEL # ############### print('... training') #print('... training',file=f) train_nn(train_model=train_model, validate_model=validate_model, test_model=test_model, n_train_batches=n_train_batches, n_valid_batches=n_valid_batches, n_test_batches=n_test_batches, n_epochs=n_epochs, fil=f) f.close()
# n_classes = 2 # result = {} # result["trainset"] = np.array(trainset[range(70)].values.tolist()) # result["trainlabels"] = np.array(trainset["labels"].apply(lambda x: features.onehot(x, n_classes - 1)).values.tolist()) # result["trainids"] = np.array(trainset.id.values.tolist()) # result["testdata"] = np.array(testset[range(70)].values.tolist()) # result["testlabels"] = np.array(testset["labels"].apply(lambda x: features.onehot(x, n_classes - 1)).values.tolist()) # result["testids"] = np.array(testset.id.values.tolist()) # result["nclasses"] = 1 # result["allvectors"] = np.array(dset.all_vectors_store["data"][range(70)].values.tolist()) # result["allvectorsids"] = np.array(dset.all_vectors_store["data"]["id"].values.tolist()) import nn path = root + "results/test/" nn.train_nn(path, nn_data, 0) # vergelijk jihad2.csv met een nn die 1v1 doet import pandas as pd data = pd.read_hdf(root + "results/test/0/probs.h5") tweets = pd.read_csv(root + "datasets/data_sample.csv" ) merged = pd.merge(data, tweets, on="id").sort_values(1) ntokens = merged rm_list = ["<stopword>", "<mention>", "<url>", "rt"] ntokens["count"] = merged.filtered_text.apply(lambda x: len([a for a in x.split() if a not in rm_list])) for i in range(1, 20): filter = ntokens[ntokens["count"] == i]
def main(passed_args=None): parser = argparse.ArgumentParser( description="train a neural network on tweets against prices") parser.add_argument( "--word2vec", "-w", dest="word2vec", action="store_true", default=False, help="toggle this option if you are obtaining dataset using word2vec", ) parser.add_argument( "--tune", "-t", dest="tuning", action="store_true", default=False, help="toogle this option if you are tuning hyperparameters", ) parser.add_argument( "--rnn", "-r", dest="train_rnn", action="store_true", default=False, help="toogle this option to train rnn", ) parser.add_argument( "--predict", "-d", dest="predict", action="store_true", default=False, help="toogle this option if you are making predictions", ) parser.add_argument( "--markowitz", "-m", dest="markowitz", action="store_true", default=False, help= "toogle this option if you are doing Markowitz portfolio optimisation", ) parser.add_argument( "--glove", "-g", dest="glove", action="store_true", default=False, help="toogle this option if you are obtaining dataset using glove", ) parser.add_argument( "--metrics", "-f", dest="metrics", action="store_true", default=False, help="toogle this option if you are evaluating the metrics", ) args = parser.parse_args(passed_args) if args.word2vec: # prepare Word2Vec model if not os.path.exists(PATH_TO_WORD2VEC): w2v.train_word2vec() # prepare all data required prices = d.load_prices() w2v_model = w2v.load_word2vec() for stock in stock_universe: d.get_return_by_stock(stock, prices) d.load_tweets_by_stock(stock) w2v.get_padded_embeddings(stock, w2v_model) sys.exit() if args.glove: # prepare all data required prices = d.load_prices() w2v_model = w2v.load_glove_model( path_to_glove="~/Downloads/GloVe-1.2/glove.twitter.27B.50d.txt", path_to_output="./temp/glove_pretrained_w2vformat.txt", ) for stock in stock_universe: d.get_return_by_stock(stock, prices) d.load_tweets_by_stock(stock) w2v.get_padded_embeddings( stock, w2v_model, path_to_output="./temp/padded_embeddings/glove_pretrained", ) sys.exit() if args.tuning: hyperparam_list = get_hyperparam_list(NN_HYPERPARAM_DICT) best_hyperparam_list = [] for stock in stock_universe: print(stock) x = pd.read_pickle( "temp/padded_embeddings/glove_pretrained/pickle/" + stock + ".pickle") y = pd.read_pickle("temp/returns/pickle/" + stock + ".pickle") torch_dataset = nn.get_tensor_dataset(x, y) for hyperparam in hyperparam_list: train_set, _ = nn.train_test_split(torch_dataset, hyperparam["TEST_SIZE"]) train_set, validation_set = nn.train_test_split( train_set, hyperparam["VALIDATION_SIZE"]) tuning_list = [] _, _, validation_losses = nn.train_nn(train_set, validation_set, hyperparam) tuning_list.append((hyperparam, validation_losses[-1])) tuning_list.sort(key=operator.itemgetter(1)) best_hyperparam = tuning_list[0][0] best_hyperparam_list.append((stock, best_hyperparam)) with open("./temp/best-hyperparam-glove-pretrained.txt", "wb") as f: pickle.dump(best_hyperparam_list, f) print(best_hyperparam_list) sys.exit() if args.predict: if os.path.exists("./temp/best-hyperparam-glove.txt"): with open("./temp/best-hyperparam-glove.txt", "rb") as f: best_hyperparam_list = pickle.load(f) best_hyperparam_dict = dict(best_hyperparam_list) for stock in stock_universe: hyperparam = best_hyperparam_dict[stock] x = pd.read_pickle("temp/padded_embeddings/glove/pickle/" + stock + ".pickle") y = pd.read_pickle("temp/returns/pickle/" + stock + ".pickle") torch_dataset = nn.get_tensor_dataset(x, y) _, test_set = nn.train_test_split(torch_dataset, hyperparam["TEST_SIZE"]) results = nn.predict_nn(test_set, "temp/nn/glove/" + stock + ".pth") results_df = pd.DataFrame(results) results_df.columns = ["y", "pred", "loss"] if not os.path.exists("./output/glove"): os.makedirs("./output/glove") results_df.to_csv("./output/glove/" + stock + ".csv") sys.exit() if args.train_rnn: eval_only = True hyperparam_list = get_hyperparam_list(RNN_HYPERPARAM_DICT) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for hyperparam in hyperparam_list: for stock in stock_universe: print(stock) returns = pd.read_pickle("temp/returns/pickle/" + stock + ".pickle") returns = nn.normalise( torch.tensor(np.stack(returns.values, axis=0), device=device)) vectorised_seq, vocab = rnn.get_vectorised_seq_by_stock(stock) input_size = len(vocab) encoder, feedforward, results = rnn.train_rnn( vectorised_seq, returns, input_size, hyperparam, eval_only=eval_only, path_to_encoder="temp/rnn/encoder/" + stock + ".pth", path_to_feedforward="temp/rnn/feedforward/" + stock + ".pth", ) if eval_only == False: if not os.path.exists("temp/rnn"): os.makedirs("temp/rnn/encoder") os.makedirs("temp/rnn/feedforward") torch.save(encoder.state_dict(), "temp/rnn/encoder/" + stock + ".pth") torch.save( feedforward.state_dict(), "temp/rnn/feedforward/" + stock + ".pth", ) results_df = pd.DataFrame(results) results_df.columns = ["returns", "pred", "loss"] if not os.path.exists("./output/rnn"): os.makedirs("./output/rnn") results_df.to_csv("./output/rnn/" + stock + ".csv") sys.exit() if args.markowitz: model_dict = { "dtm": "purple", "tfidf": "pink", "word2vec": "black", "glove": "blue", "glove_pretrained": "green", "rnn": "orange", "actual": "red", } mean_var_dict = d.get_etf_mean_var() p.plot_frontier_with_points(model_dict, mean_var_dict) # p.plot_frontier(model_dict) sys.exit() if args.metrics: models = [ "rnn", "glove", "glove_pretrained", "word2vec", "dtm", "tfidf" ] for model in models: me.get_metrics_summary(model) sys.exit() if os.path.exists("./temp/best-hyperparam-glove.txt"): with open("./temp/best-hyperparam-glove.txt", "rb") as f: best_hyperparam_list = pickle.load(f) best_hyperparam_dict = dict(best_hyperparam_list) for stock in stock_universe: print(stock) hyperparam = best_hyperparam_dict[stock] x = pd.read_pickle("temp/padded_embeddings/glove/pickle/" + stock + ".pickle") y = pd.read_pickle("temp/returns/pickle/" + stock + ".pickle") torch_dataset = nn.get_tensor_dataset(x, y) train_set, test_set = nn.train_test_split(torch_dataset, hyperparam["TEST_SIZE"]) model, _, _ = nn.train_nn(train_set, test_set, hyperparam) if not os.path.exists("temp/nn/glove"): os.makedirs("temp/nn/glove") torch.save(model.state_dict(), "temp/nn/glove/" + stock + ".pth") sys.exit()
def test_mlp_with_new_functionality(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,batch_size=128, n_hidden=500, n_hiddenLayers=3,verbose=False, smaller_set=True,example_index = 0,adversarial_parameter = 0.01,distribution = 'constant'): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) example_x = test_set[0][example_index:example_index+1] example_y = test_set[1][example_index:example_index+1] # example_x_reshape = numpy.reshape(example_x,(len(example_x),1)) # example_y_reshape = numpy.reshape(example_y,(1,1)) shared_example_x,shared_example_y = shared_dataset([example_x,example_y]) # shared_example_x = shared_example_x.reshape(shared_example_x.reshape[0],-1) # shared_example_x = theano.shared(type =theano.tensor.matrix,value = numpy.asarray(example_x,dtype=theano.config.floatX),borrow = True) # shared_example_y = theano.shared(type = theano.tensor.vector, value = numpy.asarray(example_y,dtype=theano.config.floatX),borrow = True) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size # print ' shapes of the shared_examples', shared_example_y,shared_example_x print 'n_train_batches : ',n_train_batches print 'n_valid_batches : ',n_valid_batches print 'n_test_batches : ',n_test_batches ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. # input, n_in, n_hidden, n_out, n_hiddenLayers classifier = myMLP(rng, input = x, n_in =3072 , n_hidden = n_hidden, n_out= 10, n_hiddenLayers= n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) test_example = theano.function( inputs= [index], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: test_set_x[index * 1:(index + 1) * 1], y: test_set_y[index * 1:(index + 1) * 1] } ) test_example2 = theano.function( inputs= [], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: shared_example_x, y: shared_example_y } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ################## # Performing adversarial example testing# ################## print '-------------------------------------' print 'example x :', example_x image = getImage(test_set[0][example_index]) plt.figure(1) plt.imshow(image) print 'example y :', example_y print '-------------------------------------' classification, probabilities = test_example(example_index) if int(classification) == 0: print 'Correct classification performed :', True else: print 'Correct classification performed :', False print 'probabilities : ',probabilities print 'Number predicted :', numpy.argmax(probabilities) print 'with probability :',numpy.max(probabilities)*100 print '-------------------------------------' gadversarial = T.vector() gadversarial = [T.grad(cost,x)] grad_cost_wrt_x = theano.function( inputs= [index], outputs=gadversarial, givens={ x: test_set_x[index * 1:(index + 1) * 1], y: test_set_y[index * 1:(index + 1) * 1] } ) print 'Creating adversarial example and trying to get results for that ... \n \n \n ' print '-------------------------------------' gradient_cost_wrt_x = grad_cost_wrt_x(example_index) gradient_sign = numpy.sign(gradient_cost_wrt_x) gradient_sign = numpy.reshape(gradient_sign,(1,3072)) adversarial_example_x = example_x + adversarial_parameter*gradient_sign input_image_x,input_image_y = shared_dataset([adversarial_example_x,example_y]) test_input_example = theano.function( inputs= [], outputs=[classifier.errors(y),classifier.p_y_given_x], givens={ x: input_image_x, y: input_image_y } ) adversarial_classification, input_image_output_probilities = test_input_example() if int(adversarial_classification) == 0: print 'Correct adversarial classification performed :', True else: print 'Correct adversarial classification performed :', False image2 = getImage(adversarial_example_x) plt.figure(2) plt.imshow(image2) print 'probabilities : ',input_image_output_probilities print 'Number predicted :', numpy.argmax(input_image_output_probilities) print 'with probability :',numpy.max(input_image_output_probilities)*100 print '-------------------------------------'
except ZeroDivisionError: print "empty dataset" print "Jihad data size post balance %i" % len(d_jihad.index) assert (len(d_jihad.index) == len(d_voetbal.index)) all = d_voetbal.append(d_jihad).sample(frac=1) all = all.reset_index() print all trainset = all.sample(frac=0.8, random_state=200) testset = all.drop(trainset.index) result = {} n_classes = 2 result["trainset"] = np.array(trainset[range(70)].values.tolist()) result["trainlabels"] = np.array(trainset["labels"].apply( lambda x: features.onehot(x, n_classes - 1)).values.tolist()) result["trainids"] = np.array(trainset.id.values.tolist()) result["testdata"] = np.array(testset[range(70)].values.tolist()) result["testlabels"] = np.array(testset["labels"].apply( lambda x: features.onehot(x, n_classes - 1)).values.tolist()) result["testids"] = np.array(testset.id.values.tolist()) result["nclasses"] = 2 tmp_data = dset.all_vectors_store["data"] #.sample(n=43000) result["allvectors"] = np.array(tmp_data[range(70)].values.tolist()) result["allvectorsids"] = np.array(tmp_data["id"].values.tolist()) print result import nn path = root + "results/test/ntokens/hashtag/" os.mkdir(path + str(i)) nn.train_nn(path + str(i) + "/", result, 0) # make validation set, not containing ids from data_sample.
except ZeroDivisionError: print "empty dataset" print "Jihad data size post balance %i" % len(d_jihad.index) assert(len(d_jihad.index) == len(voetbal.index)) d_voetbal = voetbal all = d_voetbal.append(d_jihad).sample(frac=1) all = all.reset_index() print all trainset = all.sample(frac=0.8, random_state=200) testset = all.drop(trainset.index) result = {} n_classes = 2 result["trainset"] = np.array(trainset[range(70)].values.tolist()) result["trainlabels"] = np.array(trainset["labels"].apply(lambda x: features.onehot(x, n_classes - 1)).values.tolist()) result["trainids"] = np.array(trainset.id.values.tolist()) result["testdata"] = np.array(testset[range(70)].values.tolist()) result["testlabels"] = np.array(testset["labels"].apply(lambda x: features.onehot(x, n_classes - 1)).values.tolist()) result["testids"] = np.array(testset.id.values.tolist()) result["nclasses"] = 2 tmp_data = dset.all_vectors_store["data"]#.sample(n=43000) result["allvectors"] = np.array(tmp_data[range(70)].values.tolist()) result["allvectorsids"] = np.array(tmp_data["id"].values.tolist()) print result import nn path = root + "results/test/" nn.train_nn(path, result, 0)