def validate(X, y, net): # Test Set. x_test = X[split_at:, :] y_test = y.__getslice__(split_at, y.shape[0]) y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) print "testing MSE:", mse np.savetxt(output_predictions_file, p, fmt='%.6f')
def fit(self, X, y): _, self.in_size = X.shape _, self.out_size = y.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") #mse = trainer.train() #trainer.trainUntilConvergence(verbose=True, maxEpochs=4) for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def fit(self, X, y): y_train = np.array([[yn] for yn in y]) _, self.in_size = X.shape _, self.out_size = y_train.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_train) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) if self.verbose: print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def train_fn(trainfile, hiddennodes, output_model_file): hidden_size = hiddennodes print 'Loading data..' x_train, y_train = load_data(trainfile) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, ds) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing model...' pickle.dump(net, open(output_model_file, 'wb'))
def predict(isGroup): path_test_file = '/home/rodolfo/Projetos/NeuralNetwork/data/test_groups_%s_file.csv' % isGroup path_neural_network = 'model_groups_%s.pkl' % isGroup test_file = path_test_file model_file = path_neural_network output_predictions_file = 'predictions_file.txt' # load model net = pickle.load(open(model_file, 'rb')) # load data test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) # predict p = net.activateOnDataset(ds) np.savetxt(output_predictions_file, p, fmt='%.6f')
def predict(X, net): # Test Set. x_test = X[:, :] # you'll need labels. In case you don't have them... y_test_dummy = np.zeros((X.shape[0], 1)) input_size = x_test.shape[1] target_size = y_test_dummy.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) print p.shape np.savetxt("1_" + output_predictions_file, p, fmt='%.6f') s = pd.Series(p[:, 0]) s.index += 1 s.to_csv('neural_prediction_3.csv', header=['Prediction'], index=True, index_label='ID')
def test(self, arr): # load model net, std_scale = pickle.load(open(self.model_file, 'rb')) print 'Finish loading model' # Load test data x_test, y_test = load_data(arr) x_test_scaled = std_scale.transform( x_test) # Normalize to standard normal y_test_dummy = np.zeros(y_test.shape) input_size = x_test_scaled.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test_scaled) ds.setField('target', y_test_dummy) # predict print 'Activating ds' p = net.activateOnDataset(ds) print 'debug' # ptest = preprocessing.StandardScaler().fit_transform(p) # p_scaled = std_scale.inverse_transform(ptest) # Convert back to original scale dna = self.convert_to_dna(p) return dna
def train(): print "-------------------------------------------------" print "loading data..." print "file to be loaded: ", train_file # regresa un ndarray de numpy train = np.loadtxt( train_file, delimiter = ',' ) print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype print "Spliting inputs and output for training..." inputs_train = train[:,0:-1] output_train = train[:,-1] output_train = output_train.reshape( -1, 1 ) print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype print "output in a ", type(output_train), " of size: ", output_train.shape, " and type:", output_train.dtype print "-------------------------------------------------" print "Setting up supervised dataset por pyBrain training..." input_size = inputs_train.shape[1] target_size = output_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', output_train ) print "-------------------------------------------------" print "Setting up supervised dataset por pyBrain training..." hidden_size = 50 epochs = 600 crime_network = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer ) trainer = BackpropTrainer( crime_network,dataset ) print "-------------------------------------------------" rmse_vector = [] print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) rmse_vector.append(rmse) print "-------------------------------------------------" pickle.dump( crime_network, open( output_model_file, 'wb' )) print "Training done!" print "-------------------------------------------------" return rmse_vector
def train(train_select, validate_select, aggregate_ttrss): train = pd_to_numpy(train_select, aggregate_ttrss) validation = pd_to_numpy(validate_select, aggregate_ttrss) output_model_file = 'model.pkl' hidden_size = 20 epochs = 10 train = np.vstack((train, validation)) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) y_train = y_train.reshape(-1, 1) print(x_train, y_train) input_size = x_train.shape[1] target_size = y_train.shape[1] # print (input_size, target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train # fnn = FeedForwardNetwork() net = buildNetwork( input_size, hidden_size, target_size, bias=True, ) # net = NNregression(ds) trainer = BackpropTrainer(net, ds, verbose=True, weightdecay=0.01) print("training for {} epochs...".format(epochs)) print(input_size, target_size, x_train, y_train) # plt.axis([0, epochs, 0, 0.03]) # plt.xlabel('epoch') # plt.ylabel('error') # plt.ion() for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) # plt.scatter(i, rmse, s=5) # plt.pause(0.00001) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open(output_model_file, 'wb')) return net
def nn(train_source, test_source, validation=False, v_size=0.5): hidden_size = 100 epochs = 600 # load data train = read_csv(train_source) tmp = open(train_source) feature_count = None for line in tmp: feature_count = len(line.split(",")) break trainX = np.asarray(train[range(1, feature_count)]) trainY = np.asarray(train[[0]]).ravel() # print "All Data size: " + str(len(trainX)) testX = None testY = None if validation: # --- CROSS VALIDATION --- trainX, testX, trainY, testY = cross_validation.train_test_split( trainX, trainY, test_size=v_size, random_state=0) else: # --- TEST DATA --- test = read_csv(test_source) testX = np.asarray(test[range(1, feature_count)]) testY = np.asarray(test[[0]]).ravel() # print testX # print testY input_size = len(trainX[0]) target_size = 1 print input_size print target_size # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', trainX ) ds.setField( 'target', [[item] for item in trainY] ) # init and train net = buildNetwork( input_size, hidden_size, target_size, bias = True ) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range( epochs ): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse)
def validate(self): """ The main method of this class. It runs the crossvalidation process and returns the validation result (e.g. performance). """ dataset = self._dataset trainer = self._trainer n_folds = self._n_folds l = dataset.getLength() inp = dataset.getField("input") tar = dataset.getField("target") indim = dataset.indim outdim = dataset.outdim assert l > n_folds perms = array_split(permutation(l), n_folds) perf = 0. for i in range(n_folds): # determine train indices train_perms_idxs = range(n_folds) train_perms_idxs.pop(i) temp_list = [] for train_perms_idx in train_perms_idxs: temp_list.append(perms[ train_perms_idx ]) train_idxs = concatenate(temp_list) # determine test indices test_idxs = perms[i] # train #print "training iteration", i train_ds = SupervisedDataSet(indim, outdim) train_ds.setField("input" , inp[train_idxs]) train_ds.setField("target" , tar[train_idxs]) trainer = copy.deepcopy(self._trainer) trainer.setData(train_ds) if not self._max_epochs: trainer.train else: trainer.trainEpochs(self._max_epochs) # test #print "testing iteration", i test_ds = SupervisedDataSet(indim, outdim) test_ds.setField("input" , inp[test_idxs]) test_ds.setField("target" , tar[test_idxs]) # perf += self.getPerformance( trainer.module, dataset ) perf += self._calculatePerformance(trainer.module, dataset) perf /= n_folds return perf
def train( train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=20, ): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField("input", x_train) ds.setField("target", y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField("input", x_test) ds_test.setField("target", y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=False, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, math.sqrt(mse)) p = net.activateOnDataset(ds_test) mse = math.sqrt(MSE(y_test, p)) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", "wb")) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, "wb")) min_mse = mse pickle.dump(net, open(model_file, "wb")) return net
def Neural_Network(xtrain,ytrain,xtest,ytest): #Hidden nodes hidden_net = 2 #Epoch is a single pass through the entire training set, followed by testing of the verification set. epoch = 2 ytrain = ytrain.reshape(-1,1) input_cnt = xtrain.shape[1] target_cnt = ytrain.shape[1] dataset = SupervisedDataSet(input_cnt, target_cnt) dataset.setField( 'input', xtrain ) dataset.setField( 'target', ytrain ) network = buildNetwork( input_cnt, hidden_net, target_cnt, bias = True ) #Trainer that trains the parameters of a module according to a supervised dataset (potentially sequential) by backpropagating the errors (through time). trainer = BackpropTrainer( network,dataset ) print("---------------Neural Network---------------") print("Train Data") for e in range(epoch): mse = trainer.train() rmse = math.sqrt(mse) print("MSE, epoch {}: {}".format(e + 1, mse)) print("RMSE, epoch {}: {}".format(e + 1, rmse)) ytest=ytest.reshape(-1,1) input_size = xtest.shape[1] target_size = ytest.shape[1] dataset = SupervisedDataSet( input_size, target_size ) dataset.setField( 'input', xtest) dataset.setField( 'target', ytest) model = network.activateOnDataset(dataset) mse = mean_squared_error(ytest, model ) rmse =math.sqrt(mse) print("Test Data:") print("MSE: ", mse) print("RMSE: ", rmse)
def predict_proba(self, X): row_size, in_size = X.shape y_test_dumy = np.zeros([row_size, self.out_size]) assert (self.net.indim == in_size) ds = SDS(in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_test_dumy) p = self.net.activateOnDataset(ds) return p
def CV_NN(X_train, Y, N_CV=1, test_sze=0.3, n_middle = 14): hidden_size = n_middle sss = cross_validation.StratifiedShuffleSplit( Y, N_CV, test_size=test_sze, random_state=0) overall_accuracy = 0 overall_error = 0 confusion_matrix = np.zeros((7, 7), dtype=np.int) for train_block, test_block in sss: x_train=X_train.as_matrix()[train_block] input_size = x_train.shape[1] y_vals = Y[train_block] y_train=np.zeros((len(y_vals),7)) for i,y in enumerate(y_vals): y_train[i][y-1]=1 target_size = y_train.shape[1] # print x_train.shape, y_train.shape ds = SDS( input_size, target_size) ds.setField( 'input', x_train) ds.setField( 'target', y_train) net = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer ) trainer = BackpropTrainer( net, ds, learningrate=0.1, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 4 ) trainer = BackpropTrainer( net, ds, learningrate=0.05, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 8 ) trainer = BackpropTrainer( net, ds, learningrate=0.01, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 512, continueEpochs = 16 ) trainer = BackpropTrainer( net, ds, learningrate=0.005, verbose=True) trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 1024, continueEpochs = 64 ) y_vals = Y[test_block] y_test=np.zeros((len(y_vals),7)) for i,y in enumerate(y_vals): y_test[i][y-1]=1 x_test = X_train.as_matrix()[test_block] ds = SDS( input_size, target_size) ds.setField( 'input', x_test ) ds.setField( 'target', y_test ) Y_predict = net.activateOnDataset( ds ) y_predict=Y_predict.argmax(axis=1) y_test=y_vals-1 accuracy = (y_test == y_predict).mean() for x, y in zip(y_test, y_predict): confusion_matrix[x - 1, y - 1] += 1 overall_accuracy += accuracy overall_error += accuracy * accuracy confusion_matrix *= 1.0 / N_CV print confusion_matrix overall_accuracy *= 1.0 / N_CV overall_error = np.sqrt( (overall_error / N_CV - overall_accuracy ** 2) / N_CV) print overall_accuracy, overall_error
def train_cross_validate(train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=50): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] input_size_test = x_test.shape[1] target_size_test = y_test.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField('input', x_test) ds_test.setField('target', y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=True, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, mse) p = net.activateOnDataset(ds_test) mse = MSE(y_test, p) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", 'wb')) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, 'wb')) min_mse = mse pickle.dump(net, open(model_file, 'wb')) return net
def validate(train_select, validate_select): train = pd_to_numpy(train_select) validation = pd_to_numpy(validate_select) output_model_file = 'model_val.pkl' hidden_size = 100 epochs = train.shape[0] continue_epochs = 100 validation_proportion = 0.15 # load data, join train and validation files # train = np.loadtxt( train_file, delimiter = ',' ) # validation = np.loadtxt( validation_file, delimiter = ',' ) train = np.vstack((train, validation)) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) train_mse, validation_mse = trainer.trainUntilConvergence( verbose=True, validationProportion=validation_proportion, maxEpochs=epochs, continueEpochs=continue_epochs) pickle.dump(net, open(output_model_file, 'wb'))
def train_fn(trainfile, hiddennodes): output_model_file = '../Serialized/model_{0}_nodes.pkl'.format( str(hiddennodes)) hidden_size = hiddennodes print 'Loading data..' x_train, y_train = load_data(trainfile) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, ds) # print "training for {} epochs...".format( epochs ) # # for i in range(epochs): # mse = trainer.train() # rmse = sqrt( mse ) # print "training RMSE, epoch {}: {}".format( i + 1, rmse ) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing model...' pickle.dump(net, open(output_model_file, 'wb'))
def prepareDataset(): train_file = "../traindata/train_scaled.csv" train = np.loadtxt(train_file, delimiter=',') x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] print input_size print target_size # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) return (ds, input_size)
def predict(aggregate_quotes, aggregate_ttrss): # test_file = 'data/test.csv' model_file = 'model.pkl' output_predictions_file = 'predictions.txt' # load model net = pickle.load(open(model_file, 'rb')) # load data test = pd_to_numpy(aggregate_quotes, aggregate_ttrss) x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # # you'll need labels. In case you don't have them... # y_test_dummy = np.zeros( y_test.shape ) # y_test_dummy = np.zeros(y_test.shape) print(x_test, y_test) input_size = x_test.shape[1] target_size = y_test.shape[1] print(net.indim, net.outdim, input_size, target_size) assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) rmse = sqrt(mse) print("testing RMSE:", rmse, p) np.savetxt(output_predictions_file, p, fmt='%.6f') return p
def train(self, arr): ''' Train NN for given data :param arr: [wt_arr, mt_arr], in ATCG or atcg :return: void, but serialize model to file ''' x_train, y_train = load_data(arr) std_scale = preprocessing.StandardScaler().fit(x_train) x_train_scaled = std_scale.transform( x_train) # Normalize to standard normal # y_train_scaled = std_scale.transform(y_train) # Try not scaling y input_size = x_train_scaled.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train_scaled) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, self.hiddennodes, target_size, bias=True, hiddenclass=TanhLayer, outclass=TanhLayer) trainer = BackpropTrainer(net, ds) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing bundle...' bundle = [net, std_scale] pickle.dump(bundle, open(self.model_file, 'wb'))
def neuralNetworkRegression(X_test): """ :param X: data consisting of features (excluding class variable) :param Y: column vector consisting of class variable :return: models neural network regression with fine-tuning of epochs """ print "NEURAL NETWORK REGRESSION" print "Executing..." print print "Loading saved model..." net = pickle.load(open("Models/neural.sav", 'rb')) # utils.neuralNetworkRegression() """ predict new value """ y_test = np.zeros((X_test.shape[0], 1)) input_size = X_test.shape[1] target_size = y_test.shape[1] ds = SDS(input_size, target_size) ds.setField('input', X_test) ds.setField('target', y_test) prediction = net.activateOnDataset(ds) print prediction return prediction
def test_fn(testfile, hiddennodes, model_file): # load model net = pickle.load( open( model_file, 'rb' )) print 'Finish loading model' # Load test data x_test, y_test = load_data(testfile) y_test_dummy = np.zeros( y_test.shape ) input_size = x_test.shape[1] target_size = y_test.shape[1] assert( net.indim == input_size ) assert( net.outdim == target_size ) # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', x_test ) ds.setField( 'target', y_test_dummy ) # predict print 'Activating ds' p = net.activateOnDataset( ds ) def threshold(x): if x>0.5: print 'x>0.5' return 0 if x<0.5 else 1 p_converted = [] for each in p: converted = map(threshold, each) p_converted.append(converted) p_converted = np.array(p_converted) acc = accuracy_score(y_test, p_converted) print 'Accuracy score=%s' %acc
def predict(X, net): # Test Set. x_test = X[:, :] # you'll need labels. In case you don't have them... y_test_dummy = np.zeros((X.shape[0], 1)) input_size = x_test.shape[1] target_size = y_test_dummy.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) print p.shape np.savetxt("1_" + output_predictions_file, p, fmt='%.6f') s = pd.Series(p[:, 0]) s.index += 1 s.to_csv('neural_prediction_3.csv', header=['Prediction'], index=True, index_label='ID')
def FitNeuralNetworkDeptAnimate(dept = 1, num = 1000): train_file = input_file_path + train_file_name[0] + str(dept) + train_file_name[1] test_file = input_file_path + test_file_name[0] + str(dept) + test_file_name[1] train = np.loadtxt( train_file, delimiter = ' ' ) test = np.loadtxt( test_file, delimiter = ' ' ) print len(train) x_train = train[0:num, 0 : -1] y_train = train[0:num, -1] y_max = max(y_train) y_min = min(y_train) y_train = (y_train - y_min) / (y_max-y_min) y_train = y_train.reshape(-1,1) input_size = x_train.shape[1] target_size = y_train.shape[1] x_test = test[0:num/4, 0 : -1] y_test = test[0:num/4, -1] y_test = y_test.reshape(-1,1) ds_test = SDS( input_size, target_size ) ds_test.setField( 'input', x_test ) ds_test.setField( 'target', y_test ) ds = SDS( input_size, target_size ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) hidden_size = input_size*hidden_size_ratio n = RecurrentNetwork() n.addInputModule(LinearLayer(input_size, name='in')) n.addModule(BiasUnit('bias')) for i in range(0, num_hidden_layer+1): hidden_name = 'hidden'+str(i) n.addModule(SigmoidLayer(hidden_size, name=hidden_name)) n.addOutputModule(LinearLayer(target_size, name='out')) n.addConnection(FullConnection(n['in'], n['hidden0'], name='c1')) next_hidden = 'hidden0' for i in range(0,num_hidden_layer ): current_hidden = 'hidden'+str(i) next_hidden = 'hidden'+str(i+1) n.addConnection(FullConnection(n[current_hidden], n[next_hidden], name='c'+str(i+2))) n.addConnection(FullConnection(n[next_hidden], n['out'], name='c'+str(num_hidden_layer+2))) n.addConnection(FullConnection(n['bias'], n['hidden0'], name='c'+str(num_hidden_layer+7))) n.sortModules() print n trainer = BackpropTrainer(n,ds ,weightdecay=weightdecay, learningrate=learningrate, lrdecay=1.0, momentum = momentum) plt.ion() fig = plt.figure() ax = fig.add_subplot(111) plt.annotate("Dept1", (10,-15000)) plt.annotate("Dept2", (180,-30000)) plt.annotate("Dept3", (300,-15000)) plt.annotate("Dept4", (450,-30000)) plt.annotate("Dept5", (600,-15000)) plt.annotate("Dept6", (700,-30000)) plt.annotate("Dept7", (900,-15000)) line1, = ax.plot([],[],'-b',label='train') line2, = ax.plot([],[],'-r',label='test') ax.legend() dummy = raw_input("Plot the graph?") for i in range(epochs): error = trainer.train() print "Epoch: %d, Error: %7.4f" % (i, error) p_train = n.activateOnDataset( ds ) p_test = n.activateOnDataset( ds_test ) plot_result = np.vstack((p_train*(y_max-y_min) + y_min, p_test*(y_max-y_min) + y_min )) p_test_print = p_test.reshape(-1,len(p_test)) p_test_print = p_test_print*(y_max-y_min) + y_min line1.set_ydata(y_train*(y_max-y_min) + y_min) line1.set_xdata(range(len(y_train))) line2.set_ydata(plot_result) line2.set_xdata(range(len(plot_result))) ax.relim() ax.autoscale_view() plt.draw()
#Loading data test = np.loadtxt("data_out/test3.csv") test = test.astype(int) net = pickle.load(open("data_out/model3.pk1", "rb")) #Variables x_test = test[:, 1:-3] y_test = test[:, -3:] y_test_pred = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_pred) p = net.activateOnDataset(ds) mse = MSE(y_test, p) rmse = sqrt(mse) print "testing RMSE:", rmse np.savetxt("pred", p, fmt='%.6f')
X = np.transpose(np.vstack((z, rich))) Y = mass return X, Y X_train, Y_train = extract_xy(train_data) X_test, Y_test = extract_xy(test_data) Y_train = Y_train.reshape( -1, 1 ) input_size = X_train.shape[1] target_size = Y_train.shape[1] # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', X_train ) ds.setField( 'target', Y_train ) # init and train net = buildNetwork( input_size, hidden_size, target_size, bias = True ) trainer = BackpropTrainer( net,ds ) print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse )
def train_ann_multihidden(data_dicts, input_fields, layers, hidden_size, epochs): print "-------------------------------------------------" print "loading data..." # regresa un ndarray de numpy train = dicts_to_np_array(data_dicts, input_fields) print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype print "Spliting inputs and output for training..." inputs_train = train[:,2:] outputs_train = train[:,:2] outputs_train = outputs_train.reshape( -1, 2 ) print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype print "output in a ", type(outputs_train), " of size: ", outputs_train.shape, " and type:", outputs_train.dtype print "-------------------------------------------------" print "primeros vectores de inputs: ", inputs_train[0:2,:] print "primeros vectores de outputs: ", outputs_train[0:2,:] print "Setting up supervised dataset por pyBrain training..." input_size = inputs_train.shape[1] target_size = outputs_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', outputs_train ) print "-------------------------------------------------" print "Setting up network for supervised learning in pyBrain..." appraisal_network = FeedForwardNetwork() inLayer = LinearLayer(input_size) hiddenLayer1 = SigmoidLayer(hidden_size) hiddenLayer2 = SigmoidLayer(hidden_size//2) outLayer = LinearLayer(target_size) appraisal_network.addInputModule(inLayer) appraisal_network.addModule(hiddenLayer1) appraisal_network.addModule(hiddenLayer2) appraisal_network.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2) hidden2_to_out = FullConnection(hiddenLayer2, outLayer) appraisal_network.addConnection(in_to_hidden1) appraisal_network.addConnection(hidden1_to_hidden2) appraisal_network.addConnection(hidden2_to_out) appraisal_network.sortModules() trainer = BackpropTrainer( appraisal_network,dataset ) print "-------------------------------------------------" start_time = time.time() rmse_vector = [] rmse_min = sys.float_info.max #print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) rmse_vector.append(rmse) if rmse < rmse_min: rmse_min = rmse #print "-------------------------------------------------" elapsed_time = time.time() - start_time # pickle.dump( crime_ann, open( output_model_file, 'wb' )) #print "Training done!" #print "-------------------------------------------------" # return rmse_vector return {"time_elapsed":elapsed_time, "epochs:":epochs, "rmse_vector":rmse_vector, "rmse_min":rmse_min, "hidden_layers":1, "hidden_neurons":hidden_size }, appraisal_network
print "Loading in the data" train = np.loadtxt( train_path, delimiter = ',', skiprows=1 ) train_target = np.loadtxt( train_target_path, delimiter= ",", skiprows=1) test = np.loadtxt( test_path, delimiter = ',', skiprows=1) test_target = np.loadtxt( test_target_path, delimiter= ",", skiprows=1 ) train_target = train_target.reshape(-1, 1) input_size = train.shape[1] target_size = train_target.shape[1] # prepare dataset print "Preparing the dataset" print "" ds = SDS( input_size, target_size ) ds.setField( 'input', train ) ds.setField( 'target', train_target / np.max(train_target) ) # init and train print "Initalizing the network and training" net = buildNetwork( input_size, hidden_size, target_size, bias = True ) trainer = BackpropTrainer( net,ds ) start = time() for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) end = time() print "Training took: " + str((end - start)) + "seconds" print ""
train = np.loadtxt( train_file, delimiter = ',' ) validation = np.loadtxt( validation_file, delimiter = ',' ) train = np.vstack(( train, validation )) x_train = train[:,0:-1] y_train = train[:,-1] y_train = y_train.reshape( -1, 1 ) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) # init and train net = buildNetwork( input_size, hidden_size, target_size, bias= True ) trainer = BackpropTrainer( net,ds ) train_mse, validation_mse = trainer.trainUntilConvergence( verbose = True, validationProportion = validation_proportion, maxEpochs = epochs, continueEpochs = continue_epochs ) pickle.dump( net, open( output_model_file, 'wb' ))
def train_ann(data_dicts, layers, hidden_size, epochs): print "-------------------------------------------------" print "loading data..." # regresa un ndarray de numpy train = dicts_to_np_array(data_dicts) print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype print "Spliting inputs and output for training..." inputs_train = train[:,2:] outputs_train = train[:,:2] outputs_train = outputs_train.reshape( -1, 2 ) print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype print "output in a ", type(outputs_train), " of size: ", outputs_train.shape, " and type:", outputs_train.dtype print "-------------------------------------------------" print "primeros vectores de inputs: ", inputs_train[0:2,:] print "primeros vectores de outputs: ", outputs_train[0:2,:] print "Setting up supervised dataset por pyBrain training..." input_size = inputs_train.shape[1] target_size = outputs_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', outputs_train ) print "-------------------------------------------------" print "Setting up network for supervised learning in pyBrain..." appraisal_network = FeedForwardNetwork() inLayer = LinearLayer(input_size) hiddenLayer1 = SigmoidLayer(hidden_size) outLayer = LinearLayer(target_size) appraisal_network.addInputModule(inLayer) appraisal_network.addModule(hiddenLayer1) appraisal_network.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_out = FullConnection(hiddenLayer1, outLayer) appraisal_network.addConnection(in_to_hidden1) appraisal_network.addConnection(hidden1_to_out) appraisal_network.sortModules() trainer = BackpropTrainer( appraisal_network,dataset ) print "-------------------------------------------------" rmse_vector = [] print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) if i%10 == 0: print "training RMSE, epoch {}: {}".format( i + 1, rmse ) rmse_vector.append(rmse) print "-------------------------------------------------" # pickle.dump( crime_ann, open( output_model_file, 'wb' )) print "Training done!" print "-------------------------------------------------" # return rmse_vector return appraisal_network
# Carrega os arquivos com dataser train = np.loadtxt(train_file, delimiter=' ') if not add_i: train = train[:, 1:] validation = np.loadtxt(validation_file, delimiter=' ') validation = validation[:, 1:] x_train = train y_train = validation input_size = x_train.shape[1] target_size = validation.shape[1] ds = SDS(input_size, target_size) ds.setField('input', train) ds.setField('target', validation) # executa pra cada conjunto de combinacoes de parametros for hidden_layer, epoch, learning_rate in product(hidden_size, epochs, learning_rates): output_model_file = 'model_{}-{}_learning-rate-{}_hidden-{}_epochs-{}.pkl'.format( train_file, learning_rate, hidden_layer, epoch, "with_i" if add_i else "without-i") output_data = 'model_result_{}-{}_learning_rate-{}_hidden-{}_epochs.txt'.format( train_file, learning_rate, hidden_layer, epoch, "with_i" if add_i else "without-i") net = buildNetwork(input_size, hidden_layer, target_size, bias=True) trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
loaded_data.drop(['DATE', 'ASS_ID', 'YEAR_DAY_AND_YEAR', 'DAY_DS', 'MONTH'], axis=1) print(preprocessing.data.columns) train = np.asarray(loaded_data) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] hidden_size = 100 epochs = 600 ds = SDS(input_size, target_size) ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) submission = sp.submission_preprocessing() submission.full_preprocess() data_to_predict = np.asarray(submission.data)
train = np.loadtxt(train_file, delimiter = ' ') if not add_i: train = train[:, 1:] validation = np.loadtxt(validation_file, delimiter = ' ' ) validation = validation[:, 1:] x_train = train y_train = validation input_size = x_train.shape[1] target_size = validation.shape[1] ds = SDS(input_size, target_size) ds.setField('input',train) ds.setField('target', validation) # executa pra cada conjunto de combinacoes de parametros for hidden_layer, epoch, learning_rate in product(hidden_size, epochs, learning_rates): output_model_file = 'model_{}-{}_learning-rate-{}_hidden-{}_epochs-{}.pkl'.format(train_file, learning_rate, hidden_layer, epoch, "with_i" if add_i else "without-i") output_data = 'model_result_{}-{}_learning_rate-{}_hidden-{}_epochs.txt'.format(train_file, learning_rate, hidden_layer, epoch, "with_i" if add_i else "without-i") net = buildNetwork(input_size, hidden_layer, target_size, bias = True) trainer = BackpropTrainer(net, ds, learningrate = learning_rate) print "Training for {} epochs with learning_rate={}, hidden_layer={} ...".format(epoch, learning_rate, hidden_layer) mse = 0
X = pd.read_csv('Train/Train_Combine.csv', usecols=['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM']) Y = pd.read_csv('Train/Train_Combine.csv', usecols=['PM 2.5']) X = X.values Y = Y.values hidden_size = 100 epochs = 600 input_size = X.shape[1] target_size = Y.shape[1] ds = SDS(input_size, target_size) ds.setField('input', X) ds.setField('target', Y) net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse)
if m_min == []: m_min = m.min(0) if m_max == []: m_max = m.max(0) m = 2 * (m - m_min) / (m_max - m_min) - 1 return m, m_min, m_max '''training dataset with random phase screens (2018.01.17)''' mx = np.abs(trnslos_all).max() ntrnslos_all = trnslos_all / mx n_frame = 20 norm_inp = ntrnslos_all[:, :n_frame * 72] trnds = SupervisedDataSet(72 * n_frame, 1) #norm_inp, trnslos_min, trnslos_max = normalise(trnslos) trnds.setField('input', norm_inp) trn_tar = np.empty([6000, 1]) trn_tar[:, 0] = np.arange(5, 11).repeat(1000) / 15 trnds.setField('target', trn_tar) '''learning process''' net = buildNetwork(72 * n_frame, 1000, 1, hiddenclass=TanhLayer) lr = 0.001 momentum = 0 lrdecay = 1 wdecay = 0 t = BackpropTrainer(net, trnds, learningrate=lr, lrdecay=lrdecay, momentum=momentum, verbose=True,
model_file = 'model.pkl' output_predictions_file = 'predictions.txt' X2 = pd.read_csv('Test/Test_Combine.csv', usecols=[ 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM']) Y2 = pd.read_csv('Test/Test_Combine.csv', usecols=['PM 2.5']) X2 = X2.values Y2 = Y2.values net = pickle.load(open(model_file, 'rb')) y_test_dummy = np.zeros(Y2.shape) input_size = X2.shape[1] target_size = X2.shape[1] ds = SDS(input_size, target_size) ds.setField('input', X2) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) mse = MSE(Y2, p) rmse = sqrt(mse) print "testing RMSE:", rmse print "testing MSE: ", mse main(Y2, p) np.savetxt(output_predictions_file, p, fmt='%.6f')
true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 # load model net = pickle.load( open(var.output_model_file, 'rb' )) #load data test = np.loadtxt( var.test_file, delimiter = ',' ) input_data = test[:,0:-1] target_data = test[:,-1] target_data = target_data.reshape( -1, 1 ) #print input_data,target_data # prepare dataset ds = SDS( var.no_of_clusters, var.output ) ds.setField( 'input', input_data ) ds.setField( 'target', target_data ) #activate network predict_list = net.activateOnDataset(ds) for predict,ground_truth in zip(predict_list,target_data): if predict <= 0.0: if ground_truth <= 0 : true_negative += 1 else: false_negative += 1 print "Pedicted: NOT Car" else : if ground_truth <= 0 : false_positive += 1 else: true_positive += 1 print "Predicted: Car" #print true_positive,true_negative,false_positive,false_negative precision = true_positive / (true_positive + false_positive) recall = true_positive / (true_positive + false_negative)
def train_ann(data_dicts, input_fields, hidden_size, epochs): #print "-------------------------------------------------" #print "loading data..." # regresa un ndarray de numpy train = dicts_to_np_array(data_dicts, input_fields) #print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype #print "Spliting inputs and output for training..." inputs_train = train[:,2:] outputs_train = train[:,:2] outputs_train = outputs_train.reshape( -1, 2 ) #print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype #print "output in a ", type(outputs_train), " of size: ", outputs_train.shape, " and type:", outputs_train.dtype # Setting up supervised dataset por pyBrain training... input_size = inputs_train.shape[1] target_size = outputs_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', outputs_train ) #Setting up network for supervised learning in pyBrain... appraisal_network = FeedForwardNetwork() inLayer = LinearLayer(input_size) hiddenLayer1 = SigmoidLayer(hidden_size) outLayer = LinearLayer(target_size) appraisal_network.addInputModule(inLayer) appraisal_network.addModule(hiddenLayer1) appraisal_network.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_out = FullConnection(hiddenLayer1, outLayer) appraisal_network.addConnection(in_to_hidden1) appraisal_network.addConnection(hidden1_to_out) appraisal_network.sortModules() trainer = BackpropTrainer( appraisal_network,dataset ) start_time = time.time() rmse_vector = [] rmse_min = sys.float_info.max # training for epochs... for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) # training RMSE rmse_vector.append(rmse) if rmse < rmse_min: rmse_min = rmse #print "training RMSE, epoch {}: {}".format( i + 1, rmse ) elapsed_time = time.time() - start_time report_fields_training = {"time_elapsed":elapsed_time, "epochs":epochs, "rmse_min":rmse_min, "hidden_layers":1, "hidden_neurons":hidden_size, "input_neurons":input_size, "output_neurons":target_size} return report_fields_training, appraisal_network
X = pd.read_csv('Train/Train_Combine.csv', usecols=[ 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM']) Y = pd.read_csv('Train/Train_Combine.csv', usecols=['PM 2.5']) X = X.values Y = Y.values hidden_size = 100 epochs = 600 input_size = X.shape[1] target_size = Y.shape[1] ds = SDS(input_size, target_size) ds.setField('input', X) ds.setField('target', Y) net = buildNetwork( input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) pickle.dump(net, open(output_model_file, 'wb'))
loaded_data=preprocessing.data[:10] loaded_data.drop(['DATE','ASS_ID','YEAR_DAY_AND_YEAR','DAY_DS','MONTH'], axis=1) print(preprocessing.data.columns) train = np.asarray(loaded_data) x_train = train[:,0:-1] y_train = train[:,-1] y_train = y_train.reshape( -1, 1 ) input_size = x_train.shape[1] target_size = y_train.shape[1] hidden_size = 100 epochs = 600 ds = SDS(input_size,target_size) ds = SDS( input_size, target_size ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) net = buildNetwork( input_size, hidden_size, target_size, bias = True ) trainer = BackpropTrainer( net,ds ) print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) submission = sp.submission_preprocessing() submission.full_preprocess() data_to_predict = np.asarray(submission.data)
print file_name + ': reading data' (Xtrn, Xtst, Ytrn, f_out) = read_X_Y(f_in_trn, f_in_tst, sol_dir, my_dim) # PARAMETERS hidden_size = 100 epochs = 600 continue_epochs = 10 val_prop = 0.2 # Prepare dataset print file_name + ': preparing ds' Ytrn = Ytrn[:,1:] # Remove ID col input_size = Xtrn.shape[1] # ncols target_size = Ytrn.shape[1] # ncols ds = SupervisedDataSet(input_size, target_size) ds.setField('input', Xtrn) ds.setField('target', Ytrn) # Train a network print file_name + ': training network' net = buildNetwork(input_size, hidden_size, target_size, bias = True) trainer = BackpropTrainer(net, ds) trainer.trainUntilConvergence(verbose = True, validationProportion = val_prop, maxEpochs = epochs, continueEpochs = continue_epochs) # Save model print file_name + ': saving model' pickle.dump(net, open(f_out_model, 'wb')) # Predict on test data, save to file
train = np.loadtxt(train_file, delimiter=',') #validation = np.loadtxt( validation_file, delimiter = ',' ) #train = np.vstack(( train, validation )) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) pickle.dump(net, open(output_model_file, 'wb'))
def FitNeuralNetworkDept(dept): train_file = input_file_path + train_file_name[0] + str(dept) + train_file_name[1] test_file = input_file_path + test_file_name[0] + str(dept) + test_file_name[1] train = np.loadtxt( train_file, delimiter = ' ' ) test = np.loadtxt( test_file, delimiter = ' ' ) x_train = train[:, 0 : -1] y_train = train[:, -1] y_max = max(y_train) y_min = min(y_train) y_train = (y_train - y_min) / (y_max-y_min) y_train = y_train.reshape(-1,1) input_size = x_train.shape[1] target_size = y_train.shape[1] x_test = test[:, 0 : -1] y_test = test[:, -1] y_test = y_test.reshape(-1,1) ds_test = SDS( input_size, target_size ) ds_test.setField( 'input', x_test ) ds_test.setField( 'target', y_test ) ds = SDS( input_size, target_size ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) hidden_size = input_size*hidden_size_ratio ''' Set the parameter online = True to do online learning! ''' n = getModel(dept = dept, hidden_size = hidden_size, input_size = input_size, target_size = target_size, online = OnlineLearningMode) #print n trainer = BackpropTrainer(n,ds ,weightdecay=weightdecay, learningrate=learningrate, lrdecay=1.0, momentum = momentum) train_mse, validation_mse = trainer.trainUntilConvergence(verbose=False, maxEpochs = epochs, validationProportion = cv_ratio, continueEpochs = 5) file_name = output_file_path + 'nn_dept' + str(dept) + '_epoch' + str(epochs) model_file = open(file_name + '_model', 'w') pickle.dump(n, model_file) model_file.close() print 'dept' + str(dept) + ' complete..!' model_info = open(file_name + '_info.txt', 'w') model_info.write('model for dept' + str(dept) +'\n\n') model_info.write(str(n) +'\n\n') model_info.write("input size: " + str(input_size) +'\n') model_info.write("hidden size: " + str(hidden_size) +'\n') model_info.write("hidden layer number: " + str(num_hidden_layer+1) +'\n') model_info.write("target size: " + str(target_size) +'\n\n') model_info.write("learningrate: " + str(learningrate) +'\n') model_info.write("momentum: " + str(momentum) +'\n') model_info.write("weightdecay: " + str(weightdecay) +'\n\n') model_info.write("epochs: " + str(epochs) +'\n') model_info.write("cv_ratio: " + str(cv_ratio) +'\n\n') model_info.write("y_min: " + str(y_min) +'\n') model_info.write("y_max: " + str(y_max) +'\n\n') model_info.write("train_mse: " + str(train_mse) +'\n\n') model_info.write("validation_mse: " + str(validation_mse)) model_info.close() n = None #To check they dept the model well.. fileObject = open(file_name + '_model', 'r') n = pickle.load(fileObject) fileObject.close() p_train = n.activateOnDataset( ds ) p_test = n.activateOnDataset( ds_test ) plot_result = np.vstack((p_train*(y_max-y_min) + y_min, p_test*(y_max-y_min) + y_min )) p_total_print = plot_result.reshape(-1,len(plot_result)) p_test_print = p_test.reshape(-1,len(p_test)) p_test_print = p_test_print*(y_max-y_min) + y_min w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_test_result.csv', 'wb') for row in p_test_print: for element in row: w_file.write(str(element)+'\n') break w_file.close() w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_train_test_result.csv', 'wb') for row in p_total_print: for element in row: w_file.write(str(element)+'\n') break w_file.close() PlotResult(y_train = y_train, plot_result = plot_result, y_max = y_max, y_min = y_min, dept = dept) return n
def train_4_hidden(): print "-------------------------------------------------" print "loading data..." print "file to be loaded: ", train_file # regresa un ndarray de numpy train = np.loadtxt( train_file, delimiter = ',' ) print "data loaded to a ", type(train), " of size: ", train.shape, " and type:", train.dtype print "Spliting inputs and output for training..." inputs_train = train[:,0:-1] output_train = train[:,-1] output_train = output_train.reshape( -1, 1 ) print "inputs in a ", type(inputs_train), " of size: ", inputs_train.shape, " and type:", inputs_train.dtype print "output in a ", type(output_train), " of size: ", output_train.shape, " and type:", output_train.dtype print "-------------------------------------------------" print "Setting up supervised dataset por pyBrain training..." input_size = inputs_train.shape[1] target_size = output_train.shape[1] dataset = SDS( input_size, target_size ) dataset.setField( 'input', inputs_train ) dataset.setField( 'target', output_train ) print "-------------------------------------------------" print "Setting up network for supervised learning in pyBrain..." #crime_network = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer ) crime_ann = FeedForwardNetwork() inLayer = LinearLayer(input_size) hiddenLayer1 = TanhLayer(hidden_size) hiddenLayer2 = TanhLayer(hidden_size) hiddenLayer3 = TanhLayer(hidden_size) hiddenLayer4 = TanhLayer(hidden_size) outLayer = LinearLayer(target_size) crime_ann.addInputModule(inLayer) crime_ann.addModule(hiddenLayer1) crime_ann.addModule(hiddenLayer2) crime_ann.addModule(hiddenLayer3) crime_ann.addModule(hiddenLayer4) crime_ann.addOutputModule(outLayer) in_to_hidden1 = FullConnection(inLayer, hiddenLayer1) hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2) hidden2_to_hidden3 = FullConnection(hiddenLayer2, hiddenLayer3) hidden3_to_hidden4 = FullConnection(hiddenLayer3, hiddenLayer4) hidden4_to_out = FullConnection(hiddenLayer4, outLayer) crime_ann.addConnection(in_to_hidden1) crime_ann.addConnection(hidden1_to_hidden2) crime_ann.addConnection(hidden2_to_hidden3) crime_ann.addConnection(hidden3_to_hidden4) crime_ann.addConnection(hidden4_to_out) crime_ann.sortModules() trainer = BackpropTrainer( crime_ann,dataset ) print "-------------------------------------------------" rmse_vector = [] print "training for {} epochs...".format( epochs ) for i in range( epochs ): mse = trainer.train() rmse = sqrt( mse ) print "training RMSE, epoch {}: {}".format( i + 1, rmse ) rmse_vector.append(rmse) print "-------------------------------------------------" pickle.dump( crime_ann, open( output_model_file, 'wb' )) print "Training done!" print "-------------------------------------------------" return rmse_vector
test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) rmse = sqrt(mse) print "testing RMSE:", rmse np.savetxt(output_predictions_file, p, fmt='%.6f')
def benchmark(clf=None, n_hidden=10, n_epochs=10): for col in ['AP']:#, 'COP', 'AP', 'LS', 'MA']:#, 'PPR', '2X', '3X', '4X', '5X', 'AU', 'UNI', 'MEM']: print "*" * 80 print type(clf) print col X = merged[numerical_columns + ['LivingArea']] #X = merged.drop(['MlsNumber', 'Lat', 'Lng', 'BuyPrice'], axis=1, inplace=False) X_cat = merged[categorical_columns] Y = merged[['BuyPrice']] mask = merged[col]==1 X, X_cat, Y = X[mask], X_cat[mask], Y[mask] print 'X.shape: ', X.shape print 'Y.shape: ', Y.shape # filter rows with NaN mask = ~np.isnan(X).any(axis=1) X, X_cat, Y = X[mask], X_cat[mask], Y[mask] mask = ~np.isnan(Y).any(axis=1) X, X_cat, Y = X[mask], X_cat[mask], Y[mask] print 'After NaN filter: ', X.shape X, X_cat, Y = np.array(X), np.array(X_cat), np.array(Y) if USE_LOG: Y = np.log(Y) Y = Y.reshape(Y.shape[0]) print "mean: ", np.mean(Y) print "median: ", np.median(Y) print "std: ", Y.std() # remove outliers mask = Y > 10**5 X, X_cat, Y = X[mask], X_cat[mask], Y[mask] mask = Y < 10**6 X, X_cat, Y = X[mask], X_cat[mask], Y[mask] # one-hot encode categorical features X_cat_enc = [] for i, cat in enumerate(categorical_columns): col = X_cat[:,i] col = LabelEncoder().fit_transform(col).reshape((-1,1)) col_enc = OneHotEncoder(sparse=False).fit_transform(col) X_cat_enc.append(col_enc) X_cat = np.concatenate(X_cat_enc, axis=1) print 'X_cat.shape: ', X_cat.shape skf = KFold(n=X.shape[0], n_folds=10, shuffle=True, random_state=42) L = { 'rmse': [], 'corr': [], 'r2': [], 'diff': [], 'mae': [], 'explained_var': [], 'var': []} for train_indices, test_indices in skf: X_train, X_train_cat, Y_train = X[train_indices], X_cat[train_indices], Y[train_indices] X_test, X_test_cat, Y_test = X[test_indices], X_cat[test_indices], Y[test_indices] scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) X_train = np.concatenate([X_train, X_train_cat], axis=1) X_test = np.concatenate([X_test, X_test_cat], axis=1) if USE_NEURALNET: print 'n_hidden: %d' % n_hidden Y_train, Y_test = Y_train.reshape(-1, 1), Y_test.reshape(-1, 1) train_ds = SupervisedDataSet(X_train.shape[1], Y_train.shape[1]) train_ds.setField('input', X_train) train_ds.setField('target', Y_train) net = buildNetwork(X_train.shape[1], n_hidden, Y_train.shape[1], bias=True) trainer = BackpropTrainer(net, train_ds) for i in xrange(n_epochs): mse = trainer.train() rmse = math.sqrt(mse) print "epoch: %d, rmse: %f" % (i, rmse) test_ds = SupervisedDataSet(X_test.shape[1], Y_test.shape[1]) test_ds.setField('input', X_test) test_ds.setField('target', Y_test) preds = net.activateOnDataset(test_ds) else: clf.fit(X_train, Y_train) preds = clf.predict(X_test).astype(float) if USE_LOG: Y_test_10 = np.exp(Y_test) preds_10 = np.exp(preds) else: Y_test_10 = Y_test preds_10 = preds rmse = math.sqrt(metrics.mean_squared_error(Y_test_10, preds_10)) corr = pearsonr(preds_10, Y_test_10) diff = np.array([abs(p-a)/a for (p,a) in zip(Y_test_10, preds_10)]) mae = metrics.mean_absolute_error(Y_test_10, preds_10) explained_var = metrics.explained_variance_score(Y_test_10, preds_10) r2 = metrics.r2_score(Y_test_10, preds_10) var = np.var(diff) L['rmse'].append(rmse) L['corr'].append(corr[0]) L['diff'].append(diff.mean()) L['mae'].append(mae) L['explained_var'].append(explained_var) L['r2'].append(r2) L['var'].append(var) if GENERATE_PLOTS: plt.plot(Y_test_10, preds_10, 'ro') plt.show() break if USE_NEURALNET: break for key in L.keys(): print "Mean %s: %f" % (key, np.array(L[key]).mean()) return L