def validate(X, y, net):
    # Test Set.
    x_test = X[split_at:, :]
    y_test = y.__getslice__(split_at, y.shape[0])
    y_test = y_test.reshape(-1, 1)

    # you'll need labels. In case you don't have them...
    y_test_dummy = np.zeros(y_test.shape)

    input_size = x_test.shape[1]
    target_size = y_test.shape[1]

    assert (net.indim == input_size)
    assert (net.outdim == target_size)

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField('input', x_test)
    ds.setField('target', y_test)

    # predict

    p = net.activateOnDataset(ds)

    mse = MSE(y_test, p)
    print "testing MSE:", mse
    np.savetxt(output_predictions_file, p, fmt='%.6f')
コード例 #2
0
    def fit(self, X, y):
        _, self.in_size = X.shape
        _, self.out_size = y.shape

        ds = SDS(self.in_size, self.out_size)

        ds.setField('input', X)
        ds.setField('target', y)

        self.net = buildNetwork(self.in_size,
                                self.h_size,
                                self.out_size,
                                bias=True)
        trainer = BP(self.net, ds)

        print("start training ...")

        #mse = trainer.train()
        #trainer.trainUntilConvergence(verbose=True, maxEpochs=4)

        for n in xrange(self.epo):
            mse = trainer.train()
            rmse = sqrt(mse)
            print("RMSE = %8.3f epoch = %d" % (rmse, n))
        return self
コード例 #3
0
    def fit(self, X, y):

        y_train = np.array([[yn] for yn in y])
        _, self.in_size = X.shape
        _, self.out_size = y_train.shape

        ds = SDS(self.in_size, self.out_size)

        ds.setField('input', X)
        ds.setField('target', y_train)

        self.net = buildNetwork(self.in_size,
                                self.h_size,
                                self.out_size,
                                bias=True)
        trainer = BP(self.net, ds)

        print("start training ...")

        for n in xrange(self.epo):
            mse = trainer.train()
            rmse = sqrt(mse)
            if self.verbose:
                print("RMSE = %8.3f epoch = %d" % (rmse, n))
        return self
コード例 #4
0
def train_fn(trainfile, hiddennodes, output_model_file):

    hidden_size = hiddennodes

    print 'Loading data..'
    x_train, y_train = load_data(trainfile)

    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)

    # init and train

    net = buildNetwork(input_size,
                       hidden_size,
                       target_size,
                       bias=True,
                       hiddenclass=SigmoidLayer,
                       outclass=SigmoidLayer)
    trainer = BackpropTrainer(net, ds)

    print 'Training..'
    trainer.trainUntilConvergence(validationProportion=0.15,
                                  maxEpochs=1000,
                                  continueEpochs=10)

    print 'Finish training. Serializing model...'
    pickle.dump(net, open(output_model_file, 'wb'))
コード例 #5
0
def predict(isGroup):
    path_test_file = '/home/rodolfo/Projetos/NeuralNetwork/data/test_groups_%s_file.csv' % isGroup
    path_neural_network = 'model_groups_%s.pkl' % isGroup

    test_file = path_test_file
    model_file = path_neural_network
    output_predictions_file = 'predictions_file.txt'

    # load model
    net = pickle.load(open(model_file, 'rb'))

    # load data
    test = np.loadtxt(test_file, delimiter=',')
    x_test = test[:, 0:-1]
    y_test = test[:, -1]
    y_test = y_test.reshape(-1, 1)

    # you'll need labels. In case you don't have them...
    y_test_dummy = np.zeros(y_test.shape)

    input_size = x_test.shape[1]
    target_size = y_test.shape[1]

    assert (net.indim == input_size)
    assert (net.outdim == target_size)

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField('input', x_test)
    ds.setField('target', y_test_dummy)

    # predict
    p = net.activateOnDataset(ds)
    np.savetxt(output_predictions_file, p, fmt='%.6f')
def predict(X, net):
    # Test Set.
    x_test = X[:, :]

    # you'll need labels. In case you don't have them...
    y_test_dummy = np.zeros((X.shape[0], 1))

    input_size = x_test.shape[1]
    target_size = y_test_dummy.shape[1]

    assert (net.indim == input_size)
    assert (net.outdim == target_size)

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField('input', x_test)
    ds.setField('target', y_test_dummy)

    p = net.activateOnDataset(ds)
    print p.shape
    np.savetxt("1_" + output_predictions_file, p, fmt='%.6f')
    s = pd.Series(p[:, 0])
    s.index += 1
    s.to_csv('neural_prediction_3.csv',
             header=['Prediction'],
             index=True,
             index_label='ID')
コード例 #7
0
    def test(self, arr):
        # load model
        net, std_scale = pickle.load(open(self.model_file, 'rb'))
        print 'Finish loading model'

        # Load test data
        x_test, y_test = load_data(arr)
        x_test_scaled = std_scale.transform(
            x_test)  # Normalize to standard normal

        y_test_dummy = np.zeros(y_test.shape)
        input_size = x_test_scaled.shape[1]
        target_size = y_test.shape[1]

        assert (net.indim == input_size)
        assert (net.outdim == target_size)

        # prepare dataset
        ds = SDS(input_size, target_size)
        ds.setField('input', x_test_scaled)
        ds.setField('target', y_test_dummy)

        # predict
        print 'Activating ds'
        p = net.activateOnDataset(ds)
        print 'debug'
        # ptest = preprocessing.StandardScaler().fit_transform(p)
        # p_scaled = std_scale.inverse_transform(ptest)  # Convert back to original scale

        dna = self.convert_to_dna(p)

        return dna
コード例 #8
0
def train():

	print "-------------------------------------------------"
	print "loading data..."
	print "file to be loaded: ", train_file

	# regresa un ndarray de numpy
	train = np.loadtxt( train_file, delimiter = ',' )

	print "data loaded to a ", type(train),   " of size: ", train.shape, " and type:", train.dtype
	print "Spliting inputs and output for training..."

	inputs_train = train[:,0:-1]
	output_train = train[:,-1]
	output_train = output_train.reshape( -1, 1 )


	print "inputs in a ", type(inputs_train),   " of size: ", inputs_train.shape, " and type:", inputs_train.dtype
	print "output in a ", type(output_train),   " of size: ", output_train.shape, " and type:", output_train.dtype
	print "-------------------------------------------------"



	print "Setting up supervised dataset por pyBrain training..."
	input_size = inputs_train.shape[1]
	target_size = output_train.shape[1]
	dataset = SDS( input_size, target_size )
	dataset.setField( 'input', inputs_train )
	dataset.setField( 'target', output_train )
	print "-------------------------------------------------"



	print "Setting up supervised dataset por pyBrain training..."
	hidden_size = 50
	epochs = 600
	crime_network = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer )
	trainer = BackpropTrainer( crime_network,dataset )
	print "-------------------------------------------------"


	rmse_vector = []
	print "training for {} epochs...".format( epochs )
	for i in range( epochs ):
		mse = trainer.train()
		rmse = sqrt( mse )
		print "training RMSE, epoch {}: {}".format( i + 1, rmse )
		rmse_vector.append(rmse)

	print "-------------------------------------------------"
	
	pickle.dump( crime_network, open( output_model_file, 'wb' ))

	print "Training done!"
	print "-------------------------------------------------"

	return rmse_vector
コード例 #9
0
def train(train_select, validate_select, aggregate_ttrss):
    train = pd_to_numpy(train_select, aggregate_ttrss)
    validation = pd_to_numpy(validate_select, aggregate_ttrss)
    output_model_file = 'model.pkl'

    hidden_size = 20
    epochs = 10

    train = np.vstack((train, validation))
    x_train = train[:, 0:-1]
    y_train = train[:, -1]
    y_train = y_train.reshape(-1, 1)

    y_train = y_train.reshape(-1, 1)
    print(x_train, y_train)
    input_size = x_train.shape[1]
    target_size = y_train.shape[1]
    # print (input_size, target_size)

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)

    # init and train
    # fnn = FeedForwardNetwork()

    net = buildNetwork(
        input_size,
        hidden_size,
        target_size,
        bias=True,
    )
    # net = NNregression(ds)
    trainer = BackpropTrainer(net, ds, verbose=True, weightdecay=0.01)

    print("training for {} epochs...".format(epochs))
    print(input_size, target_size, x_train, y_train)

    # plt.axis([0, epochs, 0, 0.03])
    # plt.xlabel('epoch')
    # plt.ylabel('error')
    # plt.ion()

    for i in range(epochs):
        mse = trainer.train()
        rmse = sqrt(mse)
        # plt.scatter(i, rmse, s=5)
        # plt.pause(0.00001)

        print("training RMSE, epoch {}: {}".format(i + 1, rmse))
    pickle.dump(net, open(output_model_file, 'wb'))
    return net
コード例 #10
0
ファイル: classificator.py プロジェクト: junk2112/detector
def nn(train_source, test_source, validation=False, v_size=0.5):

	hidden_size = 100
	epochs = 600

	# load data
	train = read_csv(train_source)
	tmp = open(train_source)
	feature_count = None
	for line in tmp:
		feature_count = len(line.split(","))
		break

	trainX = np.asarray(train[range(1, feature_count)])
	trainY = np.asarray(train[[0]]).ravel()
	# print "All Data size: " + str(len(trainX))
	testX = None
	testY = None

	if validation:
		# --- CROSS VALIDATION ---
		trainX, testX, trainY, testY = cross_validation.train_test_split(
			trainX, trainY, test_size=v_size, random_state=0)
	else:
		# --- TEST DATA ---
		test = read_csv(test_source)
		testX = np.asarray(test[range(1, feature_count)])
		testY = np.asarray(test[[0]]).ravel()

	# print testX
	# print testY
	input_size = len(trainX[0])
	target_size = 1
	print input_size
	print target_size
	# prepare dataset

	ds = SDS( input_size, target_size )
	ds.setField( 'input', trainX )
	ds.setField( 'target', [[item] for item in trainY] )

	# init and train

	net = buildNetwork( input_size, hidden_size, target_size, bias = True )
	trainer = BackpropTrainer(net, ds)

	print "training for {} epochs...".format(epochs)

	for i in range( epochs ):
		mse = trainer.train()
		rmse = sqrt(mse)
		print "training RMSE, epoch {}: {}".format(i + 1, rmse)
コード例 #11
0
    def validate(self):
        """ The main method of this class. It runs the crossvalidation process
            and returns the validation result (e.g. performance).
        """
        dataset = self._dataset
        trainer = self._trainer
        n_folds = self._n_folds
        l = dataset.getLength()
        inp = dataset.getField("input")
        tar = dataset.getField("target")
        indim = dataset.indim
        outdim = dataset.outdim
        assert l > n_folds

        perms = array_split(permutation(l), n_folds)

        perf = 0.
        for i in range(n_folds):
            # determine train indices
            train_perms_idxs = range(n_folds)
            train_perms_idxs.pop(i)
            temp_list = []
            for train_perms_idx in train_perms_idxs:
                temp_list.append(perms[ train_perms_idx ])
            train_idxs = concatenate(temp_list)

            # determine test indices
            test_idxs = perms[i]

            # train
            #print "training iteration", i
            train_ds = SupervisedDataSet(indim, outdim)
            train_ds.setField("input"  , inp[train_idxs])
            train_ds.setField("target" , tar[train_idxs])
            trainer = copy.deepcopy(self._trainer)
            trainer.setData(train_ds)
            if not self._max_epochs:
                trainer.train
            else:
                trainer.trainEpochs(self._max_epochs)

            # test
            #print "testing iteration", i
            test_ds = SupervisedDataSet(indim, outdim)
            test_ds.setField("input"  , inp[test_idxs])
            test_ds.setField("target" , tar[test_idxs])
#            perf += self.getPerformance( trainer.module, dataset )
            perf += self._calculatePerformance(trainer.module, dataset)

        perf /= n_folds
        return perf
コード例 #12
0
def train(
    train,
    label,
    custom_net=None,
    training_mse_threshold=0.40,
    testing_mse_threshold=0.60,
    epoch_threshold=10,
    epochs=100,
    hidden_size=20,
):
    # Test Set.
    x_train = train[0:split_at, :]
    y_train_slice = label.__getslice__(0, split_at)
    y_train = y_train_slice.reshape(-1, 1)
    x_test = train[split_at:, :]
    y_test_slice = label.__getslice__(split_at, label.shape[0])
    y_test = y_test_slice.reshape(-1, 1)

    # Shape.
    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField("input", x_train)
    ds.setField("target", y_train)

    # prepare dataset
    ds_test = SDS(input_size, target_size)
    ds_test.setField("input", x_test)
    ds_test.setField("target", y_test)

    min_mse = 1000000

    # init and train
    if custom_net == None:
        net = buildNetwork(input_size, hidden_size, target_size, bias=True)
    else:
        print "Picking up the custom network"
        net = custom_net

    trainer = RPropMinusTrainer(net, dataset=ds, verbose=False, weightdecay=0.01, batchlearning=True)
    print "training for {} epochs...".format(epochs)

    for i in range(epochs):
        mse = trainer.train()
        print "training mse, epoch {}: {}".format(i + 1, math.sqrt(mse))

        p = net.activateOnDataset(ds_test)
        mse = math.sqrt(MSE(y_test, p))
        print "-- testing mse, epoch {}: {}".format(i + 1, mse)
        pickle.dump(net, open("current_run", "wb"))

        if min_mse > mse:
            print "Current minimum found at ", i
            pickle.dump(net, open("current_min_epoch_" + model_file, "wb"))
            min_mse = mse

    pickle.dump(net, open(model_file, "wb"))
    return net
コード例 #13
0
def Neural_Network(xtrain,ytrain,xtest,ytest):
    #Hidden nodes
    hidden_net = 2
    #Epoch is a single pass through the entire training set, followed by testing of the verification set.
    epoch = 2
    ytrain = ytrain.reshape(-1,1)
    input_cnt = xtrain.shape[1]
    target_cnt = ytrain.shape[1]
    dataset = SupervisedDataSet(input_cnt, target_cnt)
    dataset.setField( 'input', xtrain )
    dataset.setField( 'target', ytrain )
    network = buildNetwork( input_cnt, hidden_net, target_cnt, bias = True )
    #Trainer that trains the parameters of a module according to a supervised dataset (potentially sequential) by backpropagating the errors (through time).
    trainer = BackpropTrainer( network,dataset )
    print("---------------Neural Network---------------")
    print("Train Data")
    for e in range(epoch):
        mse = trainer.train()
        rmse = math.sqrt(mse)   
    print("MSE, epoch {}: {}".format(e + 1, mse))
    print("RMSE, epoch {}: {}".format(e + 1, rmse))
    
    ytest=ytest.reshape(-1,1)
    input_size = xtest.shape[1]
    target_size = ytest.shape[1]
    dataset = SupervisedDataSet( input_size, target_size )
    dataset.setField( 'input', xtest)
    dataset.setField( 'target', ytest)
    model = network.activateOnDataset(dataset)

    mse = mean_squared_error(ytest, model )
    rmse =math.sqrt(mse)
    print("Test Data:")
    print("MSE: ", mse)
    print("RMSE: ", rmse)
コード例 #14
0
    def predict_proba(self, X):

        row_size, in_size = X.shape

        y_test_dumy = np.zeros([row_size, self.out_size])

        assert (self.net.indim == in_size)

        ds = SDS(in_size, self.out_size)

        ds.setField('input', X)
        ds.setField('target', y_test_dumy)

        p = self.net.activateOnDataset(ds)
        return p
コード例 #15
0
ファイル: neural_net.py プロジェクト: mcminis1/forest-cover
def CV_NN(X_train, Y, N_CV=1, test_sze=0.3, n_middle = 14):
    hidden_size = n_middle
    sss = cross_validation.StratifiedShuffleSplit(
        Y, N_CV, test_size=test_sze, random_state=0)

    overall_accuracy = 0
    overall_error = 0
    confusion_matrix = np.zeros((7, 7), dtype=np.int)
    for train_block, test_block in sss:
        x_train=X_train.as_matrix()[train_block]
        input_size = x_train.shape[1]
        y_vals = Y[train_block]
        y_train=np.zeros((len(y_vals),7))
        for i,y in enumerate(y_vals):
            y_train[i][y-1]=1
        target_size = y_train.shape[1]
        # print x_train.shape, y_train.shape

        ds = SDS( input_size, target_size)
        ds.setField( 'input', x_train)
        ds.setField( 'target', y_train)
        
        net = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer )
        trainer = BackpropTrainer( net, ds, learningrate=0.1, verbose=True)
        trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 4 )
        trainer = BackpropTrainer( net, ds, learningrate=0.05, verbose=True)
        trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 64, continueEpochs = 8 )
        trainer = BackpropTrainer( net, ds, learningrate=0.01, verbose=True)
        trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 512, continueEpochs = 16 )
        trainer = BackpropTrainer( net, ds, learningrate=0.005, verbose=True)
        trainer.trainUntilConvergence( verbose = False, validationProportion = 0.2, maxEpochs = 1024, continueEpochs = 64 )

        y_vals = Y[test_block]
        y_test=np.zeros((len(y_vals),7))
        for i,y in enumerate(y_vals):
            y_test[i][y-1]=1
        x_test = X_train.as_matrix()[test_block]

        ds = SDS( input_size, target_size)
        ds.setField( 'input', x_test )
        ds.setField( 'target', y_test )

        Y_predict = net.activateOnDataset( ds )
        y_predict=Y_predict.argmax(axis=1)
        y_test=y_vals-1
        accuracy = (y_test == y_predict).mean()
        for x, y in zip(y_test, y_predict):
            confusion_matrix[x - 1, y - 1] += 1
        overall_accuracy += accuracy
        overall_error += accuracy * accuracy
    confusion_matrix *= 1.0 / N_CV
    print confusion_matrix
    overall_accuracy *= 1.0 / N_CV
    overall_error = np.sqrt(
        (overall_error / N_CV - overall_accuracy ** 2) / N_CV)
    print overall_accuracy, overall_error
def train_cross_validate(train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60,
                         epoch_threshold=10, epochs=100, hidden_size=50):
    # Test Set.
    x_train = train[0:split_at, :]
    y_train_slice = label.__getslice__(0, split_at)
    y_train = y_train_slice.reshape(-1, 1)
    x_test = train[split_at:, :]
    y_test_slice = label.__getslice__(split_at, label.shape[0])
    y_test = y_test_slice.reshape(-1, 1)

    # Shape.
    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    input_size_test = x_test.shape[1]
    target_size_test = y_test.shape[1]

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)

    # prepare dataset
    ds_test = SDS(input_size, target_size)
    ds_test.setField('input', x_test)
    ds_test.setField('target', y_test)

    min_mse = 1000000

    # init and train
    if custom_net == None:
        net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer)
    else:
        print "Picking up the custom network"
        net = custom_net

    trainer = RPropMinusTrainer(net, dataset=ds, verbose=True, weightdecay=0.01, batchlearning=True)
    print "training for {} epochs...".format(epochs)

    for i in range(epochs):
        mse = trainer.train()
        print "training mse, epoch {}: {}".format(i + 1, mse)

        p = net.activateOnDataset(ds_test)
        mse = MSE(y_test, p)
        print "-- testing mse, epoch {}: {}".format(i + 1, mse)
        pickle.dump(net, open("current_run", 'wb'))

        if min_mse > mse:
            print "Current minimum found at ", i
            pickle.dump(net, open("current_min_epoch_" + model_file, 'wb'))
            min_mse = mse

    pickle.dump(net, open(model_file, 'wb'))
    return net
コード例 #17
0
def validate(train_select, validate_select):

    train = pd_to_numpy(train_select)
    validation = pd_to_numpy(validate_select)
    output_model_file = 'model_val.pkl'

    hidden_size = 100
    epochs = train.shape[0]
    continue_epochs = 100
    validation_proportion = 0.15

    # load data, join train and validation files

    # train = np.loadtxt( train_file, delimiter = ',' )
    # validation = np.loadtxt( validation_file, delimiter = ',' )
    train = np.vstack((train, validation))

    x_train = train[:, 0:-1]
    y_train = train[:, -1]
    y_train = y_train.reshape(-1, 1)

    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)

    # init and train

    net = buildNetwork(input_size, hidden_size, target_size, bias=True)
    trainer = BackpropTrainer(net, ds)

    train_mse, validation_mse = trainer.trainUntilConvergence(
        verbose=True,
        validationProportion=validation_proportion,
        maxEpochs=epochs,
        continueEpochs=continue_epochs)

    pickle.dump(net, open(output_model_file, 'wb'))
コード例 #18
0
def train_fn(trainfile, hiddennodes):
    output_model_file = '../Serialized/model_{0}_nodes.pkl'.format(
        str(hiddennodes))

    hidden_size = hiddennodes

    print 'Loading data..'
    x_train, y_train = load_data(trainfile)

    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)

    # init and train

    net = buildNetwork(input_size,
                       hidden_size,
                       target_size,
                       bias=True,
                       hiddenclass=SigmoidLayer,
                       outclass=SigmoidLayer)
    trainer = BackpropTrainer(net, ds)

    # print "training for {} epochs...".format( epochs )
    #
    # for i in range(epochs):
    #     mse = trainer.train()
    #     rmse = sqrt( mse )
    #     print "training RMSE, epoch {}: {}".format( i + 1, rmse )

    print 'Training..'
    trainer.trainUntilConvergence(validationProportion=0.15,
                                  maxEpochs=1000,
                                  continueEpochs=10)

    print 'Finish training. Serializing model...'
    pickle.dump(net, open(output_model_file, 'wb'))
コード例 #19
0
def prepareDataset():

    train_file = "../traindata/train_scaled.csv"
    train = np.loadtxt(train_file, delimiter=',')

    x_train = train[:, 0:-1]
    y_train = train[:, -1]
    y_train = y_train.reshape(-1, 1)

    input_size = x_train.shape[1]
    target_size = y_train.shape[1]

    print input_size
    print target_size

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_train)
    ds.setField('target', y_train)
    return (ds, input_size)
コード例 #20
0
def predict(aggregate_quotes, aggregate_ttrss):
    # test_file = 'data/test.csv'
    model_file = 'model.pkl'
    output_predictions_file = 'predictions.txt'

    # load model
    net = pickle.load(open(model_file, 'rb'))

    # load data
    test = pd_to_numpy(aggregate_quotes, aggregate_ttrss)
    x_test = test[:, 0:-1]
    y_test = test[:, -1]
    y_test = y_test.reshape(-1, 1)

    # # you'll need labels. In case you don't have them...
    # y_test_dummy = np.zeros( y_test.shape )
    # y_test_dummy = np.zeros(y_test.shape)
    print(x_test, y_test)
    input_size = x_test.shape[1]
    target_size = y_test.shape[1]

    print(net.indim, net.outdim, input_size, target_size)
    assert (net.indim == input_size)
    assert (net.outdim == target_size)

    # prepare dataset

    ds = SDS(input_size, target_size)
    ds.setField('input', x_test)
    ds.setField('target', y_test)

    # predict

    p = net.activateOnDataset(ds)
    mse = MSE(y_test, p)
    rmse = sqrt(mse)

    print("testing RMSE:", rmse, p)
    np.savetxt(output_predictions_file, p, fmt='%.6f')
    return p
コード例 #21
0
    def train(self, arr):
        '''
        Train NN for given data
        :param arr: [wt_arr, mt_arr], in ATCG or atcg
        :return: void, but serialize model to file
        '''

        x_train, y_train = load_data(arr)
        std_scale = preprocessing.StandardScaler().fit(x_train)
        x_train_scaled = std_scale.transform(
            x_train)  # Normalize to standard normal
        # y_train_scaled = std_scale.transform(y_train)     # Try not scaling y

        input_size = x_train_scaled.shape[1]
        target_size = y_train.shape[1]

        # prepare dataset

        ds = SDS(input_size, target_size)
        ds.setField('input', x_train_scaled)
        ds.setField('target', y_train)

        # init and train

        net = buildNetwork(input_size,
                           self.hiddennodes,
                           target_size,
                           bias=True,
                           hiddenclass=TanhLayer,
                           outclass=TanhLayer)
        trainer = BackpropTrainer(net, ds)

        print 'Training..'
        trainer.trainUntilConvergence(validationProportion=0.15,
                                      maxEpochs=1000,
                                      continueEpochs=10)

        print 'Finish training. Serializing bundle...'
        bundle = [net, std_scale]
        pickle.dump(bundle, open(self.model_file, 'wb'))
コード例 #22
0
def neuralNetworkRegression(X_test):
    """
    :param X: data consisting of features (excluding class variable)
    :param Y: column vector consisting of class variable
    :return: models neural network regression with fine-tuning of epochs
    """
    print "NEURAL NETWORK REGRESSION"
    print "Executing..."
    print

    print "Loading saved model..."
    net = pickle.load(open("Models/neural.sav", 'rb'))
    # utils.neuralNetworkRegression()
    """ predict new value """
    y_test = np.zeros((X_test.shape[0], 1))
    input_size = X_test.shape[1]
    target_size = y_test.shape[1]
    ds = SDS(input_size, target_size)
    ds.setField('input', X_test)
    ds.setField('target', y_test)
    prediction = net.activateOnDataset(ds)
    print prediction
    return prediction
コード例 #23
0
def test_fn(testfile, hiddennodes, model_file):
    # load model
    net = pickle.load( open( model_file, 'rb' ))
    print 'Finish loading model'

    # Load test data
    x_test, y_test = load_data(testfile)
    y_test_dummy = np.zeros( y_test.shape )
    input_size = x_test.shape[1]
    target_size = y_test.shape[1]

    assert( net.indim == input_size )
    assert( net.outdim == target_size )

    # prepare dataset
    ds = SDS( input_size, target_size )
    ds.setField( 'input', x_test )
    ds.setField( 'target', y_test_dummy )

    # predict
    print 'Activating ds'
    p = net.activateOnDataset( ds )

    def threshold(x):
        if x>0.5:
            print 'x>0.5'
        return 0 if x<0.5 else 1

    p_converted = []
    for each in p:
        converted = map(threshold, each)
        p_converted.append(converted)

    p_converted = np.array(p_converted)
    acc = accuracy_score(y_test, p_converted)
    print 'Accuracy score=%s' %acc
def predict(X, net):
    # Test Set.
    x_test = X[:, :]

    # you'll need labels. In case you don't have them...
    y_test_dummy = np.zeros((X.shape[0], 1))

    input_size = x_test.shape[1]
    target_size = y_test_dummy.shape[1]

    assert (net.indim == input_size)
    assert (net.outdim == target_size)

    # prepare dataset
    ds = SDS(input_size, target_size)
    ds.setField('input', x_test)
    ds.setField('target', y_test_dummy)

    p = net.activateOnDataset(ds)
    print p.shape
    np.savetxt("1_" + output_predictions_file, p, fmt='%.6f')
    s = pd.Series(p[:, 0])
    s.index += 1
    s.to_csv('neural_prediction_3.csv', header=['Prediction'], index=True, index_label='ID')
def FitNeuralNetworkDeptAnimate(dept = 1, num = 1000):

	train_file = input_file_path + train_file_name[0] + str(dept) + train_file_name[1]
	test_file = input_file_path + test_file_name[0] + str(dept) + test_file_name[1]

	train = np.loadtxt( train_file, delimiter = ' ' )
	test = np.loadtxt( test_file, delimiter = ' ' )
	print len(train)
	x_train = train[0:num, 0 : -1]
	y_train = train[0:num, -1]

	y_max = max(y_train)
	y_min = min(y_train)
	y_train = (y_train - y_min) / (y_max-y_min)
	y_train = y_train.reshape(-1,1)

	input_size = x_train.shape[1]
	target_size = y_train.shape[1]

	x_test = test[0:num/4, 0 : -1]
	y_test = test[0:num/4, -1]
	y_test = y_test.reshape(-1,1)

	
	ds_test = SDS( input_size, target_size )
	ds_test.setField( 'input', x_test )
	ds_test.setField( 'target', y_test )

	ds = SDS( input_size, target_size )
	ds.setField( 'input', x_train )
	ds.setField( 'target', y_train )


	hidden_size = input_size*hidden_size_ratio


	n = RecurrentNetwork()


	n.addInputModule(LinearLayer(input_size, name='in'))
	n.addModule(BiasUnit('bias'))
	for i in range(0, num_hidden_layer+1):
		hidden_name = 'hidden'+str(i)
		n.addModule(SigmoidLayer(hidden_size, name=hidden_name))
	n.addOutputModule(LinearLayer(target_size, name='out'))

	n.addConnection(FullConnection(n['in'], n['hidden0'], name='c1'))
	next_hidden = 'hidden0'

	for i in range(0,num_hidden_layer ):
		current_hidden = 'hidden'+str(i)
		next_hidden = 'hidden'+str(i+1)
		n.addConnection(FullConnection(n[current_hidden], n[next_hidden], name='c'+str(i+2)))

	n.addConnection(FullConnection(n[next_hidden], n['out'], name='c'+str(num_hidden_layer+2)))

	n.addConnection(FullConnection(n['bias'], n['hidden0'], name='c'+str(num_hidden_layer+7)))


	n.sortModules()
	print n


	trainer = BackpropTrainer(n,ds ,weightdecay=weightdecay, learningrate=learningrate, lrdecay=1.0, momentum = momentum)
	
	
	plt.ion()
	fig = plt.figure()
	ax = fig.add_subplot(111)

	plt.annotate("Dept1", (10,-15000))
	plt.annotate("Dept2", (180,-30000))
	plt.annotate("Dept3", (300,-15000))
	plt.annotate("Dept4", (450,-30000))
	plt.annotate("Dept5", (600,-15000))
	plt.annotate("Dept6", (700,-30000))
	plt.annotate("Dept7", (900,-15000))
	
	line1, = ax.plot([],[],'-b',label='train')
	line2, = ax.plot([],[],'-r',label='test')
	ax.legend()

	dummy = raw_input("Plot the graph?")

	for i in range(epochs):
		error = trainer.train()
		print "Epoch: %d, Error: %7.4f" % (i, error)


		p_train = n.activateOnDataset( ds )
		p_test = n.activateOnDataset( ds_test )
		plot_result = np.vstack((p_train*(y_max-y_min) + y_min, p_test*(y_max-y_min) + y_min ))


		p_test_print = p_test.reshape(-1,len(p_test))
		p_test_print = p_test_print*(y_max-y_min) + y_min

		line1.set_ydata(y_train*(y_max-y_min) + y_min)
		line1.set_xdata(range(len(y_train)))
		line2.set_ydata(plot_result)
		line2.set_xdata(range(len(plot_result)))
		ax.relim()
		ax.autoscale_view()
		plt.draw()
コード例 #26
0
#Loading data

test = np.loadtxt("data_out/test3.csv")
test = test.astype(int)
net = pickle.load(open("data_out/model3.pk1", "rb"))

#Variables
x_test = test[:, 1:-3]
y_test = test[:, -3:]
y_test_pred = np.zeros(y_test.shape)

input_size = x_test.shape[1]
target_size = y_test.shape[1]

assert (net.indim == input_size)
assert (net.outdim == target_size)

ds = SDS(input_size, target_size)
ds.setField('input', x_test)
ds.setField('target', y_test_pred)

p = net.activateOnDataset(ds)

mse = MSE(y_test, p)
rmse = sqrt(mse)

print "testing RMSE:", rmse

np.savetxt("pred", p, fmt='%.6f')
コード例 #27
0
    X = np.transpose(np.vstack((z, rich)))
    Y = mass
    return X, Y

X_train, Y_train = extract_xy(train_data)
X_test, Y_test = extract_xy(test_data)

Y_train = Y_train.reshape( -1, 1 )

input_size = X_train.shape[1]
target_size = Y_train.shape[1]

# prepare dataset

ds = SDS( input_size, target_size )
ds.setField( 'input', X_train )
ds.setField( 'target', Y_train )

# init and train

net = buildNetwork( input_size, hidden_size, target_size, bias = True )
trainer = BackpropTrainer( net,ds )

print "training for {} epochs...".format( epochs )

for i in range( epochs ):
	mse = trainer.train()
	rmse = sqrt( mse )
	print "training RMSE, epoch {}: {}".format( i + 1, rmse )
	
コード例 #28
0
def train_ann_multihidden(data_dicts, input_fields, layers, hidden_size, epochs):

	print "-------------------------------------------------"
 	print "loading data..."
 	# regresa un ndarray de numpy
 	train = dicts_to_np_array(data_dicts, input_fields)

 	print "data loaded to a ", type(train),   " of size: ", train.shape, " and type:", train.dtype
 	print "Spliting inputs and output for training..."

 	inputs_train = train[:,2:]
 	outputs_train = train[:,:2]
 	outputs_train = outputs_train.reshape( -1, 2 )


 	print "inputs in a ", type(inputs_train),   " of size: ", inputs_train.shape, " and type:", inputs_train.dtype
 	print "output in a ", type(outputs_train),   " of size: ", outputs_train.shape, " and type:", outputs_train.dtype
 	print "-------------------------------------------------"

 	print "primeros vectores de inputs: ", inputs_train[0:2,:]

 	print "primeros vectores de outputs: ", outputs_train[0:2,:]


 	print "Setting up supervised dataset por pyBrain training..."
 	input_size = inputs_train.shape[1]
 	target_size = outputs_train.shape[1]
 	dataset = SDS( input_size, target_size )
 	dataset.setField( 'input', inputs_train )
 	dataset.setField( 'target', outputs_train )
 	print "-------------------------------------------------"

	
	print "Setting up network for supervised learning in pyBrain..."

 	appraisal_network = FeedForwardNetwork()
 	inLayer = LinearLayer(input_size)
 	hiddenLayer1 = SigmoidLayer(hidden_size)
 	hiddenLayer2 = SigmoidLayer(hidden_size//2)
 	outLayer = LinearLayer(target_size)
 	appraisal_network.addInputModule(inLayer)
 	appraisal_network.addModule(hiddenLayer1)
 	appraisal_network.addModule(hiddenLayer2)
 	appraisal_network.addOutputModule(outLayer)
 	in_to_hidden1 = FullConnection(inLayer, hiddenLayer1)
 	hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2)
 	hidden2_to_out = FullConnection(hiddenLayer2, outLayer)
 	appraisal_network.addConnection(in_to_hidden1)
 	appraisal_network.addConnection(hidden1_to_hidden2)
 	appraisal_network.addConnection(hidden2_to_out)
 	appraisal_network.sortModules()


 	trainer = BackpropTrainer( appraisal_network,dataset )

 	print "-------------------------------------------------"

 	start_time = time.time()
 	rmse_vector = []
 	rmse_min = sys.float_info.max
 	#print "training for {} epochs...".format( epochs )
 	for i in range( epochs ):
 		mse = trainer.train()
 		rmse = sqrt( mse )
 		print "training RMSE, epoch {}: {}".format( i + 1, rmse )
 		rmse_vector.append(rmse)
 		if rmse < rmse_min:
 			rmse_min = rmse
 	#print "-------------------------------------------------"
 	elapsed_time = time.time() - start_time

# 	pickle.dump( crime_ann, open( output_model_file, 'wb' ))

 	#print "Training done!"
 	#print "-------------------------------------------------"

# 	return rmse_vector
	
	return {"time_elapsed":elapsed_time, 
			"epochs:":epochs,
			"rmse_vector":rmse_vector,
			"rmse_min":rmse_min,
			"hidden_layers":1,
			"hidden_neurons":hidden_size
			}, appraisal_network
コード例 #29
0
print "Loading in the data"
train = np.loadtxt( train_path, delimiter = ',', skiprows=1 )
train_target = np.loadtxt( train_target_path, delimiter= ",", skiprows=1)
test = np.loadtxt( test_path, delimiter = ',', skiprows=1)
test_target = np.loadtxt( test_target_path, delimiter= ",", skiprows=1 )

train_target = train_target.reshape(-1, 1)
input_size = train.shape[1]
target_size = train_target.shape[1]

# prepare dataset
print "Preparing the dataset"
print ""
ds = SDS( input_size, target_size )
ds.setField( 'input', train )
ds.setField( 'target', train_target / np.max(train_target) )

# init and train
print "Initalizing the network and training"
net = buildNetwork( input_size, hidden_size, target_size, bias = True )
trainer = BackpropTrainer( net,ds )

start = time()
for i in range( epochs ):
    mse = trainer.train()
    rmse = sqrt( mse )
    print "training RMSE, epoch {}: {}".format( i + 1, rmse )
end = time()
print "Training took: " + str((end - start)) + "seconds"
print ""
コード例 #30
0
train = np.loadtxt( train_file, delimiter = ',' )
validation = np.loadtxt( validation_file, delimiter = ',' )
train = np.vstack(( train, validation ))

x_train = train[:,0:-1]
y_train = train[:,-1]
y_train = y_train.reshape( -1, 1 )

input_size = x_train.shape[1]
target_size = y_train.shape[1]

# prepare dataset

ds = SDS( input_size, target_size )
ds.setField( 'input', x_train )
ds.setField( 'target', y_train )

# init and train

net = buildNetwork( input_size, hidden_size, target_size, bias= True )
trainer = BackpropTrainer( net,ds )

train_mse, validation_mse = trainer.trainUntilConvergence( verbose = True, validationProportion = validation_proportion, 
	maxEpochs = epochs, continueEpochs = continue_epochs )

pickle.dump( net, open( output_model_file, 'wb' ))



コード例 #31
0
def train_ann(data_dicts, layers, hidden_size, epochs):

	print "-------------------------------------------------"
 	print "loading data..."
 	# regresa un ndarray de numpy
 	train = dicts_to_np_array(data_dicts)

 	print "data loaded to a ", type(train),   " of size: ", train.shape, " and type:", train.dtype
 	print "Spliting inputs and output for training..."

 	inputs_train = train[:,2:]
 	outputs_train = train[:,:2]
 	outputs_train = outputs_train.reshape( -1, 2 )


 	print "inputs in a ", type(inputs_train),   " of size: ", inputs_train.shape, " and type:", inputs_train.dtype
 	print "output in a ", type(outputs_train),   " of size: ", outputs_train.shape, " and type:", outputs_train.dtype
 	print "-------------------------------------------------"

 	print "primeros vectores de inputs: ", inputs_train[0:2,:]

 	print "primeros vectores de outputs: ", outputs_train[0:2,:]


 	print "Setting up supervised dataset por pyBrain training..."
 	input_size = inputs_train.shape[1]
 	target_size = outputs_train.shape[1]
 	dataset = SDS( input_size, target_size )
 	dataset.setField( 'input', inputs_train )
 	dataset.setField( 'target', outputs_train )
 	print "-------------------------------------------------"

	
 	print "Setting up network for supervised learning in pyBrain..."

 	

 	appraisal_network = FeedForwardNetwork()
 	inLayer = LinearLayer(input_size)
 	hiddenLayer1 = SigmoidLayer(hidden_size)
 	outLayer = LinearLayer(target_size)
 	appraisal_network.addInputModule(inLayer)
 	appraisal_network.addModule(hiddenLayer1)
 	appraisal_network.addOutputModule(outLayer)
 	in_to_hidden1 = FullConnection(inLayer, hiddenLayer1)
 	hidden1_to_out = FullConnection(hiddenLayer1, outLayer)
 	appraisal_network.addConnection(in_to_hidden1)
 	appraisal_network.addConnection(hidden1_to_out)
 	appraisal_network.sortModules()


 	trainer = BackpropTrainer( appraisal_network,dataset )

 	print "-------------------------------------------------"


 	rmse_vector = []
 	print "training for {} epochs...".format( epochs )
 	for i in range( epochs ):
 		mse = trainer.train()
 		rmse = sqrt( mse )
 		if i%10 == 0:
 			print "training RMSE, epoch {}: {}".format( i + 1, rmse )
 		rmse_vector.append(rmse)

 	print "-------------------------------------------------"


# 	pickle.dump( crime_ann, open( output_model_file, 'wb' ))

 	print "Training done!"
 	print "-------------------------------------------------"

# 	return rmse_vector
	
	return appraisal_network
コード例 #32
0
    # Carrega os arquivos com dataser
    train = np.loadtxt(train_file, delimiter=' ')
    if not add_i:
        train = train[:, 1:]
    validation = np.loadtxt(validation_file, delimiter=' ')
    validation = validation[:, 1:]

    x_train = train
    y_train = validation

    input_size = x_train.shape[1]
    target_size = validation.shape[1]

    ds = SDS(input_size, target_size)
    ds.setField('input', train)
    ds.setField('target', validation)

    # executa pra cada conjunto de combinacoes de parametros
    for hidden_layer, epoch, learning_rate in product(hidden_size, epochs,
                                                      learning_rates):
        output_model_file = 'model_{}-{}_learning-rate-{}_hidden-{}_epochs-{}.pkl'.format(
            train_file, learning_rate, hidden_layer, epoch,
            "with_i" if add_i else "without-i")
        output_data = 'model_result_{}-{}_learning_rate-{}_hidden-{}_epochs.txt'.format(
            train_file, learning_rate, hidden_layer, epoch,
            "with_i" if add_i else "without-i")

        net = buildNetwork(input_size, hidden_layer, target_size, bias=True)

        trainer = BackpropTrainer(net, ds, learningrate=learning_rate)
コード例 #33
0
loaded_data.drop(['DATE', 'ASS_ID', 'YEAR_DAY_AND_YEAR', 'DAY_DS', 'MONTH'],
                 axis=1)
print(preprocessing.data.columns)
train = np.asarray(loaded_data)
x_train = train[:, 0:-1]
y_train = train[:, -1]
y_train = y_train.reshape(-1, 1)
input_size = x_train.shape[1]
target_size = y_train.shape[1]
hidden_size = 100
epochs = 600

ds = SDS(input_size, target_size)

ds = SDS(input_size, target_size)
ds.setField('input', x_train)
ds.setField('target', y_train)

net = buildNetwork(input_size, hidden_size, target_size, bias=True)
trainer = BackpropTrainer(net, ds)

print "training for {} epochs...".format(epochs)

for i in range(epochs):
    mse = trainer.train()
    rmse = sqrt(mse)
    print "training RMSE, epoch {}: {}".format(i + 1, rmse)

submission = sp.submission_preprocessing()
submission.full_preprocess()
data_to_predict = np.asarray(submission.data)
コード例 #34
0
        train = np.loadtxt(train_file, delimiter = ' ')
        if not add_i:
                train = train[:, 1:]
        validation = np.loadtxt(validation_file, delimiter = ' ' )
        validation = validation[:, 1:]


        x_train = train
        y_train = validation


        input_size = x_train.shape[1]
        target_size = validation.shape[1]

        ds = SDS(input_size, target_size)
        ds.setField('input',train)
        ds.setField('target', validation)


        # executa pra cada conjunto de combinacoes de parametros
        for hidden_layer, epoch, learning_rate in product(hidden_size, epochs, learning_rates):
                output_model_file = 'model_{}-{}_learning-rate-{}_hidden-{}_epochs-{}.pkl'.format(train_file, learning_rate, hidden_layer, epoch,  "with_i" if add_i  else "without-i")
                output_data = 'model_result_{}-{}_learning_rate-{}_hidden-{}_epochs.txt'.format(train_file, learning_rate, hidden_layer, epoch, "with_i" if add_i else "without-i")

                net = buildNetwork(input_size, hidden_layer, target_size, bias = True)

                trainer = BackpropTrainer(net, ds, learningrate = learning_rate)

                print "Training for {} epochs with learning_rate={}, hidden_layer={} ...".format(epoch, learning_rate, hidden_layer)
                mse = 0
コード例 #35
0
X = pd.read_csv('Train/Train_Combine.csv',
                usecols=['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM'])
Y = pd.read_csv('Train/Train_Combine.csv', usecols=['PM 2.5'])

X = X.values
Y = Y.values

hidden_size = 100
epochs = 600

input_size = X.shape[1]
target_size = Y.shape[1]

ds = SDS(input_size, target_size)
ds.setField('input', X)
ds.setField('target', Y)

net = buildNetwork(input_size,
                   hidden_size,
                   target_size,
                   bias=True,
                   hiddenclass=TanhLayer)
trainer = BackpropTrainer(net, ds)

print "training for {} epochs...".format(epochs)

for i in range(epochs):
    mse = trainer.train()
    rmse = sqrt(mse)
    print "training RMSE, epoch {}: {}".format(i + 1, rmse)
コード例 #36
0
    if m_min == []:
        m_min = m.min(0)
    if m_max == []:
        m_max = m.max(0)
    m = 2 * (m - m_min) / (m_max - m_min) - 1
    return m, m_min, m_max


'''training dataset with random phase screens (2018.01.17)'''
mx = np.abs(trnslos_all).max()
ntrnslos_all = trnslos_all / mx
n_frame = 20
norm_inp = ntrnslos_all[:, :n_frame * 72]
trnds = SupervisedDataSet(72 * n_frame, 1)
#norm_inp, trnslos_min, trnslos_max = normalise(trnslos)
trnds.setField('input', norm_inp)
trn_tar = np.empty([6000, 1])
trn_tar[:, 0] = np.arange(5, 11).repeat(1000) / 15
trnds.setField('target', trn_tar)
'''learning process'''
net = buildNetwork(72 * n_frame, 1000, 1, hiddenclass=TanhLayer)
lr = 0.001
momentum = 0
lrdecay = 1
wdecay = 0
t = BackpropTrainer(net,
                    trnds,
                    learningrate=lr,
                    lrdecay=lrdecay,
                    momentum=momentum,
                    verbose=True,
コード例 #37
0
ファイル: NNRegPred.py プロジェクト: alyakhtar/AQI-Delhi
model_file = 'model.pkl'
output_predictions_file = 'predictions.txt'

X2 = pd.read_csv('Test/Test_Combine.csv', usecols=[
                 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM'])
Y2 = pd.read_csv('Test/Test_Combine.csv', usecols=['PM 2.5'])

X2 = X2.values
Y2 = Y2.values
net = pickle.load(open(model_file, 'rb'))

y_test_dummy = np.zeros(Y2.shape)

input_size = X2.shape[1]
target_size = X2.shape[1]

ds = SDS(input_size, target_size)
ds.setField('input', X2)
ds.setField('target', y_test_dummy)

p = net.activateOnDataset(ds)

mse = MSE(Y2, p)
rmse = sqrt(mse)

print "testing RMSE:", rmse
print "testing MSE: ", mse

main(Y2, p)
np.savetxt(output_predictions_file, p, fmt='%.6f')
コード例 #38
0
true_positive = 0
false_positive = 0
true_negative = 0
false_negative = 0

# load model
net = pickle.load( open(var.output_model_file, 'rb' ))
#load data
test = np.loadtxt( var.test_file, delimiter = ',' )
input_data = test[:,0:-1]
target_data = test[:,-1]
target_data = target_data.reshape( -1, 1 )
#print input_data,target_data
# prepare dataset
ds = SDS( var.no_of_clusters, var.output )
ds.setField( 'input', input_data )
ds.setField( 'target', target_data )
#activate network
predict_list = net.activateOnDataset(ds)	
for predict,ground_truth in zip(predict_list,target_data):
	if predict <= 0.0:
			if ground_truth <= 0 : true_negative += 1
			else: false_negative += 1
			print "Pedicted: NOT Car"
	else : 
		if ground_truth <= 0 : false_positive += 1
		else: true_positive += 1
		print "Predicted: Car"
#print true_positive,true_negative,false_positive,false_negative
precision =  true_positive / (true_positive + false_positive)
recall = true_positive / (true_positive + false_negative)
コード例 #39
0
def train_ann(data_dicts, input_fields, hidden_size, epochs):

	#print "-------------------------------------------------"
	#print "loading data..."
	
	# regresa un ndarray de numpy
	train = dicts_to_np_array(data_dicts, input_fields)

	#print "data loaded to a ", type(train),   " of size: ", train.shape, " and type:", train.dtype
	#print "Spliting inputs and output for training..."

	inputs_train = train[:,2:]
	outputs_train = train[:,:2]
	outputs_train = outputs_train.reshape( -1, 2 )

	#print "inputs in a ", type(inputs_train),   " of size: ", inputs_train.shape, " and type:", inputs_train.dtype
	#print "output in a ", type(outputs_train),   " of size: ", outputs_train.shape, " and type:", outputs_train.dtype

	# Setting up supervised dataset por pyBrain training...
	input_size = inputs_train.shape[1]
	target_size = outputs_train.shape[1]
	dataset = SDS( input_size, target_size )
	dataset.setField( 'input', inputs_train )
	dataset.setField( 'target', outputs_train )
	

	#Setting up network for supervised learning in pyBrain...
	appraisal_network = FeedForwardNetwork()
	inLayer = LinearLayer(input_size)
	hiddenLayer1 = SigmoidLayer(hidden_size)
	outLayer = LinearLayer(target_size)
	appraisal_network.addInputModule(inLayer)
	appraisal_network.addModule(hiddenLayer1)
	appraisal_network.addOutputModule(outLayer)
	in_to_hidden1 = FullConnection(inLayer, hiddenLayer1)
	hidden1_to_out = FullConnection(hiddenLayer1, outLayer)
 	appraisal_network.addConnection(in_to_hidden1)
	appraisal_network.addConnection(hidden1_to_out)
	appraisal_network.sortModules()


	trainer = BackpropTrainer( appraisal_network,dataset )

	start_time = time.time()
	rmse_vector = []
	rmse_min = sys.float_info.max
	# training for epochs...
	for i in range( epochs ):
		mse = trainer.train()
		rmse = sqrt( mse )

		# training RMSE 
		rmse_vector.append(rmse)

		if rmse < rmse_min:
			rmse_min = rmse
			#print "training RMSE, epoch {}: {}".format( i + 1, rmse )
		
	elapsed_time = time.time() - start_time

	report_fields_training = {"time_elapsed":elapsed_time, 
						"epochs":epochs,
						"rmse_min":rmse_min,
						"hidden_layers":1,
						"hidden_neurons":hidden_size,
						"input_neurons":input_size,
						"output_neurons":target_size}
	
	return report_fields_training, appraisal_network
コード例 #40
0
ファイル: NNRegTrain.py プロジェクト: alyakhtar/AQI-Delhi
X = pd.read_csv('Train/Train_Combine.csv', usecols=[
                'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM'])
Y = pd.read_csv('Train/Train_Combine.csv', usecols=['PM 2.5'])

X = X.values
Y = Y.values

hidden_size = 100
epochs = 600

input_size = X.shape[1]
target_size = Y.shape[1]

ds = SDS(input_size, target_size)
ds.setField('input', X)
ds.setField('target', Y)

net = buildNetwork(
    input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer)
trainer = BackpropTrainer(net, ds)

print "training for {} epochs...".format(epochs)

for i in range(epochs):
    mse = trainer.train()
    rmse = sqrt(mse)
    print "training RMSE, epoch {}: {}".format(i + 1, rmse)

pickle.dump(net, open(output_model_file, 'wb'))
コード例 #41
0
loaded_data=preprocessing.data[:10]
loaded_data.drop(['DATE','ASS_ID','YEAR_DAY_AND_YEAR','DAY_DS','MONTH'], axis=1)
print(preprocessing.data.columns)
train = np.asarray(loaded_data)
x_train = train[:,0:-1]
y_train = train[:,-1]
y_train = y_train.reshape( -1, 1 )
input_size = x_train.shape[1]
target_size = y_train.shape[1]
hidden_size = 100
epochs = 600

ds = SDS(input_size,target_size)

ds = SDS( input_size, target_size )
ds.setField( 'input', x_train )
ds.setField( 'target', y_train )

net = buildNetwork( input_size, hidden_size, target_size, bias = True )
trainer = BackpropTrainer( net,ds )

print "training for {} epochs...".format( epochs )

for i in range( epochs ):
	mse = trainer.train()
	rmse = sqrt( mse )
	print "training RMSE, epoch {}: {}".format( i + 1, rmse )

submission = sp.submission_preprocessing()
submission.full_preprocess()
data_to_predict = np.asarray(submission.data)
コード例 #42
0
ファイル: brain.py プロジェクト: mdelhey/kaggle-galaxy
print file_name + ': reading data'
(Xtrn, Xtst, Ytrn, f_out) = read_X_Y(f_in_trn, f_in_tst, sol_dir, my_dim)

# PARAMETERS
hidden_size = 100
epochs = 600
continue_epochs = 10
val_prop = 0.2

# Prepare dataset
print file_name + ': preparing ds'
Ytrn = Ytrn[:,1:]  # Remove ID col
input_size = Xtrn.shape[1]  # ncols
target_size = Ytrn.shape[1]  # ncols
ds = SupervisedDataSet(input_size, target_size)
ds.setField('input', Xtrn)
ds.setField('target', Ytrn)

# Train a network
print file_name + ': training network'
net = buildNetwork(input_size, hidden_size, target_size, bias = True)
trainer = BackpropTrainer(net, ds)
    
trainer.trainUntilConvergence(verbose = True, validationProportion = val_prop,
                              maxEpochs = epochs, continueEpochs = continue_epochs)

# Save model
print file_name + ': saving model'
pickle.dump(net, open(f_out_model, 'wb'))

# Predict on test data, save to file
コード例 #43
0
train = np.loadtxt(train_file, delimiter=',')
#validation = np.loadtxt( validation_file, delimiter = ',' )
#train = np.vstack(( train, validation ))

x_train = train[:, 0:-1]
y_train = train[:, -1]

y_train = y_train.reshape(-1, 1)

input_size = x_train.shape[1]
target_size = y_train.shape[1]

# prepare dataset

ds = SDS(input_size, target_size)
ds.setField('input', x_train)
ds.setField('target', y_train)

# init and train

net = buildNetwork(input_size, hidden_size, target_size, bias=True)
trainer = BackpropTrainer(net, ds)

print "training for {} epochs...".format(epochs)

for i in range(epochs):
    mse = trainer.train()
    rmse = sqrt(mse)
    print "training RMSE, epoch {}: {}".format(i + 1, rmse)

pickle.dump(net, open(output_model_file, 'wb'))
コード例 #44
0
def FitNeuralNetworkDept(dept):


	train_file = input_file_path + train_file_name[0] + str(dept) + train_file_name[1]
	test_file = input_file_path + test_file_name[0] + str(dept) + test_file_name[1]

	train = np.loadtxt( train_file, delimiter = ' ' )
	test = np.loadtxt( test_file, delimiter = ' ' )

	x_train = train[:, 0 : -1]
	y_train = train[:, -1]

	y_max = max(y_train)
	y_min = min(y_train)
	y_train = (y_train - y_min) / (y_max-y_min)
	y_train = y_train.reshape(-1,1)

	input_size = x_train.shape[1]
	target_size = y_train.shape[1]

	x_test = test[:, 0 : -1]
	y_test = test[:, -1]
	y_test = y_test.reshape(-1,1)

	
	ds_test = SDS( input_size, target_size )
	ds_test.setField( 'input', x_test )
	ds_test.setField( 'target', y_test )

	ds = SDS( input_size, target_size )
	ds.setField( 'input', x_train )
	ds.setField( 'target', y_train )


	hidden_size = input_size*hidden_size_ratio

	'''
	Set the parameter online = True to do online learning!
	'''
	n = getModel(dept = dept, hidden_size = hidden_size, input_size = input_size,
		target_size = target_size, online = OnlineLearningMode)


	#print n


	trainer = BackpropTrainer(n,ds ,weightdecay=weightdecay, learningrate=learningrate, lrdecay=1.0, momentum = momentum)

	
	train_mse, validation_mse = trainer.trainUntilConvergence(verbose=False, maxEpochs = epochs, validationProportion = cv_ratio, continueEpochs = 5)

	file_name = output_file_path + 'nn_dept' + str(dept) + '_epoch' + str(epochs)
	model_file = open(file_name + '_model', 'w')
	pickle.dump(n, model_file)
	model_file.close()

	print 'dept' + str(dept) + ' complete..!'

	model_info = open(file_name + '_info.txt', 'w')
	model_info.write('model for dept' + str(dept) +'\n\n')

	model_info.write(str(n) +'\n\n')
	
	model_info.write("input size: " + str(input_size) +'\n')
	model_info.write("hidden size: " + str(hidden_size) +'\n')
	model_info.write("hidden layer number: " + str(num_hidden_layer+1) +'\n')
	model_info.write("target size: " + str(target_size) +'\n\n')

	model_info.write("learningrate: " + str(learningrate) +'\n')
	model_info.write("momentum: " + str(momentum) +'\n')
	model_info.write("weightdecay: " + str(weightdecay) +'\n\n')

	model_info.write("epochs: " + str(epochs) +'\n')
	model_info.write("cv_ratio: " + str(cv_ratio) +'\n\n')

	model_info.write("y_min: " + str(y_min) +'\n')
	model_info.write("y_max: " + str(y_max) +'\n\n')

	model_info.write("train_mse: " + str(train_mse) +'\n\n')
	model_info.write("validation_mse: " + str(validation_mse))
	model_info.close()
	

	n = None #To check they dept the model well..

	fileObject = open(file_name + '_model', 'r')
	n = pickle.load(fileObject)
	fileObject.close()
	
	
	
	p_train = n.activateOnDataset( ds )
	p_test = n.activateOnDataset( ds_test )
	plot_result = np.vstack((p_train*(y_max-y_min) + y_min, p_test*(y_max-y_min) + y_min ))
	p_total_print = plot_result.reshape(-1,len(plot_result))

	p_test_print = p_test.reshape(-1,len(p_test))
	p_test_print = p_test_print*(y_max-y_min) + y_min

	w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_test_result.csv', 'wb')
	for row in p_test_print:
		for element in row:
			w_file.write(str(element)+'\n')
		break
	w_file.close()

	w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_train_test_result.csv', 'wb')
	for row in p_total_print:
		
		for element in row:
			w_file.write(str(element)+'\n')
		break
	w_file.close()

	PlotResult(y_train = y_train, plot_result = plot_result, y_max = y_max, y_min = y_min, dept = dept)

	return n
コード例 #45
0
def train_4_hidden():

	print "-------------------------------------------------"
	print "loading data..."
	print "file to be loaded: ", train_file

	# regresa un ndarray de numpy
	train = np.loadtxt( train_file, delimiter = ',' )

	print "data loaded to a ", type(train),   " of size: ", train.shape, " and type:", train.dtype
	print "Spliting inputs and output for training..."

	inputs_train = train[:,0:-1]
	output_train = train[:,-1]
	output_train = output_train.reshape( -1, 1 )


	print "inputs in a ", type(inputs_train),   " of size: ", inputs_train.shape, " and type:", inputs_train.dtype
	print "output in a ", type(output_train),   " of size: ", output_train.shape, " and type:", output_train.dtype
	print "-------------------------------------------------"



	print "Setting up supervised dataset por pyBrain training..."
	input_size = inputs_train.shape[1]
	target_size = output_train.shape[1]
	dataset = SDS( input_size, target_size )
	dataset.setField( 'input', inputs_train )
	dataset.setField( 'target', output_train )
	print "-------------------------------------------------"



	print "Setting up network for supervised learning in pyBrain..."
	
	#crime_network = buildNetwork( input_size, hidden_size, target_size, bias = True, hiddenclass = SigmoidLayer, outclass = LinearLayer )
	
	


	crime_ann = FeedForwardNetwork()

	inLayer = LinearLayer(input_size)
	hiddenLayer1 = TanhLayer(hidden_size)
	hiddenLayer2 = TanhLayer(hidden_size)
	hiddenLayer3 = TanhLayer(hidden_size)
	hiddenLayer4 = TanhLayer(hidden_size)
	outLayer = LinearLayer(target_size)
	crime_ann.addInputModule(inLayer)
	crime_ann.addModule(hiddenLayer1)
	crime_ann.addModule(hiddenLayer2)
	crime_ann.addModule(hiddenLayer3)
	crime_ann.addModule(hiddenLayer4)
	crime_ann.addOutputModule(outLayer)
	in_to_hidden1 = FullConnection(inLayer, hiddenLayer1)
	hidden1_to_hidden2 = FullConnection(hiddenLayer1, hiddenLayer2)
	hidden2_to_hidden3 = FullConnection(hiddenLayer2, hiddenLayer3)
	hidden3_to_hidden4 = FullConnection(hiddenLayer3, hiddenLayer4)
	hidden4_to_out = FullConnection(hiddenLayer4, outLayer)
	crime_ann.addConnection(in_to_hidden1)
	crime_ann.addConnection(hidden1_to_hidden2)
	crime_ann.addConnection(hidden2_to_hidden3)
	crime_ann.addConnection(hidden3_to_hidden4)
	crime_ann.addConnection(hidden4_to_out)
	crime_ann.sortModules()


	trainer = BackpropTrainer( crime_ann,dataset )

	print "-------------------------------------------------"


	rmse_vector = []
	print "training for {} epochs...".format( epochs )
	for i in range( epochs ):
		mse = trainer.train()
		rmse = sqrt( mse )
		print "training RMSE, epoch {}: {}".format( i + 1, rmse )
		rmse_vector.append(rmse)

	print "-------------------------------------------------"
	
	pickle.dump( crime_ann, open( output_model_file, 'wb' ))

	print "Training done!"
	print "-------------------------------------------------"

	return rmse_vector
コード例 #46
0
test = np.loadtxt(test_file, delimiter=',')
x_test = test[:, 0:-1]
y_test = test[:, -1]
y_test = y_test.reshape(-1, 1)

# you'll need labels. In case you don't have them...
y_test_dummy = np.zeros(y_test.shape)

input_size = x_test.shape[1]
target_size = y_test.shape[1]

assert (net.indim == input_size)
assert (net.outdim == target_size)

# prepare dataset

ds = SDS(input_size, target_size)
ds.setField('input', x_test)
ds.setField('target', y_test_dummy)

# predict

p = net.activateOnDataset(ds)

mse = MSE(y_test, p)
rmse = sqrt(mse)

print "testing RMSE:", rmse

np.savetxt(output_predictions_file, p, fmt='%.6f')
コード例 #47
0
ファイル: benchmarks.py プロジェクト: airmuller/housing
def benchmark(clf=None, n_hidden=10, n_epochs=10):
  for col in ['AP']:#, 'COP', 'AP', 'LS', 'MA']:#, 'PPR', '2X', '3X', '4X', '5X', 'AU', 'UNI', 'MEM']:
    print "*" * 80
    print type(clf)
    print col

    X = merged[numerical_columns + ['LivingArea']]
    #X = merged.drop(['MlsNumber', 'Lat', 'Lng', 'BuyPrice'], axis=1, inplace=False)
    X_cat = merged[categorical_columns]
    Y = merged[['BuyPrice']]

    mask = merged[col]==1
    X, X_cat, Y = X[mask], X_cat[mask], Y[mask]
    print 'X.shape: ', X.shape
    print 'Y.shape: ', Y.shape

    # filter rows with NaN
    mask = ~np.isnan(X).any(axis=1)
    X, X_cat, Y = X[mask], X_cat[mask], Y[mask]

    mask = ~np.isnan(Y).any(axis=1)
    X, X_cat, Y = X[mask], X_cat[mask], Y[mask]
    print 'After NaN filter: ', X.shape

    X, X_cat, Y = np.array(X), np.array(X_cat), np.array(Y)
    if USE_LOG:
      Y = np.log(Y)
    Y = Y.reshape(Y.shape[0])

    print "mean: ", np.mean(Y)
    print "median: ", np.median(Y)
    print "std: ", Y.std()

    # remove outliers
    mask = Y > 10**5
    X, X_cat, Y = X[mask], X_cat[mask], Y[mask]
    mask = Y < 10**6
    X, X_cat, Y = X[mask], X_cat[mask], Y[mask]

    # one-hot encode categorical features
    X_cat_enc = []
    for i, cat in enumerate(categorical_columns):
      col = X_cat[:,i]
      col = LabelEncoder().fit_transform(col).reshape((-1,1))
      col_enc = OneHotEncoder(sparse=False).fit_transform(col)
      X_cat_enc.append(col_enc)
    X_cat = np.concatenate(X_cat_enc, axis=1)
    print 'X_cat.shape: ', X_cat.shape

    skf = KFold(n=X.shape[0], n_folds=10, shuffle=True, random_state=42)
    L = { 'rmse': [], 'corr': [], 'r2': [], 'diff': [], 'mae': [], 'explained_var': [], 'var': []}
    for train_indices, test_indices in skf:
      X_train, X_train_cat, Y_train = X[train_indices], X_cat[train_indices], Y[train_indices]
      X_test, X_test_cat, Y_test = X[test_indices], X_cat[test_indices], Y[test_indices]

      scaler = StandardScaler()
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

      X_train = np.concatenate([X_train, X_train_cat], axis=1)
      X_test = np.concatenate([X_test, X_test_cat], axis=1)

      if USE_NEURALNET:
        print 'n_hidden: %d' % n_hidden
        Y_train, Y_test = Y_train.reshape(-1, 1), Y_test.reshape(-1, 1)

        train_ds = SupervisedDataSet(X_train.shape[1], Y_train.shape[1])
        train_ds.setField('input', X_train)
        train_ds.setField('target', Y_train)
        net = buildNetwork(X_train.shape[1], n_hidden, Y_train.shape[1], bias=True)
        trainer = BackpropTrainer(net, train_ds)

        for i in xrange(n_epochs):
          mse = trainer.train()
          rmse = math.sqrt(mse)
          print "epoch: %d, rmse: %f" % (i, rmse)

        test_ds = SupervisedDataSet(X_test.shape[1], Y_test.shape[1])
        test_ds.setField('input', X_test)
        test_ds.setField('target', Y_test)
        preds = net.activateOnDataset(test_ds)
      else:
        clf.fit(X_train, Y_train)
        preds = clf.predict(X_test).astype(float)

      if USE_LOG:
        Y_test_10 = np.exp(Y_test)
        preds_10 = np.exp(preds)
      else:
        Y_test_10 = Y_test
        preds_10 = preds

      rmse = math.sqrt(metrics.mean_squared_error(Y_test_10, preds_10))
      corr = pearsonr(preds_10, Y_test_10)
      diff = np.array([abs(p-a)/a for (p,a) in zip(Y_test_10, preds_10)])
      mae = metrics.mean_absolute_error(Y_test_10, preds_10)
      explained_var = metrics.explained_variance_score(Y_test_10, preds_10)
      r2 = metrics.r2_score(Y_test_10, preds_10)
      var = np.var(diff)

      L['rmse'].append(rmse)
      L['corr'].append(corr[0])
      L['diff'].append(diff.mean())
      L['mae'].append(mae)
      L['explained_var'].append(explained_var)
      L['r2'].append(r2)
      L['var'].append(var)

      if GENERATE_PLOTS:
        plt.plot(Y_test_10, preds_10, 'ro')
        plt.show()
        break
      if USE_NEURALNET:
        break
    for key in L.keys():
      print "Mean %s: %f" % (key, np.array(L[key]).mean())
    return L