Ejemplo n.º 1
0
 def check(self, a_in, a_out, a_mask, act='Softmax'):
     nn = MLPC(layers=[L(act)],
               learning_rule='adam',
               learning_rate=0.05,
               n_iter=250,
               n_stable=25)
     nn.fit(a_in, a_out, a_mask)
     return nn.predict_proba(a_in)
Ejemplo n.º 2
0
class TestClassifierFunctionality(unittest.TestCase):

    def setUp(self):
        self.nn = MLPC(layers=[L("Linear")], n_iter=1)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_ExplicitValidSet(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.valid_set = (a_in, a_out)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8,4)), numpy.random.randint(0, 5, (8,))
        self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3])
        self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3])

    def test_PredictUninitializedNoUnitCount(self):
        a_in = numpy.zeros((8,16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictUninitializedNoLabels(self):
        self.nn.layers[-1].units = 4
        a_in = numpy.zeros((8,16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

    def test_PredictMultiClass(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 3, (8,2))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_test))

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
Ejemplo n.º 3
0
class TestClassifierFunctionality(unittest.TestCase):
    def setUp(self):
        self.nn = MLPC(layers=[L("Linear")], n_iter=1)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_ExplicitValidSet(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.valid_set = (a_in, a_out)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8, 4)), numpy.random.randint(0, 5, (8, ))
        self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3])
        self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3])

    def test_PredictUninitializedNoUnitCount(self):
        a_in = numpy.zeros((8, 16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictUninitializedNoLabels(self):
        self.nn.layers[-1].units = 4
        a_in = numpy.zeros((8, 16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

    def test_PredictMultiClass(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 3, (8, 2))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_test))

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
Ejemplo n.º 4
0
def load_train_data(path, modelNo=1):
    X = []
    with open(path + '/train_newFeat_sparse_mat.dat', 'rb') as infile:
        X = pickle.load(infile)
    random.seed(modelNo)
    np.random.seed(modelNo)
    r = random.sample(xrange(0, X.shape[1]), int(round(0.8 * X.shape[1])))
    X = X[:, r]
    y = pd.read_csv(path + '/labels.csv', index_col=False, header=None)
    y = np.array(y).astype('int')
    X_train, X_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=0.2,
                                                      random_state=modelNo,
                                                      stratify=y)
    nn = Classifier(
        layers=[
            Layer("Rectifier", units=200, dropout=0.5),
            Layer("Rectifier", units=200, dropout=0.5),
            Layer("Rectifier", units=200, dropout=0.5),
            Layer("Sigmoid")
        ],
        learning_rate=0.02,
        n_iter=40,
        #    valid_set=(X,y),
        n_stable=15,
        debug=True,
        verbose=True)
    print "Model No is", modelNo
    if (modelNo == 1):
        print "Model No is", modelNo
        nn.valid_set = (X_val, y_val)
    #rbm1 = SVC(C=100.0, gamma = 0.1, probability=True, verbose=1).fit(X[0:9999,:], y[0:9999])
    #rbm2 = RandomForestClassifier(n_estimators=300, criterion='entropy', max_features='auto', bootstrap=False, oob_score=False, n_jobs=1, verbose=1).fit(X[0:9999,:], y[0:9999])
    #rbm3 = GradientBoostingClassifier(n_estimators=50,max_depth=11,subsample=0.8,min_samples_leaf=5,verbose=1).fit(X[0:9999,:], y[0:9999])
    nn.fit(X_train, y_train)
    Y = []
    with open(path + '/test_newFeat_sparse_mat.dat', 'rb') as infile:
        Y = pickle.load(infile)
    Y = Y[:, r]
    preds2 = np.zeros((Y.shape[0], 38))
    for i in xrange(0, 10):
        s = i * 10000
        e = min(preds2.shape[0], s + 10000)
        preds2[s:e, :] = nn.predict_proba(Y[s:e, :])
    p2 = pd.DataFrame(preds2)
    p2.to_csv("p2_" + str(modelNo) + ".csv", index=None, header=None)
    return p2
Ejemplo n.º 5
0
class TestClassifierFunctionality(unittest.TestCase):
    def setUp(self):
        self.nn = MLPC(layers=[L("Linear")], n_iter=1)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ),
                                                        dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8, 4)), numpy.zeros((8, ),
                                                       dtype=numpy.int32)
        self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3])
        self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3])

    def test_PredictUninitialized(self):
        a_in = numpy.zeros((8, 16))
        assert_raises(ValueError, self.nn.predict, a_in)

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ),
                                                        dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ),
                                                        dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_test))

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ),
                                                        dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
class TestClassifierFunctionality(unittest.TestCase):

    def setUp(self):
        self.nn = MLPC(layers=[L("Linear")], n_iter=1)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8,4)), numpy.zeros((8,), dtype=numpy.int32)
        self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3])
        self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3])

    def test_PredictUninitialized(self):
        a_in = numpy.zeros((8,16))
        assert_raises(ValueError, self.nn.predict, a_in)

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_test))

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32)
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
# Saving the NN
pickle.dump(nn, open("Convoluted.pk1", "wb"))

# ----------------------------------------------------------------------------------------------------------- #
# Estimating the generalisation error with CV: all classes indivudually and multiclass log-loss

print("CV for class-wise generalisation errors")
num_folds = 2
kf = KFold(y, n_folds=num_folds)
y_pred = y * 0
l_loss = np.zeros((num_folds,1), dtype= float)
p = 0

for train, test in kf:
    X_train, X_test, y_train, y_test = X[train,:], X[test,:], y[train], y[test]
    nn_cv = Classifier(layers=lay, learning_rate=0.001, n_iter=2)
    nn_cv.fit(X=X_train, y=y_train)
    y_pred[test] = nn_cv.predict(X_test)
    y_pred2 = nn_cv.predict_proba(X_test)
    l_loss[p, 0] = log_loss(y_test, y_pred2)
    p += 1
print(classification_report(y, y_pred, target_names=namesClasses))
log_loss_CV = np.average(l_loss, axis=0)

# Calculating the multiclass log-loss
print("Multiclass Log-loss by CV: ", log_loss_CV)

print("Finished program")
print("--- %s seconds ---" % (round(time.time() - start_time, 4))) # Calculates machine time for the program
Ejemplo n.º 8
0
                    learning_rate=0.01,
                    batch_size=100,
                    n_iter=2000,
                    valid_size=0.25,
                    n_stable=200)

    # Training
    nn.fit(X_train, Y)
    pickle.dump(nn, open('nn_susy_classification.pkl', 'wb'))
if not runTraining:
    nn = pickle.load(open('nn_susy_classification.pkl', 'rb'))

# Testing
pred_train = nn.predict(X_train)
pred_test = nn.predict(X_test)
probabilities_train = nn.predict_proba(X_train)
probabilities_test = nn.predict_proba(X_test)

print "Training sample...."
print "  Signal identified as signal (%)        : ", 100.0 * np.sum(
    pred_train[nBackgroundEvents:nBackgroundEvents +
               nSignalEvents] == 1.0) / nSignalEvents
print "  Signal identified as background (%)    : ", 100.0 * np.sum(
    pred_train[nBackgroundEvents:nBackgroundEvents +
               nSignalEvents] == 0.0) / nSignalEvents
print "  Background identified as signal (%)    : ", 100.0 * np.sum(
    pred_train[0:nBackgroundEvents] == 1.0) / nBackgroundEvents
print "  Background identified as background (%): ", 100.0 * np.sum(
    pred_train[0:nBackgroundEvents] == 0.0) / nBackgroundEvents
print ""
print "Testing sample...."
Ejemplo n.º 9
0
def two_layers_nnet(X_train,
                    Y_train,
                    X_test,
                    Y_test,
                    method1="Tanh",
                    neurons1=5,
                    method2="",
                    neurons2=0,
                    decay=0.0001,
                    learning_rate=0.001,
                    n_iter=25,
                    random_state=1):
    """
    Parameters
    ----------
    X_train       : pandas data frame
        data frame of features for the training set
    Y_train       : pandas data frame
        data frame of labels for the training set
    X_test        : pandas data frame
        data frame of features for the test set
    Y_test        : pandas data frame
        data frame of labels for the test set
    method1       : str
        method used for the first layer
    neurons1      : int
        number of neurons of the first layer
    method2       : None
        method used for the first layer
    neurons2      : int
        number of neurons of the first layer
    decay         : float
        weight decay
    learning_rate : float
        learning rate
    n_iter        : int
        number of iterations
    random_state  : int
        seed for weight initialization
        
    Result:
    -------
    numpy array
        logloss    : averaged logarithmic loss
        miss_err   : missclassification error rate
        prec       : precision
        recall     : recall
        f1         : f1 score
        parameters : previous parameters in the order previously specified
    """

    labels = np.unique(Y_train)
    
    ## # Scale Data
    scaler = MinMaxScaler()
    X_test = scaler.fit_transform(X_test)
    X_train = scaler.fit_transform(X_train)
    
    # Layers
    if neurons2 == 0 :
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer("Softmax")]
    else:
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer(method2, weight_decay = decay, units = neurons2),
                Layer("Softmax")]
        
    ## # Run nnet
    # Define classifier
    nn = Classifier(layers,
                    learning_rate=learning_rate,
                    random_state=random_state,
                    n_iter=n_iter)
    # Fit
    nn.fit(X_train, Y_train)
    # Predict
    Y_hat = nn.predict(X_test)
    Y_probs = nn.predict_proba(X_test)
    
    ## # Misclassification error rate
    miss_err = 1-accuracy_score(Y_test, Y_hat)
    ## # Log Loss
    logloss = log_loss(Y_test, Y_probs)
    
    ## # Precision
    prec = precision_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro')
    ## # Recal
    recall = recall_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro') 
    ## # F1
    f1 = f1_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro')
    
    # Summarized results
    result = np.array([logloss,
                       miss_err,
                       prec,
                       recall,
                       f1,
                       method1,
                       neurons1,
                       method2,
                       neurons2,
                       decay,
                       learning_rate,
                       n_iter,
                       random_state])
    return result
Ejemplo n.º 10
0
def two_layers_nnet_predict(X_train,
                            Y_train,
                            X_test,
                            method1="Tanh",
                            neurons1=5,
                            method2="",
                            neurons2=0,
                            decay=0.0001,
                            learning_rate=0.001,
                            n_iter=25,
                            random_state=1):
    """
    Parameters
    ----------
    X_train       : pandas data frame
        data frame of features for the training set
    Y_train       : pandas data frame
        data frame of labels for the training set
    X_test        : pandas data frame
        data frame of features for the test set
    method1       : str
        method used for the first layer
    neurons1      : int
        number of neurons of the first layer
    method2       : None
        method used for the first layer
    neurons2      : int
        number of neurons of the first layer
    decay         : float
        weight decay
    learning_rate : float
        learning rate
    n_iter        : int
        number of iterations
    random_state  : int
        seed for weight initialization
        
    Result:
    -------
    tuple of numpy arrays
        (predicted classes, predicted probabilities)
    """

    labels = np.unique(Y_train)
    
    ## # Scale Data
    scaler = MinMaxScaler()
    X_test = scaler.fit_transform(X_test)
    X_train = scaler.fit_transform(X_train)

    ## # Split data set into train/test
    
    # Layers
    if neurons2 == 0 :
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer("Softmax")]
    else:
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer(method2, weight_decay = decay, units = neurons2),
                Layer("Softmax")]
        
    ## # Run nnet
    # Define classifier
    nn = Classifier(layers,
                    learning_rate=learning_rate,
                    random_state=random_state,
                    n_iter=n_iter)
    # Fit
    nn.fit(X_train, Y_train)
    # Predict
    Y_hat = nn.predict(X_test)
    Y_probs = nn.predict_proba(X_test)
    
    # Summarized results
    result = (Y_hat,Y_probs)
    return result
Ejemplo n.º 11
0
    learning_rate=0.00018,  #valid_set = ((X_valid, y_valid))
    n_iter=1000)
print "Neural network specifications:"
print nn

nn.fit(trainingSet, trainingSetLabels)

score1 = nn.score(trainingSet, trainingSetLabels)

score3 = nn.score(testingSet, testingSetLabels)

print "Training accuracy = ", score1

print "Testing accuracy = ", score3

probNN = nn.predict_proba(testingSet)
fprNN, tprNN, threshNN = metrics.roc_curve(testingSetLabels, probNN[:, 0]) #true positive rate, false positive rate (ROC curve)

print "Time = ", time.time() - startTime, "seconds"

startTime = time.time()

print
print

#------------------------SVM----------------------------
print "Support Vector Machine Classifier"

clf = svm.SVC(C = 100, gamma = 0.1, probability=True) 
clf_info = clf.fit(trainingSet, trainingSetLabels)
print clf_info
                        layers=[
                                Layer("Rectifier", units=49),
                                Layer("Softmax")],
                        learning_rate=0.01,
                        batch_size = 100,
                        n_iter=100)
    # Training
    nn.fit(X_train,Y)
    pickle.dump(nn, open('nn_susy_classification.pkl', 'wb'))
    if not runTraining:
        nn = pickle.load(open('nn_susy_classification.pkl', 'rb'))

    # Testing
    pred_train = nn.predict(X_train)
    pred_test = nn.predict(X_test)
    probabilities_train = nn.predict_proba(X_train)
    probabilities_test = nn.predict_proba(X_test)

    print "Training sample...."
    print "  Signal identified as signal (%)        : ",100.0*np.sum(pred_train[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==1.0)/nSignalEvents
    print "  Signal identified as background (%)    : ",100.0*np.sum(pred_train[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==0.0)/nSignalEvents
    print "  Background identified as signal (%)    : ",100.0*np.sum(pred_train[0:nBackgroundEvents]==1.0)/nBackgroundEvents
    print "  Background identified as background (%): ",100.0*np.sum(pred_train[0:nBackgroundEvents]==0.0)/nBackgroundEvents
    print ""
    print "Testing sample...."
    print "  Signal identified as signal (%)        : ",100.0*np.sum(pred_test[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==1.0)/nSignalEvents
    print "  Signal identified as background (%)    : ",100.0*np.sum(pred_test[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==0.0)/nSignalEvents
    print "  Background identified as signal (%)    : ",100.0*np.sum(pred_test[0:nBackgroundEvents]==1.0)/nBackgroundEvents
    print "  Background identified as background (%): ",100.0*np.sum(pred_test[0:nBackgroundEvents]==0.0)/nBackgroundEvents

    ## Plotting - performance curves
layer1 = Layer("Rectifier", units=45, weight_decay=0.001)
layer2 = Layer("Rectifier", units=30, weight_decay=0.001)
layer3 = Layer("Softmax")

cls = Classifier(layers=[layer1, layer2, layer3],
                 learning_rule="adam",
                 learning_rate=0.003,
                 f_stable=0.01,
                 debug=True,
                 batch_size=200,
                 n_iter=100)
cls.fit(X_train, y_train)

# get the probability of prediction for cross data
y_predict = cls.predict_proba(X_cross, collapse=True)

print(y_predict[:, 1])
p_cross = y_predict[:, 1]

loss_cross = -np.multiply(y_cross, np.log(p_cross)) - np.multiply(
    1 - y_cross, np.log(1 - p_cross))
print(loss_cross.sum() / loss_cross.size)

# get the probability of prediction for test data
y_predict_test = cls.predict_proba(X_test, collapse=True)

p_test = y_predict_test[:, 1]

loss_test = -np.multiply(y_test, np.log(p_test)) - np.multiply(
    1 - y_test, np.log(1 - p_test))
Ejemplo n.º 14
0
new_y_train = np.zeros((len(X_train), len(Classes)))

for i in range(0, len(TrainData)):
    new_y_train[i, int(y_train[i])] = 1

nn.fit(X_train, y_train)

#y_valid = nn.predict(TestData[:,1:])

X_test = TestData[:, 1:]
y_test = TestData[:, 0]

#score = nn.score(X_test, y_test)
X_test_pred = nn.predict(X_test)
conf = nn.predict_proba(X_test)
print conf
filename = "confidence_nn" + ".txt"
#print np.array(conf)
#conf=[np.ravel(i) for i in conf]
#print conf
#conf = map()
np.savetxt(filename, np.array(conf), '%1.5f', delimiter=' ')
counter = 0
for i in range(0, len(X_test_pred)):
    if TestData[i, 0] == X_test_pred[i]:
        counter = counter + 1

with open("predictions_mlp.txt", "w") as resFile:
    for i in range(0, len(X_test_pred)):
        resFile.write(str(int(X_test_pred[i])) + "\n")
Ejemplo n.º 15
0
    learning_rate=0.00018,  #valid_set = ((X_valid, y_valid))
    n_iter=1000)
print "Neural network specifications:"
print nn

nn.fit(trainingSet, trainingSetLabels)

score1 = nn.score(trainingSet, trainingSetLabels)

score3 = nn.score(testingSet, testingSetLabels)

print "Training accuracy = ", score1

print "Testing accuracy = ", score3

probNN = nn.predict_proba(testingSet)
fprNN, tprNN, threshNN = metrics.roc_curve(
    testingSetLabels,
    probNN[:, 0])  #true positive rate, false positive rate (ROC curve)

print "Time = ", time.time() - startTime, "seconds"

startTime = time.time()

print
print

#------------------------SVM----------------------------
print "Support Vector Machine Classifier"

clf = svm.SVC(C=100, gamma=0.1, probability=True)
Ejemplo n.º 16
0
class TestClassifierFunctionality(unittest.TestCase):
    def setUp(self):
        self.nn = MLPC(layers=[L("Softmax")], n_iter=1)

    def test_IsClassifier(self):
        assert_true(self.nn.is_classifier)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_ExplicitValidSet(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.valid_set = (a_in, a_out)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8, 4)), numpy.random.randint(0, 5, (8, ))
        self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3])
        self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3])

    def test_PredictUninitializedNoUnitCount(self):
        a_in = numpy.zeros((8, 16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictUninitializedNoLabels(self):
        self.nn.layers[-1].units = 4
        a_in = numpy.zeros((8, 16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictBinaryProbability(self):
        a_in = numpy.random.uniform(-1.0, 1.0, size=(8, 16))
        a_out = numpy.array((a_in.sum(axis=1) >= 0.0), dtype=numpy.int32)
        a_out[0], a_out[-1] = 0, 1
        self.nn.fit(a_in, a_out)

        a_proba = self.nn.predict_proba(a_in)
        a_test = self.nn.predict(a_in)
        c_out = numpy.unique(a_out)

        assert_equal(2, c_out.shape[0])
        assert_equal((8, 2), a_proba.shape)

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        self.nn.batch_size = 4
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

        c_out = numpy.unique(a_out)
        assert_equal(len(self.nn.classes_), 1)
        assert_true((self.nn.classes_[0] == c_out).all())

    def test_PredictLargerBatchSize(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, 1))
        self.nn.batch_size = 32

        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

    def test_PredictMultiClass(self):
        a_in, a_out = numpy.zeros(
            (32, 16)), numpy.random.randint(0, 3, (32, 2))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

        assert_equal(len(self.nn.classes_), 2)
        assert_equal(self.nn.classes_[0].shape[0], 3)
        assert_equal(self.nn.classes_[1].shape[0], 3)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_in.shape[0], a_test.shape[0])

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, ))
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
Ejemplo n.º 17
0
#!/usr/bin/python

from sknn.mlp import Classifier, Layer
import pandas as pd

train = pd.read_csv('../data/modeltrain.csv',index_col=0)
test = pd.read_csv('../data/modeltest.csv',index_col=0)
label = train['Response'].values

feat = train.columns.drop('Response',1)

nn = Classifier(
    layers=[
        Layer("Rectifier",units=10),
        Layer("Softmax")],
    learning_rate=0.001,
    n_iter=25)
    
nn.fit(train[feat].values, label)

from sklearn.metrics import log_loss
log_loss(label,nn.predict_proba(train[feat].values)[:,1])
class TestClassifierFunctionality(unittest.TestCase):

    def setUp(self):
        self.nn = MLPC(layers=[L("Softmax")], n_iter=1)

    def test_IsClassifier(self):
        assert_true(self.nn.is_classifier)

    def test_FitAutoInitialize(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_ExplicitValidSet(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.valid_set = (a_in, a_out)
        self.nn.fit(a_in, a_out)
        assert_true(self.nn.is_initialized)

    def test_PartialFit(self):
        a_in, a_out = numpy.zeros((8,4)), numpy.random.randint(0, 5, (8,))
        self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3])
        self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3])

    def test_PredictUninitializedNoUnitCount(self):
        a_in = numpy.zeros((8,16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictUninitializedNoLabels(self):
        self.nn.layers[-1].units = 4
        a_in = numpy.zeros((8,16))
        assert_raises(AssertionError, self.nn.predict, a_in)

    def test_PredictBinaryProbability(self):
        a_in = numpy.random.uniform(-1.0, 1.0, size=(8,16))
        a_out = numpy.array((a_in.sum(axis=1) >= 0.0), dtype=numpy.int32)
        a_out[0], a_out[-1] = 0, 1
        self.nn.fit(a_in, a_out)

        a_proba = self.nn.predict_proba(a_in)
        a_test = self.nn.predict(a_in)
        c_out = numpy.unique(a_out)

        assert_equal(2, c_out.shape[0])
        assert_equal((8, 2), a_proba.shape)

        assert_true((a_proba >= 0.0).all())
        assert_true((a_proba <= 1.0).all())
        assert_true((abs(a_proba.sum(axis=1) - 1.0) < 1E-9).all())

    def test_PredictClasses(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        self.nn.batch_size = 4
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

        c_out = numpy.unique(a_out)
        assert_equal(len(self.nn.classes_), 1)
        assert_true((self.nn.classes_[0] == c_out).all())

    def test_PredictLargerBatchSize(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,1))
        self.nn.batch_size = 32

        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape[0], a_test.shape[0])

    def test_PredictMultiClass(self):
        a_in, a_out = numpy.zeros((32,16)), numpy.random.randint(0, 3, (32,2))
        self.nn.fit(a_in, a_out)
        a_test = self.nn.predict(a_in)
        assert_equal(type(a_out), type(a_test))
        assert_equal(a_out.shape, a_test.shape)

        assert_equal(len(self.nn.classes_), 2)
        assert_equal(self.nn.classes_[0].shape[0], 3)
        assert_equal(self.nn.classes_[1].shape[0], 3)

    def test_EstimateProbalities(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        a_proba = self.nn.predict_proba(a_in)
        assert_equal(type(a_out), type(a_proba))
        assert_equal(a_in.shape[0], a_proba.shape[0])

        assert_true((a_proba >= 0.0).all())
        assert_true((a_proba <= 1.0).all())
        assert_true((abs(a_proba.sum(axis=1) - 1.0) < 1E-9).all())

    def test_MultipleProbalitiesAsList(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,4))
        self.nn.fit(a_in, a_out)
        a_proba = self.nn.predict_proba(a_in)
        assert_equal(list, type(a_proba))
        assert_equal(4, len(a_proba))

        for p in a_proba:
            assert_equal(a_in.shape[0], p.shape[0])
            assert_less_equal(p.shape[1], 5)
            assert_true((p >= 0.0).all())
            assert_true((p <= 1.0).all())
            assert_true((abs(p.sum(axis=1) - 1.0) < 1E-9).all())

    def test_CalculateScore(self):
        a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,))
        self.nn.fit(a_in, a_out)
        f = self.nn.score(a_in, a_out)
        assert_equal(type(f), numpy.float64)
 def check(self, a_in, a_out, a_mask, act='Softmax', n_iter=100):
     nn = MLPC(layers=[L(act)], learning_rule='rmsprop', n_iter=n_iter)
     nn.fit(a_in, a_out, a_mask)
     return nn.predict_proba(a_in)
class ClassifierScikitNN():

    packageName = 'com.brodagroup.machinelearning.ClassifierScikitNN'
    
    logger = None
    hidden_units = None
    classifier = None

    # Initializer
    def __init__(self,
                num_classes=None,
                num_features=None,
                learning_rate=0.01,
                learning_rule='sgd',
                learning_momentum=0.9,
                dropout_rate=None,
                weight_decay=None,
                random_state=0,
                n_iter=10):
        
        self.logger = Logger(self.packageName).getLogger()
        self.logger.debug('Starting...')

        self.num_classes = num_classes
        self.num_features = num_features
        self.learning_rule = learning_rule
        self.learning_momentum = learning_momentum
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.random_state = random_state
        self.n_iter = n_iter
        self.hidden_units = round( (self.num_features + self.num_classes)/3, 0)

        #                        Layer('Tanh', units=self.num_features),
        #                        Layer('Maxout', units=self.num_features, pieces=2),
        self.classifier = Classifier(
                            layers=[
                                Layer('Maxout', units=self.num_features, pieces=2),
                                Layer('Sigmoid', units=self.hidden_units),
                                Layer('Softmax', units=self.num_classes)
                            ],
                            learning_rule=self.learning_rule,
                            learning_rate=self.learning_rate,
                            learning_momentum=self.learning_momentum,
                            dropout_rate=self.dropout_rate,
                            weight_decay=self.weight_decay,
                            random_state=self.random_state,
                            n_iter=self.n_iter)

        return
        
    def __str__(self):
        x = self.packageName + '('
        x = x + '\n\t num_classes={0}, num_features: {1}'.format(self.num_classes, self.num_features)
        x = x + '\n\t learning_rule={0}, learning_rate: {1}'.format(self.learning_rule, self.learning_rate)
        x = x + '\n\t learning_momentum={0}, dropout_rate: {1}'.format(self.learning_momentum, self.dropout_rate)
        x = x + '\n\t hidden_units={0}, weight_decay: {1}'.format(self.hidden_units, self.weight_decay)
        x = x + '\n\t random_state={0}, n_iter: {1}'.format(self.random_state, self.n_iter)
        return(x)
        
    def fit(self, X, y):
        self.classifier.fit(X,y)
        
    def predict(self, X):
        y_pred = self.classifier.predict(X)
        return(y_pred)
        
    def predict_proba(self, X):
        y_pred = self.classifier.predict_proba(X)
        return(y_pred)
        
    def get_params(self, deep=True):
        return {
            "num_classes": self.num_classes,
            "num_features": self.num_features,
            "learning_rule": self.learning_rule,
            "learning_rate": self.learning_rate,
            "learning_momentum": self.learning_momentum,
            "dropout_rate": self.dropout_rate,
            "weight_decay": self.weight_decay,
            "random_state": self.random_state,
            "n_iter": self.n_iter
            }
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self        
Ejemplo n.º 21
0
 def check(self, a_in, a_out, a_mask, act='Softmax'):
     nn = MLPC(layers=[L(act)], learning_rule='adam', learning_rate=0.05, n_iter=250, n_stable=25)
     nn.fit(a_in, a_out, a_mask)
     return nn.predict_proba(a_in)
Ejemplo n.º 22
0
                    # ====================================================

                    clf = Classifier(
                        layers=[Layer('Sigmoid', units=hu), Layer('Softmax', units=3)],
                        learning_rule=lr,
                        learning_rate=lrt,
                        n_iter=ni
                    )

                    startTime = datetime.datetime.now()

                    clf.fit(X_train, y_train)

                    endTime = datetime.datetime.now()

                    y_score = clf.predict_proba(X_test)
                    y_hat = clf.predict(X_test)
                    ys = [y_s[y_h-1] for y_s, y_h in zip(y_score, y_hat)]

                    tmp = np.append(X_test, np.reshape(y_test, (1,y_test.shape[0])).T, axis=1)
                    tmp = np.append(tmp, np.reshape(y_hat, (1,y_hat.shape[0])).T, axis=1)
                    tmp = np.append(tmp, y_score, axis=1)
                    tmp = np.append(tmp, np.asarray(ys), axis=1)

                    output['data'] = [['X_' + str(i) for i in range(1, X_test.shape[1] + 1)] +
                                            ['y_label', 'y_hat', 'y_score_1', 'y_score_2', 'y_score_3', 'ys']] + \
                                     tmp.tolist()

                    acc = accuracy_score(y_hat, y_test)

                    confMatrix = confusion_matrix(y_test, y_hat).tolist()
Ejemplo n.º 23
0
 def check(self, a_in, a_out, a_mask, act='Softmax', n_iter=100):
     nn = MLPC(layers=[L(act)], learning_rule='rmsprop', n_iter=n_iter)
     nn.fit(a_in, a_out, a_mask)
     return nn.predict_proba(a_in)
Ejemplo n.º 24
0
#!/usr/bin/python

from sknn.mlp import Classifier, Layer
import pandas as pd

train = pd.read_csv('../data/modeltrain.csv', index_col=0)
test = pd.read_csv('../data/modeltest.csv', index_col=0)
label = train['Response'].values

feat = train.columns.drop('Response', 1)

nn = Classifier(layers=[Layer("Rectifier", units=10),
                        Layer("Softmax")],
                learning_rate=0.001,
                n_iter=25)

nn.fit(train[feat].values, label)

from sklearn.metrics import log_loss

log_loss(label, nn.predict_proba(train[feat].values)[:, 1])
Ejemplo n.º 25
0
#Set random_state=0 for testing
X_train, X_test, y_train, y_test, i_train, i_test = train_test_split(
    X_data, y_data, range(0, len(y_data)), test_size=0.20)

classifier = Classifier(
    layers=[Layer("Sigmoid", units=int(sys.argv[4])),
            Layer("Softmax")],
    learning_rate=float(sys.argv[2]),
    n_iter=int(sys.argv[3]))

classifier.fit(X_train, y_train)

old_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')
results = classifier.predict(X_test)  #May produce junk output
results_proba = classifier.predict_proba(X_test)  #May produce junk output
sys.stdout.close()
sys.stdout = old_stdout

results = np.reshape(results, (results.shape[0], 1))
results_proba = np.reshape(results_proba, (results.shape[0], 2))
y_test = np.reshape(y_test, results.shape)

Acc = 100 * (results == y_test).sum() / float(len(y_test))
Pre = 100 * (np.logical_and(results == 1, y_test == 1)).sum() / float(
    (results == 1).sum())
Rec = 100 * (np.logical_and(results == 1, y_test == 1)).sum() / float(
    (y_test == 1).sum())
F1S = 2 * (Pre * Rec) / float(Pre + Rec)

print 'Acc\tPre\tRec\tF1S'
Ejemplo n.º 26
0
						    n_iter=10000,
						    #n_stable=50,
						    #f_stable=0.01,
						    valid_set=(base['validation']['data'],base['validation']['target']),
						    callback={'on_epoch_finish': store_errors},
						    verbose = verbose
						    )

					if opt_samp == Oversampling.DontUse:
						nn.fit(base['training']['data'],base['training']['target'],w_train)
					else:
						nn.fit(base['training']['data'],base['training']['target'])

					print('Testing')
					predictions = np.squeeze(np.asarray(nn.predict(base['testing']['data'])))
					prob_predictions = nn.predict_proba(base['testing']['data'])
					target = base['testing']['target']
					targetByClass = np.array([0,0])
					
					errors_total = 0
					vp = 0
					fp = 0
					vn = 0
					fn = 0

					test_mse = 0
					for predicted, obj in zip(predictions,base['testing']['target']):
						predicted

						if predicted != obj:
							# print(' error')
Ejemplo n.º 27
0
                    clf = Classifier(layers=[
                        Layer('Sigmoid', units=hu),
                        Layer('Softmax', units=2)
                    ],
                                     learning_rule=lr,
                                     learning_rate=lrt,
                                     n_iter=ni)

                    startTime = datetime.now()

                    clf.fit(X_train, y_train)

                    endTime = datetime.now()

                    y_score = clf.predict_proba(X_test)
                    y_hat = clf.predict(X_test)
                    ys = [y_s[y_h] for y_s, y_h in zip(y_score, y_hat)]

                    tmp = np.append(X_test,
                                    np.reshape(y_test, (1, y_test.shape[0])).T,
                                    axis=1)
                    tmp = np.append(tmp,
                                    np.reshape(y_hat, (1, y_hat.shape[0])).T,
                                    axis=1)
                    tmp = np.append(tmp, y_score, axis=1)
                    tmp = np.append(tmp, np.asarray(ys), axis=1)

                    output['data'] = [['X_' + str(i) for i in range(1, X_test.shape[1] + 1)] +
                                      ['y_label', 'y_hat', 'y_score', 'y_score', 'ys']] + \
                                     tmp.tolist()
np.save("data_x_fire_a", data_x)
np.save("data_y_fire_a", data_y)

print 'saved'

data_x = np.load("data_x_fire_a.npy")[100:]
data_y = 1*np.load("data_y_fire_a.npy")[100:]
nn = Classifier(
    layers=[
        Layer("Sigmoid", units=512),
        Layer("Softmax")],
    learning_rate=0.01,
    n_iter=40,
    # callback=my_callback
    )
print("Generating Fit")
nn.fit(data_x, data_y)
print("Fit generated")
fs = open('nn_fire_a.pkl', 'wb')
pickle.dump(nn, fs)
fs = open('nn_fire_a.pkl', 'rb')
nn = pickle.load(fs)
n = 259
# print(nn.predict_proba(data_x[n:n+20]))
out = np.column_stack((nn.predict_proba(data_x[n:n+20]), nn.predict(data_x[n:n+20]), data_y[n:n+20]))
print(out)
# print(data_y[n:n+20])
# nn.score(data_x, data_y)
# fs.close()
# print("NN Pickled")
# pickle.save()