def check(self, a_in, a_out, a_mask, act='Softmax'): nn = MLPC(layers=[L(act)], learning_rule='adam', learning_rate=0.05, n_iter=250, n_stable=25) nn.fit(a_in, a_out, a_mask) return nn.predict_proba(a_in)
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Linear")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_ExplicitValidSet(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.valid_set = (a_in, a_out) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8,4)), numpy.random.randint(0, 5, (8,)) self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3]) self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3]) def test_PredictUninitializedNoUnitCount(self): a_in = numpy.zeros((8,16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictUninitializedNoLabels(self): self.nn.layers[-1].units = 4 a_in = numpy.zeros((8,16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) def test_PredictMultiClass(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 3, (8,2)) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) a_test = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_test)) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Linear")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_ExplicitValidSet(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.valid_set = (a_in, a_out) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8, 4)), numpy.random.randint(0, 5, (8, )) self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3]) self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3]) def test_PredictUninitializedNoUnitCount(self): a_in = numpy.zeros((8, 16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictUninitializedNoLabels(self): self.nn.layers[-1].units = 4 a_in = numpy.zeros((8, 16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) def test_PredictMultiClass(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 3, (8, 2)) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) a_test = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_test)) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
def load_train_data(path, modelNo=1): X = [] with open(path + '/train_newFeat_sparse_mat.dat', 'rb') as infile: X = pickle.load(infile) random.seed(modelNo) np.random.seed(modelNo) r = random.sample(xrange(0, X.shape[1]), int(round(0.8 * X.shape[1]))) X = X[:, r] y = pd.read_csv(path + '/labels.csv', index_col=False, header=None) y = np.array(y).astype('int') X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=modelNo, stratify=y) nn = Classifier( layers=[ Layer("Rectifier", units=200, dropout=0.5), Layer("Rectifier", units=200, dropout=0.5), Layer("Rectifier", units=200, dropout=0.5), Layer("Sigmoid") ], learning_rate=0.02, n_iter=40, # valid_set=(X,y), n_stable=15, debug=True, verbose=True) print "Model No is", modelNo if (modelNo == 1): print "Model No is", modelNo nn.valid_set = (X_val, y_val) #rbm1 = SVC(C=100.0, gamma = 0.1, probability=True, verbose=1).fit(X[0:9999,:], y[0:9999]) #rbm2 = RandomForestClassifier(n_estimators=300, criterion='entropy', max_features='auto', bootstrap=False, oob_score=False, n_jobs=1, verbose=1).fit(X[0:9999,:], y[0:9999]) #rbm3 = GradientBoostingClassifier(n_estimators=50,max_depth=11,subsample=0.8,min_samples_leaf=5,verbose=1).fit(X[0:9999,:], y[0:9999]) nn.fit(X_train, y_train) Y = [] with open(path + '/test_newFeat_sparse_mat.dat', 'rb') as infile: Y = pickle.load(infile) Y = Y[:, r] preds2 = np.zeros((Y.shape[0], 38)) for i in xrange(0, 10): s = i * 10000 e = min(preds2.shape[0], s + 10000) preds2[s:e, :] = nn.predict_proba(Y[s:e, :]) p2 = pd.DataFrame(preds2) p2.to_csv("p2_" + str(modelNo) + ".csv", index=None, header=None) return p2
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Linear")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ), dtype=numpy.int32) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8, 4)), numpy.zeros((8, ), dtype=numpy.int32) self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3]) self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3]) def test_PredictUninitialized(self): a_in = numpy.zeros((8, 16)) assert_raises(ValueError, self.nn.predict, a_in) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ), dtype=numpy.int32) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ), dtype=numpy.int32) self.nn.fit(a_in, a_out) a_test = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_test)) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8, 16)), numpy.zeros((8, ), dtype=numpy.int32) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Linear")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8,4)), numpy.zeros((8,), dtype=numpy.int32) self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3]) self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3]) def test_PredictUninitialized(self): a_in = numpy.zeros((8,16)) assert_raises(ValueError, self.nn.predict, a_in) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32) self.nn.fit(a_in, a_out) a_test = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_test)) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,), dtype=numpy.int32) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
# Saving the NN pickle.dump(nn, open("Convoluted.pk1", "wb")) # ----------------------------------------------------------------------------------------------------------- # # Estimating the generalisation error with CV: all classes indivudually and multiclass log-loss print("CV for class-wise generalisation errors") num_folds = 2 kf = KFold(y, n_folds=num_folds) y_pred = y * 0 l_loss = np.zeros((num_folds,1), dtype= float) p = 0 for train, test in kf: X_train, X_test, y_train, y_test = X[train,:], X[test,:], y[train], y[test] nn_cv = Classifier(layers=lay, learning_rate=0.001, n_iter=2) nn_cv.fit(X=X_train, y=y_train) y_pred[test] = nn_cv.predict(X_test) y_pred2 = nn_cv.predict_proba(X_test) l_loss[p, 0] = log_loss(y_test, y_pred2) p += 1 print(classification_report(y, y_pred, target_names=namesClasses)) log_loss_CV = np.average(l_loss, axis=0) # Calculating the multiclass log-loss print("Multiclass Log-loss by CV: ", log_loss_CV) print("Finished program") print("--- %s seconds ---" % (round(time.time() - start_time, 4))) # Calculates machine time for the program
learning_rate=0.01, batch_size=100, n_iter=2000, valid_size=0.25, n_stable=200) # Training nn.fit(X_train, Y) pickle.dump(nn, open('nn_susy_classification.pkl', 'wb')) if not runTraining: nn = pickle.load(open('nn_susy_classification.pkl', 'rb')) # Testing pred_train = nn.predict(X_train) pred_test = nn.predict(X_test) probabilities_train = nn.predict_proba(X_train) probabilities_test = nn.predict_proba(X_test) print "Training sample...." print " Signal identified as signal (%) : ", 100.0 * np.sum( pred_train[nBackgroundEvents:nBackgroundEvents + nSignalEvents] == 1.0) / nSignalEvents print " Signal identified as background (%) : ", 100.0 * np.sum( pred_train[nBackgroundEvents:nBackgroundEvents + nSignalEvents] == 0.0) / nSignalEvents print " Background identified as signal (%) : ", 100.0 * np.sum( pred_train[0:nBackgroundEvents] == 1.0) / nBackgroundEvents print " Background identified as background (%): ", 100.0 * np.sum( pred_train[0:nBackgroundEvents] == 0.0) / nBackgroundEvents print "" print "Testing sample...."
def two_layers_nnet(X_train, Y_train, X_test, Y_test, method1="Tanh", neurons1=5, method2="", neurons2=0, decay=0.0001, learning_rate=0.001, n_iter=25, random_state=1): """ Parameters ---------- X_train : pandas data frame data frame of features for the training set Y_train : pandas data frame data frame of labels for the training set X_test : pandas data frame data frame of features for the test set Y_test : pandas data frame data frame of labels for the test set method1 : str method used for the first layer neurons1 : int number of neurons of the first layer method2 : None method used for the first layer neurons2 : int number of neurons of the first layer decay : float weight decay learning_rate : float learning rate n_iter : int number of iterations random_state : int seed for weight initialization Result: ------- numpy array logloss : averaged logarithmic loss miss_err : missclassification error rate prec : precision recall : recall f1 : f1 score parameters : previous parameters in the order previously specified """ labels = np.unique(Y_train) ## # Scale Data scaler = MinMaxScaler() X_test = scaler.fit_transform(X_test) X_train = scaler.fit_transform(X_train) # Layers if neurons2 == 0 : layers=[Layer(method1, weight_decay = decay, units = neurons1), Layer("Softmax")] else: layers=[Layer(method1, weight_decay = decay, units = neurons1), Layer(method2, weight_decay = decay, units = neurons2), Layer("Softmax")] ## # Run nnet # Define classifier nn = Classifier(layers, learning_rate=learning_rate, random_state=random_state, n_iter=n_iter) # Fit nn.fit(X_train, Y_train) # Predict Y_hat = nn.predict(X_test) Y_probs = nn.predict_proba(X_test) ## # Misclassification error rate miss_err = 1-accuracy_score(Y_test, Y_hat) ## # Log Loss logloss = log_loss(Y_test, Y_probs) ## # Precision prec = precision_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro') ## # Recal recall = recall_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro') ## # F1 f1 = f1_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro') # Summarized results result = np.array([logloss, miss_err, prec, recall, f1, method1, neurons1, method2, neurons2, decay, learning_rate, n_iter, random_state]) return result
def two_layers_nnet_predict(X_train, Y_train, X_test, method1="Tanh", neurons1=5, method2="", neurons2=0, decay=0.0001, learning_rate=0.001, n_iter=25, random_state=1): """ Parameters ---------- X_train : pandas data frame data frame of features for the training set Y_train : pandas data frame data frame of labels for the training set X_test : pandas data frame data frame of features for the test set method1 : str method used for the first layer neurons1 : int number of neurons of the first layer method2 : None method used for the first layer neurons2 : int number of neurons of the first layer decay : float weight decay learning_rate : float learning rate n_iter : int number of iterations random_state : int seed for weight initialization Result: ------- tuple of numpy arrays (predicted classes, predicted probabilities) """ labels = np.unique(Y_train) ## # Scale Data scaler = MinMaxScaler() X_test = scaler.fit_transform(X_test) X_train = scaler.fit_transform(X_train) ## # Split data set into train/test # Layers if neurons2 == 0 : layers=[Layer(method1, weight_decay = decay, units = neurons1), Layer("Softmax")] else: layers=[Layer(method1, weight_decay = decay, units = neurons1), Layer(method2, weight_decay = decay, units = neurons2), Layer("Softmax")] ## # Run nnet # Define classifier nn = Classifier(layers, learning_rate=learning_rate, random_state=random_state, n_iter=n_iter) # Fit nn.fit(X_train, Y_train) # Predict Y_hat = nn.predict(X_test) Y_probs = nn.predict_proba(X_test) # Summarized results result = (Y_hat,Y_probs) return result
learning_rate=0.00018, #valid_set = ((X_valid, y_valid)) n_iter=1000) print "Neural network specifications:" print nn nn.fit(trainingSet, trainingSetLabels) score1 = nn.score(trainingSet, trainingSetLabels) score3 = nn.score(testingSet, testingSetLabels) print "Training accuracy = ", score1 print "Testing accuracy = ", score3 probNN = nn.predict_proba(testingSet) fprNN, tprNN, threshNN = metrics.roc_curve(testingSetLabels, probNN[:, 0]) #true positive rate, false positive rate (ROC curve) print "Time = ", time.time() - startTime, "seconds" startTime = time.time() print print #------------------------SVM---------------------------- print "Support Vector Machine Classifier" clf = svm.SVC(C = 100, gamma = 0.1, probability=True) clf_info = clf.fit(trainingSet, trainingSetLabels) print clf_info
layers=[ Layer("Rectifier", units=49), Layer("Softmax")], learning_rate=0.01, batch_size = 100, n_iter=100) # Training nn.fit(X_train,Y) pickle.dump(nn, open('nn_susy_classification.pkl', 'wb')) if not runTraining: nn = pickle.load(open('nn_susy_classification.pkl', 'rb')) # Testing pred_train = nn.predict(X_train) pred_test = nn.predict(X_test) probabilities_train = nn.predict_proba(X_train) probabilities_test = nn.predict_proba(X_test) print "Training sample...." print " Signal identified as signal (%) : ",100.0*np.sum(pred_train[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==1.0)/nSignalEvents print " Signal identified as background (%) : ",100.0*np.sum(pred_train[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==0.0)/nSignalEvents print " Background identified as signal (%) : ",100.0*np.sum(pred_train[0:nBackgroundEvents]==1.0)/nBackgroundEvents print " Background identified as background (%): ",100.0*np.sum(pred_train[0:nBackgroundEvents]==0.0)/nBackgroundEvents print "" print "Testing sample...." print " Signal identified as signal (%) : ",100.0*np.sum(pred_test[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==1.0)/nSignalEvents print " Signal identified as background (%) : ",100.0*np.sum(pred_test[nBackgroundEvents:nBackgroundEvents+nSignalEvents]==0.0)/nSignalEvents print " Background identified as signal (%) : ",100.0*np.sum(pred_test[0:nBackgroundEvents]==1.0)/nBackgroundEvents print " Background identified as background (%): ",100.0*np.sum(pred_test[0:nBackgroundEvents]==0.0)/nBackgroundEvents ## Plotting - performance curves
layer1 = Layer("Rectifier", units=45, weight_decay=0.001) layer2 = Layer("Rectifier", units=30, weight_decay=0.001) layer3 = Layer("Softmax") cls = Classifier(layers=[layer1, layer2, layer3], learning_rule="adam", learning_rate=0.003, f_stable=0.01, debug=True, batch_size=200, n_iter=100) cls.fit(X_train, y_train) # get the probability of prediction for cross data y_predict = cls.predict_proba(X_cross, collapse=True) print(y_predict[:, 1]) p_cross = y_predict[:, 1] loss_cross = -np.multiply(y_cross, np.log(p_cross)) - np.multiply( 1 - y_cross, np.log(1 - p_cross)) print(loss_cross.sum() / loss_cross.size) # get the probability of prediction for test data y_predict_test = cls.predict_proba(X_test, collapse=True) p_test = y_predict_test[:, 1] loss_test = -np.multiply(y_test, np.log(p_test)) - np.multiply( 1 - y_test, np.log(1 - p_test))
new_y_train = np.zeros((len(X_train), len(Classes))) for i in range(0, len(TrainData)): new_y_train[i, int(y_train[i])] = 1 nn.fit(X_train, y_train) #y_valid = nn.predict(TestData[:,1:]) X_test = TestData[:, 1:] y_test = TestData[:, 0] #score = nn.score(X_test, y_test) X_test_pred = nn.predict(X_test) conf = nn.predict_proba(X_test) print conf filename = "confidence_nn" + ".txt" #print np.array(conf) #conf=[np.ravel(i) for i in conf] #print conf #conf = map() np.savetxt(filename, np.array(conf), '%1.5f', delimiter=' ') counter = 0 for i in range(0, len(X_test_pred)): if TestData[i, 0] == X_test_pred[i]: counter = counter + 1 with open("predictions_mlp.txt", "w") as resFile: for i in range(0, len(X_test_pred)): resFile.write(str(int(X_test_pred[i])) + "\n")
learning_rate=0.00018, #valid_set = ((X_valid, y_valid)) n_iter=1000) print "Neural network specifications:" print nn nn.fit(trainingSet, trainingSetLabels) score1 = nn.score(trainingSet, trainingSetLabels) score3 = nn.score(testingSet, testingSetLabels) print "Training accuracy = ", score1 print "Testing accuracy = ", score3 probNN = nn.predict_proba(testingSet) fprNN, tprNN, threshNN = metrics.roc_curve( testingSetLabels, probNN[:, 0]) #true positive rate, false positive rate (ROC curve) print "Time = ", time.time() - startTime, "seconds" startTime = time.time() print print #------------------------SVM---------------------------- print "Support Vector Machine Classifier" clf = svm.SVC(C=100, gamma=0.1, probability=True)
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Softmax")], n_iter=1) def test_IsClassifier(self): assert_true(self.nn.is_classifier) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_ExplicitValidSet(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.valid_set = (a_in, a_out) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8, 4)), numpy.random.randint(0, 5, (8, )) self.nn.partial_fit(a_in, a_out, classes=[0, 1, 2, 3]) self.nn.partial_fit(a_in * 2.0, a_out + 1, classes=[0, 1, 2, 3]) def test_PredictUninitializedNoUnitCount(self): a_in = numpy.zeros((8, 16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictUninitializedNoLabels(self): self.nn.layers[-1].units = 4 a_in = numpy.zeros((8, 16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictBinaryProbability(self): a_in = numpy.random.uniform(-1.0, 1.0, size=(8, 16)) a_out = numpy.array((a_in.sum(axis=1) >= 0.0), dtype=numpy.int32) a_out[0], a_out[-1] = 0, 1 self.nn.fit(a_in, a_out) a_proba = self.nn.predict_proba(a_in) a_test = self.nn.predict(a_in) c_out = numpy.unique(a_out) assert_equal(2, c_out.shape[0]) assert_equal((8, 2), a_proba.shape) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) self.nn.batch_size = 4 a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) c_out = numpy.unique(a_out) assert_equal(len(self.nn.classes_), 1) assert_true((self.nn.classes_[0] == c_out).all()) def test_PredictLargerBatchSize(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, 1)) self.nn.batch_size = 32 self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) def test_PredictMultiClass(self): a_in, a_out = numpy.zeros( (32, 16)), numpy.random.randint(0, 3, (32, 2)) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) assert_equal(len(self.nn.classes_), 2) assert_equal(self.nn.classes_[0].shape[0], 3) assert_equal(self.nn.classes_[1].shape[0], 3) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) a_test = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_in.shape[0], a_test.shape[0]) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8, 16)), numpy.random.randint(0, 5, (8, )) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
#!/usr/bin/python from sknn.mlp import Classifier, Layer import pandas as pd train = pd.read_csv('../data/modeltrain.csv',index_col=0) test = pd.read_csv('../data/modeltest.csv',index_col=0) label = train['Response'].values feat = train.columns.drop('Response',1) nn = Classifier( layers=[ Layer("Rectifier",units=10), Layer("Softmax")], learning_rate=0.001, n_iter=25) nn.fit(train[feat].values, label) from sklearn.metrics import log_loss log_loss(label,nn.predict_proba(train[feat].values)[:,1])
class TestClassifierFunctionality(unittest.TestCase): def setUp(self): self.nn = MLPC(layers=[L("Softmax")], n_iter=1) def test_IsClassifier(self): assert_true(self.nn.is_classifier) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_ExplicitValidSet(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.valid_set = (a_in, a_out) self.nn.fit(a_in, a_out) assert_true(self.nn.is_initialized) def test_PartialFit(self): a_in, a_out = numpy.zeros((8,4)), numpy.random.randint(0, 5, (8,)) self.nn.partial_fit(a_in, a_out, classes=[0,1,2,3]) self.nn.partial_fit(a_in*2.0, a_out+1, classes=[0,1,2,3]) def test_PredictUninitializedNoUnitCount(self): a_in = numpy.zeros((8,16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictUninitializedNoLabels(self): self.nn.layers[-1].units = 4 a_in = numpy.zeros((8,16)) assert_raises(AssertionError, self.nn.predict, a_in) def test_PredictBinaryProbability(self): a_in = numpy.random.uniform(-1.0, 1.0, size=(8,16)) a_out = numpy.array((a_in.sum(axis=1) >= 0.0), dtype=numpy.int32) a_out[0], a_out[-1] = 0, 1 self.nn.fit(a_in, a_out) a_proba = self.nn.predict_proba(a_in) a_test = self.nn.predict(a_in) c_out = numpy.unique(a_out) assert_equal(2, c_out.shape[0]) assert_equal((8, 2), a_proba.shape) assert_true((a_proba >= 0.0).all()) assert_true((a_proba <= 1.0).all()) assert_true((abs(a_proba.sum(axis=1) - 1.0) < 1E-9).all()) def test_PredictClasses(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) self.nn.batch_size = 4 a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) c_out = numpy.unique(a_out) assert_equal(len(self.nn.classes_), 1) assert_true((self.nn.classes_[0] == c_out).all()) def test_PredictLargerBatchSize(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,1)) self.nn.batch_size = 32 self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape[0], a_test.shape[0]) def test_PredictMultiClass(self): a_in, a_out = numpy.zeros((32,16)), numpy.random.randint(0, 3, (32,2)) self.nn.fit(a_in, a_out) a_test = self.nn.predict(a_in) assert_equal(type(a_out), type(a_test)) assert_equal(a_out.shape, a_test.shape) assert_equal(len(self.nn.classes_), 2) assert_equal(self.nn.classes_[0].shape[0], 3) assert_equal(self.nn.classes_[1].shape[0], 3) def test_EstimateProbalities(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) a_proba = self.nn.predict_proba(a_in) assert_equal(type(a_out), type(a_proba)) assert_equal(a_in.shape[0], a_proba.shape[0]) assert_true((a_proba >= 0.0).all()) assert_true((a_proba <= 1.0).all()) assert_true((abs(a_proba.sum(axis=1) - 1.0) < 1E-9).all()) def test_MultipleProbalitiesAsList(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,4)) self.nn.fit(a_in, a_out) a_proba = self.nn.predict_proba(a_in) assert_equal(list, type(a_proba)) assert_equal(4, len(a_proba)) for p in a_proba: assert_equal(a_in.shape[0], p.shape[0]) assert_less_equal(p.shape[1], 5) assert_true((p >= 0.0).all()) assert_true((p <= 1.0).all()) assert_true((abs(p.sum(axis=1) - 1.0) < 1E-9).all()) def test_CalculateScore(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) self.nn.fit(a_in, a_out) f = self.nn.score(a_in, a_out) assert_equal(type(f), numpy.float64)
def check(self, a_in, a_out, a_mask, act='Softmax', n_iter=100): nn = MLPC(layers=[L(act)], learning_rule='rmsprop', n_iter=n_iter) nn.fit(a_in, a_out, a_mask) return nn.predict_proba(a_in)
class ClassifierScikitNN(): packageName = 'com.brodagroup.machinelearning.ClassifierScikitNN' logger = None hidden_units = None classifier = None # Initializer def __init__(self, num_classes=None, num_features=None, learning_rate=0.01, learning_rule='sgd', learning_momentum=0.9, dropout_rate=None, weight_decay=None, random_state=0, n_iter=10): self.logger = Logger(self.packageName).getLogger() self.logger.debug('Starting...') self.num_classes = num_classes self.num_features = num_features self.learning_rule = learning_rule self.learning_momentum = learning_momentum self.dropout_rate = dropout_rate self.learning_rate = learning_rate self.weight_decay = weight_decay self.random_state = random_state self.n_iter = n_iter self.hidden_units = round( (self.num_features + self.num_classes)/3, 0) # Layer('Tanh', units=self.num_features), # Layer('Maxout', units=self.num_features, pieces=2), self.classifier = Classifier( layers=[ Layer('Maxout', units=self.num_features, pieces=2), Layer('Sigmoid', units=self.hidden_units), Layer('Softmax', units=self.num_classes) ], learning_rule=self.learning_rule, learning_rate=self.learning_rate, learning_momentum=self.learning_momentum, dropout_rate=self.dropout_rate, weight_decay=self.weight_decay, random_state=self.random_state, n_iter=self.n_iter) return def __str__(self): x = self.packageName + '(' x = x + '\n\t num_classes={0}, num_features: {1}'.format(self.num_classes, self.num_features) x = x + '\n\t learning_rule={0}, learning_rate: {1}'.format(self.learning_rule, self.learning_rate) x = x + '\n\t learning_momentum={0}, dropout_rate: {1}'.format(self.learning_momentum, self.dropout_rate) x = x + '\n\t hidden_units={0}, weight_decay: {1}'.format(self.hidden_units, self.weight_decay) x = x + '\n\t random_state={0}, n_iter: {1}'.format(self.random_state, self.n_iter) return(x) def fit(self, X, y): self.classifier.fit(X,y) def predict(self, X): y_pred = self.classifier.predict(X) return(y_pred) def predict_proba(self, X): y_pred = self.classifier.predict_proba(X) return(y_pred) def get_params(self, deep=True): return { "num_classes": self.num_classes, "num_features": self.num_features, "learning_rule": self.learning_rule, "learning_rate": self.learning_rate, "learning_momentum": self.learning_momentum, "dropout_rate": self.dropout_rate, "weight_decay": self.weight_decay, "random_state": self.random_state, "n_iter": self.n_iter } def set_params(self, **parameters): for parameter, value in parameters.items(): setattr(self, parameter, value) return self
# ==================================================== clf = Classifier( layers=[Layer('Sigmoid', units=hu), Layer('Softmax', units=3)], learning_rule=lr, learning_rate=lrt, n_iter=ni ) startTime = datetime.datetime.now() clf.fit(X_train, y_train) endTime = datetime.datetime.now() y_score = clf.predict_proba(X_test) y_hat = clf.predict(X_test) ys = [y_s[y_h-1] for y_s, y_h in zip(y_score, y_hat)] tmp = np.append(X_test, np.reshape(y_test, (1,y_test.shape[0])).T, axis=1) tmp = np.append(tmp, np.reshape(y_hat, (1,y_hat.shape[0])).T, axis=1) tmp = np.append(tmp, y_score, axis=1) tmp = np.append(tmp, np.asarray(ys), axis=1) output['data'] = [['X_' + str(i) for i in range(1, X_test.shape[1] + 1)] + ['y_label', 'y_hat', 'y_score_1', 'y_score_2', 'y_score_3', 'ys']] + \ tmp.tolist() acc = accuracy_score(y_hat, y_test) confMatrix = confusion_matrix(y_test, y_hat).tolist()
#!/usr/bin/python from sknn.mlp import Classifier, Layer import pandas as pd train = pd.read_csv('../data/modeltrain.csv', index_col=0) test = pd.read_csv('../data/modeltest.csv', index_col=0) label = train['Response'].values feat = train.columns.drop('Response', 1) nn = Classifier(layers=[Layer("Rectifier", units=10), Layer("Softmax")], learning_rate=0.001, n_iter=25) nn.fit(train[feat].values, label) from sklearn.metrics import log_loss log_loss(label, nn.predict_proba(train[feat].values)[:, 1])
#Set random_state=0 for testing X_train, X_test, y_train, y_test, i_train, i_test = train_test_split( X_data, y_data, range(0, len(y_data)), test_size=0.20) classifier = Classifier( layers=[Layer("Sigmoid", units=int(sys.argv[4])), Layer("Softmax")], learning_rate=float(sys.argv[2]), n_iter=int(sys.argv[3])) classifier.fit(X_train, y_train) old_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') results = classifier.predict(X_test) #May produce junk output results_proba = classifier.predict_proba(X_test) #May produce junk output sys.stdout.close() sys.stdout = old_stdout results = np.reshape(results, (results.shape[0], 1)) results_proba = np.reshape(results_proba, (results.shape[0], 2)) y_test = np.reshape(y_test, results.shape) Acc = 100 * (results == y_test).sum() / float(len(y_test)) Pre = 100 * (np.logical_and(results == 1, y_test == 1)).sum() / float( (results == 1).sum()) Rec = 100 * (np.logical_and(results == 1, y_test == 1)).sum() / float( (y_test == 1).sum()) F1S = 2 * (Pre * Rec) / float(Pre + Rec) print 'Acc\tPre\tRec\tF1S'
n_iter=10000, #n_stable=50, #f_stable=0.01, valid_set=(base['validation']['data'],base['validation']['target']), callback={'on_epoch_finish': store_errors}, verbose = verbose ) if opt_samp == Oversampling.DontUse: nn.fit(base['training']['data'],base['training']['target'],w_train) else: nn.fit(base['training']['data'],base['training']['target']) print('Testing') predictions = np.squeeze(np.asarray(nn.predict(base['testing']['data']))) prob_predictions = nn.predict_proba(base['testing']['data']) target = base['testing']['target'] targetByClass = np.array([0,0]) errors_total = 0 vp = 0 fp = 0 vn = 0 fn = 0 test_mse = 0 for predicted, obj in zip(predictions,base['testing']['target']): predicted if predicted != obj: # print(' error')
clf = Classifier(layers=[ Layer('Sigmoid', units=hu), Layer('Softmax', units=2) ], learning_rule=lr, learning_rate=lrt, n_iter=ni) startTime = datetime.now() clf.fit(X_train, y_train) endTime = datetime.now() y_score = clf.predict_proba(X_test) y_hat = clf.predict(X_test) ys = [y_s[y_h] for y_s, y_h in zip(y_score, y_hat)] tmp = np.append(X_test, np.reshape(y_test, (1, y_test.shape[0])).T, axis=1) tmp = np.append(tmp, np.reshape(y_hat, (1, y_hat.shape[0])).T, axis=1) tmp = np.append(tmp, y_score, axis=1) tmp = np.append(tmp, np.asarray(ys), axis=1) output['data'] = [['X_' + str(i) for i in range(1, X_test.shape[1] + 1)] + ['y_label', 'y_hat', 'y_score', 'y_score', 'ys']] + \ tmp.tolist()
np.save("data_x_fire_a", data_x) np.save("data_y_fire_a", data_y) print 'saved' data_x = np.load("data_x_fire_a.npy")[100:] data_y = 1*np.load("data_y_fire_a.npy")[100:] nn = Classifier( layers=[ Layer("Sigmoid", units=512), Layer("Softmax")], learning_rate=0.01, n_iter=40, # callback=my_callback ) print("Generating Fit") nn.fit(data_x, data_y) print("Fit generated") fs = open('nn_fire_a.pkl', 'wb') pickle.dump(nn, fs) fs = open('nn_fire_a.pkl', 'rb') nn = pickle.load(fs) n = 259 # print(nn.predict_proba(data_x[n:n+20])) out = np.column_stack((nn.predict_proba(data_x[n:n+20]), nn.predict(data_x[n:n+20]), data_y[n:n+20])) print(out) # print(data_y[n:n+20]) # nn.score(data_x, data_y) # fs.close() # print("NN Pickled") # pickle.save()