test_data_file = pd.read_csv('test.csv') features_train, labels_train = train_data(train_data_file) features_valid, labels_valid = valid_data(train_data_file) features_test = test_data(test_data_file) nn = Classifier(layers=[ Layer("Sigmoid", units=429), Layer("Sigmoid", units=300), Layer("Sigmoid", units=150), Layer("Softmax", units=92) ], n_iter=1, n_stable=40, batch_size=25, learning_rate=0.003, learning_rule="momentum", valid_size=0.25, regularize="L2", normalize="weights", weight_decay=0.0001, loss_type="mcc", verbose=1) nn.fit(features_train, labels_train) predicts1 = nn.predict(features_valid) correctness = correct_rate(predicts1, labels_valid) print correctness
logging.info('%s place names and %s other words' % (len(places), len(non_places))) place_vectors = repvecs(places, nlp) non_place_vectors = repvecs(non_places, nlp) logging.info('%s place name vectors and %s other word vectors' % (len(place_vectors), len(non_place_vectors))) place_outputs = [1] * len(place_vectors) non_place_outputs = [0] * len(non_place_vectors) x = place_vectors + non_place_vectors y = place_outputs + non_place_outputs x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=984) logging.info('training classifier') clf = Classifier( layers=[ Layer("Rectifier", units=100), Layer("Softmax") ], regularize='dropout', dropout_rate=0.5, learning_rate=0.02, n_iter=20) clf.fit(np.asarray(x_train), np.asarray(y_train)) print(classification_report(np.asarray(y_test), clf.predict(np.asarray(x_test))))
test_size=1.0/7.0, random_state=1234) classifiers = [] if 'sknn' in sys.argv: from sknn.platform import gpu32 from sknn.mlp import Classifier, Layer, Convolution clf = Classifier( layers=[ # Convolution("Rectifier", channels=10, pool_shape=(2,2), kernel_shape=(3, 3)), Layer('Rectifier', units=200), Layer('Softmax')], learning_rate=0.01, learning_rule='nesterov', learning_momentum=0.9, batch_size=300, valid_size=0.0, n_stable=10, n_iter=10, verbose=True) classifiers.append(('sknn.mlp', clf)) if 'nolearn' in sys.argv: from sknn.platform import gpu32 from nolearn.lasagne import NeuralNet, BatchIterator from lasagne.layers import InputLayer, DenseLayer from lasagne.nonlinearities import softmax from lasagne.updates import nesterov_momentum
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=0) # scale the x_train dataset under the l2-norm X_trainn = preprocessing.normalize(X_train, norm='l2') # scale the x_test dataset under the l2-norm X_testn = preprocessing.normalize(X_test, norm='l2') # scale the x_trainn dataset around the mean X_trainn = preprocessing.scale(X_trainn) #scale the x_testn dataset around the mean X_testn = preprocessing.scale(X_testn) # use a classification neural network to create predictive model clsfr = Classifier( layers=[ # Rectifier is used for both nonlinear input activation layers using 13 units Layer("Rectifier", units=13), Layer("Rectifier", units=13), ''' Softmax is used as the linear output activation layer - form of linear regression using mutually exclusive multi-class classification responses''' Layer("Softmax")], # learning rate parameter set at 0.001 learning_rate=0.001, # learning rule using the stochastic gradient descent to minimize the objective function learning_rule='sgd', # random seed set for classification model random_state=201, # max number of iterations used to develop model (n_iter = epoch) n_iter=200) # predictive model fit around the training set, evaluated over both the scaled x and unscaled y model1=clsfr.fit(X_trainn, y_train)
ipt_closed = f.read() f.close() ipt_closed = ipt_closed.split("\n") for i in range(0, len(ipt_closed) - 1): ipt_closed[i] = ipt_closed[i].strip("[]").split(",") ipt_closed[i][0] = int(ipt_closed[i][0]) ipt_closed[i][1] = int(ipt_closed[i][1]) opt.append(0) ipt = ipt_open[:-1] + ipt_closed[:-1] ipt = np.asarray(ipt) opt = np.asarray(opt) print ":" + str(len(ipt)) print len(opt) nn = Classifier(layers=[ Layer("Softmax", units=2), Layer("Softmax", units=2), Layer("Softmax", units=2) ], learning_rate=0.05, n_iter=15) nn.fit(ipt, opt) a = np.asarray([[4, 30], [2, 30], [6, 300], [4, 300]]) # a =a.reshape(2,-1) op = nn.predict(a) print op
print "Creating data\n" # Grab the correct indices from the training data X = train.ix[:, 1:129].as_matrix() y = train.ix[:, 0:1].as_matrix() A = train_unlabeled.ix[:, 0:128].as_matrix() from sknn.mlp import Classifier, Layer # This is the important stuff to adjust print "Creating classifier\n" nn = Classifier(layers=[ Layer('Tanh', units=128), Layer('Sigmoid', units=128), Layer('Softmax', units=10) ], learning_rate=.04, n_iter=85, batch_size=10) """ Uncomment to actually train whole data and write file """ outfile = open('output.csv', 'w') # change the file name writer = csv.writer(outfile) writer.writerow(['Id', 'y']) print "About to fit\n" nn.fit(X, y) print "About to predict" b = nn.predict(A) nn.fit(A, b) prediction = nn.predict(test.as_matrix())
import numpy from sknn.mlp import Classifier, Layer X = numpy.array([[0, 1], [0, 0], [1, 0]]) print(X.shape) y = numpy.array([[1], [0], [2]]) print(y.shape) nn = Classifier(layers=[Layer("Sigmoid", units=2), Layer("Sigmoid", units=3)], n_iter=10) nn.fit(X, y)
with open('label_val_sparse_mat.dat', 'rb') as infile: labels_val = pickle.load(infile) labels = labels.transpose() labels_val = labels_val.transpose() labels = labels.toarray() labels_val = labels_val.toarray() ################################# classifier 1###################### nn = Classifier(layers=[Layer("Tanh", units=100), Layer("Softmax")], learning_rate=0.02, n_iter=50, batch_size=100, n_stable=20, debug=True, valid_set=(train_val, labels_val), verbose=True) nn.fit(train, labels) ################################# classifier 1###################### nn = Classifier(layers=[Layer("Tanh", units=200), Layer("Softmax")], learning_rate=0.02, n_iter=50, batch_size=100, n_stable=20, debug=True,
eta = 0.001 iters = 45 rootdir = os.getcwd() if not os.path.exists('sklearnTry'): os.makedirs('sklearnTry') newdir = os.path.join(rootdir, 'sklearnTry') fout = open(os.path.join(newdir, 'NeuralNetsOut.txt'), 'w+') train_features, train_labels, test_features, test_labels, test_keys = GetData() model = Classifier( layers=[Layer("Sigmoid", units=50), Layer("Softmax")], learning_rate=eta, n_iter=iters, weight_decay=0.00001, warning=None ) #MPLClassifier(alpha = 1e-05, hidden_layer_sizes= (15,), epsilon = 1e-08) gs = GridSearchCV(model, param_grid={ 'learning_rate': [0.005, 0.001, 0.0002], 'hidden0__units': [8, 25, 40, 45, 50], 'hidden0__type': ["Rectifier", "Sigmoid", "Tanh", "ExpLin"], 'weight_decay': [0.00001, 0.001, 0.0001], 'output__type': ["Sigmoid", "Softmax"] }) gs.fit(train_features, train_labels) pred = gs.predict(test_features)
def model_fitting(train_set, train_labels, classifier_name, n_jobs=cpu_count()): """ The fitting process with sklearn algorithms. :param train_set: numpy array, required :param train_labels: list, required :param classifier_name: string, required :param n_jobs: integer, required :return: object - Fit classifier model according to the given training data """ classifier_list = { "svm_linear": SVC(probability=True, kernel='linear', C=1.0), "svm_poly": SVC(probability=True, kernel='poly', C=1.0), "svm_rbf": SVC(probability=True, kernel='rbf', C=1.0, gamma=0.01), "linear_svc": LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.1, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, random_state=None, max_iter=3000), "knn": KNeighborsClassifier(n_neighbors=100, weights='distance', leaf_size=30, n_jobs=n_jobs), "random_forests": RandomForestClassifier(n_estimators=350, criterion='entropy', min_samples_split=2, min_samples_leaf=1, max_leaf_nodes=600, n_jobs=n_jobs), "logistic_regression": LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.4, fit_intercept=True, intercept_scaling=1, random_state=None, solver='liblinear', max_iter=1000, multi_class='ovr', warm_start=False, n_jobs=n_jobs), "decision_trees": DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=100, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, presort=False), "sgd": SGDClassifier(alpha=.0001, n_iter=500, penalty="elasticnet", n_jobs=n_jobs), "neural_network": Classifier(layers=[ Layer("Sigmoid", units=14), Layer("Sigmoid", units=13), Layer("Sigmoid", units=12), Layer("Sigmoid", units=10), Layer("Softmax") ], learning_rate=0.01, n_iter=200, batch_size=10, regularize='L1', n_stable=50, dropout_rate=0, verbose=True), "GBC": GradientBoostingClassifier(max_depth=10, max_leaf_nodes=850, min_samples_leaf=15, learning_rate=0.1), "XGB": XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=10, min_child_weight=2, missing=None, n_estimators=100, nthread=n_jobs, reg_alpha=0, objective='binary:logistic', reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1) } return classifier_list[classifier_name].fit(train_set, train_labels)
for hu in hidden_units: for ni in n_iters: for ii in range(1): print('learning_rate = ', lrt, 'learning_rule = ', lr, 'hidden_units = ', hu, 'n_iters = ', ni, '## = ', ii) # Train & Test X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3) # ==================================================== clf = Classifier(layers=[ Layer('Sigmoid', units=hu), Layer('Softmax', units=2) ], learning_rule=lr, learning_rate=lrt, n_iter=ni) startTime = datetime.now() clf.fit(X_train, y_train) endTime = datetime.now() y_score = clf.predict_proba(X_test) y_hat = clf.predict(X_test) ys = [y_s[y_h] for y_s, y_h in zip(y_score, y_hat)] tmp = np.append(X_test,
def get_nn_pck(X_train, X_test, y_train, y_test, c1=16, k1=9, p1=2, c2=14, k2=7, p2=2, c3=10, k3=3, p3=2): currentTime = str( time.strftime('%Y%m%d %H%M%S', time.localtime(time.time()))) dirPath = currentPath + currentTime + 'canshu' + '%d-%d-%d-%d-%d-%d-%d-%d-%d' % ( c1, k1, p1, c2, k2, p2, c3, k3, p3) + "/" excelPath = dirPath + "resLog.xlsx" os.mkdir(dirPath) # 创建一个excel文件,当前时间命名 workbook = xlsxwriter.Workbook(excelPath) # 创建一个工作表对象 worksheet = workbook.add_worksheet() worksheet.write("A1", "epochs") worksheet.write("B1", "Train-Score") worksheet.write("C1", "Test-Score") result = [] for i in range(1, 10): nn = Classifier( layers=[ Convolution('Rectifier', channels=c1, kernel_shape=(k1, k1), border_mode='full', pool_shape=(p1, p1)), # border_mode = 'full',没有stride Convolution('Rectifier', channels=c2, kernel_shape=(k2, k2), border_mode='full', pool_shape=(p2, p2)), # border_mode = 'full',没有stride Convolution('Rectifier', channels=c3, kernel_shape=(k3, k3), border_mode='full', pool_shape=(p3, p3)), Layer( 'Rectifier', units=32, ), Layer( 'Rectifier', units=32, ), Layer('Softmax', units=2) ], learning_rule="sgd", learning_rate=0.015, learning_momentum=0.9, weight_decay=0.001, n_iter=i, n_stable=10, f_stable=0.001, valid_size=0.1, verbose=True) nn.fit(X_train, y_train) pickle.dump(nn, open(dirPath + "nn" + str(i) + ".pkl", 'wb')) worksheet.write("A" + str(i + 1), i) worksheet.write("B" + str(i + 1), nn.score(X_train, y_train)) worksheet.write("C" + str(i + 1), nn.score(X_test, y_test)) result.append(nn.score(X_test, y_test)) workbook.close() return max(result)
# Load the data and split it into subsets for training and testing. digits = datasets.load_digits() X = digits.images y = digits.target X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2) # Create a neural network that uses convolution to scan the input images. nn = Classifier( layers=[ Convolution('Rectifier', channels=12, kernel_shape=(3, 3), border_mode='full'), Convolution('Rectifier', channels=10, kernel_shape=(3, 3), border_mode='valid'), Convolution('Rectifier', channels=4, kernel_shape=(3, 3), border_mode='valid'), Layer('Rectifier', units=64), Layer('Softmax')], learning_rate=0.002, valid_size=0.2, n_stable=10, verbose=True) nn.fit(X_train, y_train) # Determine how well it does on training data and unseen test data. print('\nTRAIN SCORE', nn.score(X_train, y_train)) print('TEST SCORE', nn.score(X_test, y_test)) y_pred = nn.predict(X_test)
def neuralCombo(data): pipeline = Pipeline([('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('nn', Classifier(layers=[ Layer("Rectifier", units=100), Layer("Sigmoid", units=100), Layer("Softmax") ], n_iter=25))]) learningRate = [0.05, 0.005, 0.001, 0.0001, 0.00001] units = [5, 50, 100, 200] type = [ 'Rectifier', 'Sigmoid', 'Sigmoid', 'Tanh', 'Linear', 'Softmax', 'Gaussian' ] #type = ['Rectifier', 'Linear', 'Gaussian'] iterations = [25, 50, 100, 200] best = {} best['learningRate'] = 0.05 best['units'] = 4 best['type'] = 'Rectifier' best['iterations'] = 5 best['trainingAccuracy'] = 0.0 for l in learningRate: for i in iterations: for type0 in type: for u0 in units: pipeline = Pipeline([ ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('nn', Classifier( layers=[Layer(type0, units=u0), Layer("Softmax")], n_iter=i)) ]) best = testModel(data, pipeline, best, l, u0, type0, i) for type1 in type: for u1 in units: pipeline = Pipeline([ ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('nn', Classifier(layers=[ Layer(type0, units=u0), Layer(type1, units=u1), Layer("Softmax") ], n_iter=i)) ]) best = testModel(data, pipeline, best, l, str(u0) + "," + str(u1), type0 + "," + type1, i) for type2 in type: for u2 in units: pipeline = Pipeline([ ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('nn', Classifier(layers=[ Layer(type0, units=u0), Layer(type1, units=u1), Layer(type2, units=u2), Layer("Softmax") ], n_iter=i)) ]) best = testModel( data, pipeline, best, l, str(u0) + "," + str(u1) + "," + str(u2), type0 + "," + type1 + "," + type2, i) print "bestOverall====================================" print "trainingAccuracy" + " = " + str(best['trainingAccuracy']) print "learningRate" + " = " + str(best['units']) print "units" + " = " + str(best['type']) print "type" + " = " + str(best['iterations']) print "iterations" + " = " + str(best['learningRate'])
X = np.vstack((pX, nX)) Y = nn.predict(X) print Y[:3] print Y[-3:] def clean(x): return ''.join([t for t in x.strip() if ord(t) < 128]) #_________MAIN___________# g = load_glove(glove_path) print 'GloVe loaded...' X, Y = load_ds(g, senti_path) nn = Classifier(layers=[ Convolution('Rectifier', channels=1, kernel_shape=(1, embed_dim)), Layer('Rectifier', units=400), Layer('Tanh', units=600), Layer('Softmax') ], learning_rate=0.001, verbose=True) train(nn, X, Y, model_path, 100, 25) #___TEST___# nn = pickle.load(open(model_path, 'r')) print 'Model loaded...' test_ds(g, nn, senti_path)
y_train, y_test = y[:trainCount], y[trainCount:] print x_train, print x_test, print y_train, print y_test from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler pipeline = Pipeline([ ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('neural network', Classifier(layers=[Layer("Tanh", units=128), Layer("Softmax", units=2)], n_iter=25, learning_rate=0.001, verbose=True)) ]) pipeline.fit(x_train, y_train) y_test2 = pipeline.predict(x_test) tpr = [0.0] fpr = [0.0] positives = float(np.count_nonzero(y_test)) negatives = float(len(y_test) - positives) tpCount = 0 fpCount = 0
elif f == 1: single_inside = matrix elif f == 2: double_outside = matrix elif f == 3: double_inside = matrix frames = [single_outside, single_inside, double_outside, double_inside] results = pd.concat(frames) x = np.array(results.drop(['label'], 1)) x = preprocessing.scale(x) y = np.array(results['label']) # machine learning x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.2) # clf = MLPClassifier(solver='lbgfs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1) clf = Classifier( layers=[Layer("Maxout", units=100, pieces=2), Layer("Softmax")], learning_rate=0.001, n_iter=25) clf.fit(x_train, y_train) # accuracy = clf.score(x_test, y_test) print clf.predict(x_test) print y_test print clf.score(x_test, y_test) with open('gesture_recognizeSVM_NN.pickle', 'wb') as f: pickle.dump(clf, f)
print(X.shape) X_train, X_test, Y_train, Y_test = cross_validation.train_test_split( X, Y, test_size=0.2, random_state=0) #Bernoaulli based RBM with number of units are 400(Conpresssed features get from 786 features) rbm = BernoulliRBM(n_components=400, learning_rate=0.01, batch_size=10, n_iter=10, verbose=True, random_state=None) #Create a neural network that uses convolution to scan the input images with two fully connected layer nn = Classifier(layers=[ Convolution('Tanh', channels=20, kernel_shape=(5, 5), border_mode='valid'), Layer('Sigmoid', units=100), Layer('Softmax') ], learning_rate=0.002, valid_size=0.2, n_stable=5, verbose=True) classifier = Pipeline(steps=[('rbm', rbm), ('cnn', nn)]) ############################################################################### # Training RBM-CNN Pipeline classifier.fit(X_train, Y_train) Y_pred = classifier.predict(X_test) print('Score: ', (metrics.classification_report(Y_test, Y_pred)))
def mlp_kernel_constructor(kernel_option): return lambda: Classifier(**kernel_option)
testingSetLabels, probNB[:, 0]) #true positive rate, false positive rate (ROC curve) print "Time = ", time.time() - startTime, "seconds" startTime = time.time() print print #------------------------NN--------------------------- print "Neural Network Classifer" nn = Classifier( layers=[Layer("Sigmoid", units=100), Layer("Softmax")], learning_rate=0.00018, #valid_set = ((X_valid, y_valid)) n_iter=1000) print "Neural network specifications:" print nn nn.fit(trainingSet, trainingSetLabels) score1 = nn.score(trainingSet, trainingSetLabels) score3 = nn.score(testingSet, testingSetLabels) print "Training accuracy = ", score1 print "Testing accuracy = ", score3
learningRateAE = set[2] learningRateCL = set[3] nCyclesAE = set[4] nCyclesCL = set[5] outputFileNameAE = 'trained_fine/ae_' + str(hiddenUnitsAE) + '_' + str( learningRateAE) + '_' + str(nCyclesAE) + '.pkl' outputFileNameCL = 'trained_fine/cl_' + str(hiddenUnitsCL) + '_' + str( learningRateCL) + '_' + str(nCyclesCL) + '.pkl' # AUTOENCODER if (runAE == True): nn = Regressor( layers=[Layer("Rectifier", units=hiddenUnitsAE), Layer("Linear")], learning_rate=learningRateAE, n_iter=nCyclesAE) nn.fit(X_train_background, Y_autoencoder) pickle.dump(nn, open(outputFileNameAE, 'wb')) # CLASSIFIER if (runCL == True): nn = Classifier( layers=[Layer("Rectifier", units=hiddenUnitsCL), Layer("Softmax")], learning_rate=learningRateCL, n_iter=nCyclesCL) nn.fit(X_train, Y) pickle.dump(nn, open(outputFileNameCL, 'wb')) counter = counter + 1
print X.shape print Y.shape print '______' train(nn, X, Y, './senti_model', 20, 5) #_________MAIN___________# g = load_ds(glove_path) print 'GloVe loaded...' nn = Classifier( layers=[ #Convolution('Rectifier',channels=1,kernel_shape=(3,embed_dim)), Layer('Rectifier', units=96), Layer('Rectifier', units=128), Layer('Rectifier', units=256), Layer('Rectifier', units=128), Layer('Rectifier', units=96), Layer('Softmax') ], learning_rate=0.001, verbose=True) train_csv(g, nn, '/mnt/share/Senti_csv/Sentiment Analysis Dataset.csv', 100000) #_______TESTING_________# nn = pickle.load(open('./senti_model', 'r')) while True: sent = raw_input('Enter text:') if len(sent) > 0: if sent == 'quit': break
def plotation(clf_list): nlines = len(clf_list) plt.figure(figsize=(20, 10 * nlines)) cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) X_train_plot = np.transpose([np.transpose(X_train)[i] for i in (4, 5)]) Nlvl = 5 c = 1 mlp_Reg_type = type( Regressor(layers=[Layer("Rectifier", name="hiddenN")], learning_rate=0.02, n_iter=10)) mlp_Cla_type = type( Classifier(layers=[Layer("Rectifier", name="hiddenN")], learning_rate=0.02, n_iter=10)) robust_scaler = False for _, clf in enumerate(clf_list): if hasattr(clf, "predict_proba"): print("Classifieur") if type(clf) == mlp_Cla_type: robust_scaler = sklearn.preprocessing.RobustScaler() X_train_plot_scaled = robust_scaler.fit_transform(X_train_plot) clfY = clf.fit(X_train_plot_scaled, into_levels(Y_train, Nlvl)) clfZ = clf.fit(X_train_plot_scaled, into_levels(Z_train, Nlvl)) else: clfY = clf.fit(X_train_plot, into_levels(Y_train, Nlvl)) clfZ = clf.fit(X_train_plot, into_levels(Z_train, Nlvl)) else: print("Regresseur") if type(clf) == mlp_Reg_type: robust_scaler = sklearn.preprocessing.RobustScaler() X_train_plot_scaled = robust_scaler.fit_transform(X_train_plot) clfY = clf.fit(X_train_plot_scaled, Y_train) clfZ = clf.fit(X_train_plot_scaled, Z_train) else: clfY = clf.fit(X_train_plot, Y_train) clfZ = clf.fit(X_train_plot, Z_train) for _, clfdata in enumerate([clfY, clfZ]): axes = plt.subplot(nlines, 2, c) m = Basemap(llcrnrlon=x_min, llcrnrlat=y_min, urcrnrlon=x_max, urcrnrlat=y_max, resolution='i', projection='cass', lon_0=-74.00597, lat_0=40.71427, ax=axes) m.drawcoastlines() lons, lats = m.makegrid(100, 100) x, y = m(lons, lats) Z = np.zeros((100, 100)) for l in range(100): for p in range(100): LP = np.array([lons[l][p], lats[l][p]]) LP = np.array([LP]) if robust_scaler != False: LP = robust_scaler.transform(LP) Z[l][p] = clfdata.predict(LP) diff = np.max(Z) - np.min(Z) cs = m.contourf( x, y, Z, [np.min(Z) + diff * i / Nlvl for i in range(0, Nlvl + 1)], cmap=cm, alpha=.8) m.colorbar(cs, location='bottom', pad="5%") c += 1 robust_scaler = False
percent = score / len(actual) print("Accuracy is: ", percent) if __name__ == "__main__": print( "========================================================================" ) trainingData = "G:/hackPrinceton/CHAI/data/chai/training/7.xlsx" data = nlpFile.getFeatures(trainingData, "Laughter") j = refineData(data) # print(j.xtrain) # print(j.ytrain) pipeline = Pipeline([ ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))), ('neural network', Classifier(layers=[Layer("Softmax")], n_iter=25)) ]) pipeline.fit(np.asarray(j.get('0')), np.asarray(j.get('1'))) nn = Classifier( layers=[Layer("Maxout", units=100, pieces=2), Layer("Softmax")], learning_rate=0.001, n_iter=25) nn.fit(j.get('0'), j.get('1')) test = "G:/hackPrinceton/CHAI/data/chai/training/7.xlsx" testData = nlpFile.getFeatures(test, "Laughter") t = refineData(testData) y_actual = t.get('1') x_test = t.get('0') y_calculated = nn.predict(x_test) # print(y_actual)
for line in lines: int_line = [float(x) for x in line] TrainData.append(int_line) TrainData = np.array(TrainData) with open(test_file_name) as textFile: lines = [line.split() for line in textFile] for line in lines: int_line = [float(x) for x in line] TestData.append(int_line) TestData = np.array(TestData) nn = Classifier(layers=[Layer("Sigmoid", units=100), Layer("Softmax")], learning_rate=0.02, n_iter=10) X_train = TrainData[:, 1:] y_train = TrainData[:, 0] new_y_train = np.zeros((len(X_train), len(Classes))) for i in range(0, len(TrainData)): new_y_train[i, int(y_train[i])] = 1 nn.fit(X_train, y_train) #y_valid = nn.predict(TestData[:,1:]) X_test = TestData[:, 1:]
if sample: classifiers = [ KNeighborsClassifier(n_neighbors=100, weights='uniform', algorithm='auto', leaf_size=100, p=10, metric='minkowski'), RandomForestClassifier(n_estimators=100,verbose=True), GradientBoostingClassifier(n_estimators=10, learning_rate=1.0,max_depth=5, random_state=0), AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=20), algorithm="SAMME.R", n_estimators=10), Classifier( layers=[ Layer("Tanh", units=200), Layer("Sigmoid", units=200), Layer('Rectifier', units=200), Layer('Softmax')], learning_rate=0.01, learning_rule='momentum', learning_momentum=0.9, batch_size=1000, valid_size=0.01, n_stable=100, n_iter=100, verbose=True) ] else: classifiers = [# Other methods are underperformed yet take very long training time for this data set AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=20), algorithm="SAMME.R", n_estimators=10), Classifier( layers=[ Layer("Tanh", units=200),
dropout_rate =("real", [0,0.5], 0) ) # We create the optimizer object opt = pysmac.SMAC_optimizer( working_directory = './results/dataset5/smac/' % os.environ, persistent_files=True, debug = False) # First we try the a MLP set to a default configuration, so we can see if SMAC can improve its performance scores = [] for i in np.arange(n_validations): X_train, X_test, Y_train, Y_test = sklearn.cross_validation.train_test_split(X,Y, test_size=0.3, random_state=1) predictor = Classifier( layers=[ Layer("Sigmoid", units=100, dropout = 0), Layer("Sigmoid", units=100, dropout = 0), Layer("Softmax", units=2)], learning_rate=0.001, n_iter=25) predictor.fit(X_train, Y_train) scores.append(metrics.accuracy_score(Y_test, predictor.predict(X_test))) print(('The default accuracy is %f'%median(scores))) # We set some parameters for the optimizer value, parameters = opt.minimize(mlp, n_iter, parameter_definition, # number of evaluations num_runs = 2, # number of independent SMAC runs seed = 2, # random seed num_procs = 2, # two cores mem_limit_function_mb=1000, # Memory limit
19000:28000] mini_dev_data, mini_dev_labels = X_final[49000:60000], y_final[49000:60000] param_grid = { 'learning_rate': [0.05, 0.01, 0.005, 0.001], 'n_iter': [25, 50, 100, 200], 'hidden0__units': [4, 8, 12, 16, 20], 'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"], 'hidden0__dropout': [0.2, 0.3, 0.4], 'hidden1__units': [4, 8, 12, 16, 20], 'hidden1__type': ["Rectifier", "Sigmoid", "Tanh"], 'hidden1__dropout': [0.2, 0.3, 0.4], 'hidden2__units': [4, 8, 12, 16, 20], 'hidden2__type': ["Rectifier", "Sigmoid", "Tanh"], 'hidden2__dropout': [0.2, 0.3, 0.4] } nn = Classifier(layers=[ Layer("Sigmoid", units=20), Layer("Sigmoid", units=20), Layer("Sigmoid", units=20), Layer("Softmax") ]) gs = GridSearchCV(sc, nn, param_grid) start = time() gs.fit(mini_train_data, mini_train_labels) print("GridSearchCV took {:.2f} seconds for {:d} candidate settings.".format( time() - start, len(gs.grid_scores_))) report(gs.grid_scores_)
from sknn.mlp import Classifier, Layer valid_errors = [] train_errors = [] def store_stats(avg_valid_error, avg_train_error, **_): valid_errors.append(avg_valid_error) train_errors.append(avg_train_error) from sklearn.model_selection import GridSearchCV nn = Classifier( layers=[ Layer('Sigmoid',dropout=0.20), Layer("Softmax")], valid_size=0.2, callback={'on_epoch_finish': store_stats}) gs = GridSearchCV(nn, param_grid={ 'n_iter': [100,500,1000], 'learning_rate': [0.01, 0.001], 'hidden0__units': [10, 20, 5], 'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"]},refit=True) gs.fit(X_train,y_train) print(gs.best_estimator_) plt.figure() plt.plot(range(len(train_errors)),train_errors,color="b",label="training scores") plt.plot(range(len(valid_errors)),valid_errors,color="r",label="validation scores")
# data conversion and normalization mydata = mydata.replace(['yes', 'no'], [1, 0]) # taking the class variable in another column y = mydata['y'] del mydata['y'] mynewdata = preprocessing.normalize(mydata) # creating a model and splitting data set into training and testing DefaultTrain, DefaultValidaiton, y_train, y_test = train_test_split(mynewdata, y, test_size=0.2, random_state=42) nn = Classifier(layers=[ Layer("Rectifier", units=100), Layer("Softmax")], learning_rate=0.003, n_iter=25) nn.fit(DefaultTrain, y_train) y_valid = nn.predict(DefaultValidaiton) print('Accuracy: ',nn.score(DefaultValidaiton, y_test)) print confusion_matrix(y_test,y_valid) fpr, tpr, thresholds =metrics.roc_curve(y_test, y_valid,pos_label=1) roc_auc = auc(fpr, tpr) plt.figure() plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate')