def __init__(self, w_size=100, input_size=12, layers=1, n_itr=50, learn=0.05, AutoEncoder=False, adaption=0.1): self.layers = list() #setup layers HIGHLY TENTETIVE AND SUBJECT TO CHANGE for i in range(layers): layer = mlp.Layer('Rectifier', units=input_size) self.layers.append(layer) self.layers.append(mlp.Layer('Softmax')) self.learner = mlp.Classifier(self.layers, learning_rate=learn, n_iter=n_itr) self.input_size = input_size self.w_size = self.input_size * w_size self.data = list() self.returns = list() self.labels = list() self.tstep = 0 self.sharpeA = 1 self.sharpeB = 1 self.adaption = adaption self.std = 1 return
def auto_encode(x, y): from sknn import ae, mlp # Initialize auto-encoder for unsupervised learning. myae = ae.AutoEncoder( layers=[ae.Layer("Tanh", units=8), ae.Layer("Sigmoid", units=4)], learning_rate=0.002, n_iter=10) # Layerwise pre-training using only the input data. myae.fit(x) # Initialize the multi-layer perceptron with same base layers. mymlp = mlp.Regressor(layers=[ mlp.Layer("Tanh", units=8), mlp.Layer("Sigmoid", units=4), mlp.Layer("Linear") ]) # Transfer the weights from the auto-encoder. myae.transfer(mymlp) # Now perform supervised-learning as usual. mymlp.fit(x, y) return mymlp
def __init__(self, w_size=100, input_size=12, mode='returns', layers=1, n_itr=50, learn=0.05, AutoEncoder=False): self.layers = list() #setup layers HIGHLY TENTETIVE AND SUBJECT TO CHANGE for i in range(layers): layer = mlp.Layer('Rectifier', units=input_size) self.layers.append(layer) self.layers.append(mlp.Layer('Linear')) self.learner = mlp.Regressor(self.layers, learning_rate=learn, n_iter=n_itr) self.input_size = input_size self.w_size = self.input_size * w_size self.data = list() self.tstep = 0 self.mode = mode self.std = 1 return
def test_TransferSuccess(self): X = numpy.zeros((8, 4)) ae = AE(layers=[L("Tanh", units=4)], n_iter=1) ae.fit(X) nn = mlp.MultiLayerPerceptron(layers=[mlp.Layer("Tanh", units=4)]) ae.transfer(nn)
def test_TransferFailure(self): X = numpy.zeros((8, 4)) ae = AE(layers=[L("Tanh", units=8)], n_iter=1) ae.fit(X) nn = mlp.MultiLayerPerceptron(layers=[mlp.Layer("Tanh", units=4)]) assert_raises(AssertionError, ae.transfer, nn)
def _doFit(self, goodData_LR, goodData_HR, weight, local): ''' Private function. Fits the neural network. ''' # Once all the samples have been picked build the regression using # neural network approach print('Fitting neural network') HR_scaler = preprocessing.StandardScaler() data_HR = HR_scaler.fit_transform(goodData_HR) LR_scaler = preprocessing.StandardScaler() data_LR = LR_scaler.fit_transform(goodData_LR.reshape(-1, 1)) if self.regressionType == REG_sknn_ann: layers = [] if 'hidden_layer_sizes' in self.regressorOpt.keys(): for layer in self.regressorOpt['hidden_layer_sizes']: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=layer)) else: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=100)) self.regressorOpt.pop('activation') self.regressorOpt.pop('hidden_layer_sizes') output_layer = ann_sknn.Layer('Linear', units=1) layers.append(output_layer) baseRegressor = ann_sknn.Regressor(layers, **self.regressorOpt) else: baseRegressor = ann_sklearn.MLPRegressor(**self.regressorOpt) # NN regressors do not support sample weights. weight = None reg = ensemble.BaggingRegressor(baseRegressor, **self.baggingRegressorOpt) if data_HR.shape[0] <= 1: reg.max_samples = 1.0 reg = reg.fit(data_HR, np.ravel(data_LR), sample_weight=weight) return {"reg": reg, "HR_scaler": HR_scaler, "LR_scaler": LR_scaler}
# Test Harness # ------------------------------------------------------------------------------ if __name__ == '__main__': # Load dataset featureVecs, labels, numFeatures, numLabelTypes = loadArffDataset( 'data/faces_vegetables_dataset.arff', normalise=True, displayData=True) # Construct all classifiers we wish to test, with 'standard' parameters classifiers = { 'SVM': svm.SVC(kernel='linear', C=1), 'Decision Tree': tree.DecisionTreeClassifier(criterion='gini', splitter='best'), 'Feed-Forward Neural Network (Sigmoid)': mlp.Classifier(layers=[ mlp.Layer('Sigmoid', units=numFeatures), mlp.Layer('Sigmoid', units=numLabelTypes), ], n_iter=100), 'Gaussian Naive Bayes': naive_bayes.GaussianNB(), 'Multi-Nomial Naive Bayes': naive_bayes.MultinomialNB(), 'Bernoulli Naive Bayes': naive_bayes.BernoulliNB(), } # Test classifiers and compute their mean scores results = evaluateClassifiers(classifiers, featureVecs, labels, 10) scores = computeOverallScores(results)
for p in sorted(PARAMETERS): values = PARAMETERS[p] # User requested to test against this parameter? if p in args.params: params.append(values) # Otherwise, use the first item of the list as default. else: params.append(values[:1]) # Build the classifiers for all possible combinations of parameters. names = [] classifiers = [] for (activation, alpha, dropout, iterations, output, rule, units) in itertools.product(*params): params = {'pieces': 2} if activation == "Maxout" else {} classifiers.append(mlp.Classifier( layers=[mlp.Layer(activation, units=units, **params), mlp.Layer(output)], random_state=1, n_iter=iterations, n_stable=iterations, dropout=dropout, learning_rule=rule, learning_rate=alpha),) t = [] for k, v in zip(sorted(PARAMETERS), [activation, alpha, dropout, iterations, output, rule, units]): if k in args.params: t.append(str(v)) names.append(','.join(t)) # Create randomized datasets for visualizations, on three rows. seed = int(time.time()) X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1) rng = np.random.RandomState(seed+1) X += 2 * rng.uniform(size=X.shape)
print norm1.values print norm2.values bothGPAs = pd.concat([norm1, norm2], axis=1) # plt.figure() norm1.plot(kind='hist', alpha=.5) norm2.plot(kind='hist', alpha=.5) plt.show() knn = neighbors.KNeighborsRegressor(5, "distance") percep = linear_model.Perceptron(n_iter=15) layers = [] layers.append(mlp.Layer("Sigmoid", units=9)) layers.append(mlp.Layer("Sigmoid", units=18)) layers.append(mlp.Layer("Linear", units=1)) MLP = mlp.Regressor(layers, learning_rule="momentum") runRegressionModel(knn) # runRegressionModel() runRegressionModel(MLP) """ features = allData[featNames] labels = allData[labelName] # trainFeat, testFeat, trainLabel, testLabel = train_test_split(features, labels, test_size=0.3, random_state=42) for train_rows, test_rows in folds:
unknown = glob.glob('data/*/unsure?/*.png') print("Found total of %i files:" % len(positive + negative + unknown)) print(" - %i placed pieces," % len(positive)) print(" - %i missing pieces," % len(negative)) print(" - %i unsure images.\n" % len(unknown)) ds = Dataset() ds.store(negative, 0, times=1) ds.store(positive, 1, times=1) ds.store(unknown, 2, times=2) X, y = ds.toarray() nn = mlp.Classifier(layers=[ mlp.Layer("Rectifier", units=48, dropout=0.3), mlp.Layer("Rectifier", units=32, dropout=0.1), mlp.Layer("Rectifier", units=24), mlp.Layer("Softmax") ], learning_rate=0.01, learning_rule='adagrad', n_iter=10, n_stable=10, batch_size=50, valid_set=(X, y), verbose=1) try: nn.fit(X, y) except KeyboardInterrupt:
labels_test = np.array(dataset3['labels']) n_feat = data_train.shape[1] n_targets = labels_train.max() + 1 import sys import logging logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout) from sknn import mlp net = mlp.Classifier(layers=[ mlp.Layer("Rectifier", units=n_feat * 2 / 3), mlp.Layer("Rectifier", units=n_feat * 1 / 3), mlp.Layer("Softmax", units=n_targets) ], n_iter=50, n_stable=10, learning_rate=0.001, valid_size=0.1, verbose=1) net.fit(data_train, labels_train) from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix expected = labels_test predicted = net.predict(data_test)
testyshape = testY.shape X = X.reshape(xshape[0], xshape[2]) testX = testX.reshape(testxshape[0], testxshape[2]) # Y = Y.reshape(yshape[0], 1) # testY = testY.reshape(testyshape[0], 1) print(X.shape, Y.shape, mainX.shape, mainY.shape, testX.shape, testY.shape) print(X.max, X.min, Y.max, Y.min) # Y = Y.reshape(yshape[0], yshape[2]) # testY = testY.reshape(testyshape[0], testyshape[2]) gc.collect() glob_rf = mlp.Regressor( layers=[ mlp.Native(lasagne.DenseLayer, num_units=1024, nonlinearity=nl.very_leaky_rectify), mlp.Native(lasagne.DenseLayer, num_units=512, nonlinearity=nl.very_leaky_rectify), mlp.Native(lasagne.DenseLayer, num_units=256, nonlinearity=nl.very_leaky_rectify), mlp.Layer("Linear")], learning_rate=.1, n_iter=5, learning_rule="adadelta", callback={'on_epoch_finish': store_stats}, loss_type='mse', regularize="L1", # possibly L1, to instead filter out useless inputs. L1 gave 5+ in results? weight_decay=.001, # default .0001 increase to combat overfitting. dropout_rate=0, # keep 80% of neurons/inputs at .2, anti overfit verbose=True, #valid_set=(testX, testY), batch_size=1) # TRIED NON-1, DIDN'T WORK AT ALL #glob_rf = pickle.load(open('forest' + str(length) + 'dyn.pkl', 'rb')) #TODO only for loading preexisting # begin pre-training with autoencoders
data_sp500=data_sp500.dropna(axis=1) ############################## #####Select Target Stocks##### ############################## #Calculate log daily rets rets_sp500=np.log(data_sp500/data_sp500.shift(1)) rets_sp500=rets_sp500.fillna(0) rets_sp500.head() #Optimize model layers=[mlp.Layer('Tanh', units=len(rets_sp500.columns)*3), mlp.Layer('Tanh', units=len(rets_sp500.columns)*3), mlp.Layer('Tanh', units=len(rets_sp500.columns)*3), mlp.Layer('Linear')] testRun=rnn(layers,rets_sp500,0.8,3,100) np.mean(testRun[1]) np.mean(rets_sp500.as_matrix())
# User requested to test against this parameter? if p in args.params: params.append(values) # Otherwise, use the first item of the list as default. else: params.append(values[:1]) # Build the classifiers for all possible combinations of parameters. names = [] classifiers = [] for (activation, alpha, dropout, iterations, output, regularize, rule, units) in itertools.product(*params): params = {'pieces': 2} if activation == "Maxout" else {} classifiers.append( mlp.Classifier(layers=[ mlp.Layer(activation, units=units, **params), mlp.Layer(output) ], random_state=1, n_iter=iterations, n_stable=iterations, regularize=regularize, dropout_rate=dropout, learning_rule=rule, learning_rate=alpha), ) t = [] for k, v in zip(sorted(PARAMETERS), [ activation, alpha, dropout, iterations, output, regularize, rule, units ]):
data_train = np.vstack([dataset1['data']]) #, dataset2['data']]) labels_train = np.hstack([dataset1['labels']]) #, dataset2['labels']]) data_train = data_train.astype('float') / 255. labels_train = labels_train data_test = dataset3['data'].astype('float') / 255. labels_test = np.array(dataset3['labels']) n_feat = data_train.shape[1] n_targets = labels_train.max() + 1 from sknn import mlp nn = mlp.Classifier(layers=[ mlp.Layer("Tanh", units=n_feat * 2 / 3), mlp.Layer("Sigmoid", units=n_feat * 1 / 3), mlp.Layer("Softmax", units=n_targets) ], n_iter=50, n_stable=10, learning_rate=0.001, valid_size=0.5, verbose=1) if PRETRAIN: from sknn import ae ae = ae.AutoEncoder(layers=[ ae.Layer("Tanh", units=n_feat * 2 / 3), ae.Layer("Sigmoid", units=n_feat * 2 / 3) ],
def run_neural_net(training_features, training_labels, test_features, test_labels): """ Classifies the data using pybrain's neural net Parameters ---------- training_data: data used to train the classifier. For each row, item 0 assumed to be the label test_data: data used to test the classifier. For each row, item 0 assumed to be the label hidden_units: sets the hidden unit count for the neural net training_epochs: sets the training epochs for the neural net training_iterations: # of training loops Returns ------- prediction: predicted labels of the test data accuracy: percent of test data labels accurately predicted """ time_1 = time.time() #set the number of classes in the data number_of_outputs = training_labels.astype(int).max() + 1 number_of_inputs = training_features.shape[1] #determine optimal hidden nodes based on Huang et al. (2003) first_layer_nodes = int( math.sqrt((number_of_outputs + 2) * number_of_inputs) + 2 * math.sqrt(number_of_inputs / (number_of_outputs + 2))) second_layer_nodes = int(number_of_outputs * math.sqrt(number_of_inputs / (number_of_outputs + 2))) #set up the layers input_layer = mlp_nn.Layer("Linear", units=number_of_inputs) hidden_layer1 = mlp_nn.Layer("Sigmoid", units=first_layer_nodes) hidden_layer2 = mlp_nn.Layer("Sigmoid", units=second_layer_nodes) output_layer = mlp_nn.Layer("Softmax", units=number_of_outputs) layers = [input_layer, hidden_layer1, hidden_layer2, output_layer] #set up the classifier neural_net = mlp_nn.Classifier(layers=layers, learning_rate=0.02, n_iter=5) #set up tuning parameters parameters = {"learning_rate": [0.02], "n_iter": [1, 5, 10, 25, 50]} #create cross validation iterator cv = ShuffleSplit(training_features.shape[0], n_iter=5, test_size=0.2, random_state=0) #set up tuning algorithm classifier = GridSearchCV(estimator=neural_net, cv=cv, param_grid=parameters) classifier.fit(training_features, training_labels) test_prediction = classifier.predict(test_features) test_accuracy = classifier.score(test_features, test_labels) time_2 = time.time() graph_title = "Learning Curves \n(Neural Net, learning rate=%f)" % classifier.best_estimator_.learning_rate plot_learning_curve_iter(classifier, graph_title) pylab.savefig( os.path.join(results_location, 'Validator Curves - Neural Net.png')) time_3 = time.time() #output time stats #time 1 -> time 2 is optimization time #time 2 -> time 3 is run for just one case print("Neural Net Time Stats") print("Optimization Time -> %f" % (time_2 - time_1)) print("Single Run Time -> %f" % (time_3 - time_2)) #output classification report and confusion matrix print('\n\n----------------------------') print('Classification Report') print('----------------------------\n') print(classification_report(y_true=test_labels, y_pred=test_prediction)) print('\n\n----------------------------') print('Confusion Matrix') print('----------------------------\n') print(confusion_matrix(y_true=test_labels, y_pred=test_prediction)) return test_prediction, test_accuracy
labels_train = np.hstack( [dataset1['labels']] ) #, dataset2['labels'], dataset3['labels'], dataset4['labels'], dataset5['labels']]) data_train = data_train.astype('float') / 255. labels_train = labels_train data_test = dataset0['data'].astype('float') / 255. labels_test = np.array(dataset0['labels']) n_feat = data_train.shape[1] n_targets = labels_train.max() + 1 from sknn import mlp nn = mlp.Classifier(layers=[ mlp.Layer("Tanh", units=n_feat / 8), mlp.Layer("Sigmoid", units=n_feat / 16), mlp.Layer("Softmax", units=n_targets) ], n_iter=50, n_stable=10, learning_rate=0.002, learning_rule="momentum", valid_size=0.1, verbose=1) if PRETRAIN: from sknn import ae ae = ae.AutoEncoder(layers=[ ae.Layer("Tanh", units=n_feat / 8), ae.Layer("Sigmoid", units=n_feat / 16)
classifiers = [] if 'dbn' in sys.argv: from nolearn.dbn import DBN clf = DBN([X_train.shape[1], 300, 10], learn_rates=0.3, learn_rate_decays=0.9, epochs=10, verbose=1) classifiers.append(('nolearn.dbn', clf)) if 'sknn' in sys.argv: from sknn import mlp clf = mlp.Classifier( layers=[mlp.Layer("Rectifier", units=300), mlp.Layer("Softmax")], learning_rate=0.02, learning_rule='momentum', batch_size=25, valid_size=0.0, n_stable=10, n_iter=10, verbose=1, ) classifiers.append(('sknn.mlp', clf)) if 'lasagne' in sys.argv: from nolearn.lasagne import NeuralNet from lasagne.layers import InputLayer, DenseLayer from lasagne.nonlinearities import softmax
def testingAlgoTypes(_all_country_data, MP4, verbose=0): print( "\n \n \n Testing various untrained classification algorithms on each country's seperate sub datasets " ) all_country_data_with_algos = copy.deepcopy(_all_country_data) ##parameters for NeuralNet nn_layers = [ mlp.Layer('Sigmoid', units=7, name="Layer1"), mlp.Layer("Softmax", ) ] nn_params = { 'layers': nn_layers, 'learning_momentum': 0.9, 'n_stable': 10, 'f_stable': 0.01, 'learning_rate': 0.001, 'learning_rule': 'adadelta', 'random_state': seed, 'n_iter': 8, 'batch_size': 100, 'warning': None, 'verbose': None, 'debug': False } max_iter_params = {'max_iter': 1000} classifiers = [ LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto'), linear_model.RidgeClassifier(random_state=seed), linear_model.LogisticRegression(solver='saga', penalty='l2', class_weight='balanced', random_state=seed), neighbors.KNeighborsClassifier(n_neighbors=9, weights='distance', leaf_size=20), svm.LinearSVC(class_weight='balanced', random_state=seed, dual=False), ensemble.RandomForestClassifier(n_estimators=200, min_samples_split=5, min_samples_leaf=3, max_depth=3, random_state=seed), ensemble.GradientBoostingClassifier(random_state=seed, n_estimators=200, min_samples_split=5, max_features='sqrt'), mlp.Classifier(**nn_params), linear_model.PassiveAggressiveClassifier(max_iter=1000, random_state=seed, class_weight="balanced"), linear_model.SGDClassifier(max_iter=1000, random_state=seed, class_weight='balanced', penalty='l2') ] headers = [ 'LDA', 'RC', 'LogR', 'KNN', 'SVM', 'RF', 'GBC', 'NN', 'PAC', 'SGD' ] for country in all_country_data_with_algos.keys(): df_cv_results = pd.DataFrame(columns=headers) for _bus_cycle in all_country_data_with_algos[country].keys( ): #iterating through the different business cycles means_vars_for_clf = [] result_all_clf = [] Y_target = all_country_data_with_algos[country][_bus_cycle].get( "Y") X_features = all_country_data_with_algos[country][_bus_cycle].get( "X") for _clf in classifiers: ##Creating Pipelines #standardizer = ('standardize',preprocessing.StandardScaler()) algo = ('clf', _clf) steps = [] #steps.append(standardizer) steps.append(algo) pipeline_clf = pipeline.Pipeline(steps) kfold = model_selection.KFold(n_splits=2, random_state=seed, shuffle=True) result_clf = model_selection.cross_val_score( pipeline_clf, np.array(X_features), Y_target.values.ravel(), cv=kfold, n_jobs=1) result_all_clf = result_all_clf + [ result_clf.mean() ] ##used to find top 3 methods means_vars_for_clf = means_vars_for_clf + [ "{0:.3g}".format(result_clf.mean()) ] ##used for excel sheet df_cv_results.loc[ "{}-{}".format(country, _bus_cycle), :] = means_vars_for_clf ##gathering names of top three algos to be inserted into all_country_data dictionary top3 = sorted(result_all_clf, reverse=True)[:3] indexes_of_top_3 = [result_all_clf.index(x) for x in top3] top_3_algos_by_mean = [headers[x] for x in indexes_of_top_3 ] ##stored as 3 letter abbreviation of algo all_country_data_with_algos[country][_bus_cycle].update( {"algos": top_3_algos_by_mean}) if MP4 == True: df_cv_results.to_excel( '../Reserach/Classifier Cross Validation Scores For All Countries/All/' + country + '.xlsx', index=False) if MP4 == "Only": df_cv_results.to_excel( '../Reserach/Classifier Cross Validation Scores For All Countries/Only/' + country + '.xlsx', index=False) if MP4 == False: df_cv_results.to_excel( '../Reserach/Classifier Cross Validation Scores For All Countries/Excl/' + country + '.xlsx', index=False) if verbose > 0: print(df_cv_results) print("\n") saveTopThreeAlgos(all_country_data_with_algos) return all_country_data_with_algos
def create_estimator(estimator_name, class_weight): estimator = None param_grid = None support_class_weight = False if estimator_name == "logistic_regression": from sklearn import linear_model estimator = linear_model.LogisticRegression(class_weight=class_weight) param_grid = {"C": np.logspace(-3, 4, 20)} support_class_weight = True elif estimator_name == "random_forest": estimator = ensemble.RandomForestClassifier(class_weight=class_weight) param_grid = { "n_estimators": list(range(10, 110, 10)), "max_features": ("auto", 0.5, 0.8, None) # "max_features": np.arange(int(np.sqrt(n_features)), n_features, step=4) } support_class_weight = True # support_class_weight = False elif estimator_name == "gradient_boosting": """ import xgboost.sklearn as xgb estimator = xgb.XGBClassifier(learning_rate=0.1) param_grid = { # "n_estimators": list(range(150, 250, 10)), # "max_depth": list(range(3, 8)) } """ # for some unknown reason, XGBoost does not perform well on my machine and hangs sometimes # fallback to use the less efficient implementation in sklearn. estimator = ensemble.GradientBoostingClassifier(learning_rate=0.1, warm_start=True) param_grid = { "n_estimators": list(range(150, 250, 10)), "max_depth": list(range(3, 8)) } elif estimator_name == "adaboost": estimator = ensemble.AdaBoostClassifier() param_grid = { "n_estimators": list(range(30, 150, 10)), "learning_rate": np.logspace(-1, 0, 2) } elif estimator_name.startswith("svc_"): subtype = estimator_name[4:] from sklearn import svm if subtype == "linear": # linear SVC uses liblinear insteaed of libsvm internally, which is more efficient param_grid = { "C": np.logspace(-6, 2, 50), } estimator = svm.LinearSVC( dual= False, # dual=False when n_samples > n_features according to the API doc. class_weight=class_weight) else: estimator = svm.SVC( shrinking=False, cache_size=2048, verbose=False, probability=False, # use True when predict_proba() is needed class_weight=class_weight) if subtype == "rbf": estimator.set_params(kernel="rbf") param_grid = { "C": np.logspace(-2, 2, 20), "gamma": np.logspace(-2, -1, 3) } else: # poly estimator.set_params(kernel="poly") param_grid = {"degree": [2], "C": np.logspace(-3, 1, 20)} support_class_weight = True elif estimator_name == "mlp1" or estimator_name == "mlp2": # multiple layer perceptron neural network from sknn import mlp param_grid = { "learning_rate": [0.0001], "regularize": ["l2"], # , "dropout"], "weight_decay": np.logspace(-6, -5, 2), # parameter for L2 regularizer "hidden0__type": ["Tanh"] # "Rectifier", "Sigmoid" } layers = [mlp.Layer(type="Tanh", name="hidden0")] # add the second hidden layer as needed if estimator_name == "mlp2": # 2 hidden layer layers.append(mlp.Layer(type="Tanh", name="hidden1")) param_grid["hidden0__units"] = list(range(2, 5, 1)) param_grid["hidden1__units"] = list(range(2, 5, 1)) param_grid["hidden1__type"] = ["Tanh"] # "Rectifier", "Sigmoid" else: param_grid["hidden0__units"] = list(range(5, 26, 1)) # add the output layer layers.append(mlp.Layer("Softmax")) estimator = mlp.Classifier(layers=layers, batch_size=150) return estimator, param_grid, support_class_weight
Y = traindata[1:, 0] cv = train_test_split(X, Y, test_size=.33, random_state=20) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.33, random_state=20) #Finding the optimal component AELayers = [ ae.Layer("Sigmoid", units=120), ae.Layer("Sigmoid", units=60), ae.Layer("Sigmoid", units=30) ] NNLayers = [ mlp.Layer("Sigmoid", units=120), mlp.Layer("Sigmoid", units=75), mlp.Layer("Softmax", units=5) ] ## ##for each in complist: ## comp = each t0 = time.clock() print("Time started") # Fit the Autoencoder result = ae.AutoEncoder(AELayers, warning=None, random_state=0,
trainData = dataclean.convertPandasDataFrameToNumpyArray(trainFrame) testFrame = dataclean.cleanDataset(dataclean.loadTestData(), True) testData = dataclean.convertPandasDataFrameToNumpyArray(testFrame) trainX = trainData[:, 1:] trainY = trainData[:, 0] testX = testData[:, 1:] """ Cross Validation """ # Learning rules L: sgd, momentum, nesterov, adadelta, adagrad or rmsprop mlp = nn.Regressor(layers=[nn.Layer("Rectifier", units=7),nn.Layer("Rectifier", units=8), nn.Layer("Rectifier", units=9), nn.Layer("Rectifier", units=8),nn.Layer("Rectifier", units=7), nn.Layer("Linear", units=1)], learning_rate=0.1, random_state=1, n_iter=100, verbose=True, learning_rule="adagrad", valid_size=0.1, batch_size=500) #cvCount = 10 #crossvalidation = metrics.crossValidationScore(ensemble.GradientBoostingRegressor(random_state=1), trainX, trainY, cvCount=cvCount) xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1) """ #{'n_estimators': 400, 'max_depth': 6, 'learning_rate': 0.01 if __name__ == "__main__": params = {"max_depth" : [3,4,5,6,7,8], "n_estimators" : [100, 200, 300, 400], "learning_rate" : [0.01, 0.05, 0.1, 0.2, 0.5, 1]}
Y = traindata[1:, 0] cv = train_test_split(X, Y, test_size=.33, random_state=20) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.33, random_state=20) #Finding the optimal component AELayers = [ ae.Layer("Sigmoid", units=1000), ae.Layer("Sigmoid", units=500), ae.Layer("Sigmoid", units=250) ] NNLayers = [ mlp.Layer("Sigmoid", units=1000), mlp.Layer("Sigmoid", units=500), mlp.Layer("Softmax", units=15) ] ## ##for each in complist: ## comp = each t0 = time.clock() print("Time started") # Fit the Autoencoder result = ae.AutoEncoder(AELayers, warning=None, random_state=0,
import MNIST.DataClean as dc import numpy as np import sknn.mlp as mlp import pickle try: nn = pickle.load(open("simplenn.pkl", "rb")) print("Model loaded") except: nn = None layers = [ mlp.Convolution("Rectifier", channels=10, kernel_shape=(2, 2)), mlp.Layer("Rectifier", units=1000), mlp.Layer("Softmax", units=10) ] if nn is None: trainFrame = dc.loadTrainData(describe=False) trainData = dc.convertPandasDataFrameToNumpyArray(trainFrame) nn = mlp.Classifier(layers=layers, learning_rate=0.00001, valid_size=0, random_state=0, n_iter=50, verbose=True, batch_size=1000, learning_rule="nesterov") nn.fit(trainData[:, 1:], trainData[:, 0]) print("Model fitting complete")
def fineTuneModel(_all_country_data_with_algos): print( "\n \n Fine Tuning Parameters for the top 3 predictive algorithms for each country for each sub dataset split by Mentality/Business Cycle " ) all_country_data_with_algos = copy.deepcopy(_all_country_data_with_algos) algos_dict = { "LDA": LinearDiscriminantAnalysis(), "RC": linear_model.RidgeClassifier(), "LogR": linear_model.LogisticRegression(), "KNN": neighbors.KNeighborsClassifier(), "SVM": svm.LinearSVC(), "RF": ensemble.RandomForestClassifier(verbose=0), "GBC": ensemble.GradientBoostingClassifier(verbose=0), "NN": mlp.Classifier( layers=[mlp.Layer('Rectifier', units=7), mlp.Layer("Softmax", )]), "PAC": linear_model.PassiveAggressiveClassifier(), "SGD": linear_model.SGDClassifier() } cv_folds = 3 n_jobs_count = np.arange(1, 2) results = {} for country in all_country_data_with_algos.keys(): for _bus_cycle in all_country_data_with_algos[country]: X = all_country_data_with_algos[country][_bus_cycle].get("X") Y = all_country_data_with_algos[country][_bus_cycle].get("Y") all_country_data_with_algos[country][_bus_cycle].update( {"trained algos": []}) for _algo in all_country_data_with_algos[country][_bus_cycle].get( "algos"): #Possible parameters for each var Parameters _parameters = {} if _algo == "LDA": lda_n_components = np.arange(2, 8, 1) shrinkage = ['auto'] lda_solver = ['lsqr', 'eigen'] _parameters.update({ 'n_components': lda_n_components, 'solver': lda_solver, 'shrinkage': shrinkage }) if _algo == "RC": rc_class_weight = ['balanced'] rc_solver = ['saga', 'sparse_cg', 'svd'] alpha = np.arange(0.5, 4.5, 0.5) _parameters.update({ 'class_weight': rc_class_weight, 'solver': rc_solver, 'alpha': alpha }) if _algo == "LogR": lr_penalty = ['l1', 'l2'] lr_class_weight = ['balanced'] lr_solver = ['liblinear'] _parameters.update({ 'penalty': lr_penalty, 'class_weight': lr_class_weight, 'solver': lr_solver, 'random_state': [seed] }) if _algo == "KNN": knn_neighbors = np.arange(2, 13, 1) knn_weights = ['uniform', 'distance'] knn_leaf_size = np.arange(10, 30, 2) _parameters.update({ 'n_neighbors': knn_neighbors, 'weights': knn_weights, 'leaf_size': knn_leaf_size }) if _algo == "SVM": ##put change of kernel in after svm_weights = ['balanced'] dual = [False] _parameters.update({ 'class_weight': svm_weights, 'dual': dual, 'random_state': [seed] }) if _algo == "RF": rf_max_depth = np.arange(1, 5, 1) n_estimators = np.asarray([200]) min_samples_leaf = np.arange(3, 6, 1) min_samples_split = np.arange(3, 5, 1) max_features = ["sqrt"] _parameters.update({ 'max_depth': rf_max_depth, 'n_estimators': n_estimators, 'min_samples_leaf': min_samples_leaf, 'min_samples_split': min_samples_split, 'max_features': max_features, 'random_state': [seed] }) if _algo == "GBC": gb_loss = ['deviance'] gb_max_depth = np.arange(1, 5, 1) n_estimators = np.asarray([200]) min_samples_leaf = np.arange(3, 6, 1) min_samples_leaf = np.arange(3, 6, 1) min_samples_split = np.arange(3, 6, 1) max_features = ["sqrt"] _parameters.update({ 'loss': gb_loss, 'max_depth': gb_max_depth, 'min_samples_leaf': min_samples_leaf, 'n_estimators': n_estimators, 'min_samples_leaf': min_samples_leaf, 'max_features': max_features, 'random_state': [seed] }) if _algo == "NN": layer_1 = [ mlp.Layer(type="Sigmoid", units=7, name="layer1"), mlp.Layer(type="Softmax", name="layer2") ] #mlp.Layer('Rectifier',units=5) nn_layers = [layer_1] nn_regularize = ['L1'] learning_rate = [0.01] n_iter = [1000] weight_decay = [0.01] learning_rule = ['adadelta'] momentum = [0.90] n_stable = np.arange(150, 151, 2) f_stable = [0.001] dropout_rate = np.asarray([0, 0.25, 0.5]) random_state = [seed] nn_params = { 'layers': nn_layers, 'regularize': nn_regularize, 'learning_rate': learning_rate, 'n_iter': n_iter, 'learning_rule': learning_rule, 'n_iter': n_iter, 'weight_decay': weight_decay, 'learning_momentum': momentum, 'n_stable': n_stable, 'random_state': random_state } #hidden layer size should be average of input layer and output layer _parameters.update(nn_params) if _algo == "PAC": class_weight = ['balanced'] max_iter = np.arange(1000, 10001, 1) _parameters.update({ 'class_weight': class_weight, 'max_iter': max_iter, 'random_state': [seed] }) if _algo == "SGD": loss = ['squared_hinge', 'hinge'] class_weight = ['balanced'] penalty = ['l2', 'l1', 'elasticnet'] _parameters.update({ 'loss': loss, 'class_weight': class_weight, 'max_iter': [1000], 'penalty': penalty, 'random_state': [seed] }) _grid = model_selection.GridSearchCV(algos_dict.get(_algo), param_grid=_parameters, cv=cv_folds, n_jobs=1) _grid.fit(np.array(X), Y.as_matrix().flatten()) trained_algo = _grid.best_estimator_ all_country_data_with_algos[country][_bus_cycle][ "trained algos"].append(trained_algo) return all_country_data_with_algos