def auto_encode(x, y): from sknn import ae, mlp # Initialize auto-encoder for unsupervised learning. myae = ae.AutoEncoder( layers=[ae.Layer("Tanh", units=8), ae.Layer("Sigmoid", units=4)], learning_rate=0.002, n_iter=10) # Layerwise pre-training using only the input data. myae.fit(x) # Initialize the multi-layer perceptron with same base layers. mymlp = mlp.Regressor(layers=[ mlp.Layer("Tanh", units=8), mlp.Layer("Sigmoid", units=4), mlp.Layer("Linear") ]) # Transfer the weights from the auto-encoder. myae.transfer(mymlp) # Now perform supervised-learning as usual. mymlp.fit(x, y) return mymlp
def rnn(layers,dataDF,trainSplit,lagConstant,iterations=100): #Split create x matrix x=dataDF.shift(1).dropna() #Extract prediction columns predColumns=x.columns #Add lag for lag in range(2,lagConstant+2): xLag=x.shift(lag) xLag.columns=[col+'lag'+str(lag-1) for col in xLag.columns] try: xBind=xBind.join(xLag,how='left') except: xBind=xLag x=x.join(xBind,how='left') x=x.dropna() y=rets_sp500.ix[x.index.values] xTrain=x.ix[x.index.values[:trainSplit*len(x)]] xTest=x.ix[x.index.values[trainSplit*len(x):]] yTrain=y.ix[y.index.values[:trainSplit*len(y)]] yTest=y.ix[y.index.values[trainSplit*len(y):]] signalNN = mlp.Regressor(layers,random_state=1,n_iter=iterations) signalNN.fit(xTrain.as_matrix(),yTrain[predColumns].as_matrix()) yPred=signalNN.predict(xTest.as_matrix()) mae=np.mean(np.abs(yPred-yTest[predColumns].as_matrix()),axis=0) return([signalNN,mae])
def __init__(self, w_size=100, input_size=12, mode='returns', layers=1, n_itr=50, learn=0.05, AutoEncoder=False): self.layers = list() #setup layers HIGHLY TENTETIVE AND SUBJECT TO CHANGE for i in range(layers): layer = mlp.Layer('Rectifier', units=input_size) self.layers.append(layer) self.layers.append(mlp.Layer('Linear')) self.learner = mlp.Regressor(self.layers, learning_rate=learn, n_iter=n_itr) self.input_size = input_size self.w_size = self.input_size * w_size self.data = list() self.tstep = 0 self.mode = mode self.std = 1 return
def _doFit(self, goodData_LR, goodData_HR, weight, local): ''' Private function. Fits the neural network. ''' # Once all the samples have been picked build the regression using # neural network approach print('Fitting neural network') HR_scaler = preprocessing.StandardScaler() data_HR = HR_scaler.fit_transform(goodData_HR) LR_scaler = preprocessing.StandardScaler() data_LR = LR_scaler.fit_transform(goodData_LR.reshape(-1, 1)) if self.regressionType == REG_sknn_ann: layers = [] if 'hidden_layer_sizes' in self.regressorOpt.keys(): for layer in self.regressorOpt['hidden_layer_sizes']: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=layer)) else: layers.append( ann_sknn.Layer(self.regressorOpt['activation'], units=100)) self.regressorOpt.pop('activation') self.regressorOpt.pop('hidden_layer_sizes') output_layer = ann_sknn.Layer('Linear', units=1) layers.append(output_layer) baseRegressor = ann_sknn.Regressor(layers, **self.regressorOpt) else: baseRegressor = ann_sklearn.MLPRegressor(**self.regressorOpt) # NN regressors do not support sample weights. weight = None reg = ensemble.BaggingRegressor(baseRegressor, **self.baggingRegressorOpt) if data_HR.shape[0] <= 1: reg.max_samples = 1.0 reg = reg.fit(data_HR, np.ravel(data_LR), sample_weight=weight) return {"reg": reg, "HR_scaler": HR_scaler, "LR_scaler": LR_scaler}
testFrame = dataclean.cleanDataset(dataclean.loadTestData(), True) testData = dataclean.convertPandasDataFrameToNumpyArray(testFrame) trainX = trainData[:, 1:] trainY = trainData[:, 0] testX = testData[:, 1:] """ Cross Validation """ # Learning rules L: sgd, momentum, nesterov, adadelta, adagrad or rmsprop mlp = nn.Regressor(layers=[nn.Layer("Rectifier", units=7),nn.Layer("Rectifier", units=8), nn.Layer("Rectifier", units=9), nn.Layer("Rectifier", units=8),nn.Layer("Rectifier", units=7), nn.Layer("Linear", units=1)], learning_rate=0.1, random_state=1, n_iter=100, verbose=True, learning_rule="adagrad", valid_size=0.1, batch_size=500) #cvCount = 10 #crossvalidation = metrics.crossValidationScore(ensemble.GradientBoostingRegressor(random_state=1), trainX, trainY, cvCount=cvCount) xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1) """ #{'n_estimators': 400, 'max_depth': 6, 'learning_rate': 0.01 if __name__ == "__main__": params = {"max_depth" : [3,4,5,6,7,8], "n_estimators" : [100, 200, 300, 400], "learning_rate" : [0.01, 0.05, 0.1, 0.2, 0.5, 1]} clf = GridSearchCV(crossvalidationTree, params, verbose=1, n_jobs=2, cv=10) clf.fit(trainX, trainY)
bothGPAs = pd.concat([norm1, norm2], axis=1) # plt.figure() norm1.plot(kind='hist', alpha=.5) norm2.plot(kind='hist', alpha=.5) plt.show() knn = neighbors.KNeighborsRegressor(5, "distance") percep = linear_model.Perceptron(n_iter=15) layers = [] layers.append(mlp.Layer("Sigmoid", units=9)) layers.append(mlp.Layer("Sigmoid", units=18)) layers.append(mlp.Layer("Linear", units=1)) MLP = mlp.Regressor(layers, learning_rule="momentum") runRegressionModel(knn) # runRegressionModel() runRegressionModel(MLP) """ features = allData[featNames] labels = allData[labelName] # trainFeat, testFeat, trainLabel, testLabel = train_test_split(features, labels, test_size=0.3, random_state=42) for train_rows, test_rows in folds: roundData = allData.iloc[train_rows] print roundData # roundTrainLab = labels[train_rows]
# Y = Y.reshape(yshape[0], 1) # testY = testY.reshape(testyshape[0], 1) print(X.shape, Y.shape, mainX.shape, mainY.shape, testX.shape, testY.shape) print(X.max, X.min, Y.max, Y.min) # Y = Y.reshape(yshape[0], yshape[2]) # testY = testY.reshape(testyshape[0], testyshape[2]) gc.collect() glob_rf = mlp.Regressor( layers=[ mlp.Native(lasagne.DenseLayer, num_units=1024, nonlinearity=nl.very_leaky_rectify), mlp.Native(lasagne.DenseLayer, num_units=512, nonlinearity=nl.very_leaky_rectify), mlp.Native(lasagne.DenseLayer, num_units=256, nonlinearity=nl.very_leaky_rectify), mlp.Layer("Linear")], learning_rate=.1, n_iter=5, learning_rule="adadelta", callback={'on_epoch_finish': store_stats}, loss_type='mse', regularize="L1", # possibly L1, to instead filter out useless inputs. L1 gave 5+ in results? weight_decay=.001, # default .0001 increase to combat overfitting. dropout_rate=0, # keep 80% of neurons/inputs at .2, anti overfit verbose=True, #valid_set=(testX, testY), batch_size=1) # TRIED NON-1, DIDN'T WORK AT ALL #glob_rf = pickle.load(open('forest' + str(length) + 'dyn.pkl', 'rb')) #TODO only for loading preexisting # begin pre-training with autoencoders try: glob_rf.fit(X, X) except KeyboardInterrupt: pass