コード例 #1
0
ファイル: MLP_Classifier.py プロジェクト: hgazali/AutoBuffett
    def __init__(self,
                 w_size=100,
                 input_size=12,
                 layers=1,
                 n_itr=50,
                 learn=0.05,
                 AutoEncoder=False,
                 adaption=0.1):
        self.layers = list()
        #setup layers HIGHLY TENTETIVE AND SUBJECT TO CHANGE
        for i in range(layers):
            layer = mlp.Layer('Rectifier', units=input_size)
            self.layers.append(layer)
        self.layers.append(mlp.Layer('Softmax'))
        self.learner = mlp.Classifier(self.layers,
                                      learning_rate=learn,
                                      n_iter=n_itr)
        self.input_size = input_size
        self.w_size = self.input_size * w_size
        self.data = list()
        self.returns = list()
        self.labels = list()
        self.tstep = 0

        self.sharpeA = 1
        self.sharpeB = 1
        self.adaption = adaption

        self.std = 1
        return
コード例 #2
0
def auto_encode(x, y):
    from sknn import ae, mlp

    # Initialize auto-encoder for unsupervised learning.
    myae = ae.AutoEncoder(
        layers=[ae.Layer("Tanh", units=8),
                ae.Layer("Sigmoid", units=4)],
        learning_rate=0.002,
        n_iter=10)

    # Layerwise pre-training using only the input data.
    myae.fit(x)

    # Initialize the multi-layer perceptron with same base layers.
    mymlp = mlp.Regressor(layers=[
        mlp.Layer("Tanh", units=8),
        mlp.Layer("Sigmoid", units=4),
        mlp.Layer("Linear")
    ])

    # Transfer the weights from the auto-encoder.
    myae.transfer(mymlp)
    # Now perform supervised-learning as usual.
    mymlp.fit(x, y)
    return mymlp
コード例 #3
0
    def __init__(self,
                 w_size=100,
                 input_size=12,
                 mode='returns',
                 layers=1,
                 n_itr=50,
                 learn=0.05,
                 AutoEncoder=False):
        self.layers = list()
        #setup layers HIGHLY TENTETIVE AND SUBJECT TO CHANGE
        for i in range(layers):
            layer = mlp.Layer('Rectifier', units=input_size)
            self.layers.append(layer)
        self.layers.append(mlp.Layer('Linear'))
        self.learner = mlp.Regressor(self.layers,
                                     learning_rate=learn,
                                     n_iter=n_itr)
        self.input_size = input_size
        self.w_size = self.input_size * w_size
        self.data = list()
        self.tstep = 0
        self.mode = mode

        self.std = 1
        return
コード例 #4
0
ファイル: test_ae.py プロジェクト: edmarola/RedesNeuronales
    def test_TransferSuccess(self):
        X = numpy.zeros((8, 4))
        ae = AE(layers=[L("Tanh", units=4)], n_iter=1)
        ae.fit(X)

        nn = mlp.MultiLayerPerceptron(layers=[mlp.Layer("Tanh", units=4)])
        ae.transfer(nn)
コード例 #5
0
ファイル: test_ae.py プロジェクト: edmarola/RedesNeuronales
    def test_TransferFailure(self):
        X = numpy.zeros((8, 4))
        ae = AE(layers=[L("Tanh", units=8)], n_iter=1)
        ae.fit(X)

        nn = mlp.MultiLayerPerceptron(layers=[mlp.Layer("Tanh", units=4)])
        assert_raises(AssertionError, ae.transfer, nn)
コード例 #6
0
ファイル: pyDMS.py プロジェクト: tuncaemre/pyDMS
    def _doFit(self, goodData_LR, goodData_HR, weight, local):
        ''' Private function. Fits the neural network.
        '''

        # Once all the samples have been picked build the regression using
        # neural network approach
        print('Fitting neural network')
        HR_scaler = preprocessing.StandardScaler()
        data_HR = HR_scaler.fit_transform(goodData_HR)
        LR_scaler = preprocessing.StandardScaler()
        data_LR = LR_scaler.fit_transform(goodData_LR.reshape(-1, 1))
        if self.regressionType == REG_sknn_ann:
            layers = []
            if 'hidden_layer_sizes' in self.regressorOpt.keys():
                for layer in self.regressorOpt['hidden_layer_sizes']:
                    layers.append(
                        ann_sknn.Layer(self.regressorOpt['activation'],
                                       units=layer))
            else:
                layers.append(
                    ann_sknn.Layer(self.regressorOpt['activation'], units=100))
            self.regressorOpt.pop('activation')
            self.regressorOpt.pop('hidden_layer_sizes')
            output_layer = ann_sknn.Layer('Linear', units=1)
            layers.append(output_layer)
            baseRegressor = ann_sknn.Regressor(layers, **self.regressorOpt)
        else:
            baseRegressor = ann_sklearn.MLPRegressor(**self.regressorOpt)

        # NN regressors do not support sample weights.
        weight = None

        reg = ensemble.BaggingRegressor(baseRegressor,
                                        **self.baggingRegressorOpt)
        if data_HR.shape[0] <= 1:
            reg.max_samples = 1.0
        reg = reg.fit(data_HR, np.ravel(data_LR), sample_weight=weight)

        return {"reg": reg, "HR_scaler": HR_scaler, "LR_scaler": LR_scaler}
コード例 #7
0
# Test Harness
# ------------------------------------------------------------------------------
if __name__ == '__main__':
    # Load dataset
    featureVecs, labels, numFeatures, numLabelTypes = loadArffDataset(
        'data/faces_vegetables_dataset.arff', normalise=True, displayData=True)

    # Construct all classifiers we wish to test, with 'standard' parameters
    classifiers = {
        'SVM':
        svm.SVC(kernel='linear', C=1),
        'Decision Tree':
        tree.DecisionTreeClassifier(criterion='gini', splitter='best'),
        'Feed-Forward Neural Network (Sigmoid)':
        mlp.Classifier(layers=[
            mlp.Layer('Sigmoid', units=numFeatures),
            mlp.Layer('Sigmoid', units=numLabelTypes),
        ],
                       n_iter=100),
        'Gaussian Naive Bayes':
        naive_bayes.GaussianNB(),
        'Multi-Nomial Naive Bayes':
        naive_bayes.MultinomialNB(),
        'Bernoulli Naive Bayes':
        naive_bayes.BernoulliNB(),
    }

    # Test classifiers and compute their mean scores
    results = evaluateClassifiers(classifiers, featureVecs, labels, 10)
    scores = computeOverallScores(results)
コード例 #8
0
for p in sorted(PARAMETERS):
    values = PARAMETERS[p]
    # User requested to test against this parameter?
    if p in args.params:
        params.append(values)
    # Otherwise, use the first item of the list as default.
    else:
        params.append(values[:1])

# Build the classifiers for all possible combinations of parameters.
names = []
classifiers = []
for (activation, alpha, dropout, iterations, output, rule, units) in itertools.product(*params):
    params = {'pieces': 2} if activation == "Maxout" else {}
    classifiers.append(mlp.Classifier(
        layers=[mlp.Layer(activation, units=units, **params), mlp.Layer(output)], random_state=1,
        n_iter=iterations, n_stable=iterations,
        dropout=dropout, learning_rule=rule, learning_rate=alpha),)

    t = []
    for k, v in zip(sorted(PARAMETERS), [activation, alpha, dropout, iterations, output, rule, units]):
        if k in args.params:
            t.append(str(v))
    names.append(','.join(t))

# Create randomized datasets for visualizations, on three rows.
seed = int(time.time())
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                           random_state=0, n_clusters_per_class=1)
rng = np.random.RandomState(seed+1)
X += 2 * rng.uniform(size=X.shape)
コード例 #9
0
    print norm1.values
    print norm2.values

    bothGPAs = pd.concat([norm1, norm2], axis=1)

    # plt.figure()
    norm1.plot(kind='hist', alpha=.5)
    norm2.plot(kind='hist', alpha=.5)
    plt.show()


knn = neighbors.KNeighborsRegressor(5, "distance")
percep = linear_model.Perceptron(n_iter=15)

layers = []
layers.append(mlp.Layer("Sigmoid", units=9))
layers.append(mlp.Layer("Sigmoid", units=18))
layers.append(mlp.Layer("Linear", units=1))
MLP = mlp.Regressor(layers, learning_rule="momentum")

runRegressionModel(knn)
# runRegressionModel()
runRegressionModel(MLP)
"""
	features = allData[featNames]
	labels = allData[labelName]

	# trainFeat, testFeat, trainLabel, testLabel = train_test_split(features, labels, test_size=0.3, random_state=42)


	for train_rows, test_rows in folds:
コード例 #10
0
unknown = glob.glob('data/*/unsure?/*.png')

print("Found total of %i files:" % len(positive + negative + unknown))
print("  - %i placed pieces," % len(positive))
print("  - %i missing pieces," % len(negative))
print("  - %i unsure images.\n" % len(unknown))

ds = Dataset()
ds.store(negative, 0, times=1)
ds.store(positive, 1, times=1)
ds.store(unknown, 2, times=2)

X, y = ds.toarray()

nn = mlp.Classifier(layers=[
    mlp.Layer("Rectifier", units=48, dropout=0.3),
    mlp.Layer("Rectifier", units=32, dropout=0.1),
    mlp.Layer("Rectifier", units=24),
    mlp.Layer("Softmax")
],
                    learning_rate=0.01,
                    learning_rule='adagrad',
                    n_iter=10,
                    n_stable=10,
                    batch_size=50,
                    valid_set=(X, y),
                    verbose=1)

try:
    nn.fit(X, y)
except KeyboardInterrupt:
コード例 #11
0
labels_test = np.array(dataset3['labels'])

n_feat = data_train.shape[1]
n_targets = labels_train.max() + 1

import sys
import logging

logging.basicConfig(format="%(message)s",
                    level=logging.DEBUG,
                    stream=sys.stdout)

from sknn import mlp

net = mlp.Classifier(layers=[
    mlp.Layer("Rectifier", units=n_feat * 2 / 3),
    mlp.Layer("Rectifier", units=n_feat * 1 / 3),
    mlp.Layer("Softmax", units=n_targets)
],
                     n_iter=50,
                     n_stable=10,
                     learning_rate=0.001,
                     valid_size=0.1,
                     verbose=1)
net.fit(data_train, labels_train)

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

expected = labels_test
predicted = net.predict(data_test)
コード例 #12
0
ファイル: scikitforest.py プロジェクト: tobby2002/portfolio-1
    testyshape = testY.shape
    X = X.reshape(xshape[0], xshape[2])
    testX = testX.reshape(testxshape[0], testxshape[2])
    # Y = Y.reshape(yshape[0], 1)
    # testY = testY.reshape(testyshape[0], 1)
    print(X.shape, Y.shape, mainX.shape, mainY.shape, testX.shape, testY.shape)
    print(X.max, X.min, Y.max, Y.min)
    # Y = Y.reshape(yshape[0], yshape[2])
    # testY = testY.reshape(testyshape[0], testyshape[2])
    gc.collect()
    glob_rf = mlp.Regressor(
        layers=[
            mlp.Native(lasagne.DenseLayer, num_units=1024, nonlinearity=nl.very_leaky_rectify),
            mlp.Native(lasagne.DenseLayer, num_units=512, nonlinearity=nl.very_leaky_rectify),
            mlp.Native(lasagne.DenseLayer, num_units=256, nonlinearity=nl.very_leaky_rectify),
            mlp.Layer("Linear")],
        learning_rate=.1,
        n_iter=5,
        learning_rule="adadelta",
        callback={'on_epoch_finish': store_stats},

        loss_type='mse',
        regularize="L1",  # possibly L1, to instead filter out useless inputs. L1 gave 5+ in results?
        weight_decay=.001,  # default .0001 increase to combat overfitting.
        dropout_rate=0,  # keep 80% of neurons/inputs at .2, anti overfit
        verbose=True,
        #valid_set=(testX, testY),
        batch_size=1)  # TRIED NON-1, DIDN'T WORK AT ALL
    #glob_rf = pickle.load(open('forest' + str(length) + 'dyn.pkl', 'rb')) #TODO only for loading preexisting

    # begin pre-training with autoencoders
コード例 #13
0
ファイル: rnnPortfolio.py プロジェクト: nickVenuti/packages
data_sp500=data_sp500.dropna(axis=1)    


##############################
#####Select Target Stocks#####
##############################

#Calculate log daily rets
rets_sp500=np.log(data_sp500/data_sp500.shift(1))
rets_sp500=rets_sp500.fillna(0)
rets_sp500.head()



#Optimize model
layers=[mlp.Layer('Tanh', units=len(rets_sp500.columns)*3),
    mlp.Layer('Tanh', units=len(rets_sp500.columns)*3),
    mlp.Layer('Tanh', units=len(rets_sp500.columns)*3),
    mlp.Layer('Linear')]

testRun=rnn(layers,rets_sp500,0.8,3,100)
np.mean(testRun[1])
np.mean(rets_sp500.as_matrix())







コード例 #14
0
    # User requested to test against this parameter?
    if p in args.params:
        params.append(values)
    # Otherwise, use the first item of the list as default.
    else:
        params.append(values[:1])

# Build the classifiers for all possible combinations of parameters.
names = []
classifiers = []
for (activation, alpha, dropout, iterations, output, regularize, rule,
     units) in itertools.product(*params):
    params = {'pieces': 2} if activation == "Maxout" else {}
    classifiers.append(
        mlp.Classifier(layers=[
            mlp.Layer(activation, units=units, **params),
            mlp.Layer(output)
        ],
                       random_state=1,
                       n_iter=iterations,
                       n_stable=iterations,
                       regularize=regularize,
                       dropout_rate=dropout,
                       learning_rule=rule,
                       learning_rate=alpha), )

    t = []
    for k, v in zip(sorted(PARAMETERS), [
            activation, alpha, dropout, iterations, output, regularize, rule,
            units
    ]):
コード例 #15
0
data_train = np.vstack([dataset1['data']])  #, dataset2['data']])
labels_train = np.hstack([dataset1['labels']])  #, dataset2['labels']])

data_train = data_train.astype('float') / 255.
labels_train = labels_train
data_test = dataset3['data'].astype('float') / 255.
labels_test = np.array(dataset3['labels'])

n_feat = data_train.shape[1]
n_targets = labels_train.max() + 1

from sknn import mlp

nn = mlp.Classifier(layers=[
    mlp.Layer("Tanh", units=n_feat * 2 / 3),
    mlp.Layer("Sigmoid", units=n_feat * 1 / 3),
    mlp.Layer("Softmax", units=n_targets)
],
                    n_iter=50,
                    n_stable=10,
                    learning_rate=0.001,
                    valid_size=0.5,
                    verbose=1)

if PRETRAIN:
    from sknn import ae
    ae = ae.AutoEncoder(layers=[
        ae.Layer("Tanh", units=n_feat * 2 / 3),
        ae.Layer("Sigmoid", units=n_feat * 2 / 3)
    ],
コード例 #16
0
def run_neural_net(training_features, training_labels, test_features,
                   test_labels):
    """
    Classifies the data using pybrain's neural net

    Parameters
    ----------
        training_data: data used to train the classifier. For each row, item 0 assumed to be the label
        test_data: data used to test the classifier. For each row, item 0 assumed to be the label
        hidden_units: sets the hidden unit count for the neural net
        training_epochs: sets the training epochs for the neural net
        training_iterations: # of training loops 
    
    Returns
    -------
        prediction: predicted labels of the test data
        accuracy: percent of test data labels accurately predicted
    """

    time_1 = time.time()

    #set the number of classes in the data
    number_of_outputs = training_labels.astype(int).max() + 1
    number_of_inputs = training_features.shape[1]

    #determine optimal hidden nodes based on Huang et al. (2003)
    first_layer_nodes = int(
        math.sqrt((number_of_outputs + 2) * number_of_inputs) +
        2 * math.sqrt(number_of_inputs / (number_of_outputs + 2)))
    second_layer_nodes = int(number_of_outputs *
                             math.sqrt(number_of_inputs /
                                       (number_of_outputs + 2)))

    #set up the layers
    input_layer = mlp_nn.Layer("Linear", units=number_of_inputs)
    hidden_layer1 = mlp_nn.Layer("Sigmoid", units=first_layer_nodes)
    hidden_layer2 = mlp_nn.Layer("Sigmoid", units=second_layer_nodes)
    output_layer = mlp_nn.Layer("Softmax", units=number_of_outputs)
    layers = [input_layer, hidden_layer1, hidden_layer2, output_layer]

    #set up the classifier
    neural_net = mlp_nn.Classifier(layers=layers, learning_rate=0.02, n_iter=5)

    #set up tuning parameters
    parameters = {"learning_rate": [0.02], "n_iter": [1, 5, 10, 25, 50]}

    #create cross validation iterator
    cv = ShuffleSplit(training_features.shape[0],
                      n_iter=5,
                      test_size=0.2,
                      random_state=0)

    #set up tuning algorithm
    classifier = GridSearchCV(estimator=neural_net,
                              cv=cv,
                              param_grid=parameters)

    classifier.fit(training_features, training_labels)

    test_prediction = classifier.predict(test_features)
    test_accuracy = classifier.score(test_features, test_labels)

    time_2 = time.time()

    graph_title = "Learning Curves \n(Neural Net, learning rate=%f)" % classifier.best_estimator_.learning_rate
    plot_learning_curve_iter(classifier, graph_title)
    pylab.savefig(
        os.path.join(results_location, 'Validator Curves - Neural Net.png'))

    time_3 = time.time()

    #output time stats
    #time 1 -> time 2 is optimization time
    #time 2 -> time 3 is run for just one case
    print("Neural Net Time Stats")
    print("Optimization Time -> %f" % (time_2 - time_1))
    print("Single Run Time -> %f" % (time_3 - time_2))

    #output classification report and confusion matrix
    print('\n\n----------------------------')
    print('Classification Report')
    print('----------------------------\n')
    print(classification_report(y_true=test_labels, y_pred=test_prediction))

    print('\n\n----------------------------')
    print('Confusion Matrix')
    print('----------------------------\n')
    print(confusion_matrix(y_true=test_labels, y_pred=test_prediction))

    return test_prediction, test_accuracy
コード例 #17
0
labels_train = np.hstack(
    [dataset1['labels']]
)  #, dataset2['labels'], dataset3['labels'], dataset4['labels'], dataset5['labels']])

data_train = data_train.astype('float') / 255.
labels_train = labels_train
data_test = dataset0['data'].astype('float') / 255.
labels_test = np.array(dataset0['labels'])

n_feat = data_train.shape[1]
n_targets = labels_train.max() + 1

from sknn import mlp

nn = mlp.Classifier(layers=[
    mlp.Layer("Tanh", units=n_feat / 8),
    mlp.Layer("Sigmoid", units=n_feat / 16),
    mlp.Layer("Softmax", units=n_targets)
],
                    n_iter=50,
                    n_stable=10,
                    learning_rate=0.002,
                    learning_rule="momentum",
                    valid_size=0.1,
                    verbose=1)

if PRETRAIN:
    from sknn import ae
    ae = ae.AutoEncoder(layers=[
        ae.Layer("Tanh", units=n_feat / 8),
        ae.Layer("Sigmoid", units=n_feat / 16)
コード例 #18
0
classifiers = []

if 'dbn' in sys.argv:
    from nolearn.dbn import DBN
    clf = DBN([X_train.shape[1], 300, 10],
              learn_rates=0.3,
              learn_rate_decays=0.9,
              epochs=10,
              verbose=1)
    classifiers.append(('nolearn.dbn', clf))

if 'sknn' in sys.argv:
    from sknn import mlp

    clf = mlp.Classifier(
        layers=[mlp.Layer("Rectifier", units=300),
                mlp.Layer("Softmax")],
        learning_rate=0.02,
        learning_rule='momentum',
        batch_size=25,
        valid_size=0.0,
        n_stable=10,
        n_iter=10,
        verbose=1,
    )
    classifiers.append(('sknn.mlp', clf))

if 'lasagne' in sys.argv:
    from nolearn.lasagne import NeuralNet
    from lasagne.layers import InputLayer, DenseLayer
    from lasagne.nonlinearities import softmax
コード例 #19
0
def testingAlgoTypes(_all_country_data, MP4, verbose=0):
    print(
        "\n \n \n Testing various untrained classification algorithms on each country's seperate sub datasets "
    )
    all_country_data_with_algos = copy.deepcopy(_all_country_data)
    ##parameters for NeuralNet
    nn_layers = [
        mlp.Layer('Sigmoid', units=7, name="Layer1"),
        mlp.Layer("Softmax", )
    ]
    nn_params = {
        'layers': nn_layers,
        'learning_momentum': 0.9,
        'n_stable': 10,
        'f_stable': 0.01,
        'learning_rate': 0.001,
        'learning_rule': 'adadelta',
        'random_state': seed,
        'n_iter': 8,
        'batch_size': 100,
        'warning': None,
        'verbose': None,
        'debug': False
    }

    max_iter_params = {'max_iter': 1000}

    classifiers = [
        LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto'),
        linear_model.RidgeClassifier(random_state=seed),
        linear_model.LogisticRegression(solver='saga',
                                        penalty='l2',
                                        class_weight='balanced',
                                        random_state=seed),
        neighbors.KNeighborsClassifier(n_neighbors=9,
                                       weights='distance',
                                       leaf_size=20),
        svm.LinearSVC(class_weight='balanced', random_state=seed, dual=False),
        ensemble.RandomForestClassifier(n_estimators=200,
                                        min_samples_split=5,
                                        min_samples_leaf=3,
                                        max_depth=3,
                                        random_state=seed),
        ensemble.GradientBoostingClassifier(random_state=seed,
                                            n_estimators=200,
                                            min_samples_split=5,
                                            max_features='sqrt'),
        mlp.Classifier(**nn_params),
        linear_model.PassiveAggressiveClassifier(max_iter=1000,
                                                 random_state=seed,
                                                 class_weight="balanced"),
        linear_model.SGDClassifier(max_iter=1000,
                                   random_state=seed,
                                   class_weight='balanced',
                                   penalty='l2')
    ]

    headers = [
        'LDA', 'RC', 'LogR', 'KNN', 'SVM', 'RF', 'GBC', 'NN', 'PAC', 'SGD'
    ]

    for country in all_country_data_with_algos.keys():
        df_cv_results = pd.DataFrame(columns=headers)
        for _bus_cycle in all_country_data_with_algos[country].keys(
        ):  #iterating through the different business cycles
            means_vars_for_clf = []
            result_all_clf = []
            Y_target = all_country_data_with_algos[country][_bus_cycle].get(
                "Y")
            X_features = all_country_data_with_algos[country][_bus_cycle].get(
                "X")
            for _clf in classifiers:
                ##Creating Pipelines
                #standardizer = ('standardize',preprocessing.StandardScaler())
                algo = ('clf', _clf)
                steps = []
                #steps.append(standardizer)
                steps.append(algo)
                pipeline_clf = pipeline.Pipeline(steps)
                kfold = model_selection.KFold(n_splits=2,
                                              random_state=seed,
                                              shuffle=True)
                result_clf = model_selection.cross_val_score(
                    pipeline_clf,
                    np.array(X_features),
                    Y_target.values.ravel(),
                    cv=kfold,
                    n_jobs=1)
                result_all_clf = result_all_clf + [
                    result_clf.mean()
                ]  ##used to find top 3 methods

                means_vars_for_clf = means_vars_for_clf + [
                    "{0:.3g}".format(result_clf.mean())
                ]  ##used for excel sheet
            df_cv_results.loc[
                "{}-{}".format(country, _bus_cycle), :] = means_vars_for_clf

            ##gathering names of top three algos to be inserted into all_country_data dictionary
            top3 = sorted(result_all_clf, reverse=True)[:3]
            indexes_of_top_3 = [result_all_clf.index(x) for x in top3]
            top_3_algos_by_mean = [headers[x] for x in indexes_of_top_3
                                   ]  ##stored as 3 letter abbreviation of algo
            all_country_data_with_algos[country][_bus_cycle].update(
                {"algos": top_3_algos_by_mean})

        if MP4 == True:
            df_cv_results.to_excel(
                '../Reserach/Classifier Cross Validation Scores For All Countries/All/'
                + country + '.xlsx',
                index=False)
        if MP4 == "Only":
            df_cv_results.to_excel(
                '../Reserach/Classifier Cross Validation Scores For All Countries/Only/'
                + country + '.xlsx',
                index=False)
        if MP4 == False:
            df_cv_results.to_excel(
                '../Reserach/Classifier Cross Validation Scores For All Countries/Excl/'
                + country + '.xlsx',
                index=False)

        if verbose > 0:
            print(df_cv_results)
            print("\n")
    saveTopThreeAlgos(all_country_data_with_algos)

    return all_country_data_with_algos
コード例 #20
0
ファイル: vf_classify.py プロジェクト: liuliuliu123456/dee
def create_estimator(estimator_name, class_weight):
    estimator = None
    param_grid = None
    support_class_weight = False

    if estimator_name == "logistic_regression":
        from sklearn import linear_model
        estimator = linear_model.LogisticRegression(class_weight=class_weight)
        param_grid = {"C": np.logspace(-3, 4, 20)}
        support_class_weight = True
    elif estimator_name == "random_forest":
        estimator = ensemble.RandomForestClassifier(class_weight=class_weight)
        param_grid = {
            "n_estimators": list(range(10, 110, 10)),
            "max_features": ("auto", 0.5, 0.8, None)
            # "max_features": np.arange(int(np.sqrt(n_features)), n_features, step=4)
        }
        support_class_weight = True
        # support_class_weight = False
    elif estimator_name == "gradient_boosting":
        """
        import xgboost.sklearn as xgb
        estimator = xgb.XGBClassifier(learning_rate=0.1)
        param_grid = {
            # "n_estimators": list(range(150, 250, 10)),
            # "max_depth": list(range(3, 8))
        }
        """
        # for some unknown reason, XGBoost does not perform well on my machine and hangs sometimes
        # fallback to use the less efficient implementation in sklearn.
        estimator = ensemble.GradientBoostingClassifier(learning_rate=0.1,
                                                        warm_start=True)
        param_grid = {
            "n_estimators": list(range(150, 250, 10)),
            "max_depth": list(range(3, 8))
        }
    elif estimator_name == "adaboost":
        estimator = ensemble.AdaBoostClassifier()
        param_grid = {
            "n_estimators": list(range(30, 150, 10)),
            "learning_rate": np.logspace(-1, 0, 2)
        }
    elif estimator_name.startswith("svc_"):
        subtype = estimator_name[4:]
        from sklearn import svm
        if subtype == "linear":  # linear SVC uses liblinear insteaed of libsvm internally, which is more efficient
            param_grid = {
                "C": np.logspace(-6, 2, 50),
            }
            estimator = svm.LinearSVC(
                dual=
                False,  # dual=False when n_samples > n_features according to the API doc.
                class_weight=class_weight)
        else:
            estimator = svm.SVC(
                shrinking=False,
                cache_size=2048,
                verbose=False,
                probability=False,  # use True when predict_proba() is needed
                class_weight=class_weight)
            if subtype == "rbf":
                estimator.set_params(kernel="rbf")
                param_grid = {
                    "C": np.logspace(-2, 2, 20),
                    "gamma": np.logspace(-2, -1, 3)
                }
            else:  # poly
                estimator.set_params(kernel="poly")
                param_grid = {"degree": [2], "C": np.logspace(-3, 1, 20)}
        support_class_weight = True
    elif estimator_name == "mlp1" or estimator_name == "mlp2":  # multiple layer perceptron neural network
        from sknn import mlp
        param_grid = {
            "learning_rate": [0.0001],
            "regularize": ["l2"],  # , "dropout"],
            "weight_decay": np.logspace(-6, -5,
                                        2),  # parameter for L2 regularizer
            "hidden0__type": ["Tanh"]  # "Rectifier", "Sigmoid"
        }

        layers = [mlp.Layer(type="Tanh", name="hidden0")]
        # add the second hidden layer as needed
        if estimator_name == "mlp2":  # 2 hidden layer
            layers.append(mlp.Layer(type="Tanh", name="hidden1"))
            param_grid["hidden0__units"] = list(range(2, 5, 1))
            param_grid["hidden1__units"] = list(range(2, 5, 1))
            param_grid["hidden1__type"] = ["Tanh"]  # "Rectifier", "Sigmoid"
        else:
            param_grid["hidden0__units"] = list(range(5, 26, 1))
        # add the output layer
        layers.append(mlp.Layer("Softmax"))
        estimator = mlp.Classifier(layers=layers, batch_size=150)

    return estimator, param_grid, support_class_weight
コード例 #21
0
Y = traindata[1:, 0]
cv = train_test_split(X, Y, test_size=.33, random_state=20)
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=.33,
                                                    random_state=20)

#Finding the optimal component

AELayers = [
    ae.Layer("Sigmoid", units=120),
    ae.Layer("Sigmoid", units=60),
    ae.Layer("Sigmoid", units=30)
]
NNLayers = [
    mlp.Layer("Sigmoid", units=120),
    mlp.Layer("Sigmoid", units=75),
    mlp.Layer("Softmax", units=5)
]

##
##for each in complist:
##    comp = each
t0 = time.clock()

print("Time started")
# Fit the Autoencoder

result = ae.AutoEncoder(AELayers,
                        warning=None,
                        random_state=0,
コード例 #22
0
ファイル: MLP.py プロジェクト: qdo1010/Kaggle
trainData = dataclean.convertPandasDataFrameToNumpyArray(trainFrame)

testFrame = dataclean.cleanDataset(dataclean.loadTestData(), True)
testData = dataclean.convertPandasDataFrameToNumpyArray(testFrame)

trainX = trainData[:, 1:]
trainY = trainData[:, 0]

testX = testData[:, 1:]

"""
Cross Validation
"""

# Learning rules L: sgd, momentum, nesterov, adadelta, adagrad or rmsprop
mlp = nn.Regressor(layers=[nn.Layer("Rectifier", units=7),nn.Layer("Rectifier", units=8),
                           nn.Layer("Rectifier", units=9),
                           nn.Layer("Rectifier", units=8),nn.Layer("Rectifier", units=7),
                           nn.Layer("Linear", units=1)],
                   learning_rate=0.1, random_state=1, n_iter=100, verbose=True, learning_rule="adagrad",
                   valid_size=0.1, batch_size=500)
#cvCount = 10
#crossvalidation = metrics.crossValidationScore(ensemble.GradientBoostingRegressor(random_state=1), trainX, trainY, cvCount=cvCount)

xTrain, xTest, yTrain, yTest = Metrics.traintestSplit(trainX, trainY, randomState=1)

"""
#{'n_estimators': 400, 'max_depth': 6, 'learning_rate': 0.01

if __name__ == "__main__":
    params = {"max_depth" : [3,4,5,6,7,8], "n_estimators" : [100, 200, 300, 400], "learning_rate" : [0.01, 0.05, 0.1, 0.2, 0.5, 1]}
コード例 #23
0
ファイル: irisAE.py プロジェクト: ewang73/proj3
Y = traindata[1:, 0]
cv = train_test_split(X, Y, test_size=.33, random_state=20)
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=.33,
                                                    random_state=20)

#Finding the optimal component

AELayers = [
    ae.Layer("Sigmoid", units=1000),
    ae.Layer("Sigmoid", units=500),
    ae.Layer("Sigmoid", units=250)
]
NNLayers = [
    mlp.Layer("Sigmoid", units=1000),
    mlp.Layer("Sigmoid", units=500),
    mlp.Layer("Softmax", units=15)
]

##
##for each in complist:
##    comp = each
t0 = time.clock()

print("Time started")
# Fit the Autoencoder

result = ae.AutoEncoder(AELayers,
                        warning=None,
                        random_state=0,
コード例 #24
0
import MNIST.DataClean as dc
import numpy as np
import sknn.mlp as mlp
import pickle

try:
    nn = pickle.load(open("simplenn.pkl", "rb"))
    print("Model loaded")
except:
    nn = None

layers = [
    mlp.Convolution("Rectifier", channels=10, kernel_shape=(2, 2)),
    mlp.Layer("Rectifier", units=1000),
    mlp.Layer("Softmax", units=10)
]

if nn is None:
    trainFrame = dc.loadTrainData(describe=False)
    trainData = dc.convertPandasDataFrameToNumpyArray(trainFrame)

    nn = mlp.Classifier(layers=layers,
                        learning_rate=0.00001,
                        valid_size=0,
                        random_state=0,
                        n_iter=50,
                        verbose=True,
                        batch_size=1000,
                        learning_rule="nesterov")
    nn.fit(trainData[:, 1:], trainData[:, 0])
    print("Model fitting complete")
コード例 #25
0
def fineTuneModel(_all_country_data_with_algos):
    print(
        "\n \n Fine Tuning Parameters for the top 3 predictive algorithms for each country for each sub dataset split by Mentality/Business Cycle "
    )
    all_country_data_with_algos = copy.deepcopy(_all_country_data_with_algos)
    algos_dict = {
        "LDA":
        LinearDiscriminantAnalysis(),
        "RC":
        linear_model.RidgeClassifier(),
        "LogR":
        linear_model.LogisticRegression(),
        "KNN":
        neighbors.KNeighborsClassifier(),
        "SVM":
        svm.LinearSVC(),
        "RF":
        ensemble.RandomForestClassifier(verbose=0),
        "GBC":
        ensemble.GradientBoostingClassifier(verbose=0),
        "NN":
        mlp.Classifier(
            layers=[mlp.Layer('Rectifier', units=7),
                    mlp.Layer("Softmax", )]),
        "PAC":
        linear_model.PassiveAggressiveClassifier(),
        "SGD":
        linear_model.SGDClassifier()
    }

    cv_folds = 3
    n_jobs_count = np.arange(1, 2)
    results = {}

    for country in all_country_data_with_algos.keys():
        for _bus_cycle in all_country_data_with_algos[country]:
            X = all_country_data_with_algos[country][_bus_cycle].get("X")
            Y = all_country_data_with_algos[country][_bus_cycle].get("Y")
            all_country_data_with_algos[country][_bus_cycle].update(
                {"trained algos": []})

            for _algo in all_country_data_with_algos[country][_bus_cycle].get(
                    "algos"):
                #Possible parameters for each var Parameters

                _parameters = {}

                if _algo == "LDA":
                    lda_n_components = np.arange(2, 8, 1)
                    shrinkage = ['auto']

                    lda_solver = ['lsqr', 'eigen']
                    _parameters.update({
                        'n_components': lda_n_components,
                        'solver': lda_solver,
                        'shrinkage': shrinkage
                    })

                if _algo == "RC":
                    rc_class_weight = ['balanced']
                    rc_solver = ['saga', 'sparse_cg', 'svd']
                    alpha = np.arange(0.5, 4.5, 0.5)
                    _parameters.update({
                        'class_weight': rc_class_weight,
                        'solver': rc_solver,
                        'alpha': alpha
                    })

                if _algo == "LogR":
                    lr_penalty = ['l1', 'l2']
                    lr_class_weight = ['balanced']
                    lr_solver = ['liblinear']
                    _parameters.update({
                        'penalty': lr_penalty,
                        'class_weight': lr_class_weight,
                        'solver': lr_solver,
                        'random_state': [seed]
                    })

                if _algo == "KNN":
                    knn_neighbors = np.arange(2, 13, 1)
                    knn_weights = ['uniform', 'distance']
                    knn_leaf_size = np.arange(10, 30, 2)
                    _parameters.update({
                        'n_neighbors': knn_neighbors,
                        'weights': knn_weights,
                        'leaf_size': knn_leaf_size
                    })

                if _algo == "SVM":
                    ##put change of kernel in after
                    svm_weights = ['balanced']
                    dual = [False]

                    _parameters.update({
                        'class_weight': svm_weights,
                        'dual': dual,
                        'random_state': [seed]
                    })

                if _algo == "RF":
                    rf_max_depth = np.arange(1, 5, 1)
                    n_estimators = np.asarray([200])
                    min_samples_leaf = np.arange(3, 6, 1)
                    min_samples_split = np.arange(3, 5, 1)
                    max_features = ["sqrt"]
                    _parameters.update({
                        'max_depth': rf_max_depth,
                        'n_estimators': n_estimators,
                        'min_samples_leaf': min_samples_leaf,
                        'min_samples_split': min_samples_split,
                        'max_features': max_features,
                        'random_state': [seed]
                    })

                if _algo == "GBC":
                    gb_loss = ['deviance']
                    gb_max_depth = np.arange(1, 5, 1)
                    n_estimators = np.asarray([200])
                    min_samples_leaf = np.arange(3, 6, 1)
                    min_samples_leaf = np.arange(3, 6, 1)
                    min_samples_split = np.arange(3, 6, 1)
                    max_features = ["sqrt"]
                    _parameters.update({
                        'loss': gb_loss,
                        'max_depth': gb_max_depth,
                        'min_samples_leaf': min_samples_leaf,
                        'n_estimators': n_estimators,
                        'min_samples_leaf': min_samples_leaf,
                        'max_features': max_features,
                        'random_state': [seed]
                    })

                if _algo == "NN":
                    layer_1 = [
                        mlp.Layer(type="Sigmoid", units=7, name="layer1"),
                        mlp.Layer(type="Softmax", name="layer2")
                    ]
                    #mlp.Layer('Rectifier',units=5)
                    nn_layers = [layer_1]
                    nn_regularize = ['L1']
                    learning_rate = [0.01]
                    n_iter = [1000]
                    weight_decay = [0.01]
                    learning_rule = ['adadelta']
                    momentum = [0.90]
                    n_stable = np.arange(150, 151, 2)
                    f_stable = [0.001]
                    dropout_rate = np.asarray([0, 0.25, 0.5])
                    random_state = [seed]
                    nn_params = {
                        'layers': nn_layers,
                        'regularize': nn_regularize,
                        'learning_rate': learning_rate,
                        'n_iter': n_iter,
                        'learning_rule': learning_rule,
                        'n_iter': n_iter,
                        'weight_decay': weight_decay,
                        'learning_momentum': momentum,
                        'n_stable': n_stable,
                        'random_state': random_state
                    }  #hidden layer size should be average of input layer and output layer
                    _parameters.update(nn_params)

                if _algo == "PAC":
                    class_weight = ['balanced']
                    max_iter = np.arange(1000, 10001, 1)
                    _parameters.update({
                        'class_weight': class_weight,
                        'max_iter': max_iter,
                        'random_state': [seed]
                    })

                if _algo == "SGD":
                    loss = ['squared_hinge', 'hinge']
                    class_weight = ['balanced']
                    penalty = ['l2', 'l1', 'elasticnet']
                    _parameters.update({
                        'loss': loss,
                        'class_weight': class_weight,
                        'max_iter': [1000],
                        'penalty': penalty,
                        'random_state': [seed]
                    })

                _grid = model_selection.GridSearchCV(algos_dict.get(_algo),
                                                     param_grid=_parameters,
                                                     cv=cv_folds,
                                                     n_jobs=1)

                _grid.fit(np.array(X), Y.as_matrix().flatten())

                trained_algo = _grid.best_estimator_
                all_country_data_with_algos[country][_bus_cycle][
                    "trained algos"].append(trained_algo)

    return all_country_data_with_algos