예제 #1
0
def train(X_train, y_train, batch_size, n_batches):
    model = mlp(hidden_layer_sizes=(1024, 2048, 1024, 512, 256, 256, 128, 64),
                max_iter=10000)

    train_size = np.shape(X_train)[0]

    min_loss = 1e20

    for iterator in tqdm(range(n_batches)):
        idx = np.random.randint(0, train_size, size=batch_size)

        X_select = X_train[idx, :]
        y_select = y_train[idx]

        model.partial_fit(X_select, y_select)

        sentence_predicted_scores = get_values(X_val, model)

        loss = get_loss(sentence_predicted_scores, y_val)

        # saving best model seen so far
        if loss < min_loss:
            min_loss = loss
            pickle.dump(model, open(model_name + '_best_model', 'wb'))

    final_model = pickle.load(open(model_name + '_best_model', 'rb'))
    return final_model
예제 #2
0
def executeAlgorithm(algorithm, dataset):
    classifier = None
    model = None

    if (dataset == 1):
        data, targets, classes = get_iris()
    elif (dataset == 2):
        data, targets, classes = get_diabetes()

    count = 0

    #split dataset into random parts
    train_data, test_data, train_target, test_target = train_test_split(
        data, targets, test_size=.3)

    if (algorithm == 7):
        classifier = NeuralNetwork()
        model = classifier.fit(train_data, train_target, classes)
    else:
        classifier = mlp()
        model = classifier.fit(train_data, train_target)

    #target_predicted is an array of predictions that is received by the predict
    target_predicted = model.predict(test_data)

    #loop through the target_predicted and count up the correct predictions
    for index in range(len(target_predicted)):
        #increment counter for every match from
        #target_predicted and test_target
        if target_predicted[index] == test_target[index]:
            count += 1

    accuracy = get_accuracy(count, len(test_data))

    print("Accuracy: {:.2f}%".format(accuracy))
예제 #3
0
    def __init__(self, fname):
        """
        instantiate ensemble of neural nets using sklearn
        """

        if os.path.isdir(fname):
            open_name = "{}/netparams-{}.pckl".format(fname, fname)
        else:
            open_name = fname

        with open(open_name, "rb") as f:
            data = pickle.load(f)
        f.close()


        ensemble = [mlp(hidden_layer_sizes=data["hidden_layer_sizes"],activation=data["activation"]) for \
                ii in range(data["Nensemble"])]

        for model in range(data["Nensemble"]):
            ensemble[model]._random_state = check_random_state(
                ensemble[model].random_state)
            ensemble[model]._initialize(y=np.zeros((1, 2)),
                                        layer_units=data["layer_units"])

            for ii in range(len(data["layer_units"]) - 1):
                ensemble[model].coefs_[ii] = data["weights"][model][ii]
                ensemble[model].intercepts_[ii] = data["biases"][model][ii]

        self.ensemble = ensemble
        self.Nens = data["Nensemble"]
        self.Xdim = data["layer_units"][0]
        self.preconditioning = data["preconditioning"]
예제 #4
0
def MLP(data_directory, model_dir, features):
    X_train, X_test, y_train, y_test, predict_X, features = pre(
        data_directory, features)
    os.chdir(model_dir)
    model = mlp(random_state=1, max_iter=10000)
    grid = gs(estimator=model,
              param_grid={
                  'hidden_layer_sizes': [(500, 500)],
                  'activation': ['logistic', 'tanh', 'relu'],
                  'alpha': np.exp(2.303 * np.arange(-8, 0)),
                  'learning_rate': ['constant']
              },
              cv=5,
              n_jobs=6)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    print(grid.best_estimator_.score(X_test, y_test))

    joblib.dump(
        grid.best_estimator_, 'mlp_%d_%.4f.m' %
        (len(features), grid.best_estimator_.score(X_test, y_test)))

    df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap'])
    df['pbe_bandgap'] = y_test
    df['ml_bandgap'] = grid.best_estimator_.predict(X_test)
    print(df)
예제 #5
0
def predict_mlp(X_train, X_test, y_train, y_test):
    clf=mlp()
    print("mlp started")
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    calc_accuracy("MLP classifier",y_test,y_pred)
    return clf
예제 #6
0
def neural_networks(M, m, D, d, feature_mean, diag, accuracy):
    from sklearn.neural_network import MLPClassifier as mlp

    training_start = t.time()
    nn = mlp()
    nn.fit(M, D)
    training_end = t.time()
    print("\nNeural Networks\nTraining time: {0:.0000001} sec".format(
        training_end - training_start))

    testing_start = t.time()
    p = nn.predict(m)
    testing_end = t.time()
    print("Testing/Prediction time: {0:.0000001} sec".format(testing_end -
                                                             testing_start))

    validation = []
    validation = cross_val_score(nn, feature_mean, diag, cv=5)
    accuracy.append(accuracy_score(p, d))

    print("Accuracy: {0:.01%}".format(accuracy_score(p, d)))
    print("Cross validation result: {0:.01%} (+/- {1:.01%})".format(
        num.mean(validation),
        num.std(validation) * 2))
    print(classification_report(d, p))
예제 #7
0
    def _neural_network(self, X, y):
        '''_NEURAL_NETWORK

            Train MLP to do final classification.

            Inputs:
            -------

            - X: numpy ndarray, features array.
            - y: numpy ndarray, labels list.

            Output:
            -------

            - clf: instandce of MLPClassifier, trained model.

        '''

        # Train MLP
        clf = mlp(**self.mlp_paras)
        clf.fit(X, y)

        # To save trained model
        # import joblib at the begining of this script
        # joblib.dump(clf, "mlp.pkl")

        return clf
예제 #8
0
def _mlp(t, min_freq, save=False):
    if save:
        clf = mlp().fit(records, labels)
        save_classifier(clf, t, 'mlp', min_freq)
        return ('mlp', clf)
    else:
        clf = load_classifier(t, 'mlp', min_freq)
        return ('mlp', clf)
예제 #9
0
def classifica_bd(grupos, attr_cluster, porc_trein, folds):
    result = []
    for grupo in grupos:
        data = grupo.drop([attr_cluster], axis=1)
        clt = grupo[attr_cluster].unique()
        classif = classificador(mlp(max_iter=2000), data, porc_trein, folds)
        result.append((clt, classif.acuracia))
    return result
예제 #10
0
 def __get_base_estimator__(self):
     return mlp(hidden_layer_sizes=(100),
                activation='relu',
                solver='adam',
                learning_rate='constant',
                learning_rate_init=1e-3,
                early_stopping=True,
                max_iter=2000)
예제 #11
0
 def __init__(self, state_set_size, action_set_size, epsilon):
     self.states = state_set_size
     self.actions = action_set_size
     self.memory = []
     self.last_100 = []
     X_train = np.ones((1, state_set_size))
     Y_train = (np.array([1, 0])).reshape((1, action_set_size))
     self.model = mlp(random_state=2, max_iter=1000)
     self.model.partial_fit(X_train, Y_train, classes=np.unique(Y_train))
예제 #12
0
 def buildClassifier(self, X_features, Y_train):
     clf = mlp(
         solver='sgd',
         learning_rate_init=0.1,
         max_iter=20,
         hidden_layer_sizes=(100, ),
         random_state=1,
         verbose=True)
     clf.fit(X_features, Y_train)
     return clf
예제 #13
0
    def create_model(self, model_type, parameters):

        if model_type == 'lr':
            model = lr()
        elif model_type == 'svm':
            model = svm()
        elif model_type == 'mlp':
            model = mlp()
        elif model_type == 'rf':
            model = rf()
        elif model_type == 'xgb':
            model = xgb()
        return model.set_params(**parameters)
예제 #14
0
def select_classify():
    return [
        naive(),
        tree(criterion="entropy"),
        knn(n_neighbors=8, weights='uniform', metric="manhattan"),
        mlp(hidden_layer_sizes=(128, ),
            alpha=0.01,
            activation='tanh',
            solver='sgd',
            max_iter=300,
            learning_rate='constant',
            learning_rate_init=0.001)
    ]
예제 #15
0
 def add_model(self, model_type):
     if model_type == 'lr':
         self.models.append((model_type, lr(normalize=True)))
     elif model_type == 'ridge':
         self.models.append((model_type, rc(normalize=True, cv=None)))
     elif model_type == 'lasso':
         self.models.append((model_type, la(normalize=True)))
     elif model_type == 'svm':
         self.models.append((model_type, svm()))
         self.param_grid['svm'] = {
             'kernel': ['rbf'],
             'C': range(10, 100, 10),
             'epsilon': [0.01]
         }
     elif model_type == 'mlp':
         self.models.append((model_type, mlp()))
         self.param_grid['mlp'] = {
             'hidden_layer_sizes': [(16, 16, 16, 16, 16), (16, 16, 16, 16)],
             'activation': ['identity', 'logistic', 'tanh', 'relu'],
             'solver': ['lbfgs', 'adam'],
             'alpha': [0.001, 0.01],
             'learning_rate': ['constant', 'invscaling', 'adaptive'],
             'learning_rate_init': [0.001, 0.01, 0.1],
             #'early_stopping':[True,False],
             #'validation_fraction':[0.1,0.05,0.2],
             #'max_iter':[200,1000,2000]
         }
     elif model_type == 'xgb':
         self.models.append((model_type, xgb()))
         self.param_grid[model_type] = {
             'max_depth': range(5, 15, 2),
             'min_child_weight': range(1, 6, 2),
             'n_estimators': range(10, 50, 10),
             'learning_rate': [0.01, 0.05, 0.1],
             'n_jobs': [4],
             'reg_alpha': [0, 0.005, 0.01],
             'subsample': [0.8, 1],
             'colsample_bytree': [0.8, 1]
         }
     elif model_type == 'rf':
         self.models.append((model_type, rf()))
         self.param_grid[model_type] = {
             'n_estimators': [10, 100, 500],
             #'max_depth':range(3,10,2),
             #'min_child_weight':range(1,6,2),
             #'learning_rate':[0.01,0.05,0.1]
         }
예제 #16
0
    def get_classifier(self, trusted_triples):
        model = KeyedVectors.load_word2vec_format(self.vectors_model,
                                                  binary=True)
        X = []
        y = []
        for (s, p, o, suorce, support, abstracts) in trusted_triples:
            skey = self.clean_for_embeddings(s).replace(' ', '_')
            okey = self.clean_for_embeddings(o).replace(' ', '_')

            if skey in model and okey in model:
                vec = np.concatenate((model[skey], model[okey]), axis=None)
                X += [vec]
                y += [p]

        X = np.array(X)
        clf = mlp(hidden_layer_sizes=(100, ))
        clf.fit(X, y)
        return clf
예제 #17
0
def init_model(modeltype):
    if modeltype == 'mlp':
        ### Feedforward Neural Network Regression Model
        regression_model = mlp(hidden_layer_sizes=(100, 50),
                               activation='relu',
                               solver='adam',
                               alpha=0.5,
                               batch_size='auto',
                               learning_rate='adaptive',
                               learning_rate_init=0.001,
                               power_t=0.5,
                               max_iter=1000,
                               shuffle=True,
                               random_state=None,
                               tol=0.0001,
                               verbose=False,
                               warm_start=False,
                               momentum=0.9,
                               nesterovs_momentum=True,
                               early_stopping=False,
                               validation_fraction=0.1,
                               beta_1=0.9,
                               beta_2=0.999,
                               epsilon=1e-08,
                               n_iter_no_change=10)
    elif modeltype == 'svm':
        ### Support Vector Machine Regression Model
        regression_model = svm(kernel='rbf',
                               C=1e6,
                               epsilon=0.1,
                               gamma='auto',
                               tol=0.001,
                               cache_size=2000,
                               shrinking=True,
                               verbose=False,
                               max_iter=-1)
    return regression_model
def model_stop(df):
    """
    Create a model from a dataframe.
    """
    #create a model from a dataframe

    #df = pd.get_dummies(df,columns=['day'])
    #features = ['day_'+str(i) for i in range(0,7)]
    #for f in features:
    #    if f not in df.columns:
    #        df[f] = 0i
    df = df[df['traveltime'] > 0]
    X = df[df['traveltime'] < df['traveltime'].quantile(0.95)]
    X = df[df['traveltime'] > df['traveltime'].quantile(0.05)]
    features = ['rain', 'temp', 'hour', 'day']
    scaler_X = ss()
    X = scaler_X.fit_transform(df[features])
    scaler_Y = ss()

    Y_real = df['traveltime']
    Y = scaler_Y.fit_transform(df['traveltime'].values.reshape(-1, 1))

    model = mlp().fit(X, Y)
    return model, X, features, scaler_X, scaler_Y, Y_real
예제 #19
0
scale.fit(X_train)
X_train = scale.transform(X_train)
X_test = scale.transform(X_test)

#while training on whole dataset, trained the whole dataset on the performance of svc
scale2 = StandardScaler()
scale2.fit(X)
X = scale2.transform(X)

#testing score of multi layered perceptron
from sklearn.neural_network import MLPClassifier as mlp
from sklearn.model_selection import learning_curve
cv_score = learning_curve(
    mlp(activation='logistic',
        hidden_layer_sizes=(25, 1),
        solver='lbfgs',
        alpha=.01,
        max_iter=400), X_train, y_train)

#training score of multi layered perceptron
classifier = mlp(activation='logistic',
                 hidden_layer_sizes=(50, 6),
                 solver='lbfgs',
                 alpha=.1,
                 max_iter=400)
classifier.fit(X_train, y_train)

#evaluation of mlp
from sklearn.metrics import confusion_matrix as cmm
cmm(y_test, classifier.predict(X_test))
def train(X, y):
    model = mlp(hidden_layer_sizes = (1024, 2048, 1024, 512, 256), max_iter = 100)
    model.fit(X, y)
    return model
예제 #21
0
def bpnn(X, Y, epochs=20):
    model = mlp(max_iter=epochs, verbose=True)
    X_tr, X_te, Y_tr, Y_te = train_test_split(X, Y)
    model.fit(X_tr, Y_tr)
    print(model.score(X_te, Y_te))
                                                  y_train,
                                                  test_size=.1,
                                                  random_state=42)

testdata = pd.read_csv(
    '/mnt/d/Work/Acad/BTP/data/testGreenBit/feature_patches.csv', header=None)
# testdata = pd.read_csv('/mnt/d/Work/Acad/BTP/data/testGreenBit/fractal_feature.csv',header=None)
# testdata2 = pd.read_csv('/mnt/d/Work/Acad/BTP/data/testGreenBit/feature.csv',header=None)
# testdata =  testdata.join(testdata2.iloc[:,2:], lsuffix='_caller', rsuffix='_other')
testdata.dropna(inplace=True)
X = testdata.iloc[:, 3:]
y = testdata.iloc[:, 2]
X = scaler.transform(X)

clf = mlp(hidden_layer_sizes=(
    100,
    20,
), max_iter=1000, verbose=1)
clf = clf.fit(X_train, y_train)
print('Validation Acc: ', clf.score(X_val, y_val))
print('Test Acc: ', clf.score(X, y))

y_test_prob = pd.DataFrame(clf.predict_proba(X))
sample = pd.DataFrame(testdata.iloc[:, 1]).reset_index()
joined = sample.join(y_test_prob, lsuffix='a')
prob_pred = joined.groupby(['1a']).mean()
joined = sample.join(pd.get_dummies(y).reset_index(), lsuffix='a')
prob_true = joined.groupby(['1a']).mean()

RCF = rcf()
RCF = RCF.fit(X_train, y_train)
print(RCF.score(X_train, y_train))
예제 #23
0
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier as mlp
import time

start_time = time.time()

input_file = 'seeds_dataset.txt'
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.25, random_state=5)

params = {'random_state': 0, 'hidden_layer_sizes': 50, 'max_iter': 2000}
classifier = mlp(**params)

classifier.fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

print("\n" + "#" * 40)
print("\nClassifier performance on training dataset\n")
print(classification_report(y_train, classifier.predict(X_train)))
print("#" * 40 + "\n")

print("#" * 40)
print("\nClassifier performance on test dataset\n")
print(classification_report(y_test, y_test_pred))
print("#" * 40 + "\n")

# print running time
예제 #24
0
    round(clf.score(dataset_train_x, dataset_train_y), 2)))
print("SVM Testing Score: {}".format(
    round(clf.score(dataset_test_x, dataset_test_y), 2)))

# Multi-level Perceptron Neural Network
clf = mlp(activation='relu',
          alpha=1e-05,
          batch_size='auto',
          beta_1=0.9,
          beta_2=0.999,
          early_stopping=False,
          epsilon=1e-08,
          hidden_layer_sizes=(10, 2),
          learning_rate='constant',
          learning_rate_init=0.001,
          max_iter=200,
          momentum=0.9,
          nesterovs_momentum=True,
          power_t=0.5,
          random_state=1,
          shuffle=True,
          solver='lbfgs',
          tol=0.0001,
          validation_fraction=0.1,
          verbose=False,
          warm_start=False)
clf.fit(dataset_train_x, dataset_train_y)
print("Neural Network Training Score: {}".format(
    round(clf.score(dataset_train_x, dataset_train_y), 2)))
print("Neural Network Testing Score: {}".format(
    round(clf.score(dataset_test_x, dataset_test_y), 2)))
예제 #25
0
clf = LogisticRegression(solver='liblinear').fit(X_train, y_train)

# In[9]:

print(clf.score(X_train, y_train))
pred = clf.predict(X_test)
y1 = np.ones(y_test.shape)
#print(y1)
print((clf.score(X_test, y_test)))

#print((clf.score(X_test,y1)))

# In[10]:

clf2 = mlp(hidden_layer_sizes=(1000, 1000, 1000),
           activation='tanh',
           solver='adam',
           max_iter=2000)
clf2.fit(X_train, y_train)
print(clf2.score(X_train, y_train))
print(clf2.score(X_test, y_test))

# In[ ]:

# In[11]:

clf3 = mlp(hidden_layer_sizes=(64, 32, 64),
           activation='tanh',
           solver='adam',
           max_iter=500)
clf3.fit(X_train, y_train)
print(clf3.score(X_train, y_train))
예제 #26
0
import matplotlib.pyplot as plt

print("Reading data")
start = t.time()
x = pd.read_csv(os.path.join('Data','Csv','spectrum.csv'))
y = pd.read_csv(os.path.join('Data','Csv','labels.csv'))
elapsed = t.time() - start
print("Done reading data: " + "{0:.2f}".format(elapsed) + "s")

print("Training")
start = t.time()
timeslices = y["time in seconds"]
y = y.drop(["time in seconds"],1)
x = x.drop(["time in seconds"],1)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.25, random_state=27)
model = mlp(hidden_layer_sizes=(100,100),solver='sgd',verbose=True,power_t=0.99)
model.fit(x_train,y_train)
elapsed = t.time() - start
print("Done training: " + "{0:.2f}".format(elapsed) + "s")
y_pred = model.predict(x_test)
print("Training set score: %f" % model.score(x_train, y_train))
print("Test set score: %f" % model.score(x_test, y_test))

txt = True
if(txt):
    header = "0"
    for value in range(127):
        header += "," + str(value+1)
    header += "\n"
    with open(os.path.join("Data","Output","Research.csv"), "w", newline='') as result_csv:
        result_csv.write(header)
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier as mlp
from sklearn.utils import shuffle

# 从CSV文件读取训练数据
df = pd.DataFrame(pd.read_csv('/mnt/sd01/sjjs_bj14/contest1/contest1_training.csv',header=None))
x_train = df.ix[:,1:]   # 第2列往后是数据
y_train = df.ix[:,0]    # 第1列是标签
# 归一化
min_max_scaler = preprocessing.MinMaxScaler()
x_tr_arr = x_train.values
x_tr_scaled = min_max_scaler.fit_transform(x_tr_arr)
x_train = pd.DataFrame(x_tr_scaled)
# 开始训练
model = mlp(activation='tanh',solver='adam',alpha=0.0001,learning_rate='adaptive',learning_rate_init=0.001,max_iter=200)
model.fit(x_train,y_train)
# 从CSV文件读取测试数据
df = pd.DataFrame(pd.read_csv('/mnt/sd01/sjjs_bj14/contest1/contest1_forecast.csv',header=None))
x = df.ix[:,1:] # 第2列往后是数据
label = df.ix[:,0]  # 第1列是标签
# 归一化
x_arr = x.values
x_scaled = min_max_scaler.fit_transform(x_arr)
x = pd.DataFrame(x_scaled)
# 开始预测
y = model.predict(x)
# 写结果文件
for i in range(10000):
    f = open('/mnt/sd01/sjjs_bj14/ans.csv','r+')
    f.read()
예제 #28
0
         'r').read())
route = routes['15'][1]
models = []
features = ['day', 'month', 'hour', 'weekend', 'vappr']
for i in range(1, len(route) - 1):
    stopA = str(route[i])
    stopB = str(route[i + 1])
    print('Building for', stopA, 'to', stopB)
    df = stop_tools.stop_data(stopA, stopB)
    df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from']
    df['weekend'] = df['day'] > 4
    print(df['traveltime'].mean())
    Y = numpy.array([i for i in df['traveltime']]).reshape(-1, 1)
    transformer2 = mms().fit(Y)
    Y = transformer2.transform(Y)
    transformer1 = mms().fit(df[features])
    X = transformer1.transform(df[features])
    import numpy
    model = mlp(hidden_layer_sizes=(40, 40, 40)).fit(X, Y)
    models.append({
        'transformer': transformer1,
        'transformer2': transformer2,
        'model': model
    })
    del (df)
    del (X)
    del (Y)
with open('/data/chained_models_neural.bin', 'wb') as handle:
    import pickle
    pickle.dump(models, handle, protocol=pickle.HIGHEST_PROTOCOL)
예제 #29
0
# In[6]:

from sklearn.linear_model.logistic import LogisticRegression as LR
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.ensemble import GradientBoostingClassifier as GBDT
from sklearn.ensemble import AdaBoostClassifier as AdaBoost
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier as etc
from sklearn.neighbors import KNeighborsClassifier as knc
from sklearn.neural_network import MLPClassifier as mlp

valid_data = data[3200:].reset_index()

clf_gender = mlp(hidden_layer_sizes=(2, 1), verbose=0, activation='tanh')
clf_gender.fit(f_tfidf[:3200], data.gender[:3200])
valid_data.gender = clf_gender.predict(f_tfidf[3200:])

# clf_age_pre = LR()
# clf_age_pre.fit(f_tfidf[:3200], data.age[:3200])

clf_age = GBDT(n_estimators=300, verbose=1)
clf_age.fit(f_tfidf[:3200], data.age[:3200])
valid_data.age = clf_age.predict(f_tfidf[3200:])

clf_location = GBDT(n_estimators=300, verbose=1)
clf_location.fit(f_tfidf[:3200], data.location[:3200])
valid_data.location = clf_location.predict(f_tfidf[3200:])

# # 输出到temp.csv
예제 #30
0
 def treinar(self, x, y):
     clf = mlp(max_iter=2000)
     clf.fit(x, y)
     return clf