Esempio n. 1
0
def train(model_name, datafrac):
    datafile = "/project/spice/radiation/ML/CRM/data/models/datain/test_train_data/train_test_data_021501AQ1H_raw.hdf5"
    # datafrac = 0.2
    nlevs = 45
    # x_train, y_train, x_test, y_test = data_io.get_data(datafile, datafrac, nlevs, normaliser="/project/spice/radiation/ML/CRM/data/models/normaliser/021501AQ_standardise_mx")
    x_train, y_train, x_test, y_test = data_io.get_data(datafile, datafrac, nlevs)
    print(x_train.shape, y_train.shape)
    # model = MultiOutputRegressor(xgboost.XGBRegressor(n_estimators=100), n_jobs=4)
    model = MultiOutputRegressor(lgb.LGBMRegressor(n_estimators=100))
    model.fit(x_train, y_train)
    print("saving model {0}".format(model_name))
    joblib.dump(model, model_name)
Esempio n. 2
0
import numpy as np
import matplotlib.pyplot as plt
import data_io as dl
import metrics as m
import features as f
import estimators as e
from sklearn.pipeline import make_pipeline
from definitions import target_fields
from sklearn import svm, cross_validation
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
import pywt
import time
import sys

data = dl.get_data('train')

targets_all = np.array([d.targets for d in data])
y_d = {k: targets_all[:, target_fields.index(k)] for k in target_fields}

feats = f.get_feature_union()
est_d = e.get_estimators_dict()

x_train_all = feats.fit_transform(data)

# BUILDING THE PIPELINES
pipe_dict = {a: make_pipeline(feats, est_d[a]) for a in target_fields}


def sample_dict(d, ind_arr):
    ret = {}
Esempio n. 3
0
import numpy as np
import matplotlib.pyplot as plt
import data_io as dl
import metrics as m
import features as f
import estimators as e
from sklearn.pipeline import make_pipeline
from definitions import target_fields
from sklearn import svm, cross_validation
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
import pywt
import time
import sys

data = dl.get_data("train")


targets_all = np.array([d.targets for d in data])
y_d = {k: targets_all[:, target_fields.index(k)] for k in target_fields}

feats = f.get_feature_union()
est_d = e.get_estimators_dict()

x_train_all = feats.fit_transform(data)

# BUILDING THE PIPELINES
pipe_dict = {a: make_pipeline(feats, est_d[a]) for a in target_fields}


def sample_dict(d, ind_arr):
Esempio n. 4
0
__author__ = 'jason'

import numpy as np
import matplotlib.pyplot as plt
import data_io as dl
import metrics as m
import features as f
from definitions import target_fields
from sklearn import svm, cross_validation
from sklearn.ensemble import GradientBoostingRegressor
import pywt
import time

data = dl.get_data('train')
spectra = data['spectra']
targets = data['targets']
x_train_all = f.get_features(data)

clfs = {
    # 'Ca':   svm.SVR(C=10000.0),
    # 'P':    svm.SVR(C=5000.0),
    # 'pH':   svm.SVR(C=10000.0),
    # 'SOC':  svm.SVR(C=10000.0),
    # 'Sand': svm.SVR(C=10000.0),
    'Ca': GradientBoostingRegressor(n_estimators=200),
    'P': GradientBoostingRegressor(n_estimators=200),
    'pH': GradientBoostingRegressor(n_estimators=200),
    'SOC': GradientBoostingRegressor(n_estimators=200),
    'Sand': GradientBoostingRegressor(n_estimators=200),
}
Esempio n. 5
0
def test_get_data():
    train_ds, valid_ds = get_data()
    assert isinstance(train_ds, TensorDataset)
    assert isinstance(valid_ds, TensorDataset)
    assert len(train_ds) == 50_000
    assert len(valid_ds) == 10_000
Esempio n. 6
0
__author__ = 'jason'

import numpy as np
import matplotlib.pyplot as plt
import data_io as dl
import metrics as m
import features as f
from definitions import target_fields
from sklearn import svm, cross_validation
from sklearn.ensemble import GradientBoostingRegressor
import pywt
import time

data = dl.get_data('train')
spectra = data['spectra']
targets = data['targets']
x_train_all = f.get_features(data)

clfs = {
    # 'Ca':   svm.SVR(C=10000.0),
    # 'P':    svm.SVR(C=5000.0),
    # 'pH':   svm.SVR(C=10000.0),
    # 'SOC':  svm.SVR(C=10000.0),
    # 'Sand': svm.SVR(C=10000.0),
    'Ca':   GradientBoostingRegressor(n_estimators=200),
    'P':    GradientBoostingRegressor(n_estimators=200),
    'pH':   GradientBoostingRegressor(n_estimators=200),
    'SOC':  GradientBoostingRegressor(n_estimators=200),
    'Sand': GradientBoostingRegressor(n_estimators=200),
}
def train(model_path='model.h5'):
    epochs = 5
    batch_size = 64
    input_shape = (160, 320, 3)

    m = nvidia_dropout_model(input_shape=input_shape)

    optimizer = Nadam()
    m.compile(loss='mean_squared_error', optimizer=optimizer, metrics=[])

    train_samples, validation_samples, samples_per_epoch_training, samples_per_epoch_validation = get_data(
        batch_size, epochs)
    print(
        'Training size: %d samples per epoch: %d, Validation size: %d samples per epoch: %d'
        % (len(train_samples), samples_per_epoch_training,
           len(validation_samples), samples_per_epoch_validation))
    print(
        "------------------------------------------------------------------------------------"
    )

    checkpointer = ModelCheckpoint(filepath=os.path.join(
        os.path.split(__file__)[0], model_path),
                                   verbose=1,
                                   save_best_only=True)

    train_generator = generator(train_samples,
                                batch_size=batch_size,
                                input_shape=input_shape)
    validation_generator = generator(validation_samples,
                                     batch_size=batch_size,
                                     input_shape=input_shape)

    history = m.fit_generator(train_generator,
                              samples_per_epoch=samples_per_epoch_training,
                              nb_epoch=epochs,
                              verbose=1,
                              validation_data=validation_generator,
                              nb_val_samples=samples_per_epoch_validation,
                              pickle_safe=True,
                              callbacks=[checkpointer])

    score = m.evaluate_generator(validation_generator,
                                 val_samples=samples_per_epoch_validation,
                                 pickle_safe=True)

    print('Validation MSE:', score)
    return m, history