def train(model_name, datafrac): datafile = "/project/spice/radiation/ML/CRM/data/models/datain/test_train_data/train_test_data_021501AQ1H_raw.hdf5" # datafrac = 0.2 nlevs = 45 # x_train, y_train, x_test, y_test = data_io.get_data(datafile, datafrac, nlevs, normaliser="/project/spice/radiation/ML/CRM/data/models/normaliser/021501AQ_standardise_mx") x_train, y_train, x_test, y_test = data_io.get_data(datafile, datafrac, nlevs) print(x_train.shape, y_train.shape) # model = MultiOutputRegressor(xgboost.XGBRegressor(n_estimators=100), n_jobs=4) model = MultiOutputRegressor(lgb.LGBMRegressor(n_estimators=100)) model.fit(x_train, y_train) print("saving model {0}".format(model_name)) joblib.dump(model, model_name)
import numpy as np import matplotlib.pyplot as plt import data_io as dl import metrics as m import features as f import estimators as e from sklearn.pipeline import make_pipeline from definitions import target_fields from sklearn import svm, cross_validation from sklearn.grid_search import GridSearchCV from sklearn.ensemble import GradientBoostingRegressor import pywt import time import sys data = dl.get_data('train') targets_all = np.array([d.targets for d in data]) y_d = {k: targets_all[:, target_fields.index(k)] for k in target_fields} feats = f.get_feature_union() est_d = e.get_estimators_dict() x_train_all = feats.fit_transform(data) # BUILDING THE PIPELINES pipe_dict = {a: make_pipeline(feats, est_d[a]) for a in target_fields} def sample_dict(d, ind_arr): ret = {}
import numpy as np import matplotlib.pyplot as plt import data_io as dl import metrics as m import features as f import estimators as e from sklearn.pipeline import make_pipeline from definitions import target_fields from sklearn import svm, cross_validation from sklearn.grid_search import GridSearchCV from sklearn.ensemble import GradientBoostingRegressor import pywt import time import sys data = dl.get_data("train") targets_all = np.array([d.targets for d in data]) y_d = {k: targets_all[:, target_fields.index(k)] for k in target_fields} feats = f.get_feature_union() est_d = e.get_estimators_dict() x_train_all = feats.fit_transform(data) # BUILDING THE PIPELINES pipe_dict = {a: make_pipeline(feats, est_d[a]) for a in target_fields} def sample_dict(d, ind_arr):
__author__ = 'jason' import numpy as np import matplotlib.pyplot as plt import data_io as dl import metrics as m import features as f from definitions import target_fields from sklearn import svm, cross_validation from sklearn.ensemble import GradientBoostingRegressor import pywt import time data = dl.get_data('train') spectra = data['spectra'] targets = data['targets'] x_train_all = f.get_features(data) clfs = { # 'Ca': svm.SVR(C=10000.0), # 'P': svm.SVR(C=5000.0), # 'pH': svm.SVR(C=10000.0), # 'SOC': svm.SVR(C=10000.0), # 'Sand': svm.SVR(C=10000.0), 'Ca': GradientBoostingRegressor(n_estimators=200), 'P': GradientBoostingRegressor(n_estimators=200), 'pH': GradientBoostingRegressor(n_estimators=200), 'SOC': GradientBoostingRegressor(n_estimators=200), 'Sand': GradientBoostingRegressor(n_estimators=200), }
def test_get_data(): train_ds, valid_ds = get_data() assert isinstance(train_ds, TensorDataset) assert isinstance(valid_ds, TensorDataset) assert len(train_ds) == 50_000 assert len(valid_ds) == 10_000
def train(model_path='model.h5'): epochs = 5 batch_size = 64 input_shape = (160, 320, 3) m = nvidia_dropout_model(input_shape=input_shape) optimizer = Nadam() m.compile(loss='mean_squared_error', optimizer=optimizer, metrics=[]) train_samples, validation_samples, samples_per_epoch_training, samples_per_epoch_validation = get_data( batch_size, epochs) print( 'Training size: %d samples per epoch: %d, Validation size: %d samples per epoch: %d' % (len(train_samples), samples_per_epoch_training, len(validation_samples), samples_per_epoch_validation)) print( "------------------------------------------------------------------------------------" ) checkpointer = ModelCheckpoint(filepath=os.path.join( os.path.split(__file__)[0], model_path), verbose=1, save_best_only=True) train_generator = generator(train_samples, batch_size=batch_size, input_shape=input_shape) validation_generator = generator(validation_samples, batch_size=batch_size, input_shape=input_shape) history = m.fit_generator(train_generator, samples_per_epoch=samples_per_epoch_training, nb_epoch=epochs, verbose=1, validation_data=validation_generator, nb_val_samples=samples_per_epoch_validation, pickle_safe=True, callbacks=[checkpointer]) score = m.evaluate_generator(validation_generator, val_samples=samples_per_epoch_validation, pickle_safe=True) print('Validation MSE:', score) return m, history