def __trends(self): from model_holder import load_model self.result_map = {} for model_file in self.models: print('Running regression ', model_file, '....') mh = load_model(model_file) model, most_dependent_columns = mh.get() formatter = dtFrm.LANLDataFormatter(data_df=self.data_df, data_type='test', doTransform=True, doScale=True, \ most_dependent_columns=most_dependent_columns) train_df = formatter.transform() train_df = train_df.drop(['segment_id'], axis=1) y_train_pred = model.predict(train_df) self.result_map[model_file] = y_train_pred
from sklearn.metrics import mean_absolute_error from sklearn.model_selection import train_test_split import accoustic_sampler as acs import data_formatter as dtFrm from model_holder import ModelHolder, load_model import numpy as np from sklearn.neural_network.multilayer_perceptron import MLPRegressor model_name = 'mlp_regression.model'; sampler = acs.AccousticSampler('D:/PYTHON_WORKSPACES/Kaggles/EarthquakePrediction/LANL_Earthquake/data/train_data_new') sampler.fit() data_df = sampler.get() formatter = dtFrm.LANLDataFormatter(data_df=data_df, data_type='train', doTransform=True, doScale=True, cols_to_keep=50) data_df = formatter.transform() most_dependent_columns = formatter.getMostImpCols() # data_df = data_df.drop(['acc_max','acc_min','chg_acc_max','chg_acc_min'],axis=1) # Splitting data into test_random_forest and train # train_set, test_set = train_test_split(data_df, test_size=0.2, random_state=np.random.randint(1, 1000)) # Separate output from inputs y_train = data_df['time_to_failure'] x_train_seg = data_df['segment_id'] x_train = data_df.drop(['time_to_failure', 'segment_id'], axis=1) y_train = np.around(y_train.values, decimals=2) # mlpReg = MLPRegressor(verbose=True, tol=0.0001, max_iter=200000, n_iter_no_change=10000, hidden_layer_sizes=(200,))
from sklearn.model_selection import cross_val_score from sklearn.linear_model import Lasso from sklearn.svm import SVC from sklearn.linear_model.base import LinearRegression from sklearn.svm.classes import SVR from sklearn.metrics.regression import mean_absolute_error from sklearn.metrics.scorer import make_scorer from sklearn.ensemble.forest import RandomForestRegressor import sys # load dataset sampler = acs.AccousticSampler('D:/PYTHON_WORKSPACES/Kaggles/EarthquakePrediction/LANL_Earthquake/data/train_data_new') sampler.fit() data_df = sampler.get() formatter = dtFrm.LANLDataFormatter(data_df=data_df, data_type='train', doTransform=True) data_df = formatter.transform() # data_df = data_df.drop(['acc_max', 'acc_min', 'chg_acc_max', 'chg_acc_min'], axis=1) # Splitting data into test_random_forest and train from sklearn.model_selection import train_test_split train_set, test_set = train_test_split(data_df, test_size=0.4, random_state=42) # Separate output from inputs y_train = data_df['time_to_failure'] x_train_seg = data_df['segment_id'] x_train = data_df.drop(['time_to_failure'], axis=1) x_train = x_train.drop(['segment_id'], axis=1) y_test = test_set['time_to_failure'] x_test_seg = test_set['segment_id']