Ejemplo n.º 1
0
 def __trends(self):
     from model_holder import load_model
     self.result_map = {}
     for model_file in self.models:
         print('Running regression ', model_file, '....')
         mh = load_model(model_file)
         model, most_dependent_columns = mh.get()
         formatter = dtFrm.LANLDataFormatter(data_df=self.data_df, data_type='test', doTransform=True, doScale=True, \
                                         most_dependent_columns=most_dependent_columns)
         train_df = formatter.transform()
         train_df = train_df.drop(['segment_id'], axis=1)
         y_train_pred = model.predict(train_df)
         self.result_map[model_file] = y_train_pred
Ejemplo n.º 2
0
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

import accoustic_sampler as acs
import data_formatter as dtFrm
from model_holder import ModelHolder, load_model
import numpy as np
from sklearn.neural_network.multilayer_perceptron import MLPRegressor

model_name = 'mlp_regression.model';

sampler = acs.AccousticSampler('D:/PYTHON_WORKSPACES/Kaggles/EarthquakePrediction/LANL_Earthquake/data/train_data_new')
sampler.fit()
data_df = sampler.get()

formatter = dtFrm.LANLDataFormatter(data_df=data_df, data_type='train', doTransform=True, doScale=True, cols_to_keep=50)
data_df = formatter.transform()
most_dependent_columns = formatter.getMostImpCols()

# data_df = data_df.drop(['acc_max','acc_min','chg_acc_max','chg_acc_min'],axis=1)
# Splitting data into test_random_forest and train
# train_set, test_set = train_test_split(data_df, test_size=0.2, random_state=np.random.randint(1, 1000))

# Separate output from inputs
y_train = data_df['time_to_failure']
x_train_seg = data_df['segment_id']
x_train = data_df.drop(['time_to_failure', 'segment_id'], axis=1)

y_train = np.around(y_train.values, decimals=2)

# mlpReg = MLPRegressor(verbose=True, tol=0.0001, max_iter=200000, n_iter_no_change=10000, hidden_layer_sizes=(200,))
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.linear_model.base import LinearRegression
from sklearn.svm.classes import SVR
from sklearn.metrics.regression import mean_absolute_error
from sklearn.metrics.scorer import make_scorer
from sklearn.ensemble.forest import RandomForestRegressor
import sys
# load dataset

sampler = acs.AccousticSampler('D:/PYTHON_WORKSPACES/Kaggles/EarthquakePrediction/LANL_Earthquake/data/train_data_new')
sampler.fit()
data_df = sampler.get()

formatter = dtFrm.LANLDataFormatter(data_df=data_df, data_type='train', doTransform=True)
data_df = formatter.transform()

# data_df = data_df.drop(['acc_max', 'acc_min', 'chg_acc_max', 'chg_acc_min'], axis=1)

# Splitting data into test_random_forest and train
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data_df, test_size=0.4, random_state=42)
# Separate output from inputs
y_train = data_df['time_to_failure']
x_train_seg = data_df['segment_id']
x_train = data_df.drop(['time_to_failure'], axis=1)
x_train = x_train.drop(['segment_id'], axis=1)

y_test = test_set['time_to_failure']
x_test_seg = test_set['segment_id']