def setUp(self):
     self.input_data, self.config_map = read_input(
         'Input/historical_data.csv', 'Input/data_config.json')
     self.nan_data = self.input_data.head(100)
     self.input_data = self.input_data.dropna()
     self.input_data = self.input_data.reset_index(drop=True)
     self.data = self.input_data.head(5)
Esempio n. 2
0
def estimator_selection(
        estimators=None,
        data_path=None,
        config_path=None,
        Kfolds=10,
        n_jobs=-1):
    '''
    Cross Validator Best Estimator Analysis
    '''
    if data_path is None or config_path is None or estimators is None:
        raise ValueError('Need Estimators, Data path and Config Path as arguments  !')
    data, config_map = read_input(data_path, config_path)
    data = data_preprocessor(data, config_map, 5, 'string')
    y = data[config_map['label']]
    data.drop(y[config_map['label']], axis=1, inplace=True)
    data[config_map["scale_columns"]] = scaling_type(
            'min-max').fit_transform(data[config_map["scale_columns"]])
    y[config_map["label"]] = scaling_type(
            'min-max').fit_transform(y[config_map["label"]])
    rmse_errors = []
    for estimator in estimators:
        mse = np.sum(-cross_val_score(estimator, data, y,
            scoring='neg_mean_squared_error', cv=Kfolds, n_jobs=n_jobs))
        rmse = np.sqrt(mse)
        rmse_errors.append(rmse)
    index_of_lowest_error = rmse_errors.index(min(rmse_errors))
    return estimators[index_of_lowest_error]
Esempio n. 3
0
 def setUp(self):
     self.data, self.config_map = read_input('Input/test_data.csv',
                                             'Input/data_config.json')
     self.estimator = linear_model.LinearRegression()
     self.data = data_preprocessor(self.data, self.config_map, 5, 'string')
     self.X_train, self.y_train, self.X_validation, self.y_validation, self.X_test, self.y_test = split_train_test_validation_data(
         self.data, self.config_map, 0.25, 0.2, 5)
     self.X_scaler, self.y_scaler, self.model, self.training_rmse = training(
         self.estimator, self.X_train, self.y_train, self.config_map)
Esempio n. 4
0
def run_fractional_stratification_model(
        estimator=None,
        data_path=None,
        config_path=None,
        num_iter=1,
        seed=None):
    '''
    Fractional Stratification Model Analysis
    '''
    if estimator is None or data_path is None or config_path is None:
        raise ValueError('Need Estimator, Data path and Config Path as arguments !')
    data, config_map = read_input(data_path, config_path)
    data = data_preprocessor(data, config_map, 5, 'string')
    training_map = {}
    for _ in range(0, num_iter):
        training_data, validation_data, testing_data = fractional_stratification(
                data, data.columns, 4, [0.6, 0.2, 0.2], config_map, seed)
        X_train, y_train = split_data(training_data, config_map)
        X_validation, y_validation = split_data(validation_data, config_map)
        X_test, y_test = split_data(testing_data, config_map)
        X_scaler, y_scaler, model, training_rmse = training(
                estimator, X_train, y_train, config_map)
        validation_rmse = calculate_rmse(
                X_validation,
                y_validation,
                X_scaler,
                y_scaler,
                model,
                config_map)
        testing_rmse = calculate_rmse(
                X_test, y_test, X_scaler, y_scaler, model, config_map)
        if training_rmse < validation_rmse:
            model_properties = {}
            model_properties['estimator'] = estimator
            model_properties['config_map'] = config_map
            model_properties['X_train'] = X_train
            model_properties['y_train'] = y_train
            model_properties['X_validation'] = X_validation
            model_properties['y_validation'] = y_validation
            model_properties['X_test'] = X_test
            model_properties['y_test'] = y_test
            model_properties['X_scaler'] = X_scaler
            model_properties['y_scaler'] = y_scaler
            model_properties['model'] = model
            model_properties['training_rmse'] = training_rmse
            model_properties['validation_rmse'] = validation_rmse
            model_properties['testing_rmse'] = testing_rmse
            training_map[validation_rmse] = model_properties
    if(len(training_map) > 0):
        best_model_properties = training_map[min(training_map)]
        print('Best Model train error: {} | Best Model validation error: {} | Best Model test error: {}'.format(
            round(best_model_properties['training_rmse'], 7),
            round(best_model_properties['validation_rmse'], 7),
            round(best_model_properties['testing_rmse'], 7)))
        return best_model_properties
    return None
Esempio n. 5
0
def run_train_test_model(
        estimator=None,
        data_path=None,
        config_path=None,
        num_iter=1,
        seed=None):
    '''
    Train/test Model Analysis
    '''
    if estimator is None or data_path is None or config_path is None:
        raise ValueError('Need Estimator, Data path and Config Path as arguments !')
    data, config_map = read_input(data_path, config_path)
    data = data_preprocessor(data, config_map, 5, 'string')
    training_map = {}
    for _ in range(0, num_iter):
        X_train, y_train, X_test, y_test = split_train_test_data(
                data, config_map, 0.3, seed)
        X_scaler, y_scaler, model, training_rmse = training(
                estimator, X_train, y_train, config_map)
        testing_rmse = calculate_rmse(
                X_test, y_test, X_scaler, y_scaler, model, config_map)
        if training_rmse < testing_rmse:
            model_properties = {}
            model_properties['estimator'] = estimator
            model_properties['config_map'] = config_map
            model_properties['X_train'] = X_train
            model_properties['y_train'] = y_train
            model_properties['X_test'] = X_test
            model_properties['y_test'] = y_test
            model_properties['X_scaler'] = X_scaler
            model_properties['y_scaler'] = y_scaler
            model_properties['model'] = model
            model_properties['training_rmse'] = training_rmse
            model_properties['testing_rmse'] = testing_rmse
            training_map[testing_rmse] = model_properties
    if(len(training_map) > 0):
        best_model_properties = training_map[min(training_map)]
        print('Best Model train error: {} | Best Model test error: {}'.format(
            round(best_model_properties['training_rmse'], 7),
            round(best_model_properties['testing_rmse'], 7)))
        return best_model_properties
    return None