Ejemplo n.º 1
0
 def __init__(self, x_data: np.ndarray, y_data: np.ndarray):
     xl, xr, yl, yr = Util.split_arrays_by_value(
         x_data, y_data, default_train_border)
     self.model_left = Train.fit_gbm(xl, yl,
                                     Train.gbm_optimal_config)
     self.model_right = Train.fit_gbm(xr, yr,
                                      Train.gbm_optimal_config)
Ejemplo n.º 2
0
 def triple_model_train_border_6_5(x: np.ndarray, y: np.ndarray) -> MyModel:
     cm = lambda xd, yd: Util.cut_middle(xd, yd, 0, 10)
     return SplitModels._triple_model(x,
                                      y,
                                      np.maximum,
                                      cm,
                                      train_border=6.5)
Ejemplo n.º 3
0
 def __init__(self, x_data: np.ndarray, y_data: np.ndarray):
     xl, xr, yl, yr = Util.split_arrays_by_value(
         x_data, y_data, train_border)
     xm, ym = cut_middle(x_data, y_data)
     self.model_all = Train.fit_gbm(xm, ym,
                                    Train.gbm_optimal_config)
     self.model_left = Train.fit_gbm(xl, yl,
                                     Train.gbm_optimal_config)
     self.model_right = Train.fit_gbm(xr, yr,
                                      Train.gbm_optimal_config)
Ejemplo n.º 4
0
def run_hists():
    df_train = files.train_df()
    x_data = df_train[Train.x_names].values
    y_data = df_train[[Train.y_name]].values
    xl, xr, yl, yr = Util.split_arrays_by_value(x_data, y_data,
                                                default_train_border)
    hist(y_data, 'all', f'target values', color='r')
    hist(yl,
         'left',
         f'target values smaller {default_train_border:.2f}',
         color='g')
    hist(yr,
         'right',
         f'target values greater {default_train_border:.2f}',
         color='b')
Ejemplo n.º 5
0
def hist_predictions(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=203842039)
    xl, xr, yl, yr = Util.split_arrays_by_value(x_train, y_train,
                                                default_train_border)
    model_all = Train.fit_gbm(x_train, y_train, Train.gbm_optimal_config)
    model_left = Train.fit_gbm(xl, yl, Train.gbm_optimal_config)
    model_right = Train.fit_gbm(xr, yr, Train.gbm_optimal_config)

    yp_all = model_all.predict(x_test)
    yp_left = model_left.predict(x_test)
    yp_right = model_right.predict(x_test)

    hist(yp_all, 'pred_all', f'predicted values', color='orange')
    hist(yp_left,
         'pred_left',
         f'predicted values smaller {default_train_border:.2f}',
         color='orange')
    hist(yp_right,
         'pred_right',
         f'predicted values greater {default_train_border:.2f}',
         color='orange')
Ejemplo n.º 6
0
def tryout_mean_of_the_greatest():
    a = np.array([1.37, 2.4, 4., 7.1])
    b = np.array([1.4, 2.5, 1., 7.])
    c = np.array([1.4, 0.4, 1., 7.])
    y = Util.mean_of_greatest(a, b, c)
    print('amog:', y)
Ejemplo n.º 7
0
import argparse
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from tabplay import Files, Train, Util

util = Util()
files = Files()


def scaler():
    x = [
        [0, 0],
        [3, 0],
        [1, 1],
        [1, 2],
    ]
    x1 = [
        [0, 0],
        [3, 0],
        [2, 5],
        [1, 1],
    ]
    y = [
        [0],
        [3],
        [1],
Ejemplo n.º 8
0
def run_train_it():
    cnt = 20
    tid = '05'

    def train_it(x_dat, y_dat):
        split_train_cfgs = {
            '01': [
                SplitTrain("no split", 1217, x_dat, y_dat,
                           SplitModels.no_split),
                SplitTrain("triple max", 1983, x_dat, y_dat,
                           SplitModels.triple_model_maximum),
            ],
            '02': [
                SplitTrain("triple mean of g", 1283, x_dat, y_dat,
                           SplitModels.triple_model_mean_of_greatest),
            ],
            '03': [
                SplitTrain("triple cut m", 1281113, x_dat, y_dat,
                           SplitModels.triple_model_maximum_narrow_m),
                SplitTrain("triple cut s", 1232823, x_dat, y_dat,
                           SplitModels.triple_model_maximum_narrow_s),
                SplitTrain("triple cut xs", 145453, x_dat, y_dat,
                           SplitModels.triple_model_maximum_narrow_xs),
            ],
            '04': [
                SplitTrain("triple cut l", 132823, x_dat, y_dat,
                           SplitModels.triple_model_maximum_narrow_l),
                SplitTrain("triple cut xl", 54453, x_dat, y_dat,
                           SplitModels.triple_model_maximum_narrow_xl),
            ],
            '05': [
                SplitTrain("border 6.5", 54445, x_dat, y_dat,
                           SplitModels.triple_model_train_border_6_5),
                SplitTrain("border 7.0", 54458, x_dat, y_dat,
                           SplitModels.triple_model_train_border_7_0),
                SplitTrain("border 7.5", 54485, x_dat, y_dat,
                           SplitModels.triple_model_train_border_7_5),
                SplitTrain("border 8.0", 54445, x_dat, y_dat,
                           SplitModels.triple_model_train_border_8_0),
                SplitTrain("border 8.5", 54412, x_dat, y_dat,
                           SplitModels.triple_model_train_border_8_5),
                SplitTrain("border 9.0", 12445, x_dat, y_dat,
                           SplitModels.triple_model_train_border_9_0),
                SplitTrain("border best", 823, x_dat, y_dat,
                           SplitModels.triple_model_maximum),
                SplitTrain("no split", 1217, x_dat, y_dat,
                           SplitModels.no_split),
            ],
        }

        split_trains = split_train_cfgs[tid]
        for st in split_trains:
            with multiprocessing.Pool() as pool:
                np.random.seed(st.seed)
                seeds = np.random.randint(0, 1000000, cnt)
                sts = [
                    SplitTrain(desc=st.desc, seed=s, x=st.x, y=st.y, f=st.f)
                    for s in seeds
                ]
                result = {}
                for i in pool.map(process_split_train, sts):
                    result.setdefault(i[0], []).append(i[1])
                pprint(result)

    print(f"---> run_train_it id:{tid} cnt:{cnt}")
    df_train = files.train_df()
    x_all = df_train[Train.x_names].values
    y_all = df_train[[Train.y_name]].values
    _, x, _, y = Util.split_arrays_by_value(x_all, y_all, min_data)
    print('x', x.shape)
    print('y', y.shape)

    train_it(x, y)
    print(f"<--- run_train_it id:{tid} cnt:{cnt}")
Ejemplo n.º 9
0
 def triple_model_mean_of_greatest(x: np.ndarray, y: np.ndarray) -> MyModel:
     cm = lambda xd, yd: Util.cut_middle(xd, yd, 0, 10)
     return SplitModels._triple_model(x, y, Util.mean_of_greatest, cm)
Ejemplo n.º 10
0
 def triple_model_maximum_narrow_xs(x: np.ndarray,
                                    y: np.ndarray) -> MyModel:
     cm = lambda xd, yd: Util.cut_middle(xd, yd, 7.92, 7.96)
     return SplitModels._triple_model(x, y, np.maximum, cm)
Ejemplo n.º 11
0
 def triple_model_maximum(x: np.ndarray, y: np.ndarray) -> MyModel:
     cm = lambda xd, yd: Util.cut_middle(xd, yd, 0, 10)
     return SplitModels._triple_model(x, y, np.maximum, cm)