def __init__(self, model, filename=None, num_rows=None): self.logger = pocket_logger.get_my_logger() self.model = model use_col = feature_engineerer.get_necessary_col() dtypes = csv_loader.get_featured_dtypes() local_filename = HOLDOUT_DATA if filename is None: filename = HOLDOUT_DATA if num_rows is None: self.holdout_df = dd.read_csv(filename, dtype=dtypes, usecols=use_col).compute() else: self.holdout_df = dd.read_csv(filename, dtype=dtypes, nrows=num_rows, usecols=use_col).compute() print(self.holdout_df.info()) print("Initialized validator.")
OUTPUT_DATA8 = os.path.join(OUTPUT_DIR, "long_train_day8.feather") OUTPUT_DATA9 = os.path.join(OUTPUT_DIR, "long_train_day9.feather") OUTPUT_TEST = os.path.join(OUTPUT_DIR, "long_test.feather") MAMAS_INDEX = os.path.join(INPUT_DIR, "last_test_idx.npy") OUTPUT_FILE = os.path.join(OUTPUT_DIR, "sub_long.csv") import pandas as pd import numpy as np from sklearn import model_selection import gc from dask import dataframe as dd from talkingdata.fe import column_selector, runtime_fe from talkingdata.common import csv_loader, pocket_lgb, pocket_timer, pocket_logger logger = pocket_logger.get_my_logger() timer = pocket_timer.GoldenTimer(logger) predict_col = column_selector.get_predict_col() dtypes = csv_loader.get_featured_dtypes() train7 = pd.read_feather(OUTPUT_DATA7) train8 = pd.read_feather(OUTPUT_DATA8) train9 = pd.read_feather(OUTPUT_DATA9) test = pd.read_feather(OUTPUT_TEST) timer.time("load csv in ") train = train7.append(train8).append(train9) print(train.info()) print(test.info()) del train7, train8, train9 gc.collect()
def __init__(self, model, df, predict_col): self.logger = pocket_logger.get_my_logger() self.model = model self.holdout_df = df self.predict_col = predict_col print("Initialized validator.")