import os, sys ROOT = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") OUTPUT_DIR = os.path.join(APP_ROOT, "output") SUB_DIR = os.path.join(APP_ROOT, "submission") PRED_TRAIN = os.path.join(OUTPUT_DIR, "pred_train.csv") PRED_TEST = os.path.join(OUTPUT_DIR, "pred_test.csv") GAZOU_TRAIN = os.path.join(OUTPUT_DIR, "image_train.csv") GAZOU_TEST = os.path.join(OUTPUT_DIR, "image_test.csv") PSEUDO_PRED_99 = os.path.join(SUB_DIR, "submission99p.csv") from avito.common import filename_getter DESC_TF_COLS, DESC_TF_TRAIN, DESC_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_desc", "tf") TITLE_TF_COLS, TITLE_TF_TRAIN, TITLE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title", "tf") DENSE_TF_COLS, DENSE_TF_TRAIN, DENSE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title_desc", "tf") TITLE_CNT_COLS, TITLE_CNT_TRAIN, TITLE_CNT_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title", "cnt") DENSE_CNT15_COLS, DENSE_CNT15_TRAIN, DENSE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_dense", "cnt") DESC_CNT15_COLS, DESC_CNT15_TRAIN, DESC_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_desc", "cnt") TITLE_CNT15_COLS, TITLE_CNT15_TRAIN, TITLE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_title", "cnt") BEST_SUB = os.path.join(SUB_DIR, "submission.csv") import pandas as pd import numpy as np import scipy.sparse
import os, sys import pandas as pd ROOT = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") OUTPUT_DIR = os.path.join(APP_ROOT, "output") from avito.common import filename_getter DESC_TF_COLS, DESC_TF_TRAIN, DESC_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "desc", "tf") TITLE_TF_COLS, TITLE_TF_TRAIN, TITLE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "title", "tf") TITLE_CNT_COLS, TITLE_CNT_TRAIN, TITLE_CNT_TEST = filename_getter.get_filename( OUTPUT_DIR, "title", "cnt") DENSE_TF_COLS, DENSE_TF_TRAIN, DENSE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "title_desc", "tf") STEM_DESC_TF_COLS, STEM_DESC_TF_TRAIN, STEM_DESC_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_desc", "tf") STEM_TITLE_TF_COLS, STEM_TITLE_TF_TRAIN, STEM_TITLE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title", "tf") STEM_TITLE_CNT_COLS, STEM_TITLE_CNT_TRAIN, STEM_TITLE_CNT_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title", "cnt") STEM_DENSE_TF_COLS, STEM_DENSE_TF_TRAIN, STEM_DENSE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "stem_title_desc", "tf") def get_predict_col(): # item_id, user_id, title, description, image, deal_probability pred_col = [
import os, sys ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") OUTPUT_DIR = os.path.join(APP_ROOT, "output") SUB_DIR = os.path.join(APP_ROOT, "submission") PRED_TRAIN = os.path.join(OUTPUT_DIR, "pred_train.csv") PRED_TEST = os.path.join(OUTPUT_DIR, "pred_test.csv") GAZOU_TRAIN = os.path.join(OUTPUT_DIR, "image_train.csv") GAZOU_TEST = os.path.join(OUTPUT_DIR, "image_test.csv") OUTPUT_PRED = os.path.join(SUB_DIR, "submission.csv") OUTPUT_CV_PRED = os.path.join(SUB_DIR, "cv_pred.csv") from avito.common import filename_getter DENSE_CNT15_COLS, DENSE_CNT15_TRAIN, DENSE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "vanilla_cnt15_dense", "cnt") DESC_CNT15_COLS, DESC_CNT15_TRAIN, DESC_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "vanilla_cnt15_desc", "cnt") TITLE_CNT15_COLS, TITLE_CNT15_TRAIN, TITLE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "vanilla_cnt15_title", "cnt") VDESC_TF_COLS, VDESC_TF_TRAIN, VDESC_TF_TEST = filename_getter.get_filename(OUTPUT_DIR, "desc", "tf") VTITLE_TF_COLS, VTITLE_TF_TRAIN, VTITLE_TF_TEST = filename_getter.get_filename(OUTPUT_DIR, "title", "tf") VDENSE_TF_COLS, VDENSE_TF_TRAIN, VDENSE_TF_TEST = filename_getter.get_filename(OUTPUT_DIR, "title_desc", "tf") import pandas as pd import numpy as np import scipy.sparse import gc from sklearn import model_selection from dask import dataframe as dd from avito.common import csv_loader, column_selector, pocket_lgb, pocket_timer, pocket_logger, holdout_validator from avito.fe import additional_fe
ROOT = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") INPUT_DIR = os.path.join(APP_ROOT, "input") OUTPUT_DIR = os.path.join(APP_ROOT, "output") ORG_TRAIN = os.path.join(INPUT_DIR, "train.csv") ORG_TEST = os.path.join(INPUT_DIR, "test.csv") STEM_TRAIN = os.path.join(OUTPUT_DIR, "stem_train.csv") STEM_TEST = os.path.join(OUTPUT_DIR, "stem_test.csv") PRED_TRAIN = os.path.join(OUTPUT_DIR, "pred_train.csv") PRED_TEST = os.path.join(OUTPUT_DIR, "pred_test.csv") from avito.common import filename_getter DESC_TF_COLS, DESC_TF_TRAIN, DESC_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "ker_desc", "tf") TITLE_TF_COLS, TITLE_TF_TRAIN, TITLE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "ker_title", "tf") TITLE_CNT_COLS, TITLE_CNT_TRAIN, TITLE_CNT_TEST = filename_getter.get_filename( OUTPUT_DIR, "ker_title", "cnt") DENSE_TF_COLS, DENSE_TF_TRAIN, DENSE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "ker_title_desc", "tf") import numpy as np import pandas as pd import scipy.sparse import gc from avito.common import pocket_timer, pocket_logger, column_selector from avito.fe import kernel_bow logger = pocket_logger.get_my_logger()
import os, sys ROOT = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") INPUT_DIR = os.path.join(APP_ROOT, "input") OUTPUT_DIR = os.path.join(APP_ROOT, "output") ORG_TRAIN = os.path.join(INPUT_DIR, "train.csv") ORG_TEST = os.path.join(INPUT_DIR, "test.csv") STEM_TRAIN = os.path.join(OUTPUT_DIR, "stem_train.csv") STEM_TEST = os.path.join(OUTPUT_DIR, "stem_test.csv") PRED_TRAIN = os.path.join(OUTPUT_DIR, "pred_train.csv") PRED_TEST = os.path.join(OUTPUT_DIR, "pred_test.csv") from avito.common import filename_getter DENSE_CNT15_COLS, DENSE_CNT15_TRAIN, DENSE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_dense", "cnt") DESC_CNT15_COLS, DESC_CNT15_TRAIN, DESC_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_desc", "cnt") TITLE_CNT15_COLS, TITLE_CNT15_TRAIN, TITLE_CNT15_TEST = \ filename_getter.get_filename(OUTPUT_DIR, "stem_cnt15_title", "cnt") import numpy as np import pandas as pd import scipy.sparse import gc from avito.common import pocket_timer, pocket_logger, column_selector from avito.fe import lda_cnt_bow logger = pocket_logger.get_my_logger() timer = pocket_timer.GoldenTimer(logger) #dtypes = csv_loader.get_featured_dtypes()
import os, sys ROOT = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')) sys.path.append(ROOT) APP_ROOT = os.path.join(ROOT, "avito") OUTPUT_DIR = os.path.join(APP_ROOT, "output") SUBMISSION = os.path.join(APP_ROOT, "submission") PRED_TRAIN = os.path.join(OUTPUT_DIR, "pred_train.csv") PRED_TEST = os.path.join(OUTPUT_DIR, "pred_test.csv") OUTPUT_PRED = os.path.join(SUBMISSION, "submission.csv") MODEL_FILE = os.path.join(SUBMISSION, "pred_model.txt") from avito.common import filename_getter DESC_TF_COLS, DESC_TF_TRAIN, DESC_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "desc", "tf") TITLE_TF_COLS, TITLE_TF_TRAIN, TITLE_TF_TEST = filename_getter.get_filename( OUTPUT_DIR, "title", "tf") import pandas as pd import numpy as np import scipy.sparse import gc from sklearn import model_selection from dask import dataframe as dd from avito.common import csv_loader, column_selector, pocket_lgb, pocket_timer, pocket_logger logger = pocket_logger.get_my_logger() timer = pocket_timer.GoldenTimer(logger) dtypes = csv_loader.get_featured_dtypes() predict_col = column_selector.get_predict_col() lgb_col = column_selector.get_pred_tf_col()