dataset, tasktype)) parser.add_argument('--model-dir', default='check_{}_{}_model/'.format(dataset, tasktype)) parser.add_argument( '--mode', choices=['classification', 'regression'], default='regression' if tasktype == 'r' else 'classification') args = parser.parse_args() if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) start_time = time.time() print('Dataset:', args.train_csv) df_X, df_y, model_config, _ = load_data(args.train_csv) model_config['mode'] = args.mode model_config_filename = os.path.join(args.model_dir, 'model_config.pkl') with open(model_config_filename, 'wb') as fout: pickle.dump(model_config, fout, protocol=pickle.HIGHEST_PROTOCOL) # eval dataset during the training size = min(df_X.shape[0] // 10, 1000) X_train, X_eval, y_train, y_eval = train_test_split(df_X, df_y, test_size=size) train_dir = 'catboost_info/' if os.path.exists(train_dir):
parser.add_argument('--test-csv', required=True) parser.add_argument('--prediction-csv', type=argparse.FileType('w'), required=True) parser.add_argument('--model-dir', required=True) args = parser.parse_args() start_time = time.time() # load model model_config_filename = os.path.join(args.model_dir, 'model_config.pkl') with open(model_config_filename, 'rb') as fin: model_config = pickle.load(fin) X_scaled, _, _, df = load_data(args.test_csv, datatype='test', cfg=model_config) if model_config['time_leakage']['is_leakage']: df[model_config['time_leakage'] ['id_col']] = model_config['time_leakage']['id_series'] df[model_config['time_leakage'] ['dt_col']] = model_config['time_leakage']['dt_series'] df[model_config['time_leakage'] ['num_col']] = model_config['time_leakage']['num_series'] df = use_time_leakage(df, model_config['time_leakage']) df['prediction'] = df['prediction'].fillna(0) else: model = model_config['model'] #df = pd.read_csv(args.test_csv, usecols=['line_id',]) #print(args.test_csv)