def tuning_runner(args): train = load_dataset(args['train_path']) target = load_dataset(args['target_path']) model_name = args['model_name'] pipeline_name = args['pipeline_name'] num_folds = args['num_folds'] num_iter = args['num_iter'] model_tuning = ModelTuning(train, target, model_name, pipeline_name, num_folds, num_iter) model_tuning.run()
def model_runner(args): train = load_dataset(args['train_path']) target = load_dataset(args['target_path']) test = load_dataset(args['test_path']) model_name = args['model_name'] pipeline_name = args['pipeline_name'] num_folds = args['num_folds'] create_submission = args['create_submission'] use_stacking = args['use_stacking'] id_column = args['id_column'] target_column = args['target_column'] model = StackingModel if use_stacking else ModelEvaluation model_evaluation = model(train, target, test, model_name, pipeline_name, num_folds, create_submission, id_column, target_column) model_evaluation.run()
def stacking_runner(args): target = load_dataset(args['target_path']) stacking_file = args['stacking_file'] num_folds = args['num_folds'] id_column = args['id_column'] target_column = args['target_column'] stacking_runner = StackingEvaluation(target, stacking_file, num_folds, id_column, target_column) stacking_runner.run()
def split_runner(args): dataset_path = args['dataset_path'] num_folds = args['num_folds'] save_folder = args['save_folder'] train_name = args['train_name'] target_name = args['target_name'] target_column = args['target_column'] create_folder(save_folder) print('Loading train dataset') train = load_dataset(dataset_path, verbose=True) print('Creating folds for cross-validation') train_folds = create_folds(train, num_folds=num_folds) print('Creating target dataset') target_folds = create_target(train_folds, target_column) print('Saving train fold dataset') save_dataset(train_folds, save_folder, train_name) print('Saving target fold dataset') save_dataset(target_folds, save_folder, target_name)
def get_stacking_data(self, stacking_csv): pandas_csv = (load_dataset(csv) for csv in stacking_csv) single_df = pd.concat(pandas_csv, axis=1) return single_df.loc[:, ~single_df.columns.duplicated()]