def tuning_runner(args):
    train = load_dataset(args['train_path'])
    target = load_dataset(args['target_path'])

    model_name = args['model_name']
    pipeline_name = args['pipeline_name']
    num_folds = args['num_folds']
    num_iter = args['num_iter']

    model_tuning = ModelTuning(train, target, model_name, pipeline_name,
                               num_folds, num_iter)

    model_tuning.run()
예제 #2
0
def model_runner(args):
    train = load_dataset(args['train_path'])
    target = load_dataset(args['target_path'])
    test = load_dataset(args['test_path'])

    model_name = args['model_name']
    pipeline_name = args['pipeline_name']
    num_folds = args['num_folds']
    create_submission = args['create_submission']
    use_stacking = args['use_stacking']

    id_column = args['id_column']
    target_column = args['target_column']

    model = StackingModel if use_stacking else ModelEvaluation

    model_evaluation = model(train, target, test, model_name, pipeline_name,
                             num_folds, create_submission, id_column,
                             target_column)

    model_evaluation.run()
def stacking_runner(args):
    target = load_dataset(args['target_path'])

    stacking_file = args['stacking_file']
    num_folds = args['num_folds']
    id_column = args['id_column']
    target_column = args['target_column']

    stacking_runner = StackingEvaluation(target, stacking_file, num_folds,
                                         id_column, target_column)

    stacking_runner.run()
def split_runner(args):
    dataset_path = args['dataset_path']
    num_folds = args['num_folds']
    save_folder = args['save_folder']
    train_name = args['train_name']
    target_name = args['target_name']
    target_column = args['target_column']

    create_folder(save_folder)

    print('Loading train dataset')
    train = load_dataset(dataset_path, verbose=True)

    print('Creating folds for cross-validation')
    train_folds = create_folds(train, num_folds=num_folds)

    print('Creating target dataset')
    target_folds = create_target(train_folds, target_column)

    print('Saving train fold dataset')
    save_dataset(train_folds, save_folder, train_name)

    print('Saving target fold dataset')
    save_dataset(target_folds, save_folder, target_name)
예제 #5
0
    def get_stacking_data(self, stacking_csv):
        pandas_csv = (load_dataset(csv) for csv in stacking_csv)
        single_df = pd.concat(pandas_csv, axis=1)

        return single_df.loc[:, ~single_df.columns.duplicated()]