def benchmark_holdout(dataset, decision_function, bins, criterion):
    dataset['data'].load_from_folder()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    if bins >= dataset['data'].get_target().size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(
            bins, int(dataset['data'].get_target().size * 0.8))

    dfunc = decision_function['classification']

    bench = HoldoutBenchmark(
        ForwardFeatureSelectionExtended(decision_function=dfunc,
                                        score_function=criterion,
                                        n_bins=bins,
                                        train_share=0.9,
                                        n_cv_ffs=8,
                                        n_jobs=1),
        #MultilinearUSMExtended(
        #    decision_function=dfunc,
        #    score_function=score_function,
        #    n_bins=bins,
        #    train_share=0.8,
        #    n_cv=8,
        #),
        decision_function=dfunc,
        requires_linearisation=decision_function['type'] != 'gbdt',
        n_holdouts=80,
        n_jobs=24)

    return bench.benchmark(dataset['data'])
def benchmark_train_test(dataset, decision_function, criterion, bins):
    dataset['data'].load_train_from_file()
    dataset['data'].load_test_from_file()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    y_train = dataset['data'].get_train_target()

    if bins >= y_train.size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(bins, int(y_train.size * 0.8))

    dfunc = decision_function['classification']

    bench = TrainTestBenchmark(
        optimizer=ForwardFeatureSelectionExtended(decision_function=dfunc,
                                                  score_function=criterion,
                                                  n_bins=bins,
                                                  train_share=0.9,
                                                  n_cv_ffs=8,
                                                  n_jobs=8),
        decision_function=dfunc,
        requires_linearisation=decision_function['type'] != 'gbdt')

    return bench.benchmark(dataset['data'])
Example #3
0
def benchmark_train_test(dataset,
                         decision_function,
                         lambda_param,
                         bins,
                         df_jobs=4):
    dataset['data'].load_train_from_file()
    dataset['data'].load_test_from_file()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    y_train = dataset['data'].get_train_target()

    if bins >= y_train.size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(bins, int(y_train.size * 0.8))

    dfunc = decision_function['classification']
    dfunc.n_jobs = df_jobs
    score_function = partial(log_likelihood_regularized_score_val,
                             _lambda=lambda_param)

    bench = TrainTestBenchmark(
        optimizer=ForwardFeatureSelectionExtended(
            decision_function=dfunc,
            score_function=score_function,
            n_bins=bins,
            train_share=0.9,
            n_cv_ffs=8,
            n_jobs=24),
        decision_function=dfunc,
        requires_linearisation=decision_function['type'] != 'gbdt')

    return bench.benchmark(dataset['data'])
Example #4
0
def benchmark_holdout(dataset, decision_function, lambda_param, bins):
    dataset['data'].load_from_folder()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    if bins >= dataset['data'].get_target().size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(
            bins, int(dataset['data'].get_target().size * 0.8))

    dfunc = decision_function['classification']
    components_function = get_log_likelihood_regularized_score_balanced_components
    score_function = partial(
        log_likelihood_regularized_score_multiplicative_balanced,
        _lambda=lambda_param)

    bench = HoldoutBenchmark(
        ForwardFeatureSelectionComposite(
            decision_function=dfunc,
            score_function_components=components_function,
            score_function=score_function,
            n_bins=bins,
            train_share=0.8,
            n_cv_ffs=6,
            n_jobs=1),
        decision_function=dfunc,
        requires_linearisation=decision_function['type'] != 'gbdt',
        n_holdouts=80,
        n_jobs=1)

    return bench.benchmark(dataset['data'])
Example #5
0
def benchmark_train_test(
    service_eval_host: str,
    service_eval_port: str,
    dataset,
    decision_function,
    lambda_param,
    bins,
):
    dataset['data'].load_train_from_file()
    dataset['data'].load_test_from_file()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    y_train = dataset['data'].get_train_target()

    if bins >= y_train.size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(bins, int(y_train.size * 0.8))

    score_function = partial(
        log_likelihood_regularized_score_multiplicative_balanced,
        _lambda=lambda_param)

    bench = TrainTestBenchmark(
        optimizer=ForwardFeatureSelectionCompositeClient(
            server_clc=service_eval_host,
            port_clc=service_eval_port,
            decision_function=decision_function['type'],
            score_function=score_function,
            n_bins=bins,
            train_share=0.9,
            n_cv_ffs=8,
        ),
        decision_function=decision_function['classification'],
        requires_linearisation=decision_function['type'] != 'gbdt')

    start_time = time.time()
    result = bench.benchmark(dataset['data'])
    print("--- %s seconds ---" % (time.time() - start_time))

    return result
Example #6
0
def benchmark_holdout(
    service_eval_host: str,
    service_eval_port: str,
    service_store_host: str,
    service_store_port: str,
    dataset,
    decision_function,
    lambda_param,
    bins,
):
    dataset['data'].load_from_folder()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    if bins >= dataset['data'].get_target().size * 0.8 + 1:
        print(key, bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(
            bins, int(dataset['data'].get_target().size * 0.8))

    dfunc = decision_function['classification']

    score_function = partial(
        log_likelihood_regularized_score_multiplicative_balanced,
        _lambda=lambda_param)

    bench = HoldoutBenchmark(
        ForwardFeatureSelectionCompositeClient(
            server_clc=service_eval_host,
            port_clc=service_eval_port,
            decision_function=decision_function['type'],
            score_function=score_function,
            n_bins=bins,
            train_share=0.9,
            n_cv_ffs=8,
        ),
        decision_function=dfunc,
        requires_linearisation=decision_function['type'] != 'gbdt',
        n_holdouts=100,
        n_jobs=24)

    return bench.benchmark(dataset['data'])
Example #7
0
def benchmark_holdout(dataset, decision_function, lambda_param, bins):
    dataset['data'].load_from_folder()
    dataset['data'].process_features()
    dataset['data'].cache_features()

    if bins >= dataset['data'].get_target().size * 0.8 + 1:
        print(bins, 'very small dataset for such dichtomization.')
        raise DichotomizationImpossible(
            bins, int(dataset['data'].get_target().size * 0.8))

    dfunc = decision_function['classification']
    score_function = partial(
        log_likelihood_regularized_score_val, _lambda=lambda_param
    )  #score_function = partial(decision_function, _lambda=lambda_param)
    #score_function = bic_regularized

    bench = HoldoutBenchmark(
        ForwardFeatureSelectionExtended(decision_function=dfunc,
                                        score_function=score_function,
                                        n_bins=bins,
                                        train_share=0.9,
                                        n_cv_ffs=8,
                                        n_jobs=1),
        #MultilinearUSMExtended(
        #    decision_function=dfunc,
        #    score_function=score_function,
        #    n_bins=bins,
        #    train_share=0.8,
        #    n_cv=8,
        #),
        decision_function=dfunc,
        n_holdouts=80,
        n_jobs=4,
        requires_linearisation=decision_function['type'] != 'gbdt')

    return bench.benchmark(dataset['data'])