コード例 #1
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    X_train, X_test = impute_array(
        *unsparsify(dataset.train.X_enc, dataset.test.X_enc, fmt='array'))
    y_train, y_test = unsparsify(dataset.train.y_enc,
                                 dataset.test.y_enc,
                                 fmt='array')
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test))

    def process_results(results):
        if results.probabilities is not None and not results.probabilities.shape:  # numpy load always return an array
            prob_format = results.probabilities.item()
            if prob_format == "predictions":
                target_values_enc = dataset.target.label_encoder.transform(
                    dataset.target.values)
                results.probabilities = Encoder(
                    'one-hot', target=False,
                    encoded_type=float).fit(target_values_enc).transform(
                        results.predictions)
            else:
                raise ValueError(
                    f"Unknown probabilities format: {prob_format}")
        return results

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config,
                       process_results=process_results)
コード例 #2
0
def run(dataset: Dataset, config: TaskConfig):
    log.info(f"\n**** Decision Tree [sklearn v{sklearn.__version__}] ****\n")

    is_classification = config.type == 'classification'

    X_train, X_test = impute_array(*unsparsify(dataset.train.X_enc, dataset.test.X_enc, fmt='array'))
    y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc, fmt='array')

    estimator = DecisionTreeClassifier if is_classification else DecisionTreeRegressor
    predictor = estimator(random_state=config.seed, **config.framework_params)

    with Timer() as training:
        predictor.fit(X_train, y_train)
    with Timer() as predict:
        predictions = predictor.predict(X_test)
    probabilities = predictor.predict_proba(X_test) if is_classification else None

    save_predictions(dataset=dataset,
                     output_file=config.output_predictions_file,
                     probabilities=probabilities,
                     predictions=predictions,
                     truth=y_test,
                     target_is_encoded=is_classification)

    return dict(
        models_count=1,
        training_duration=training.duration,
        predict_duration=predict.duration
    )
コード例 #3
0
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** Constant predictor (sklearn dummy) ****\n")

    is_classification = config.type == 'classification'
    predictor = DummyClassifier(
        strategy='prior') if is_classification else DummyRegressor(
            strategy='median')

    encode = config.framework_params.get('_encode', False)

    X_train = unsparsify(dataset.train.X_enc if encode else dataset.train.X,
                         fmt='array')
    y_train = unsparsify(dataset.train.y_enc if encode else dataset.train.y,
                         fmt='array')
    X_test = unsparsify(dataset.test.X_enc if encode else dataset.test.X,
                        fmt='array')
    y_test = unsparsify(dataset.test.y_enc if encode else dataset.test.y,
                        fmt='array')

    with Timer() as training:
        predictor.fit(X_train, y_train)
    with Timer() as predict:
        predictions = predictor.predict(X_test)
    probabilities = predictor.predict_proba(
        X_test) if is_classification else None

    save_predictions(dataset=dataset,
                     output_file=config.output_predictions_file,
                     probabilities=probabilities,
                     predictions=predictions,
                     truth=y_test,
                     target_is_encoded=encode)

    return dict(models_count=1,
                training_duration=training.duration,
                predict_duration=predict.duration)
コード例 #4
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    X_train, X_test = dataset.train.X_enc, dataset.test.X_enc
    y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc)
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test),
                predictors_type=[
                    'Numerical' if p.is_numerical() else 'Categorical'
                    for p in dataset.predictors
                ])

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)