예제 #1
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(data=dataset.train.data),
        test=dict(data=dataset.test.data),
        target=dict(name=dataset.target.name, classes=dataset.target.values),
        columns=[
            (
                f.name,
                (
                    'object' if f.is_categorical(
                        strict=False
                    )  # keep as object everything that is not numerical
                    else 'int' if f.data_type == 'integer' else 'float'))
            for f in dataset.features
        ],
        problem_type=dataset.type.
        name  # AutoGluon problem_type is using same names as amlb.data.DatasetType
    )

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #2
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc)
    y_train, y_test = dataset.train.y_enc, dataset.test.y_enc
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test))

    def process_results(results):
        if results.probabilities is not None and not results.probabilities.shape:  # numpy load always return an array
            prob_format = results.probabilities.item()
            if prob_format == "predictions":
                target_values_enc = dataset.target.label_encoder.transform(
                    dataset.target.values)
                results.probabilities = Encoder(
                    'one-hot', target=False,
                    encoded_type=float).fit(target_values_enc).transform(
                        results.predictions)
            else:
                raise ValueError(
                    f"Unknown probabilities format: {prob_format}")
        return results

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config,
                       process_results=process_results)
예제 #3
0
def run(dataset: Dataset, config: TaskConfig):
    X_train_enc, X_test_enc = impute(dataset.train.X_enc, dataset.test.X_enc)
    data = dict(train=dict(X_enc=X_train_enc, y_enc=dataset.train.y_enc),
                test=dict(X_enc=X_test_enc, y_enc=dataset.test.y_enc))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #4
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train_path=dataset.train.path,
        test_path=dataset.test.path,
        target=dataset.target.name
    )

    return run_in_venv(__file__, "exec.py",
                       input_data=data, dataset=dataset, config=config)
예제 #5
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(path=dataset.train.path),
        test=dict(path=dataset.test.path),
        target=dict(index=dataset.target.index),
        domains=dict(cardinalities=[0 if f.values is None else len(f.values) for f in dataset.features])
    )

    config.ext.monitoring = rconfig().monitoring
    return run_in_venv(__file__, "exec.py",
                       input_data=data, dataset=dataset, config=config)
예제 #6
0
def run(dataset: Dataset, config: TaskConfig):
    from amlb.datautils import impute
    from frameworks.shared.caller import run_in_venv

    X_train_enc, X_test_enc = impute(dataset.train.X_enc, dataset.test.X_enc)
    data = dict(train=dict(X_enc=X_train_enc, y_enc=dataset.train.y_enc),
                test=dict(X_enc=X_test_enc, y_enc=dataset.test.y_enc))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #7
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(train=dict(path=dataset.train.path),
                test=dict(path=dataset.test.path),
                target=dict(index=dataset.target.index))

    config.ext.monitoring = rconfig().monitoring
    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #8
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(path=dataset.train.data_path('parquet')),
        test=dict(path=dataset.test.data_path('parquet')),
        target=dict(
            name=dataset.target.name,
            classes=dataset.target.values
        ),
        problem_type=dataset.type.name  # AutoGluon problem_type is using same names as amlb.data.DatasetType
    )

    return run_in_venv(__file__, "exec.py",
                       input_data=data, dataset=dataset, config=config)
예제 #9
0
def run(dataset, config):
    from frameworks.shared.caller import run_in_venv

    data = dict(train=dict(X=dataset.train.X, y=dataset.train.y),
                test=dict(X=dataset.test.X, y=dataset.test.y),
                problem_type=dataset.type.name)
    options = dict(serialization=dict(
        sparse_dataframe_deserialized_format='dense'))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config,
                       options=options)
예제 #10
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(X_enc=dataset.train.X_enc, y_enc=dataset.train.y_enc),
        test=dict(X_enc=dataset.test.X_enc, y_enc=dataset.test.y_enc),
        predictors_type=[
            'Categorical' if p.is_categorical(strict=False) else 'Numerical'
            for p in dataset.predictors
        ])

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #11
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(
            X=dataset.train.X,
            y=dataset.train.y
        ),
        test=dict(
            X=dataset.test.X,
            y=dataset.test.y
        )
    )

    return run_in_venv(__file__, "exec.py",
                       input_data=data, dataset=dataset, config=config)
예제 #12
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    X_train, X_test = dataset.train.X_enc, dataset.test.X_enc
    y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc)
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test),
                predictors_type=[
                    'Numerical' if p.is_numerical() else 'Categorical'
                    for p in dataset.predictors
                ])

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #13
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(data=dataset.train.data, y_enc=dataset.train.y_enc),
        test=dict(data=dataset.test.data, y_enc=dataset.test.y_enc),
        target=dict(name=dataset.target.name, classes=dataset.target.values),
        columns=[(f.name, ('object' if f.is_categorical(
            strict=False) else 'int' if f.data_type == 'integer' else 'float'))
                 for f in dataset.features],
        problem_type=dataset.type.name)

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #14
0
def run(dataset: Dataset, config: TaskConfig):
    from amlb.datautils import impute_array
    from frameworks.shared.caller import run_in_venv

    encode = config.framework_params.get('_encode', True)
    X_train, X_test = impute_array(
        dataset.train.X_enc,
        dataset.test.X_enc) if encode else (dataset.train.X, dataset.test.X)
    y_train, y_test = (dataset.train.y_enc,
                       dataset.test.y_enc) if encode else (dataset.train.y,
                                                           dataset.test.y)
    data = dict(train=dict(X=X_train, y=y_train),
                test=dict(X=X_test, y=y_test))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #15
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    data = dict(
        target=dataset.target.name,
        train=dict(path=dataset.train.path,
                   X=dataset.train.X,
                   y=dataset.train.y),
        test=dict(path=dataset.test.path, X=dataset.test.X, y=dataset.test.y),
    )
    options = dict(serialization=dict(
        sparse_dataframe_deserialized_format='dense'))

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config,
                       options=options)
예제 #16
0
def run(dataset: Dataset, config: TaskConfig):
    from amlb.datautils import impute
    from frameworks.shared.caller import run_in_venv

    data = dict(
        train=dict(X=dataset.train.X, y=dataset.train.y),
        test=dict(X=dataset.test.X, y=dataset.test.y),
        columns=[
            (
                f.name,
                (
                    'object' if not f.is_numerical(
                    )  # keep as object everything that is not numerical
                    else 'float')) for f in dataset.predictors
        ],
        problem_type=dataset.type.name)

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)
예제 #17
0
def run(dataset: Dataset, config: TaskConfig):
    from frameworks.shared.caller import run_in_venv

    train_path = dataset.train.path
    test_path = dataset.test.path
    backend = config.framework_params.get('_backend')
    # ML-Plan requires the target attribute to be the last column
    if dataset.target.index != len(dataset.predictors):
        train_path = reorder_dataset(dataset.train.path,
                                     target_src=dataset.target.index)
        test_path = reorder_dataset(dataset.test.path,
                                    target_src=dataset.target.index)

    data = dict(
        train=dict(path=train_path),
        test=dict(path=test_path),
        target=dict(index=dataset.target.index),
    )

    return run_in_venv(__file__,
                       "exec.py",
                       input_data=data,
                       dataset=dataset,
                       config=config)