def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict(data=dataset.train.data), test=dict(data=dataset.test.data), target=dict(name=dataset.target.name, classes=dataset.target.values), columns=[ ( f.name, ( 'object' if f.is_categorical( strict=False ) # keep as object everything that is not numerical else 'int' if f.data_type == 'integer' else 'float')) for f in dataset.features ], problem_type=dataset.type. name # AutoGluon problem_type is using same names as amlb.data.DatasetType ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc) y_train, y_test = dataset.train.y_enc, dataset.test.y_enc data = dict(train=dict(X=X_train, y=y_train), test=dict(X=X_test, y=y_test)) def process_results(results): if results.probabilities is not None and not results.probabilities.shape: # numpy load always return an array prob_format = results.probabilities.item() if prob_format == "predictions": target_values_enc = dataset.target.label_encoder.transform( dataset.target.values) results.probabilities = Encoder( 'one-hot', target=False, encoded_type=float).fit(target_values_enc).transform( results.predictions) else: raise ValueError( f"Unknown probabilities format: {prob_format}") return results return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config, process_results=process_results)
def run(dataset: Dataset, config: TaskConfig): X_train_enc, X_test_enc = impute(dataset.train.X_enc, dataset.test.X_enc) data = dict(train=dict(X_enc=X_train_enc, y_enc=dataset.train.y_enc), test=dict(X_enc=X_test_enc, y_enc=dataset.test.y_enc)) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train_path=dataset.train.path, test_path=dataset.test.path, target=dataset.target.name ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict(path=dataset.train.path), test=dict(path=dataset.test.path), target=dict(index=dataset.target.index), domains=dict(cardinalities=[0 if f.values is None else len(f.values) for f in dataset.features]) ) config.ext.monitoring = rconfig().monitoring return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from amlb.datautils import impute from frameworks.shared.caller import run_in_venv X_train_enc, X_test_enc = impute(dataset.train.X_enc, dataset.test.X_enc) data = dict(train=dict(X_enc=X_train_enc, y_enc=dataset.train.y_enc), test=dict(X_enc=X_test_enc, y_enc=dataset.test.y_enc)) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict(train=dict(path=dataset.train.path), test=dict(path=dataset.test.path), target=dict(index=dataset.target.index)) config.ext.monitoring = rconfig().monitoring return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict(path=dataset.train.data_path('parquet')), test=dict(path=dataset.test.data_path('parquet')), target=dict( name=dataset.target.name, classes=dataset.target.values ), problem_type=dataset.type.name # AutoGluon problem_type is using same names as amlb.data.DatasetType ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset, config): from frameworks.shared.caller import run_in_venv data = dict(train=dict(X=dataset.train.X, y=dataset.train.y), test=dict(X=dataset.test.X, y=dataset.test.y), problem_type=dataset.type.name) options = dict(serialization=dict( sparse_dataframe_deserialized_format='dense')) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config, options=options)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict(X_enc=dataset.train.X_enc, y_enc=dataset.train.y_enc), test=dict(X_enc=dataset.test.X_enc, y_enc=dataset.test.y_enc), predictors_type=[ 'Categorical' if p.is_categorical(strict=False) else 'Numerical' for p in dataset.predictors ]) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict( X=dataset.train.X, y=dataset.train.y ), test=dict( X=dataset.test.X, y=dataset.test.y ) ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv X_train, X_test = dataset.train.X_enc, dataset.test.X_enc y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc) data = dict(train=dict(X=X_train, y=y_train), test=dict(X=X_test, y=y_test), predictors_type=[ 'Numerical' if p.is_numerical() else 'Categorical' for p in dataset.predictors ]) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( train=dict(data=dataset.train.data, y_enc=dataset.train.y_enc), test=dict(data=dataset.test.data, y_enc=dataset.test.y_enc), target=dict(name=dataset.target.name, classes=dataset.target.values), columns=[(f.name, ('object' if f.is_categorical( strict=False) else 'int' if f.data_type == 'integer' else 'float')) for f in dataset.features], problem_type=dataset.type.name) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from amlb.datautils import impute_array from frameworks.shared.caller import run_in_venv encode = config.framework_params.get('_encode', True) X_train, X_test = impute_array( dataset.train.X_enc, dataset.test.X_enc) if encode else (dataset.train.X, dataset.test.X) y_train, y_test = (dataset.train.y_enc, dataset.test.y_enc) if encode else (dataset.train.y, dataset.test.y) data = dict(train=dict(X=X_train, y=y_train), test=dict(X=X_test, y=y_test)) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv data = dict( target=dataset.target.name, train=dict(path=dataset.train.path, X=dataset.train.X, y=dataset.train.y), test=dict(path=dataset.test.path, X=dataset.test.X, y=dataset.test.y), ) options = dict(serialization=dict( sparse_dataframe_deserialized_format='dense')) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config, options=options)
def run(dataset: Dataset, config: TaskConfig): from amlb.datautils import impute from frameworks.shared.caller import run_in_venv data = dict( train=dict(X=dataset.train.X, y=dataset.train.y), test=dict(X=dataset.test.X, y=dataset.test.y), columns=[ ( f.name, ( 'object' if not f.is_numerical( ) # keep as object everything that is not numerical else 'float')) for f in dataset.predictors ], problem_type=dataset.type.name) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)
def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv train_path = dataset.train.path test_path = dataset.test.path backend = config.framework_params.get('_backend') # ML-Plan requires the target attribute to be the last column if dataset.target.index != len(dataset.predictors): train_path = reorder_dataset(dataset.train.path, target_src=dataset.target.index) test_path = reorder_dataset(dataset.test.path, target_src=dataset.target.index) data = dict( train=dict(path=train_path), test=dict(path=test_path), target=dict(index=dataset.target.index), ) return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config)