Beispiel #1
0
def run_api_experiment(config, data_parquet):
    # Train on Parquet
    dask_backend = DaskBackend()
    train_with_backend(dask_backend, config, dataset=data_parquet)

    # Train on DataFrame directly
    data_df = read_parquet(data_parquet, df_lib=dask_backend.df_engine.df_lib)
    train_with_backend(dask_backend, config, dataset=data_df)
Beispiel #2
0
def run_api_experiment(config, data_parquet):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_horovod_kwargs()
    assert kwargs.get('num_hosts') == 1
    assert kwargs.get('num_slots') == 2

    # Train on Parquet
    dask_backend = RayBackend()
    train_with_backend(dask_backend, config, dataset=data_parquet)
Beispiel #3
0
def run_api_experiment(config, data_parquet):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_horovod_kwargs()
    assert kwargs.get('num_workers') == 2

    # Train on Parquet
    dask_backend = RayBackend(processor={
        'parallelism': 2,
    })
    train_with_backend(dask_backend,
                       config,
                       dataset=data_parquet,
                       evaluate=False)
Beispiel #4
0
def run_api_experiment(config, dataset, backend_config, skip_save_processed_input=True):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_trainer_kwargs()
    if torch.cuda.device_count() > 0:
        assert kwargs.get("num_workers") == torch.cuda.device_count(), kwargs
        assert kwargs.get("use_gpu"), kwargs
    else:
        assert kwargs.get("num_workers") == 1, kwargs
        assert not kwargs.get("use_gpu"), kwargs

    # Train on Parquet
    model = train_with_backend(
        backend_config,
        config,
        dataset=dataset,
        evaluate=True,
        predict=False,
        skip_save_processed_input=skip_save_processed_input,
    )

    assert isinstance(model.backend, RayBackend)
    if isinstance(model.backend.df_engine, DaskEngine):
        assert model.backend.df_engine.parallelism == backend_config["processor"]["parallelism"]

    return model
Beispiel #5
0
def run_api_experiment(config, data_parquet, cache_format):
    # Train on Parquet
    dask_backend = DaskBackend(cache_format=cache_format)
    train_with_backend(dask_backend,
                       config,
                       dataset=data_parquet,
                       evaluate=False,
                       predict=False)

    # Train on DataFrame directly
    data_df = read_parquet(data_parquet, df_lib=dask_backend.df_engine.df_lib)
    train_with_backend(dask_backend,
                       config,
                       dataset=data_df,
                       evaluate=False,
                       predict=False)
Beispiel #6
0
def run_split_api_experiment(config, data_parquet):
    train_fname, val_fname, test_fname = split(data_parquet)

    # Train
    train_with_backend(RAY_BACKEND_CONFIG,
                       config,
                       training_set=train_fname,
                       evaluate=False,
                       predict=False)

    # Train + Validation
    train_with_backend(RAY_BACKEND_CONFIG,
                       config,
                       training_set=train_fname,
                       validation_set=val_fname,
                       evaluate=False,
                       predict=False)

    # Train + Validation + Test
    train_with_backend(
        RAY_BACKEND_CONFIG,
        config,
        training_set=train_fname,
        validation_set=val_fname,
        test_set=test_fname,
        evaluate=False,
        predict=False,
    )
Beispiel #7
0
def run_split_api_experiment(config, data_parquet, cache_format):
    backend = DaskBackend(cache_format=cache_format)

    train_fname, val_fname, test_fname = split(data_parquet)

    # Train
    train_with_backend(backend,
                       config,
                       training_set=train_fname,
                       evaluate=False,
                       predict=False)

    # Train + Validation
    train_with_backend(backend,
                       config,
                       training_set=train_fname,
                       validation_set=val_fname,
                       evaluate=False,
                       predict=False)

    # Train + Validation + Test
    train_with_backend(backend,
                       config,
                       training_set=train_fname,
                       validation_set=val_fname,
                       test_set=test_fname,
                       evaluate=False,
                       predict=False)
Beispiel #8
0
def run_api_experiment(config, data_parquet):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_trainer_kwargs()
    assert kwargs.get("num_workers") == 1, kwargs
    assert kwargs.get("resources_per_worker").get("CPU") == 2, kwargs

    # Train on Parquet
    model = train_with_backend(RAY_BACKEND_CONFIG,
                               config,
                               dataset=data_parquet,
                               evaluate=False)

    assert isinstance(model.backend, RayBackend)
    assert model.backend.df_engine.parallelism == RAY_BACKEND_CONFIG[
        "processor"]["parallelism"]
Beispiel #9
0
def run_split_api_experiment(config, data_parquet):
    backend = DaskBackend()

    train_fname, val_fname, test_fname = split(data_parquet)

    # Train
    train_with_backend(backend, config, training_set=train_fname)

    # Train + Validation
    train_with_backend(backend,
                       config,
                       training_set=train_fname,
                       validation_set=val_fname)

    # Train + Validation + Test
    train_with_backend(backend,
                       config,
                       training_set=train_fname,
                       validation_set=val_fname,
                       test_set=test_fname)