def test_run_experiment_lr_with_object_no_configdir():
    """Test rsmtool using a Configuration object and no specified configdir."""
    source = 'lr-object-no-path'
    experiment_id = 'lr_object_no_path'

    # set up a temporary directory since
    # we will be using getcwd
    old_file_dict = {'train': 'data/files/train.csv',
                     'test': 'data/files/test.csv',
                     'features': 'data/experiments/lr-object-no-path/features.csv'}

    temp_dir = tempfile.TemporaryDirectory(prefix=getcwd())
    new_file_dict = copy_data_files(temp_dir.name,
                                    old_file_dict,
                                    rsmtool_test_dir)

    config_dict = {"train_file": new_file_dict['train'],
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": new_file_dict['test'],
                   "trim_max": 6,
                   "features": new_file_dict['features'],
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object_no_path",
                   "description": "Using all features with an LinearRegression model."}

    config_obj = Configuration(config_dict)

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_object_and_configdir():
    """Test rsmtool using a Configuration object and specified configdir."""
    source = 'lr-object'
    experiment_id = 'lr_object'

    configdir = join(rsmtool_test_dir,
                     'data',
                     'experiments',
                     source)

    config_dict = {"train_file": "../../files/train.csv",
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": "../../files/test.csv",
                   "trim_max": 6,
                   "features": "features.csv",
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object",
                   "description": "Using all features with an LinearRegression model."}

    config_obj = Configuration(config_dict, configdir=configdir)

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_dictionary():
    # Passing a dictionary as input.
    source = 'lr-dictionary'
    experiment_id = 'lr_dictionary'

    old_file_dict = {'train': 'data/files/train.csv',
                     'test': 'data/files/test.csv',
                     'features': 'data/experiments/lr-dictionary/features.csv'}

    temp_dir = tempfile.TemporaryDirectory(prefix=getcwd())
    new_file_dict = copy_data_files(temp_dir.name,
                                    old_file_dict,
                                    rsmtool_test_dir)

    config_dict = {"train_file": new_file_dict['train'],
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": new_file_dict['test'],
                   "trim_max": 6,
                   "features": new_file_dict['features'],
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_dictionary",
                   "description": "Using all features with an LinearRegression model."}

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_dict)
def test_run_experiment_parameterized(*args, **kwargs):
    if TEST_DIR:
        kwargs['given_test_dir'] = TEST_DIR

    # suppress known convergence warnings for LinearSVR-based experiments
    # TODO: once SKLL hyperparameters can be passed, replace this code
    if args[0].startswith('linearsvr'):
        kwargs['suppress_warnings_for'] = [ConvergenceWarning]

    check_run_experiment(*args, **kwargs)
def test_run_experiment_lr_with_object_and_filepath():
    """Test for a rare use case where an old Configuration object is passed."""
    source = 'lr-object'
    experiment_id = 'lr_object'

    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))

    config_dict = {"train_file": "../../files/train.csv",
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": "../../files/test.csv",
                   "trim_max": 6,
                   "features": "features.csv",
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object",
                   "description": "Using all features with an LinearRegression model."}

    config_obj = Configuration(config_dict)

    # we catch the deprecation warning triggered by this line
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        config_obj.filepath = config_file
    # we have to explicitly remove configdir attribute
    # since it will always be assigned a value by the current code
    del(config_obj.configdir)

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_obj)
Beispiel #6
0
def test_run_experiment_parameterized(*args, **kwargs):
    if TEST_DIR:
        kwargs['given_test_dir'] = TEST_DIR
    check_run_experiment(*args, **kwargs)