Exemple #1
0
def test_add_data_set(data_sets_dict):
    data_bunch = DataBunch()
    for data_set_name, data_set in data_sets_dict.items():
        data_bunch.add_data_set(data_set_name, data_set)

    for data_set_name, data_set in data_sets_dict.items():
        assert getattr(data_bunch, data_set_name) == data_set
Exemple #2
0
def read_data_bunch(read_data_set, params_dict):
    """
    Takes the provider "read_data_set" and uses it to read arbitrarily names DataSets and add them to a
    new DataBunch

    :param callable read_data_set: A function that reads source data and returns a DatSet
    :param dict params_dict: A dictionary consisting of the names and the parameters according to which DataSets are to
    be created. For example:
    {
        "train": {
            "path": "./example_data/train.csv",
            "input_format": ".csv",
            "data_wrappers_params_dict": {
                "index": ["ID", "ID2"],
                "features": ["field1_num", "field2_num", "field3_num"],
                "targets": ["field4_target", "field5_target", "field6_target"],
                "full_data": ["ID", "ID2", "field1_num", "field2_num", "field3_num", "field4_target", "field5_target", "field6_target"]
            }
        },
        "valid" : {...},
        "test" : {...}
    }

    :return: a DataBunch
    """

    data_sets_dict = {}
    for data_set_name, params in params_dict.items():
        data_sets_dict[data_set_name] = read_data_set(**params)

    return DataBunch(data_sets_dict)
Exemple #3
0
def read_test_data_bunch(read_data_set, test_params):
    """
    Takes the provider "read_data_set" and uses it to read a "test" DataSet and add it to a new DataBunch

    :param callable read_data_set: A function that reads source data and returns a DatSet
    :param dict test_params: The parameters according to which to read the "test" DataSet
    :return: a DataBunch
    """
    return DataBunch({"test": read_data_set(**test_params)})
Exemple #4
0
def read_train_valid_test_data_bunch(read_data_set, train_params, valid_params,
                                     test_params):
    """
    Takes the provider "read_data_set" and uses it to read "train, "valid" and "test" DataSets and add them to a
    new DataBunch

    :param callable read_data_set: A function that reads source data and returns a DatSet
    :param dict train_params: The parameters according to which to read the "train" DataSet
    :param dict valid_params: The parameters according to which to read the "valid" DataSet
    :param dict test_params: The parameters according to which to read the "test" DataSet
    :return: a DataBunch
    """
    return DataBunch({
        "train": read_data_set(**train_params),
        "valid": read_data_set(**valid_params),
        "test": read_data_set(**test_params)
    })
Exemple #5
0
    def transform(self, transform_then_slice, transformation_params):
        new_data_set = MockDataSet(self.value * 100)
        new_data_set.transform_then_slice = transform_then_slice
        new_data_set.transformation_params = transformation_params

        return new_data_set


data_sets_dict = {
    "train": MockDataSet(123),
    "valid": MockDataSet(456),
    "test": MockDataSet(789)
}

data_bunch = DataBunch(data_sets_dict)


@pytest.mark.parametrize(
    "data_bunch, data_set_names, params, transform_then_slice",
    [(data_bunch, ["train", "valid"], {
        "blah": "456"
    }, True)])
def test_transform(data_bunch, data_set_names, params, transform_then_slice):
    new_data_bunch = data_bunch.transform(data_set_names, params,
                                          transform_then_slice)

    for data_set_name in data_set_names:
        new_data_set = getattr(new_data_bunch, data_set_name)
        assert new_data_set.value == getattr(data_bunch,
                                             data_set_name).value * 100
Exemple #6
0
data_bunch = DataBunch(
    data_sets_dict={
        "train":
        DataSet({
            "full_data":
            PandasDataWrapper(df_train, full_data_columns),
            "index":
            PandasDataWrapper(df_train[index_columns], index_columns),
            "features":
            PandasDataWrapper(df_train[features_columns], features_columns),
            "targets":
            PandasDataWrapper(df_train[targets_columns], targets_columns)
        }),
        "valid":
        DataSet({
            "full_data":
            PandasDataWrapper(df_valid, full_data_columns),
            "index":
            PandasDataWrapper(df_valid[index_columns], index_columns),
            "features":
            PandasDataWrapper(df_valid[features_columns], features_columns),
            "targets":
            PandasDataWrapper(df_valid[targets_columns], targets_columns)
        }),
        "test":
        DataSet({
            "full_data":
            PandasDataWrapper(df_test, full_data_columns),
            "index":
            PandasDataWrapper(df_test[index_columns], index_columns),
            "features":
            PandasDataWrapper(df_test[features_columns], features_columns),
            "targets":
            PandasDataWrapper(df_test[targets_columns], targets_columns)
        })
    })