Python DataSource Examples

Programming Language: Python

Namespace/Package Name: src.sources.data_source

Class/Type: DataSource

Examples at hotexamples.com: 16

Python DataSource - 16 examples found. These are the top rated real world Python examples of src.sources.data_source.DataSource extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataSource(16)

create_column_relation(2)

get_data(2)

describe_row_relation_for_index(1)

map_rows_to(1)

Example #1

Show file

def test_init_when_unstructured():
    data = "this is an unstructured text string"
    ds = DataSource(data)
    assert isinstance(ds, UnstructuredDataSource)
    assert ds.get_data() == data
    assert not ds.structured
    assert ds.name == "string with hash 9ec30fc91f18445a44b9e9c2820d388d"

Example #2

Show file

def test_init_when_structured():
    data = get_sample_directory_df()
    ds = DataSource(data)
    assert isinstance(ds, StructuredDataSource)
    assert list(ds.get_data().values) == list(data.values)
    assert ds.structured
    assert ds.name == "pandas DataFrame (hash 7383002750474244645)"

Example #3

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_get_relations_when_not_map_by_name():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/employees.xml")
    crm = ColumnRelationBuilder(source, target)
    mapping_configuration = Mock()
    mapping_configuration.get_map_by_type.return_value = "test"
    with pytest.raises(NotImplementedError):
        crm.get_relations(mapping_configuration)

Example #4

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_get_relations_by_name():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/names.csv")
    crm = ColumnRelationBuilder(source, target)
    relations = crm._get_relations_by_name(test_mapping_config)
    print(relations)
    assert len(relations) == 1
    assert relations[0].target_data_source == target
    assert relations[0].source_column_name == "name"
    assert relations[0].target_column_name == "name"
    assert relations[0].confidence == 1.0

Example #5

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_get_relations_when_map_by_name():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/employees.xml")
    crm = ColumnRelationBuilder(source, target)
    mapping_configuration = Mock()
    mapping_configuration.get_map_by_type.return_value = "name"
    crm._get_relations_by_name = Mock()
    crm.get_relations(mapping_configuration)
    assert mapping_configuration.get_map_by_type.call_count == 1
    assert crm._get_relations_by_name.call_count == 1
    crm._get_relations_by_name.assert_called_with(mapping_configuration)

Example #6

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_build_relations_from_matches_when_above_threshold():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/employees.xml")
    crm = ColumnRelationBuilder(source, target)
    relations = crm._build_relations_from_matches("SOURCETEST", [test_match],
                                                  0.1)
    assert len(relations) == 1
    assert relations[0].target_data_source == target
    assert relations[0].source_column_name == "SOURCETEST"
    assert relations[0].target_column_name == "TEST"
    assert relations[0].confidence == 0.1234

Example #7

Show file

def test_relate_columns_to():
    ds_source = DataSource("src/tests/test_data/sample/names.csv")
    ds_target = DataSource("src/tests/test_data/sample/names.csv")
    matching_config = ValueMatchingConfiguration(model_type="exact")
    ds_source.relate_columns_to(ds_target,
                                mapping_configuration=matching_config)
    assert len(ds_source.column_relations) == 1
    assert ds_source.column_relations[0].target_data_source == ds_target
    assert ds_source.column_relations[0].source_column_name == "name"
    assert ds_source.column_relations[0].target_column_name == "name"
    assert ds_source.column_relations[0].confidence == 1.0

Example #8

Show file

File: data_collection.py Project: zaxmks/demo-data-compliance-service

 def append(self, *args, **kwargs):
     """Append a new data source to this collection by specifying DataSource args."""
     if len(args) == 0:
         raise Exception("Must specify a valid input to append.")
     elif os.path.isdir(args[0]):
         file_list = self._construct_filelist(*args, **kwargs)
         for file_name in file_list:
             try:
                 self.sources.append(DataSource(file_name))
             except NotImplementedError:
                 self.unreadable_sources.append(file_name)
     else:
         self.sources.append(DataSource(*args, **kwargs))

Example #9

Show file

def test_map_rows_to():
    ds_source = DataSource("src/tests/test_data/sample/names.csv")
    ds_target = DataSource("src/tests/test_data/sample/names.csv")
    ds_source.create_column_relation("name", "name", ds_target)
    value_matching_config = ValueMatchingConfiguration(model_type="exact")
    row_mapping_config = RowMappingConfiguration(model_type="weighted_linear",
                                                 weights={"name": 1})
    ds_source.map_rows_to(ds_target, value_matching_config, row_mapping_config)
    assert len(
        ds_source.row_relations) == 252  # Duplicate record present, hence +2

Example #10

Show file

File: test_pseudocolumn_generator.py Project: zaxmks/demo-data-compliance-service

def get_no_middle_source():
    source_no_middle_dict = copy.deepcopy(source_dict)
    del source_no_middle_dict["MIDDLE_NAME"]
    source_no_middle_df = pd.DataFrame(source_no_middle_dict)
    return DataSource(source_no_middle_df)

Example #11

Show file

File: test_pseudocolumn_generator.py Project: zaxmks/demo-data-compliance-service

def get_source():
    source_df = pd.DataFrame(source_dict)
    return DataSource(source_df)

Example #12

Show file

def get_golden_source():
    return DataSource("src/tests/test_data/table/person_base.tsv")

Example #13

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_build_relations_from_matches_when_under_threshold():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/employees.xml")
    crm = ColumnRelationBuilder(source, target)
    assert crm._build_relations_from_matches("SOURCETEST", [test_match],
                                             0.5) == []

Example #14

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_init_when_target_unstructured():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/email.txt")
    with pytest.raises(TypeError):
        crm = ColumnRelationBuilder(source, target)

Example #15

Show file

File: test_column_relation_builder.py Project: zaxmks/demo-data-compliance-service

def test_init_when_source_and_target_structured():
    source = DataSource("src/tests/test_data/sample/names.csv")
    target = DataSource("src/tests/test_data/sample/employees.xml")
    crb = ColumnRelationBuilder(source, target)
    assert crb.source == source
    assert crb.target == target

Example #16

Show file

def test_describe_row_relation_for_index():
    ds_source = DataSource("src/tests/test_data/sample/names.csv")
    ds_target = DataSource("src/tests/test_data/sample/names.csv")
    ds_source.create_column_relation("name", "name", ds_target)
    description = ds_source.describe_row_relation_for_index(0)
    assert description == '{"name": "Soo Hong"}'