예제 #1
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_reverse_transform(self):
        """Test reverse transform"""
        # Setup
        ht_mock = Mock()
        ht_mock.reverse_transform.return_value = {
            'item 1': pd.Series([1.0, 2.0, None, 4.0, 5.0]),
            'item 2': pd.Series([1.1, None, 3.3, None, 5.5]),
            'item 3': pd.Series([None, 'bbb', 'ccc', 'ddd', None]),
            'item 4': pd.Series([True, False, None, False, True])
        }

        metadata = Mock(spec_set=Metadata)
        metadata._hyper_transformers = {
            'test': ht_mock
        }
        metadata.get_dtypes.return_value = {
            'item 1': 'int',
            'item 2': 'float',
            'item 3': 'str',
            'item 4': 'bool',
        }

        # Run
        data = pd.DataFrame({'foo': [0, 1]})
        Metadata.reverse_transform(metadata, 'test', data)

        # Asserts
        expected_call = pd.DataFrame({'foo': [0, 1]})
        pd.testing.assert_frame_equal(
            ht_mock.reverse_transform.call_args[0][0],
            expected_call
        )
예제 #2
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_table_with_fields_metadata(self):
        """Add table with fields metadata"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}

        # Run
        fields_metadata = {
            'a_field': {'type': 'numerical', 'subtype': 'integer'}
        }

        Metadata.add_table(metadata, 'x_table', fields_metadata=fields_metadata)

        # Asserts
        expected_table_meta = {
            'fields': {
                'a_field': {'type': 'numerical', 'subtype': 'integer'}
            }
        }

        assert metadata._metadata['tables']['x_table'] == expected_table_meta

        metadata.set_primary_key.call_count == 0
        metadata.add_relationship.call_count == 0
예제 #3
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_table_with_data_str(self, mock_read_csv):
        """Add table with data as str"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}
        mock_read_csv.return_value = pd.DataFrame({
            'a_field': [0, 1],
            'b_field': [True, False],
            'c_field': ['a', 'b']
        })
        metadata._get_field_details.return_value = {
            'a_field': {'type': 'numerical', 'subtype': 'integer'},
            'b_field': {'type': 'boolean'},
            'c_field': {'type': 'categorical'}
        }

        # Run
        Metadata.add_table(metadata, 'x_table', data='/path/to/file.csv')

        expected_table_meta = {
            'fields': {
                'a_field': {'type': 'numerical', 'subtype': 'integer'},
                'b_field': {'type': 'boolean'},
                'c_field': {'type': 'categorical'}
            },
            'path': '/path/to/file.csv'
        }

        assert metadata._metadata['tables']['x_table'] == expected_table_meta

        metadata.set_primary_key.call_count == 0
        metadata.add_relationship.call_count == 0
예제 #4
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_table_with_no_fields_data(self):
        """Add table with data to analyze all"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}
        metadata._get_field_details.return_value = {
            'a_field': {'type': 'numerical', 'subtype': 'integer'},
            'b_field': {'type': 'boolean'},
            'c_field': {'type': 'categorical'}
        }

        # Run
        data = pd.DataFrame({'a_field': [0, 1], 'b_field': [True, False], 'c_field': ['a', 'b']})

        Metadata.add_table(metadata, 'x_table', data=data)

        # Asserts
        expected_table_meta = {
            'fields': {
                'a_field': {'type': 'numerical', 'subtype': 'integer'},
                'b_field': {'type': 'boolean'},
                'c_field': {'type': 'categorical'}
            }
        }

        assert metadata._metadata['tables']['x_table'] == expected_table_meta

        metadata.set_primary_key.call_count == 0
        metadata.add_relationship.call_count == 0
예제 #5
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_field(self):
        """Add field table no exist"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = list()
        metadata._metadata = {
            'tables': {
                'a_table': {'fields': dict()}
            }
        }

        # Run
        Metadata.add_field(metadata, 'a_table', 'a_field', 'id', 'string', None)

        # Asserts
        expected_metadata = {
            'tables': {
                'a_table': {
                    'fields': {'a_field': {'type': 'id', 'subtype': 'string'}}
                }
            }
        }

        assert metadata._metadata == expected_metadata
        metadata._check_field.assert_called_once_with('a_table', 'a_field', exists=False)
예제 #6
0
파일: test_dataset.py 프로젝트: zyteka/SDV
 def test__get_transformers_raise_valueerror(self):
     """Test get transformers dict raise ValueError."""
     # Run
     dtypes = {
         'void': 'void'
     }
     with pytest.raises(ValueError):
         Metadata._get_transformers(dtypes, None)
예제 #7
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_table_already_exist(self):
        """Try to add a new table that already exist"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']

        # Run
        with pytest.raises(ValueError):
            Metadata.add_table(metadata, 'a_table')
예제 #8
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_relationship_parent_no_exist(self):
        """Add relationship table no exist"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table']

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')
예제 #9
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_relationship_already_exist(self):
        """Add relationship already exist"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata.get_parents.return_value = set(['b_table'])

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')
예제 #10
0
    def test_get_dtypes_error_subtype_id(self):
        """Test get data types with an invalid id subtype."""
        # Setup
        table_meta = {'fields': {'item': {'type': 'id', 'subtype': 'boolean'}}}
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta
        metadata._DTYPES = Metadata._DTYPES

        # Run
        with pytest.raises(MetadataError):
            Metadata.get_dtypes(metadata, 'test', ids=True)
예제 #11
0
    def test_get_dtypes_error_invalid_type(self):
        """Test get data types with an invalid type."""
        # Setup
        table_meta = {'fields': {'item': {'type': 'unknown'}}}
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta
        metadata._DTYPES = Metadata._DTYPES

        # Run
        with pytest.raises(MetadataError):
            Metadata.get_dtypes(metadata, 'test')
예제 #12
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_add_relationship_parent_no_primary_key(self):
        """Add relationship parent no primary key"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata.get_parents.return_value = set()
        metadata.get_children.return_value = set()
        metadata.get_primary_key.return_value = None

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')
예제 #13
0
    def test_get_dtypes_error_invalid_type(self):
        """Test get data types with an invalid type."""
        # Setup
        table_meta = {'fields': {'item': {'type': 'unknown'}}}
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta
        metadata._DTYPES = Metadata._DTYPES

        # Run
        errors = []
        Metadata.get_dtypes(metadata, 'test', errors=errors)

        assert len(errors) == 1
예제 #14
0
    def test_get_dtypes_error_subtype_id(self):
        """Test get data types with an invalid id subtype."""
        # Setup
        table_meta = {'fields': {'item': {'type': 'id', 'subtype': 'boolean'}}}
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta
        metadata._DTYPES = Metadata._DTYPES

        # Run
        errors = []
        Metadata.get_dtypes(metadata, 'test', ids=True, errors=errors)

        assert len(errors) == 1
예제 #15
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_load_tables(self):
        """Test get tables"""
        # Setup
        table_names = ['foo', 'bar', 'tar']
        table_data = [
            pd.DataFrame({'foo': [1, 2]}),
            pd.DataFrame({'bar': [3, 4]}),
            pd.DataFrame({'tar': [5, 6]})
        ]
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.side_effect = table_names
        metadata.load_table.side_effect = table_data

        # Run
        tables = ['table 1', 'table 2', 'table 3']
        result = Metadata.load_tables(metadata, tables=tables)

        # Asserts
        expected = {
            'table 1': pd.DataFrame({'foo': [1, 2]}),
            'table 2': pd.DataFrame({'bar': [3, 4]}),
            'table 3': pd.DataFrame({'tar': [5, 6]})
        }
        assert result.keys() == expected.keys()

        for k, v in result.items():
            pd.testing.assert_frame_equal(v, expected[k])
예제 #16
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_get_dtypes_no_ids(self):
        """Test get data types excluding ids."""
        # Setup
        table_meta = {
            'fields': {
                'item 0': {'type': 'id', 'subtype': 'integer'},
                'item 1': {'type': 'numerical', 'subtype': 'integer'},
                'item 2': {'type': 'numerical', 'subtype': 'float'},
                'item 3': {'type': 'categorical'},
                'item 4': {'type': 'boolean'},
                'item 5': {'type': 'datetime'},
            }
        }
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta
        metadata._DTYPES = Metadata._DTYPES

        # Run
        result = Metadata.get_dtypes(metadata, 'test')

        # Asserts
        expected = {
            'item 1': 'int',
            'item 2': 'float',
            'item 3': 'object',
            'item 4': 'bool',
            'item 5': 'datetime64',
        }
        assert result == expected
예제 #17
0
    def test_add_table_with_fields_no_data(self):
        """Add table with fields and no data"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}

        # Run
        fields = ['a_field', 'b_field']

        Metadata.add_table(metadata, 'x_table', fields=fields)

        # Asserts
        expected_table_meta = {'fields': dict()}

        assert metadata._metadata['tables']['x_table'] == expected_table_meta
예제 #18
0
    def test_add_table_only_name(self):
        """Add table with only the name"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}

        # Run
        Metadata.add_table(metadata, 'x_table')

        # Asserts
        expected_table_meta = {'fields': dict()}

        assert metadata._metadata['tables']['x_table'] == expected_table_meta

        metadata.set_primary_key.call_count == 0
        metadata.add_relationship.call_count == 0
예제 #19
0
    def test_add_table_with_primary_key(self):
        """Add table with primary key"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata._metadata = {'tables': dict()}

        # Run
        Metadata.add_table(metadata, 'x_table', primary_key='id')

        # Asserts
        expected_table_meta = {'fields': dict()}

        assert metadata._metadata['tables']['x_table'] == expected_table_meta

        metadata.set_primary_key.assert_called_once_with('x_table', 'id')
        metadata.add_relationship.call_count == 0
예제 #20
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test___init__default_metadata_dict(self, mock_meta, mock_relationships):
        """Test create Metadata instance default with a dict"""
        # Run
        metadata = Metadata({'some': 'meta'})

        # Asserts
        mock_meta.assert_called_once_with({'some': 'meta'})
        mock_relationships.assert_called_once_with()
        assert metadata.root_path == '.'
        assert metadata._hyper_transformers == dict()
예제 #21
0
    def test__analyze_relationships(self):
        """Test get relationships"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        _metadata = {
            'tables': {
                'test': {
                    'use': True,
                    'name': 'test',
                    'fields': {
                        'test_field': {
                            'ref': {
                                'table': 'table_ref',
                                'field': 'field_ref'
                            },
                            'name': 'test_field'
                        }
                    }
                },
                'test_not_use': {
                    'use': False,
                    'name': 'test_not_use',
                    'fields': {
                        'test_field_not_use': {
                            'ref': {
                                'table': 'table_ref',
                                'field': 'field_ref'
                            },
                            'name': 'test_field_not_use'
                        }
                    }
                }
            }
        }
        metadata._metadata = _metadata

        # Run
        Metadata._analyze_relationships(metadata)

        # Asserts
        assert metadata._child_map == {'table_ref': {'test'}}
        assert metadata._parent_map == {'test': {'table_ref'}}
예제 #22
0
    def test_get_parents(self):
        """Test get parents"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata._parent_map = {'test': 'parent_table'}

        # Run
        result = Metadata.get_parents(metadata, 'test')

        # Asserts
        assert result == 'parent_table'
예제 #23
0
    def test_get_table_meta(self):
        """Test get table meta"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata._metadata = {'tables': {'test': {'some': 'data'}}}

        # Run
        result = Metadata.get_table_meta(metadata, 'test')

        # Asserts
        assert result == {'some': 'data'}
예제 #24
0
    def test_get_children(self):
        """Test get children"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata._child_map = {'test': 'child_table'}

        # Run
        result = Metadata.get_children(metadata, 'test')

        # Asserts
        assert result == 'child_table'
예제 #25
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_set_primary_key(self):
        """Set primary key table no exist"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_tables.return_value = list()
        metadata.get_fields.return_value = {'a_field': {'type': 'id', 'subtype': 'integer'}}
        metadata._metadata = {
            'tables': {
                'a_table': {
                    'fields': {'a_field': {'type': 'id', 'subtype': 'integer'}}
                }
            }
        }

        # Run
        Metadata.set_primary_key(metadata, 'a_table', 'a_field')

        # Asserts
        metadata._check_field.assert_called_once_with('a_table', 'a_field', exists=True)
        metadata.get_fields.assert_called_once_with('a_table')
        metadata._get_key_subtype.assert_called_once_with({'type': 'id', 'subtype': 'integer'})
예제 #26
0
    def test_get_primary_key(self):
        """Test get primary key"""
        # Setup
        table_meta = {'primary_key': 'a_primary_key'}
        metadata = Mock(spec_set=Metadata)
        metadata.get_table_meta.return_value = table_meta

        # Run
        result = Metadata.get_primary_key(metadata, 'test')

        # Asserts
        assert result == 'a_primary_key'
        metadata.get_table_meta.assert_called_once_with('test')
예제 #27
0
    def test_get_foreign_keys(self):
        """Test get foreign key"""
        # Setup
        metadata = Metadata({
            'tables': {
                'parent': {
                    'fields': {
                        'parent_id': {
                            'type': 'id',
                        }
                    },
                    'primary_key': 'parent_id'
                },
                'child': {
                    'fields': {
                        'parent_id': {
                            'type': 'id',
                            'ref': {
                                'table': 'parent',
                                'field': 'id'
                            }
                        },
                        'parent_id_2': {
                            'type': 'id',
                            'ref': {
                                'table': 'parent',
                                'field': 'id'
                            }
                        },
                    }
                }
            }
        })

        # Run
        result = Metadata.get_foreign_keys(metadata, 'parent', 'child')

        # Asserts
        assert result == ['parent_id', 'parent_id_2']
예제 #28
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_load_table(self, mock_load_csv):
        """Test load table"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.root_path = 'a/path'
        metadata.get_table_meta.return_value = {'some': 'data'}
        mock_load_csv.return_value = 'data'

        # Run
        result = Metadata.load_table(metadata, 'test')

        # Asserts
        assert result == 'data'

        metadata.get_table_meta.assert_called_once_with('test')
        mock_load_csv.assert_called_once_with('a/path', {'some': 'data'})
예제 #29
0
파일: test_dataset.py 프로젝트: zyteka/SDV
    def test_get_tables(self):
        """Test get table names"""
        # Setup
        _metadata = {
            'tables': {
                'table 1': None,
                'table 2': None,
                'table 3': None
            }
        }
        metadata = Mock(spec_set=Metadata)
        metadata._metadata = _metadata

        # Run
        result = Metadata.get_tables(metadata)

        # Asserts
        assert sorted(result) == ['table 1', 'table 2', 'table 3']
예제 #30
0
    def test__load_hyper_transformer(self, mock_ht):
        """Test load HyperTransformer"""
        # Setup
        metadata = Mock(spec_set=Metadata)
        metadata.get_dtypes.return_value = {'meta': 'dtypes'}
        metadata._get_pii_fields.return_value = {'meta': 'pii_fields'}
        metadata._get_transformers.return_value = {'meta': 'transformers'}
        mock_ht.return_value = 'hypertransformer'

        # Run
        result = Metadata._load_hyper_transformer(metadata, 'test')

        # Asserts
        assert result == 'hypertransformer'
        metadata.get_dtypes.assert_called_once_with('test')
        metadata._get_pii_fields.assert_called_once_with('test')
        metadata._get_transformers.assert_called_once_with(
            {'meta': 'dtypes'}, {'meta': 'pii_fields'})
        mock_ht.assert_called_once_with(transformers={'meta': 'transformers'})