Beispiel #1
0
    def test_add_relationship_parent_no_exist(self):
        """Add relationship table no exist"""
        # Setup
        metadata = Mock(spec=Metadata)
        metadata.get_tables.return_value = ['a_table']

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')
Beispiel #2
0
    def test_add_relationship_already_exist(self):
        """Add relationship already exist"""
        # Setup
        metadata = Mock(spec=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata.get_parents.return_value = set(['b_table'])

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')
Beispiel #3
0
def test_hma1_single_child_row_single_parent_row():
    """Test that ``HMA1`` supports a single child row per single parent row.

    ``HMA1`` doesn't learn the distribution of the values for a child row when those
    are equal to 1. This is because those values will be equal to ``0``  and alter the
    ``std`` by a lot.

    Setup:
        - Create a dataset that has 1 child row per single parent row.
        - Create the ``sdv.Metadata`` for that dataset.
        - Create an instance of ``HMA1``.

    Input:
        - ``dataset``
        - ``sdv.Metadata``

    Output:
        - ``dict`` with synthetic data.
    """

    # Setup
    parent_a = pd.DataFrame({
        'parent_id': range(5),
        'value': range(5)
    })

    child = pd.DataFrame({
        'parent_a': range(5),
        'value_a': range(5),
    })

    tables = {
        'parent_a': parent_a,
        'child': child
    }

    metadata = Metadata()
    metadata.add_table('parent_a', parent_a, primary_key='parent_id')
    metadata.add_table('child', child)
    metadata.add_relationship('parent_a', 'child', 'parent_a')

    model = HMA1(metadata)

    # Run
    model.fit(tables)
    sampled = model.sample(num_rows=10)

    # Assert
    assert len(sampled) == 2
    assert len(sampled['parent_a']) == 10
    assert len(sampled['child']) == 10

    assert len(sampled['parent_a']['parent_id'].unique()) == 10
    assert len(sampled['child']['parent_a'].unique()) == 10
Beispiel #4
0
    def test_add_relationship_parent_no_primary_key(self):
        """Add relationship parent no primary key"""
        # Setup
        metadata = Mock(spec=Metadata)
        metadata.get_tables.return_value = ['a_table', 'b_table']
        metadata.get_parents.return_value = set()
        metadata.get_children.return_value = set()
        metadata.get_primary_key.return_value = None

        # Run
        with pytest.raises(ValueError):
            Metadata.add_relationship(metadata, 'a_table', 'b_table')