def test_add_relationship_parent_no_exist(self): """Add relationship table no exist""" # Setup metadata = Mock(spec=Metadata) metadata.get_tables.return_value = ['a_table'] # Run with pytest.raises(ValueError): Metadata.add_relationship(metadata, 'a_table', 'b_table')
def test_add_relationship_already_exist(self): """Add relationship already exist""" # Setup metadata = Mock(spec=Metadata) metadata.get_tables.return_value = ['a_table', 'b_table'] metadata.get_parents.return_value = set(['b_table']) # Run with pytest.raises(ValueError): Metadata.add_relationship(metadata, 'a_table', 'b_table')
def test_hma1_single_child_row_single_parent_row(): """Test that ``HMA1`` supports a single child row per single parent row. ``HMA1`` doesn't learn the distribution of the values for a child row when those are equal to 1. This is because those values will be equal to ``0`` and alter the ``std`` by a lot. Setup: - Create a dataset that has 1 child row per single parent row. - Create the ``sdv.Metadata`` for that dataset. - Create an instance of ``HMA1``. Input: - ``dataset`` - ``sdv.Metadata`` Output: - ``dict`` with synthetic data. """ # Setup parent_a = pd.DataFrame({ 'parent_id': range(5), 'value': range(5) }) child = pd.DataFrame({ 'parent_a': range(5), 'value_a': range(5), }) tables = { 'parent_a': parent_a, 'child': child } metadata = Metadata() metadata.add_table('parent_a', parent_a, primary_key='parent_id') metadata.add_table('child', child) metadata.add_relationship('parent_a', 'child', 'parent_a') model = HMA1(metadata) # Run model.fit(tables) sampled = model.sample(num_rows=10) # Assert assert len(sampled) == 2 assert len(sampled['parent_a']) == 10 assert len(sampled['child']) == 10 assert len(sampled['parent_a']['parent_id'].unique()) == 10 assert len(sampled['child']['parent_a'].unique()) == 10
def test_add_relationship_parent_no_primary_key(self): """Add relationship parent no primary key""" # Setup metadata = Mock(spec=Metadata) metadata.get_tables.return_value = ['a_table', 'b_table'] metadata.get_parents.return_value = set() metadata.get_children.return_value = set() metadata.get_primary_key.return_value = None # Run with pytest.raises(ValueError): Metadata.add_relationship(metadata, 'a_table', 'b_table')