Exemple #1
0
def test_sets_only_existing_adapters():
    shgraph = SnowShuGraph()

    test_relation = Relation(database='SNOWSHU_DEVELOPMENT',
                             schema='SOURCE_SYSTEM',
                             name='ORDER_ITEMS',
                             materialization=mz.TABLE,
                             attributes=[])
    test_relation.include_outliers, test_relation.unsampled = [
        False for _ in range(2)
    ]
    test_relation.sampling = DefaultSampling()
    config_dict = copy.deepcopy(CONFIGURATION)
    config_dict['preserve_case'] = True
    config_dict['source']['specified_relations'][1]['sampling'] = 'lucky_guess'
    with pytest.raises(AttributeError):
        config = ConfigurationParser().from_file_or_path(
            StringIO(yaml.dump(config_dict)))

    assert isinstance(test_relation.sampling, DefaultSampling)
    config_dict['source']['specified_relations'][1]['sampling'] = 'brute_force'
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    assert isinstance(
        shgraph._set_overriding_params_for_node(test_relation,
                                                config).sampling,
        BruteForceSampling)
Exemple #2
0
def test_graph_difference_more_both_isolated_non_isolated_relations_source(
        stub_graph_set, stub_relation_set):
    """ Tests graph_difference returns graph with expected nodes if source graph has non-isolated
    and isolated nodes which are not present in target catalog """

    _, vals = stub_graph_set
    common_relation = Relation(name=rand_string(10),
                               **stub_relation_set.rand_relation_helper())
    source_catalog = [
        common_relation,
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        vals.downstream_relation, vals.upstream_relation, vals.birelation_right
    ]

    target_catalog = {
        common_relation,
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        vals.birelation_left, vals.birelation_right
    }

    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation.*$",
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))
    shgraph = SnowShuGraph()

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = source_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        expected_nodes = source_catalog[1:]
        actual = SnowShuGraph.catalog_difference(shgraph, target_catalog)
        assert list(actual.nodes) == expected_nodes
Exemple #3
0
    def _get_filtered_schemas(
            self, filters: Iterable[dict]) -> List[_DatabaseObject]:
        """ Get all of the filtered schema structures based on the provided filters. """
        db_filters = []
        schema_filters = []
        for _filter in filters:
            new_filter = _filter.copy()
            new_filter["name"] = ".*"
            if schema_filters.count(new_filter) == 0:
                schema_filters.append(new_filter)
        for s_filter in schema_filters:
            new_filter = s_filter.copy()
            new_filter["schema"] = ".*"
            if db_filters.count(new_filter) == 0:
                db_filters.append(new_filter)

        databases = self._get_all_databases()
        database_relations = [
            Relation(self._correct_case(database), "", "", None, None)
            for database in databases
        ]
        filtered_databases = [
            rel for rel in database_relations
            if at_least_one_full_pattern_match(rel, db_filters)
        ]

        # get all schemas in all databases
        filtered_schemas = []
        for db_rel in filtered_databases:
            schemas = self._get_all_schemas(
                database=db_rel.quoted(db_rel.database))
            schema_objs = [
                BaseSourceAdapter._DatabaseObject(
                    schema,
                    Relation(db_rel.database, self._correct_case(schema), "",
                             None, None)) for schema in schemas
            ]
            filtered_schemas += [
                d for d in schema_objs if at_least_one_full_pattern_match(
                    d.full_relation, schema_filters)
            ]

        return filtered_schemas
Exemple #4
0
    def _initialize_snowshu_meta_database(self) -> None:
        self.create_database_if_not_exists('snowshu')
        self.create_schema_if_not_exists('snowshu', 'snowshu')
        attributes = [
            Attribute('created_at', dt.TIMESTAMP_TZ),
            Attribute('name', dt.VARCHAR),
            Attribute('short_description', dt.VARCHAR),
            Attribute('long_description', dt.VARCHAR)
        ]

        relation = Relation("snowshu", "snowshu", "replica_meta", mz.TABLE,
                            attributes)

        relation.data = pd.DataFrame([
            dict(created_at=datetime.now(),
                 name=self.replica_meta['name'],
                 short_description=self.replica_meta['short_description'],
                 long_description=self.replica_meta['long_description'])
        ])
        self.create_and_load_relation(relation)
Exemple #5
0
    def __init__(self):
        self.downstream_relation = Relation(
            name='downstream_relation', **self.rand_relation_helper())
        self.upstream_relation = Relation(
            name='upstream_relation', **self.rand_relation_helper())
        self.iso_relation = Relation(
            name='iso_relation', **self.rand_relation_helper())
        self.birelation_left = Relation(
            name='birelation_left', **self.rand_relation_helper())
        self.birelation_right = Relation(
            name='birelation_right', **self.rand_relation_helper())
        self.view_relation = Relation(
            name='view_relation', **self.rand_relation_helper())
        self.bidirectional_key_left = rand_string(10),
        self.bidirectional_key_right = rand_string(8),
        self.directional_key = rand_string(15)

        # update specifics
        self.view_relation.materialization = mz.VIEW

        for n in ('downstream_relation', 'upstream_relation',):
            self.__dict__[n].attributes = [
                Attribute(self.directional_key, dt.INTEGER)]

        self.birelation_right.attributes = [
            Attribute(self.bidirectional_key_right, dt.VARCHAR)]
        self.birelation_left.attributes = [
            Attribute(self.bidirectional_key_left, dt.VARCHAR)]

        for r in ('downstream_relation', 'upstream_relation', 'iso_relation', 'birelation_left', 'birelation_right', 'view_relation',):
            self.__dict__[r].compiled_query = ''
def test_build_catalog():
    config_patterns = [
        dict(database="snowshu_development",
             schema=".*",
             name="(?i)^.*(?<!_view)$"),
        dict(database="snowshu_development",
             schema="source_system",
             name="order_items_view")
    ]

    mock_filtered_schema = [
        BaseSQLAdapter._DatabaseObject("SOURCE_SYSTEM", Relation("snowshu_development", "source_system", "", None, None)),
        BaseSQLAdapter._DatabaseObject("Cased_Schema", Relation("snowshu_development", "Cased_Schema", "", None, None)),
    ]

    included_relations = [
        # included tables
        Relation("snowshu_development", "source_system", "fake_table_1", mz.TABLE, []),
        Relation("snowshu_development", "Cased_Schema", "fake_table_2", mz.TABLE, []),
        # included view
        Relation("snowshu_development", "source_system", "order_items_view", mz.VIEW, []),
    ]
    excluded_relations = [
        # excluded _view
        Relation("snowshu_development", "source_system", "some_other_view", mz.VIEW, []),
        Relation("snowshu_development", "Cased_Schema", "another_view", mz.VIEW, []),
    ]

    mock_relations = included_relations + excluded_relations
    def mock_get_relations_func(schema_obj: BaseSQLAdapter._DatabaseObject):
        return [r for r in mock_relations if schema_obj.full_relation.schema == r.schema]

    # stubbed version of the BaseSourceAdapter with the required class vars
    class StubbedSourceAdapter(BaseSourceAdapter):
        REQUIRED_CREDENTIALS = []
        ALLOWED_CREDENTIALS = []
        MATERIALIZATION_MAPPINGS = {}
        DATA_TYPE_MAPPINGS = {}
        SUPPORTED_SAMPLE_METHODS = []


    with patch("snowshu.adapters.BaseSQLAdapter._get_filtered_schemas", return_value=mock_filtered_schema) \
         , patch("snowshu.adapters.BaseSQLAdapter._get_relations_from_database", side_effect=mock_get_relations_func):
        adapter = StubbedAdapter()
        catalog = adapter.build_catalog(config_patterns, thread_workers=1)
        for r in excluded_relations:
            assert not r in catalog
        for r in included_relations:
            assert r in catalog
Exemple #7
0
def test_get_relations_from_database(end_to_end):
    adapter = PostgresAdapter(replica_metadata={})
    if adapter.target != "localhost":
        adapter._credentials.host = 'integration-test'

    config_patterns = [dict(database="snowshu", schema=".*", name=".*")]

    attributes = [
        Attribute('created_at', data_types.TIMESTAMP_TZ),
        Attribute('config_json', data_types.JSON),
        Attribute('name', data_types.VARCHAR),
        Attribute('short_description', data_types.VARCHAR),
        Attribute('long_description', data_types.VARCHAR)
    ]
    relation = Relation("snowshu", "snowshu", "replica_meta", TABLE,
                        attributes)

    catalog = adapter.build_catalog(config_patterns, thread_workers=1)
    relations = []
    for rel in catalog:
        relations.append(rel.__dict__.items())
    assert relation.__dict__.items() in relations
Exemple #8
0
    def compile_queries_for_relation(
            relation: Relation,  # noqa pylint: disable=too-many-branches
            dag: networkx.Graph,
            source_adapter: Type[BaseSourceAdapter],
            analyze: bool) -> Relation:
        """ Generates the sql statements for the given relation

            Args:
                relation (Relation): the relation to generate the sql for
                dag (Graph): the connected dependency graph that contains the relation
                source_adapter (BaseSourceAdapter): the source adapter for the sql dialect
                analyze (bool): whether to generate sql statements for analyze or actaul sampling

            Returns:
                Relation: the given relation with `compiled_query` populated
        """
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            polymorphic_predicates = list()
            for child in dag.successors(relation):
                # parallel edges aren't currently supported
                edge = dag.edges[relation, child]
                if edge['direction'] == 'bidirectional':
                    predicates.append(
                        source_adapter.upstream_constraint_statement(
                            child, edge['remote_attribute'],
                            edge['local_attribute']))
                if relation.include_outliers and edge[
                        'direction'] == 'polymorphic':
                    logger.warning(
                        "Polymorphic relationships currently do not support including outliers. "
                        "Ignoring include_outliers flag for edge "
                        f"from {relation.dot_notation} to {child.dot_notation}. "
                    )
                elif relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, child, edge['remote_attribute'],
                            edge['local_attribute'],
                            relation.max_number_of_outliers))

            for parent in dag.predecessors(relation):
                edge = dag.edges[parent, relation]
                # if any incoming edge is bidirectional or polymorphic set do_not_sample flag
                # do_not_sample is set since those types are most likely already restricted
                do_not_sample = (edge['direction'] in (
                    'bidirectional',
                    'polymorphic',
                ) or do_not_sample)
                if edge['direction'] == 'polymorphic':
                    # if the local type attribute is set, the constraint needs to account for it
                    # otherwise we only need the normal predicate constraint
                    if 'local_type_attribute' in edge:
                        local_type_override = edge['local_type_overrides'].get(
                            parent.dot_notation, None)
                        polymorphic_predicates.append(
                            source_adapter.polymorphic_constraint_statement(
                                parent, analyze, edge['local_attribute'],
                                edge['remote_attribute'],
                                edge['local_type_attribute'],
                                local_type_override))
                    else:
                        polymorphic_predicates.append(
                            source_adapter.predicate_constraint_statement(
                                parent, analyze, edge['local_attribute'],
                                edge['remote_attribute']))
                else:
                    predicates.append(
                        source_adapter.predicate_constraint_statement(
                            parent, analyze, edge['local_attribute'],
                            edge['remote_attribute']))
                if relation.include_outliers and edge[
                        'direction'] == 'polymorphic':
                    logger.warning(
                        "Polymorphic relationships currently do not support including outliers. "
                        "Ignoring include_outliers flag for edge "
                        f"from {parent.dot_notation} to {relation.dot_notation}. "
                    )
                elif relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, parent, edge['local_attribute'],
                            edge['remote_attribute'],
                            relation.max_number_of_outliers))

            # if polymorphic predicates are set up, then generate the or predicate
            if polymorphic_predicates:
                full_polymorphic_predicate = " OR ".join(
                    polymorphic_predicates)
                predicates.append(f"( {full_polymorphic_predicate} )")

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation
Exemple #9
0
    def compile_queries_for_relation(relation: Relation, dag: networkx.Graph,
                                     source_adapter: Type[BaseSourceAdapter],
                                     analyze: bool) -> Relation:
        """generates and populates the compiled sql for each relation in a
        dag."""
        query = str()
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            for child in [c for c in dag.successors(relation)]:
                for edge in dag.edges((relation, child), True):
                    edge_data = edge[2]
                    if edge_data['direction'] == 'bidirectional':
                        predicates.append(
                            source_adapter.upstream_constraint_statement(
                                child, edge_data['remote_attribute'],
                                edge_data['local_attribute']))
                    if relation.include_outliers:
                        unions.append(
                            source_adapter.union_constraint_statement(
                                relation, child, edge_data['remote_attribute'],
                                edge_data['local_attribute'],
                                relation.max_number_of_outliers))

            for parent in [p for p in dag.predecessors(relation)]:
                for edge in dag.edges((
                        parent,
                        relation,
                ), True):
                    edge_data = edge[2]
                    do_not_sample = edge_data['direction'] == 'bidirectional'
                    predicates.append(
                        source_adapter.predicate_constraint_statement(
                            parent, analyze, edge_data['local_attribute'],
                            edge_data['remote_attribute']))
                    if relation.include_outliers:
                        unions.append(
                            source_adapter.union_constraint_statement(
                                relation, parent, edge_data['local_attribute'],
                                edge_data['remote_attribute'],
                                relation.max_number_of_outliers))

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation
Exemple #10
0
                 schema='THING',
                 relation='.*poly2$',
                 relationships=dict(polymorphic=[
                     dict(local_attribute='id',
                          local_type_attribute='',
                          remote_attribute='parent_id',
                          database='',
                          schema='',
                          relation='^poly_child_[0-9]_items$')
                 ], )),
        ]),
    target=dict(adapter='default'),
    storage=dict(profile='default'))

MOCKED_CATALOG = (
    Relation('snowyes', 'thing', 'foo_suffix', mz.TABLE, []),
    Relation('SNOWYES', 'thing', 'bar_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'THING', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
    Relation('noperope', 'thing', 'foo_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'THING', 'bar_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'dont_match', 'nevermatch_except_bidirectional',
             mz.TABLE, []),
    Relation('snowno', 'thing', 'matches_in_directional', mz.TABLE, []),
    Relation('SNOWYES', 'thing', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
    Relation('snowyes', 'thing', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
    Relation('snowyes', 'thing', 'parent_poly', mz.TABLE, []),
    Relation('snowyes', 'thing', 'parent_poly2', mz.TABLE, []),
    Relation('snowyes', 'thing', 'poly_child_1_items', mz.TABLE, []),
Exemple #11
0
                          schema='',
                          relation='nevermatch_except_bidirectional')
                 ],
                                    directional=[
                                        dict(local_attribute='id',
                                             remote_attribute='id',
                                             database='snowno',
                                             schema='THING',
                                             relation='matches_in_directional')
                                    ]))
        ]),
    target=dict(adapter='default'),
    storage=dict(profile='default'))

MOCKED_CATALOG = (
    Relation('snowyes', 'thing', 'foo_suffix', mz.TABLE, []),
    Relation('SNOWYES', 'thing', 'bar_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'THING', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
    Relation('noperope', 'thing', 'foo_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'THING', 'bar_suffix', mz.TABLE, []),
    Relation('SNOWNO', 'dont_match', 'nevermatch_except_bidirectional',
             mz.TABLE, []),
    Relation('snowno', 'thing', 'matches_in_directional', mz.TABLE, []),
    Relation('SNOWYES', 'thing', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
    Relation('snowyes', 'thing', 'nevermatch_except_bidirectional', mz.TABLE,
             []),
)

Exemple #12
0
    def __init__(self):
        self.downstream_relation = Relation(name='downstream_relation',
                                            **self.rand_relation_helper())
        self.upstream_relation = Relation(name='upstream_relation',
                                          **self.rand_relation_helper())
        self.iso_relation = Relation(name='iso_relation',
                                     **self.rand_relation_helper())
        self.birelation_left = Relation(name='birelation_left',
                                        **self.rand_relation_helper())
        self.birelation_right = Relation(name='birelation_right',
                                         **self.rand_relation_helper())
        self.view_relation = Relation(name='view_relation',
                                      **self.rand_relation_helper())

        self.downstream_wildcard_relation_1 = Relation(
            name='downstream_wildcard_relation_1',
            **self.rand_relation_helper())
        self.downstream_wildcard_relation_2 = Relation(
            name='downstream_wildcard_relation_2',
            **self.rand_relation_helper())
        self.upstream_wildcard_relation_1 = Relation(
            name='upstream_wildcard_relation_1',
            schema=self.downstream_wildcard_relation_1.schema,
            database=self.downstream_wildcard_relation_1.database,
            materialization=mz.TABLE,
            attributes=[])
        self.upstream_wildcard_relation_2 = Relation(
            name='upstream_wildcard_relation_2',
            schema=self.downstream_wildcard_relation_2.schema,
            database=self.downstream_wildcard_relation_2.database,
            materialization=mz.TABLE,
            attributes=[])

        self.parent_relation_childid_type = Relation(
            name='parent_relation_childid_type', **self.rand_relation_helper())
        self.parent_relation_parentid = Relation(
            name='parent_relation_parentid', **self.rand_relation_helper())
        self.child_relation_type_1 = Relation(name='child_type_1_records',
                                              **self.rand_relation_helper())
        self.child_relation_type_2 = Relation(name='child_type_2_records',
                                              **self.rand_relation_helper())
        self.child_relation_type_3 = Relation(name='child_type_3_records',
                                              **self.rand_relation_helper())

        self.bidirectional_key_left = rand_string(10)
        self.bidirectional_key_right = rand_string(8)
        self.directional_key = rand_string(15)
        self.parentid_key = rand_string(15)
        self.childid_key = rand_string(15)
        self.childtype_key = rand_string(15)
        self.child2override_key = rand_string(20)

        # update specifics
        self.view_relation.materialization = mz.VIEW

        for n in ('downstream_relation', 'upstream_relation',
                  'downstream_wildcard_relation_1',
                  'downstream_wildcard_relation_2',
                  'upstream_wildcard_relation_1',
                  'upstream_wildcard_relation_2'):
            self.__dict__[n].attributes = [
                Attribute(self.directional_key, dt.INTEGER)
            ]

        for n in (
                'child_relation_type_1',
                'child_relation_type_2',
                'child_relation_type_3',
        ):
            self.__dict__[n].attributes = [
                Attribute(self.parentid_key, dt.VARCHAR),
                Attribute(self.childid_key, dt.VARCHAR)
            ]

        self.parent_relation_childid_type.attributes = [
            Attribute(self.childid_key, dt.VARCHAR),
            Attribute(self.childtype_key, dt.VARCHAR)
        ]
        self.parent_relation_parentid.attributes = [
            Attribute(self.parentid_key, dt.VARCHAR)
        ]

        self.birelation_right.attributes = [
            Attribute(self.bidirectional_key_right, dt.VARCHAR)
        ]
        self.birelation_left.attributes = [
            Attribute(self.bidirectional_key_left, dt.VARCHAR)
        ]

        for r in ('downstream_relation', 'upstream_relation', 'iso_relation',
                  'birelation_left', 'birelation_right', 'view_relation',
                  'downstream_wildcard_relation_1',
                  'downstream_wildcard_relation_2',
                  'upstream_wildcard_relation_1',
                  'upstream_wildcard_relation_2', 'child_relation_type_1',
                  'child_relation_type_2', 'child_relation_type_3',
                  'parent_relation_childid_type', 'parent_relation_parentid'):
            self.__dict__[r].compiled_query = ''
Exemple #13
0
    def compile_queries_for_relation(relation: Relation, dag: networkx.Graph,
                                     source_adapter: Type[BaseSourceAdapter],
                                     analyze: bool) -> Relation:
        """ Generates the sql statements for the given relation

            Args:
                relation (Relation): the relation to generate the sql for
                dag (Graph): the connected dependency graph that contains the relation
                source_adapter (BaseSourceAdapter): the source adapter for the sql dialect
                analyze (bool): whether to generate sql statements for analyze or actaul sampling

            Returns:
                Relation: the given relation with `compiled_query` populated
        """
        query = str()
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            for child in dag.successors(relation):
                # parallel edges aren't currently supported
                edge = dag.edges[relation, child]
                if edge['direction'] == 'bidirectional':
                    predicates.append(
                        source_adapter.upstream_constraint_statement(
                            child, edge['remote_attribute'],
                            edge['local_attribute']))
                if relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, child, edge['remote_attribute'],
                            edge['local_attribute'],
                            relation.max_number_of_outliers))

            for parent in dag.predecessors(relation):
                edge = dag.edges[parent, relation]
                # if any incoming edge is birectional set do_not_sample flag
                do_not_sample = (edge['direction'] == 'bidirectional'
                                 or do_not_sample)
                predicates.append(
                    source_adapter.predicate_constraint_statement(
                        parent, analyze, edge['local_attribute'],
                        edge['remote_attribute']))
                if relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, parent, edge['local_attribute'],
                            edge['remote_attribute'],
                            relation.max_number_of_outliers))

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation