Example #1
0
    def compile_queries_for_relation(relation: Relation, dag: networkx.Graph,
                                     source_adapter: Type[BaseSourceAdapter],
                                     analyze: bool) -> Relation:
        """generates and populates the compiled sql for each relation in a
        dag."""
        query = str()
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            for child in [c for c in dag.successors(relation)]:
                for edge in dag.edges((relation, child), True):
                    edge_data = edge[2]
                    if edge_data['direction'] == 'bidirectional':
                        predicates.append(
                            source_adapter.upstream_constraint_statement(
                                child, edge_data['remote_attribute'],
                                edge_data['local_attribute']))
                    if relation.include_outliers:
                        unions.append(
                            source_adapter.union_constraint_statement(
                                relation, child, edge_data['remote_attribute'],
                                edge_data['local_attribute'],
                                relation.max_number_of_outliers))

            for parent in [p for p in dag.predecessors(relation)]:
                for edge in dag.edges((
                        parent,
                        relation,
                ), True):
                    edge_data = edge[2]
                    do_not_sample = edge_data['direction'] == 'bidirectional'
                    predicates.append(
                        source_adapter.predicate_constraint_statement(
                            parent, analyze, edge_data['local_attribute'],
                            edge_data['remote_attribute']))
                    if relation.include_outliers:
                        unions.append(
                            source_adapter.union_constraint_statement(
                                relation, parent, edge_data['local_attribute'],
                                edge_data['remote_attribute'],
                                relation.max_number_of_outliers))

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation
Example #2
0
    def compile_queries_for_relation(
            relation: Relation,  # noqa pylint: disable=too-many-branches
            dag: networkx.Graph,
            source_adapter: Type[BaseSourceAdapter],
            analyze: bool) -> Relation:
        """ Generates the sql statements for the given relation

            Args:
                relation (Relation): the relation to generate the sql for
                dag (Graph): the connected dependency graph that contains the relation
                source_adapter (BaseSourceAdapter): the source adapter for the sql dialect
                analyze (bool): whether to generate sql statements for analyze or actaul sampling

            Returns:
                Relation: the given relation with `compiled_query` populated
        """
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            polymorphic_predicates = list()
            for child in dag.successors(relation):
                # parallel edges aren't currently supported
                edge = dag.edges[relation, child]
                if edge['direction'] == 'bidirectional':
                    predicates.append(
                        source_adapter.upstream_constraint_statement(
                            child, edge['remote_attribute'],
                            edge['local_attribute']))
                if relation.include_outliers and edge[
                        'direction'] == 'polymorphic':
                    logger.warning(
                        "Polymorphic relationships currently do not support including outliers. "
                        "Ignoring include_outliers flag for edge "
                        f"from {relation.dot_notation} to {child.dot_notation}. "
                    )
                elif relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, child, edge['remote_attribute'],
                            edge['local_attribute'],
                            relation.max_number_of_outliers))

            for parent in dag.predecessors(relation):
                edge = dag.edges[parent, relation]
                # if any incoming edge is bidirectional or polymorphic set do_not_sample flag
                # do_not_sample is set since those types are most likely already restricted
                do_not_sample = (edge['direction'] in (
                    'bidirectional',
                    'polymorphic',
                ) or do_not_sample)
                if edge['direction'] == 'polymorphic':
                    # if the local type attribute is set, the constraint needs to account for it
                    # otherwise we only need the normal predicate constraint
                    if 'local_type_attribute' in edge:
                        local_type_override = edge['local_type_overrides'].get(
                            parent.dot_notation, None)
                        polymorphic_predicates.append(
                            source_adapter.polymorphic_constraint_statement(
                                parent, analyze, edge['local_attribute'],
                                edge['remote_attribute'],
                                edge['local_type_attribute'],
                                local_type_override))
                    else:
                        polymorphic_predicates.append(
                            source_adapter.predicate_constraint_statement(
                                parent, analyze, edge['local_attribute'],
                                edge['remote_attribute']))
                else:
                    predicates.append(
                        source_adapter.predicate_constraint_statement(
                            parent, analyze, edge['local_attribute'],
                            edge['remote_attribute']))
                if relation.include_outliers and edge[
                        'direction'] == 'polymorphic':
                    logger.warning(
                        "Polymorphic relationships currently do not support including outliers. "
                        "Ignoring include_outliers flag for edge "
                        f"from {parent.dot_notation} to {relation.dot_notation}. "
                    )
                elif relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, parent, edge['local_attribute'],
                            edge['remote_attribute'],
                            relation.max_number_of_outliers))

            # if polymorphic predicates are set up, then generate the or predicate
            if polymorphic_predicates:
                full_polymorphic_predicate = " OR ".join(
                    polymorphic_predicates)
                predicates.append(f"( {full_polymorphic_predicate} )")

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation
Example #3
0
    def compile_queries_for_relation(relation: Relation, dag: networkx.Graph,
                                     source_adapter: Type[BaseSourceAdapter],
                                     analyze: bool) -> Relation:
        """ Generates the sql statements for the given relation

            Args:
                relation (Relation): the relation to generate the sql for
                dag (Graph): the connected dependency graph that contains the relation
                source_adapter (BaseSourceAdapter): the source adapter for the sql dialect
                analyze (bool): whether to generate sql statements for analyze or actaul sampling

            Returns:
                Relation: the given relation with `compiled_query` populated
        """
        query = str()
        if relation.is_view:
            relation.core_query, relation.compiled_query = [
                source_adapter.view_creation_statement(relation)
                for _ in range(2)
            ]
            return relation
        if relation.unsampled:
            query = source_adapter.unsampled_statement(relation)
        else:
            do_not_sample = False
            predicates = list()
            unions = list()
            for child in dag.successors(relation):
                # parallel edges aren't currently supported
                edge = dag.edges[relation, child]
                if edge['direction'] == 'bidirectional':
                    predicates.append(
                        source_adapter.upstream_constraint_statement(
                            child, edge['remote_attribute'],
                            edge['local_attribute']))
                if relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, child, edge['remote_attribute'],
                            edge['local_attribute'],
                            relation.max_number_of_outliers))

            for parent in dag.predecessors(relation):
                edge = dag.edges[parent, relation]
                # if any incoming edge is birectional set do_not_sample flag
                do_not_sample = (edge['direction'] == 'bidirectional'
                                 or do_not_sample)
                predicates.append(
                    source_adapter.predicate_constraint_statement(
                        parent, analyze, edge['local_attribute'],
                        edge['remote_attribute']))
                if relation.include_outliers:
                    unions.append(
                        source_adapter.union_constraint_statement(
                            relation, parent, edge['local_attribute'],
                            edge['remote_attribute'],
                            relation.max_number_of_outliers))

            query = source_adapter.sample_statement_from_relation(
                relation,
                (None if predicates else relation.sampling.sample_method))
            if predicates:
                query += " WHERE " + ' AND '.join(predicates)
                query = source_adapter.directionally_wrap_statement(
                    query, relation, (None if do_not_sample else
                                      relation.sampling.sample_method))
            if unions:
                query += " UNION ".join([''] + unions)

        relation.core_query = query

        if analyze:
            query = source_adapter.analyze_wrap_statement(query, relation)
        relation.compiled_query = query
        return relation