Esempio n. 1
0
    def __init__(self, tables, foreign_keys = None, owner = None):
        """
        Constructor
        """
        class MySchema(object):
            """class encapsulate tables structure"""
            def __init__(self, tables):
                """initialize tables"""
                self.tables = tables
        # The dictionary keys disagree with table names. Make them agree.
        table_dict = {}
        for table_name in tables:
            table = tables[table_name]
            table_dict[table.name] = table
        self._schema = MySchema(table_dict)
        self._ordered = None
        person_table = 'person'
        if owner:
            self._owner = owner.lower()
        else:
            self._owner = owner
        self._foreign_tables = {}
        self._person_table = find_table(self._schema, person_table)

        if self._person_table:
            to_exclude = set([self._person_table])
        else:
            to_exclude = set()
        connectivity = self.graph_from_schema(self._schema,
              self.make_foreign_keys(foreign_keys, self._schema), to_exclude)
        self.construct_query = ConstructQuery(connectivity)
        self.outers = [('seblock.blockid', 'block.id'), 
                     ('block.id', 'seblock.blockid')]
 def test_small_graph(self):
     """test small graph generate from get_statment_joins """
     schema = (
         (1, 2), # 0->1,2
         (3, ),   # 1->3
         (),
         ()
         )
     construct_query = ConstructQuery(schema)
     (_, res0) = construct_query.get_statement_joins((0, 3))
     self.list_test(res0, ((1, 0), (3, 1)))
     (_, res1) = construct_query.get_statement_joins((3, 1, 0))
     self.list_test(res1, ((1, 0), (3, 1)))
     (_, res2) = construct_query.get_statement_joins((0, 2))
     self.list_test(res2, ((2, 0),))
     (_, res3) = construct_query.get_statement_joins((0, ))
     self.list_test(res3, ())
    def test_subtree(self):
        """test subtree generation from get_smallest_subtree"""
        schema = (
            (1, 2), # 0->1,2
            (3, ),   # 1->3
            (),
            ()
            )
        construct_query = ConstructQuery(schema)
        res = construct_query.get_smallest_subtree((0, 3))
#        print "subtree 0 3", res
        #self.assertEqual(res, RootedGraph())
        res = construct_query.get_smallest_subtree((3, 1, 0))
#        print "subtree 0 1 3", res
        #self.listTest(res, ((1,0), (3,1)))
        res = construct_query.get_smallest_subtree((0, 2))
#        print "subtree 0 2", res
        #self.listTest(res, ((2,0),))
        res = construct_query.get_smallest_subtree((0, ))
Esempio n. 4
0
class Schema(object):
    """
    This object is created around a sqlalchemy MetaData object. When given
    a sqlalchemy select object in build_query, it constructs determines how to
    join tables from the MetaData to support the query.
    """
    def __init__(self, tables, foreign_keys = None, owner = None):
        """
        Constructor
        """
        class MySchema(object):
            """class encapsulate tables structure"""
            def __init__(self, tables):
                """initialize tables"""
                self.tables = tables
        # The dictionary keys disagree with table names. Make them agree.
        table_dict = {}
        for table_name in tables:
            table = tables[table_name]
            table_dict[table.name] = table
        self._schema = MySchema(table_dict)
        self._ordered = None
        person_table = 'person'
        if owner:
            self._owner = owner.lower()
        else:
            self._owner = owner
        self._foreign_tables = {}
        self._person_table = find_table(self._schema, person_table)

        if self._person_table:
            to_exclude = set([self._person_table])
        else:
            to_exclude = set()
        connectivity = self.graph_from_schema(self._schema,
              self.make_foreign_keys(foreign_keys, self._schema), to_exclude)
        self.construct_query = ConstructQuery(connectivity)
        self.outers = [('seblock.blockid', 'block.id'), 
                     ('block.id', 'seblock.blockid')]

    def build_query(self, query):
        """build query by root_join"""
        root_join = self.root_join(query)
        if query != root_join:
            query.append_from(root_join)
        return query

    def build_query_with_sel(self, sel, query, add_join = None):
        """extral root_join.join with add_join """
        root_join = self.root_join(query)
        if add_join:
            for item in add_join:
                table, l_col, r_col = item
                root_join = root_join.join(table, l_col == r_col)
        sel.append_from(root_join)
        return sel

    def root_join(self, query):
        '''
        Query is a sqlalchemy query with select elements and where clauses.
        This method:

            - looks through the elements and clauses to determine 
              which tables need to be joined in order to support the query. 
            - adds those joins to the query and returns it.
        '''

        tables_of_concern = set()
        # col.table returns a select statement. We want the metadata table.
        # This is unsupported in sqlalchemy!
        # Todo I need verify this.
#        for col in query._raw_columns:
#            tables_of_concern.add(col.table)
        for table in query.froms:
            tables_of_concern.add(table)
        if  query.__dict__.has_key('whereclause'): # SQLAlchemy 0.3
            where_clause = query.whereclause
        elif query.__dict__.has_key('_whereclause'): # SQLAlchemy 0.4 0.5
            where_clause = query._whereclause
        else:
            raise Exception, "Query '%s' does not contain whereclause" % query
        if where_clause:
            if where_clause.__dict__.has_key('clauses'):
                for clause in where_clause.clauses:
                    pull_operator_side(clause, tables_of_concern)
            else:
                pull_operator_side(where_clause , tables_of_concern)

        # No need to calculate joins if there is only one table involved.
        # We actually make mistakes if that single table is Person.
        if len(tables_of_concern) == 1: 
            return query

        # Remove Person table because it is not in any spanning trees.
        block_use_person_table = False
        if self._person_table in tables_of_concern:
            tables_of_concern.remove(self._person_table)
            block_use_person_table = True

        # Find the smallest spanning tree containing tables 
        #      for SELECT and WHERE.
        # table_indices is the index list of table not concerns 
        #      references to self._ordered
        table_indices = [self._ordered.index(table) 
                                for table in tables_of_concern]

        subtree = self.construct_query.get_smallest_subtree(table_indices)
        if subtree is None:
            return None
        _LOGGER.debug("Schema.build_query: query tree length %d" % 
                                               (subtree.get_nodes_number(), ))
        _LOGGER.debug("Schema.build_query: query tree itself %s" % 
                                                           (subtree, ))

        # The subtree tells us the order of tables in the join, but it loses
        # information on which table is the foreign key, so we need to search
        # through tables to find which had the foreign key and which the
        # primary key.
        # root_join is a table.join().join()....
        root_join = None
        for (node_idx, parent_idx) in subtree.breadth_first():
            if parent_idx is None:
                root_table = self._ordered[node_idx]
                root_join = root_table
                if block_use_person_table and root_table.c.has_key('CreatedBy'):
                    root_join = root_join.join(self._person_table,
                              root_table.c.CreatedBy == self._person_table.c.ID)
            else:
                if node_idx in self._foreign_tables[parent_idx]:
                # table has a f_key to parent
                    (l_col, r_col) = self._foreign_tables[parent_idx][node_idx]
                elif parent_idx in self._foreign_tables[node_idx]:
                # parent table has a f_key to this table
                    (r_col, l_col) = self._foreign_tables[node_idx][parent_idx]
                # special case: 
                # block to seblock == 1 we outerjoin seblock 
                # seblock to block == 0 we join block, block table is huge
                if self.outers.count((str(l_col).lower(), str(r_col).lower())):
                    root_join = root_join.outerjoin(r_col.table, l_col == r_col)
                else:
                    root_join = root_join.join(r_col.table, l_col == r_col)
#                root_join = root_join.join(r_col.table, l_col==r_col)
        return root_join
#        query.append_from(root_join)
#        return query


    def make_foreign_keys(self, foreign_keys, schema):
        """ input: foreign_keys and schema
            output: foreign_keys
            when input foreign_keys not exists, it is generated from schema by
            { table:table.foreign_keys, ...}
        """
        if not foreign_keys:
            foreign_keys = {}
            for table_name in schema.tables:
                table = schema.tables[table_name]
                foreign_keys[table] = table.foreign_keys
        return foreign_keys

    def graph_from_schema(self, metadata, table_fks, exclude = set()):         
        """
        Build graph with edges for provided SQLAlchemy MetaDta object;
        list of foreign keys and set of exclude tables.
        """
        #_ordered is a list version of metadata.tables.values() 
        #         and we can set exclude table list
        self._ordered = [metadata.tables[table_name] 
                            for table_name in metadata.tables
                            if metadata.tables[table_name] not in exclude]
        # contains all foreign keys linked table index in ordered_names
        relations = [] 
        # ordered_names which is the list version of fullname self._ordered
        # VK, use names rather then compare tables objects
        ordered_names = []
        for item in self._ordered:
#            ordered_names.append(item.fullname)
            ordered_names.append(item.name)
        exclude_names_list = []
        for table in exclude: 
#            exclude_names_list.append(table.fullname)       
            exclude_names_list.append(table.name)
        exclude_names = set(exclude_names_list) # set of exclude table name
        order_list = list(ordered_names)   # list of table_name in _ordered
        order_list.sort()
#        print "\n\nSchema:"
#        print order_list
#        print exclude_names
#        print "owner",self._owner
        try:
            search_name = None
            # traversing the tables 
            #   for each foreign_keys in this table: 
            #     find correspond table index in ordered_names(self._ordered) 
            #   relations store set of thoses f_key_index 
            #   self._foreign_tables[table_index] store {f_key_index:FK, ...}
            for table_index in xrange(0, len(self._ordered)):
                table = self._ordered[table_index]
                foreign_keys = table_fks[table]
                index_set = set()
                short_tables = {}
                for f_key in foreign_keys:
                    if f_key.column.table in exclude: 
                        # table linked by foreign keys not in exclude list
                        continue

                    search_name = f_key.column.table
                    if search_name.name in exclude_names: 
                        # table.fullname are not in exclude list 
                        continue
#                    f_key_index = self._ordered.index(search_name)
                    f_key_index = ordered_names.index(search_name.name)
                    index_set.add(f_key_index)
                    short_tables[f_key_index] = (f_key.parent, f_key.column)

                relations.append(index_set)
                self._foreign_tables[table_index] = short_tables 
                # foreign_tables stores 
                # {table_index:{f_key_index:(f_key.parent,f_key.colum),...},...}
        except ValueError, value_err:
            print value_err
            _LOGGER.error("""Schema.GraphFromSchema ValueError %s.
                    Could not find f_key.column.table %s in
                    self._ordered %s. Len(ordered)=%d
                    Len(metadata.tables)=%d""" % (str(value_err),
                    search_name, self._ordered, len(self._ordered), 
                    len(metadata.tables)))
            raise Exception(
                "Could not find the table for a given foreign key constraint.",
                '''Constraint table %s len(ordered)=%d len(metadata.tables)=%d
                ordered=%s''' % (search_name, len(self._ordered),
                len(metadata.tables), str([xdx.name for xdx in self._ordered])))
        return relations