def build_simple(self, query, metadata, allowed_capabilities): """ Builds a QueryPlan (self) related to a single Gateway. This is used only by a Forwarder. This function will probably soon become DEPRECATED. If several Gateways are involved, you must use QueryPlan::build. Args: query: The Query issued by the user. metadata: allowed_capabilities: The Capabilities related to this Gateway. """ # XXX allowed_capabilities should be a property of the query plan ! # XXX Check whether we can answer query.object # Here we assume we have a single platform platform = metadata.keys()[0] announce = metadata[platform][query.get_from()] # eg. table test # Set up an AST for missing capabilities (need configuration) # Selection ? if query.filters and not announce.capabilities.selection: if not allowed_capabilities.selection: raise Exception, 'Cannot answer query: SELECTION' add_selection = query.filters query.filters = Filter() else: add_selection = None # Projection ? announce_fields = announce.get_table().get_fields() if query.fields < announce_fields and not announce.capabilities.projection: if not allowed_capabilities.projection: raise Exception, 'Cannot answer query: PROJECTION' add_projection = query.fields query.fields = set() else: add_projection = None table = Table({platform: ''}, {}, query.get_from(), set(), set()) key = metadata.get_key(query.get_from()) capabilities = metadata.get_capabilities(platform, query.get_from()) self.ast = self.ast.From(table, query, capabilities, key) # XXX associate the From node to the Gateway from_node = self.ast.get_root() self.add_from(from_node) #from_node.set_gateway(gw_or_router) #gw_or_router.query = query if not self.root: return if add_selection: self.ast.optimize_selection(add_selection) if add_projection: self.ast.optimize_projection(add_projection) self.inject_at(query)
def make_sub_graph(metadata, relevant_fields): """ \brief Create a reduced graph based on g. We only keep vertices having a key in relevant_fields \param g A DiGraph instance (the full 3nf graph) \param relevant_fields A dictionnary {Table: Fields} indicating for each Table which Field(s) are relevant. \return The corresponding sub-3nf-graph """ g = metadata.graph sub_graph = DiGraph() copy = dict() vertices_to_keep = set(relevant_fields.keys()) # Copy relevant vertices from g for u in vertices_to_keep: copy_u = Table.make_table_from_fields(u, relevant_fields[u]) copy[u] = copy_u sub_graph.add_node(copy_u) # no data on nodes # Copy relevant arcs from g for u, v in g.edges(): try: copy_u, copy_v = copy[u], copy[v] except: continue sub_graph.add_edge(copy_u, copy_v, deepcopy(g.edge[u][v])) Log.debug("Adding copy of : %s" % metadata.print_arc(u, v)) return sub_graph
def get_metadata(self): announces = [] for table, data in self.config.items(): dialect, field_names, field_types = self.get_dialect_and_field_info( table) key = self.get_key(table) capabilities = self.get_capabilities(table) filename = data['filename'] t = Table(self.platform, None, table, None, None) key_fields = set() for name, type in zip(field_names, field_types): f = Field( qualifiers=['const' ], # unless we want to update the CSV file type=type, name=name, is_array=False, description='(null)') t.insert_field(f) if name in key: key_fields.add(f) t.insert_key(key_fields) t.capabilities = capabilities announces.append(Announce(t)) return announces
def get_announce_tables(self): tables = [] for table in self.graph.nodes(False): # Ignore child tables with the same name as parents keep = True for parent, _ in self.graph.in_edges(table): if parent.get_name() == table.get_name(): keep = False if keep: tables.append(Table(None, None, table.get_name(), set(self.get_fields(table)), table.get_keys())) return tables
def parse_dot_h(iterable, filename=None): """ Import information stored in a .h file (see manifold/metadata/*.h) Args: iterable: The file descriptor of a successfully opened file. You may also pass iter(string) if the content of the .h is stored in "string" filename: The corresponding filename. It is only used to print user friendly message, so you may pass None. Returns: A tuple made of two dictionnaries (tables, enums) tables: - key: String (the name of the class) - data: the corresponding Table instance enums: - key: String (the name of the enum) - data: the corresponding MetadataEnum instance Raises: ValueError: if the input data is not well-formed. """ # Parse file table_name = None cur_enum_name = None tables = {} enums = {} no_line = -1 for line in iterable: line = line.rstrip("\r\n") is_valid = True error_message = None no_line += 1 if REGEXP_EMPTY_LINE.match(line): continue if line[0] == '#': continue if table_name: # current scope = class # local const MyType my_field[]; /**< Comment */ m = REGEXP_CLASS_FIELD.match(line) if m: qualifiers = list() if m.group(2): qualifiers.append("local") if m.group(3): qualifiers.append("const") tables[table_name].insert_field( Field(qualifiers=qualifiers, type=m.group(4), name=m.group(5), is_array=(m.group(6) != None), description=m.group(7).lstrip("/*< ").rstrip("*/ "))) continue # KEY(my_field1, my_field2); m = REGEXP_CLASS_KEY.match(line) if m: key = m.group(1).split(',') key = [key_elt.strip() for key_elt in key] tables[table_name].insert_key(key) # XXX #if key not in tables[table_name].keys: # tables[table_name].keys.append(key) continue # CAPABILITY(my_field1, my_field2); m = REGEXP_CLASS_CAP.match(line) if m: capability = map(lambda x: x.strip(), m.group(1).split(',')) tables[table_name].set_capability(capability) continue # PARTITIONBY(clause_string); m = REGEXP_CLASS_CLAUSE.match(line) if m: clause_string = m.group(1) clause = Clause(clause_string) tables[table_name].partitions.append(clause) continue # }; if REGEXP_CLASS_END.match(line): cur_class = tables[table_name] if not cur_class.keys: # we must add a implicit key key_name = "%s_id" % table_name if key_name in cur_class.get_field_names(): Log.error( "Trying to add implicit key %s which is already in use" % key_name) Log.info("Adding implicit key %s in %s" % (key_name, table_name)) dummy_key_field = Field(["const"], "unsigned", key_name, False, "Dummy key") cur_class.insert_field(dummy_key_field) cur_class.insert_key(Key([dummy_key_field])) table_name = None continue # Invalid line is_valid = False error_message = "In '%s', line %r: in table '%s': invalid line: [%r] %s" % ( filename, no_line, table_name, line, ''.join( [PATTERN_BEGIN, PATTERN_CLASS_FIELD, PATTERN_END])) elif cur_enum_name: # current scope = enum # "my string value", m = REGEXP_ENUM_FIELD.match(line) if m: value = m.group(1) continue # }; if REGEXP_CLASS_END.match(line): cur_enum_name = None continue # Invalid line is_valid = False error_message = "In '%s', line %r: in enum '%s': invalid line: [%r]" % ( filename, no_line, cur_enum_name, line) else: # no current scope # class MyClass { m = REGEXP_CLASS_BEGIN.match(line) if m: qualifier = m.group(1) table_name = m.group(2) tables[table_name] = Table(None, None, table_name, None, Keys()) # qualifier ?? continue # enum MyEnum { m = REGEXP_ENUM_BEGIN.match(line) if m: cur_enum_name = m.group(1) enums[cur_enum_name] = MetadataEnum(cur_enum_name) continue # Invalid line is_valid = False error_message = "In '%s', line %r: class declaration expected: [%r]" if is_valid == False: if not error_message: error_message = "Invalid input file %s, line %r: [%r]" % ( filename, no_line, line) Log.error(error_message) raise ValueError(error_message) return (tables, enums)
def make_table(self, table_name): """ Build a Table instance according to a given table/view name by quering the PostgreSQL schema. Args: table_name: Name of a view or a relation in PostgreSQL (String instance) Returns: The Table instance extracted from the PostgreSQL schema related to the queried view/relation """ cursor = self.get_cursor() table = Table(self.get_platform(), None, table_name, None, None) param_execute = {"table_name": table_name} # FOREIGN KEYS: # We build a foreign_keys dictionary associating each field of # the table with the table it references. cursor.execute(PostgreSQLGateway.SQL_TABLE_FOREIGN_KEYS, param_execute) fks = cursor.fetchall() foreign_keys = {fk.column_name: fk.foreign_table_name for fk in fks} # COMMENTS: # We build a comments dictionary associating each field of the table with # its comment. comments = self.get_fields_comment(table_name) # FIELDS: fields = set() cursor.execute(PostgreSQLGateway.SQL_TABLE_FIELDS, param_execute) for field in cursor.fetchall(): # PostgreSQL types vs base types table.insert_field( Field(qualifiers=[] if field.is_updatable == "YES" else ["const"], type=foreign_keys[field.column_name] if field.column_name in foreign_keys else PostgreSQLGateway.to_manifold_type(field.data_type), name=field.column_name, is_array=(field.data_type == "ARRAY"), description=comments[field.column_name] if field.column_name in comments else "(null)")) # PRIMARY KEYS: XXX simple key ? # We build a key dictionary associating each table with its primary key cursor.execute(PostgreSQLGateway.SQL_TABLE_KEYS, param_execute) fks = cursor.fetchall() primary_keys = dict() for fk in fks: foreign_key = tuple(fk.column_names) if table_name not in primary_keys.keys(): primary_keys[table_name] = set() primary_keys[table_name].add(foreign_key) if table_name in primary_keys.keys(): for k in primary_keys[table_name]: table.insert_key(k) # PARTITIONS: # TODO #mc = MetadataClass('class', table_name) #mc.fields = fields #mc.keys.append(primary_keys[table_name]) table.capabilities.retrieve = True table.capabilities.join = True table.capabilities.selection = True table.capabilities.projection = True Log.debug("Adding table: %s" % table) return table
def to_3nf(metadata): """ Compute a 3nf schema See also http://elm.eeng.dcu.ie/~ee221/EE221-DB-7.pdf p14 Args: metadata: A dictionnary {String => list(Announces)} which maps platform name a list containing its corresponding Announces. Returns: The corresponding 3nf graph (DbGraph instance) """ # 1) Compute functional dependancies tables = [] map_method_capabilities = {} for platform, announces in metadata.items(): for announce in announces: tables.append(announce.table) map_method_capabilities[(platform, announce.table.get_name())] = announce.table.get_capabilities() fds = make_fd_set(tables) # 2) Find a minimal cover (fds_min_cover, fds_removed) = fd_minimal_cover(fds) # 3) Reinjecting fds removed during normalization reinject_fds(fds_min_cover, fds_removed) # 4) Grouping fds by method #OBOSOLETE| fdss = fds_min_cover.group_by_method() # Mando fdss = fds_min_cover.group_by_tablename_method() # Jordan # 5) Making 3-nf tables tables_3nf = list() #DEPRECATED|LOIC| map_tablename_methods = dict() # map table_name with methods to demux #DEPRECATED|LOIC| for table_name, map_platform_fds in fdss.items(): # For the potential parent table # Stores the number of distinct platforms set num_platforms = 0 # Stores the set of platforms all_platforms = set() common_fields = Fields() common_key_names = set() # Annotations needed for the query plan child_tables = list() for platform, fds in map_platform_fds.items(): platforms = set() fields = set() keys = Keys() # Annotations needed for the query plane map_method_keys = dict() map_method_fields = dict() for fd in fds: key = fd.get_determinant().get_key() keys.add(key) fields |= fd.get_fields() # We need to add fields from the key for key_field in key: fields.add(key_field) # XXX for field, methods in fd.get_map_field_methods().items(): for method in methods: # key annotation if not method in map_method_keys.keys(): map_method_keys[method] = set() map_method_keys[method].add(key) # field annotations if not method in map_method_fields.keys(): map_method_fields[method] = set() map_method_fields[method].add(field.get_name()) map_method_fields[method].add(key_field.get_name()) #DEPRECATED|LOIC| # demux annotation #DEPRECATED|LOIC| method_name = method.get_name() #DEPRECATED|LOIC| if method_name != table_name : #DEPRECATED|LOIC| if method_name not in map_tablename_methods.keys(): #DEPRECATED|LOIC| map_tablename_methods[method_name] = set() #DEPRECATED|LOIC| map_tablename_methods[method_name].add(method) #DEPRECATED|LOIC| platforms.add(method.get_platform()) table = Table(platforms, None, table_name, fields, keys) # inject field and key annotation in the Table object table.map_method_keys = map_method_keys table.map_method_fields = map_method_fields tables_3nf.append(table) child_tables.append(table) Log.debug("TABLE 3nf:", table, table.keys) #print " method fields", map_method_fields num_platforms += 1 all_platforms |= platforms if common_fields.is_empty(): common_fields = Fields(fields) else: common_fields &= Fields(fields) keys_names = frozenset([field.get_name() for field in key for key in keys]) common_key_names.add(keys_names) # Convert common_key_names into Keys() according to common_fields common_keys = set() map_name_fields = dict() for field in common_fields: map_name_fields[field.get_name()] = field for key_names in common_key_names: common_keys.add(Key(frozenset([map_name_fields[field_name] for field_name in key_names]))) # Several platforms provide the same object, so we've to build a parent table if num_platforms > 1: parent_table = Table(all_platforms, None, table_name, common_fields, common_keys) # Migrate common fields from children to parents, except keys parent_map_method_fields = dict() names_in_common_keys = key.get_field_names() for field in common_fields: methods = set() field_name = field.get_name() for child_table in child_tables: # Objective = remove the field from child table # Several methods can have it for _method, _fields in child_table.map_method_fields.items(): if field_name in _fields: methods.add(_method) if field_name not in names_in_common_keys: _fields.remove(field.get_name()) if field_name not in names_in_common_keys: child_table.erase_field(field_name) # Add the field with all methods to parent_table for method in methods: if not method in parent_map_method_fields: parent_map_method_fields[method] = set() parent_map_method_fields[method].add(field.get_name()) #MANDO|parent_map_method_fields[method].add(field.get_name()) # inject field and key annotation in the Table object #MANDO|DEPRECATED| parent_table.map_method_keys = dict() #map_common_method_keys parent_table.map_method_fields = parent_map_method_fields tables_3nf.append(parent_table) Log.debug("Parent table TABLE 3nf:", parent_table, table.get_keys()) #print " method fields", parent_map_method_fields # XXX we already know about the links between those two platforms # but we can find them easily (cf dbgraph) #DEPRECATED|LOIC| # inject demux annotation #DEPRECATED|LOIC| for table in tables_3nf: #DEPRECATED|LOIC| if table.get_name() in map_tablename_methods.keys(): #DEPRECATED|LOIC| table.methods_demux = map_tablename_methods[table.get_name()] #DEPRECATED|LOIC| else: #DEPRECATED|LOIC| table.methods_demux = set() # 6) Inject capabilities # TODO: capabilities are now in tables, shall they be present in tables_3nf # instead of relying on map_method_capabilities ? for table in tables_3nf: for announces in metadata.values(): for announce in announces: if announce.get_table().get_name() == table.get_name(): capabilities = table.get_capabilities() if capabilities.is_empty(): table.set_capability(announce.get_table().get_capabilities()) elif not capabilities == announce.get_table().get_capabilities(): Log.warning("Conflicting capabilities for tables %r (%r) and %r (%r)" % ( table, capabilities, announce.get_table(), announce.get_table().get_capabilities() )) # 7) Building DBgraph graph_3nf = DBGraph(tables_3nf, map_method_capabilities) for table in tables_3nf: Log.info("%s" % table) return graph_3nf
def get_metadata(self): announces = [] # ANNOUNCE - HARDCODED # # TABLE slice ( # slice_hrn # job_id # KEY slice_hrn # ) # # - Note the 'const' field specification since all measurements are # read only # - Here we have an example of a gateway that might not support the # same operators on the different tables t = Table('oml', None, 'slice', None, None) slice_hrn = Field(qualifiers=['const'], type='text', name='slice_hrn', is_array=False, description='Slice Human Readable Name') t.insert_field(slice_hrn) t.insert_field( Field(qualifiers=['const'], type='int', name='lease_id', is_array=False, description='Lease identifier')) t.insert_key(slice_hrn) t.capabilities.join = True t.capabilities.selection = True t.capabilities.projection = True announces.append(Announce(t)) # ANNOUNCE # # TABLE application ( # lease_id # application # # ) t = Table('oml', None, 'application', None, None) lease_id = Field(qualifiers=['const'], type='int', name='lease_id', is_array=False, description='Lease identifier') application = Field(qualifiers=['const'], type='string', name='application', is_array=True, description='(null)') t.insert_field(lease_id) t.insert_field(application) key = Key([lease_id, application]) t.insert_key(key) #t.insert_key(lease_id) t.capabilities.retrieve = True t.capabilities.join = True t.capabilities.selection = True t.capabilities.projection = True announces.append(Announce(t)) # ANNOUNCE # # TABLE measurement_point ( # measurement_point # # ) t = Table('oml', None, 'measurement_point', None, None) lease_id = Field(qualifiers=['const'], type='int', name='lease_id', is_array=False, description='Lease identifier') application = Field(qualifiers=['const'], type='string', name='application', is_array=False, description='(null)') measurement_point = Field(qualifiers=['const'], type='string', name='measurement_point', is_array=False, description='(null)') t.insert_field(lease_id) t.insert_field(application) t.insert_field(measurement_point) key = Key([lease_id, application, measurement_point]) t.insert_key(key) #t.insert_key(application) t.capabilities.retrieve = True t.capabilities.join = True t.capabilities.selection = True t.capabilities.projection = True announces.append(Announce(t)) return announces