def right_callback(self, record): """ \brief Process records received from the right child \param record A dictionary representing the received record """ if record.is_last(): self._on_right_done() return # Skip records missing information necessary to join #DEPRECATED| if self.predicate.value not in record or not record[self.predicate.value]: #Log.tmp("%s <= %s" %(set(self.predicate.get_value()) , set(record.keys()))) if not set([self.predicate.get_value()]) <= set(record.keys()) \ or Record.is_empty_record(record, set([self.predicate.get_value()])): Log.warning("Missing LEFTJOIN predicate %s in right record %r: ignored" % \ (self.predicate, record)) return # We expect to receive information about keys we asked, and only these, # so we are confident the key exists in the map # XXX Dangers of duplicates ? key = Record.get_value(record, self.predicate.value) left_records = self.left_map.get(key, None) if left_records: for left_record in self.left_map[key]: left_record.update(record) self.send(left_record) del self.left_map[key]
def row2record(row): try: return Record( {c.name: getattr(row, c.name) for c in row.__table__.columns}) except: #Log.tmp("Inconsistency in ROW2RECORD", row) return Record({c: getattr(row, c) for c in row.keys()})
def __init__(self, left_child, right_child, predicate): #, callback): """ \brief Constructor \param left_child A Node instance corresponding to left operand of the LEFT JOIN \param right_child A Node instance corresponding to right operand of the LEFT JOIN \param predicate A Predicate instance invoked to determine whether two record of left_child and right_child can be joined. \param callback The callback invoked when the LeftJoin instance returns records. """ assert predicate.op == eq assert predicate.get_key() assert predicate.get_value() super(LeftJoin, self).__init__() # Check parameters LeftJoin.check_init(left_child, right_child, predicate) #, callback) # Initialization self.left = left_child self.right = right_child self.predicate = predicate # self.set_callback(callback) self.left_map = {} if isinstance(left_child, list): self.left_done = True for r in left_child: if isinstance(r, dict): self.left_map[Record.get_value(r, self.predicate.key)] = r else: # r is generally a tuple self.left_map[r] = Record.from_key_value( self.predicate.key, r) else: old_cb = left_child.get_callback() self.left_done = False left_child.set_callback(self.left_callback) self.set_callback(old_cb) right_child.set_callback(self.right_callback) # CASE WHERE WE HAVE A LIST if isinstance(left_child, list): self.query = self.right.get_query().copy() # adding left fields: we know left_child is always a dict, since it # holds more than the key only, since otherwise we would not have # injected but only added a filter. if left_child: self.query.fields |= left_child[0].keys() return # CASE WHERE WE HAVE TWO ASTs: self.query = self.left.get_query().copy() self.query.filters |= self.right.get_query().filters if self.query.fields is not None: self.query.fields |= self.right.get_query().fields
def left_callback(self, record): """ \brief Process records received by the left child \param record A dictionary representing the received record """ if record.is_last(): # left_done. Injection is not the right way to do this. # We need to insert a filter on the key in the right member predicate = Predicate(self.predicate.get_value(), included, self.left_map.keys()) if self.right.get_query().action == ACTION_CREATE: # XXX If multiple insert, we need to match the right ID with the # right inserted items if len(self.left_map.keys()) > 1: raise NotImplemented # Pass the id as a param keys = self.left_map.keys() if not keys: # No JOIN possible self.left_done = True self._on_right_done() return key = self.left_map.keys()[0] query = self.right.get_query() query.params[self.predicate.get_value()] = key else: # pass the id as a filter which is the normal behaviour self.right = self.right.optimize_selection( Filter().filter_by(predicate)) self.right.set_callback( self.right_callback) # already done in __init__ ? self.left_done = True self.right.start() return # Directly send records missing information necessary to join # XXXX !!! XXX XXX XXX if not Record.has_fields(record, self.predicate.get_field_names()): Log.warning("Missing LEFTJOIN predicate %s in left record %r : forwarding" % \ (self.predicate, record)) self.send(record) # Store the result in a hash for joining later hash_key = Record.get_value(record, self.predicate.key) if not hash_key in self.left_map: self.left_map[hash_key] = [] self.left_map[hash_key].append(record)
def child_callback(self, child_id, record): """ \brief Processes records received by the child node \param child_id identifier of the child that received the record \param record dictionary representing the received record """ if record.is_last(): # XXX SEND ALL self.status.completed(child_id) return key = self.key.get_field_names() # DISTINCT not implemented, just forward the record if not key: Log.critical("No key associated to UNION operator") self.send(record) return # Send records that have no key if not Record.has_fields(record, key): Log.info( "UNION::child_callback sent record without key '%(key)s': %(record)r", **locals()) self.send(record) return key_value = Record.get_value(record, key) if key_value in self.key_map: Log.debug("UNION::child_callback merged duplicate records: %r" % record) prev_record = self.key_map[key_value] for k, v in record.items(): if not k in prev_record: prev_record[k] = v continue if isinstance(v, list): if not prev_record[k]: prev_record[k] = list( ) # with failures it can occur that this is None prev_record[k].extend(v) # DUPLICATES ? #else: # if not v == previous[k]: # print "W: ignored conflictual field" # # else: nothing to do else: self.key_map[key_value] = record
def convert(self, row, field_names, field_types): #return dict([ (name, type_by_name(type)(value)) for value, name, type in izip(row, field_names, field_types)]) for value, name, type in izip(row, field_names, field_types): return Record([ (name, type_by_name(type)(value)) for value, name, type in izip(row, field_names, field_types) ])
def get_element_key(element, key): if isinstance(element, Record): return element.get_value(key) elif isinstance(element, dict): # record return Record.get_value(element, key) else: # id or tuple(id1, id2, ...) return element
def do_projection(record, fields): """ Take the necessary fields in dic """ ret = Record() # Preserve annotations ! # Not for Last Record which is of dict type if isinstance(record, Record): ret.set_annotations(record.get_annotations()) # 1/ split subqueries local = [] subqueries = {} for f in fields: if '.' in f: method, subfield = f.split('.', 1) if not method in subqueries: subqueries[method] = [] subqueries[method].append(subfield) else: local.append(f) # 2/ process local fields for l in local: ret[l] = record[l] if l in record else None # 3/ recursively process subqueries for method, subfields in subqueries.items(): # record[method] is an array whose all elements must be # filtered according to subfields arr = [] if not method in record: continue for x in record[method]: arr.append(do_projection(x, subfields)) ret[method] = arr return ret
def callback_records(self, rows): """ (Internal usage) See ManifoldGateway::receive_impl. Args: packet: A QUERY Packet. rows: The corresponding list of dict or Record instances. """ if rows is not None: try: iterator = iter(rows) except TypeError, te: print "rows = ", rows rows = [{'initscript_code': rows}] if isinstance(rows, basestring): rows = [{'initscript_code': rows}] for row in rows: print row self.send(Record(row))
def all_done(self): """ \brief Called when all children of the current subquery are done: we process results stored in the parent. """ try: for parent_record in self.parent_output: # Dispatching child results for i, child in enumerate(self.children): relation = self.relations[i] predicate = relation.get_predicate() key, op, value = predicate.get_tuple() if op == eq: # 1..N # Example: parent has slice_hrn, resource has a reference to slice # PARENT CHILD # Predicate: (slice_hrn,) == slice # Collect in parent all child such as they have a pointer to the parent record = Record.get_value(parent_record, key) if not record: record = [] if not isinstance(record, (list, tuple, set, frozenset)): record = [record] if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: # we have a list of elements # element = id or dict : cle simple # = tuple or dict : cle multiple ids = [ SubQuery.get_element_key(r, value) for r in record ] else: ids = [SubQuery.get_element_key(record, value)] if len(ids) == 1: id, = ids filter = Filter().filter_by( Predicate(value, eq, id)) else: filter = Filter().filter_by( Predicate(value, included, ids)) #if isinstance(key, StringTypes): # # simple key # ids = [o[key]] if key in o else [] # #print "IDS=", ids # #if ids and isinstance(ids[0], dict): # # ids = map(lambda x: x[value], ids) # # XXX we might have equality instead of IN in case of a single ID # print "VALUE", value, "INCLUDED ids=", ids # filter = Filter().filter_by(Predicate(value, included, ids)) #else: # # Composite key, o[value] is a dictionary # for field in value: # filter = filter.filter_by(Predicate(field, included, o[value][field])) # o[value] might be multiple parent_record[relation.get_relation_name()] = [] for child_record in self.child_results[i]: if filter.match(child_record): parent_record[relation.get_relation_name( )].append(child_record) elif op == contains: # 1..N # Example: parent 'slice' has a list of 'user' keys == user_hrn # PARENT CHILD # Predicate: user contains (user_hrn, ) # first, replace records by dictionaries. This only works for non-composite keys if parent_record[child.query.object]: record = parent_record[child.query.object][0] if not isinstance(record, dict): parent_record[child.query.object] = [{ value: record } for record in parent_record[ child.query.object]] if isinstance(value, StringTypes): for record in parent_record[child.query.object]: # Find the corresponding record in child_results and update the one in the parent with it for k, v in record.items(): filter = Filter().filter_by( Predicate(value, eq, record[value])) for r in self.child_results[i]: if filter.match(r): record.update(r) else: for record in parent_record[child.query.object]: # Find the corresponding record in child_results and update the one in the parent with it for k, v in record.items(): filter = Filter() for field in value: filter = filter.filter_by( Predicate(field, eq, record[field])) for r in self.child_results[i]: if filter.match(r): record.update(r) else: raise Exception, "No link between parent and child queries" self.send(parent_record) self.send(LastRecord()) except Exception, e: print "EEE", e traceback.print_exc()
def run_children(self): """ Run children queries (subqueries) assuming the parent query (main query) has successfully ended. """ if not self.parent_output: # No parent record, this is useless to run children queries. self.send(LastRecord()) return #print "=" * 80 #print self.parent_output #print "=" * 80 if not self.children: # The top operator has build a SubQuery node without child queries, # so this SubQuery operator is useless and should be replaced by # its main query. Log.warning( "SubQuery::run_children: no child node. The query plan could be improved" ) self.send(LastRecord()) return # Inspect the first parent record to deduce which fields have already # been fetched first_record = self.parent_output[0] parent_fields = set(first_record.keys()) # Optimize child queries according to the fields already retrieved thanks # to the parent query. useless_children = set() for i, child in enumerate(self.children[:]): # Test whether the current child provides relevant fields (e.g. # fields not already fetched in the parent record). If so, reduce # the set of queried field in order to only retrieve relevant fields. child_fields = child.get_query().get_select() relation = self.relations[i] relation_name = relation.get_relation_name() already_fetched_fields = set() if relation_name in parent_fields: if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: if relation_name in first_record and first_record[ relation_name] and len( first_record[relation_name]) > 0: if isinstance(first_record[relation_name][0], Record): already_fetched_fields = set( first_record[relation_name][0].keys()) else: # If we do not have a dict, we have only keys, so it's like we had no field of importance... already_fetched_fields = set() else: already_fetched_fields = set() else: if relation_name in first_record and first_record[ relation_name] and len( first_record[relation_name]) > 0: already_fetched_fields = set( first_record[relation_name].keys()) else: already_fetched_fields = set() # XXX routerv2: we need to keep key used for subquery key_field = relation.get_predicate().get_value() relevant_fields = child_fields - already_fetched_fields if not relevant_fields: tmp = list() for pr in self.parent_output: tmp.extend(pr[relation_name]) self.child_results[i] = tmp # Records useless_children.add(i) continue else: relevant_fields |= frozenset([key_field]) # necessary ? if child_fields != relevant_fields: # XXX This seems to remove the key used for joining self.children[i] = child.optimize_projection( relevant_fields) # If every children are useless, this means that we already have full records # thanks to the parent query, so we simply forward those records. if len(self.children) == len(useless_children): map(self.send, self.parent_output) self.send(LastRecord()) return # Loop through children and inject the appropriate parent results for i, child in enumerate(self.children): if i in useless_children: continue # We have two cases: # (1) either the parent query has subquery fields (a list of child # ids + eventually some additional information) # (2) either the child has a backreference to the parent # ... eventually a partial reference in case of a 1..N relationship # # In all cases, we will collect all identifiers to proceed to a # single child query for efficiency purposes, unless it's not # possible (?). # # We have several parent records stored in self.parent_output # # /!\ Can we have a mix of (1) and (2) ? For now, let's suppose NO. # * We could expect key information to be stored in the DBGraph # The operation to be performed is understood only be looking at the predicate relation = self.relations[i] predicate = relation.get_predicate() key, op, value = predicate.get_tuple() if op == eq: # 1..N # Example: parent has slice_hrn, resource has a reference to slice if relation.get_type() == Relation.types.LINK_1N_BACKWARDS: parent_ids = [record[key] for record in self.parent_output] if len(parent_ids) == 1: parent_id, = parent_ids filter_pred = Predicate(value, eq, parent_id) else: filter_pred = Predicate(value, included, parent_ids) else: parent_ids = [] for parent_record in self.parent_output: record = Record.get_value(parent_record, key) if not record: record = [] # XXX Nothing to do for the case where the list of keys in the parent is empty if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: # we have a list of elements # element = id or dict : cle simple # = tuple or dict : cle multiple parent_ids.extend([ self.get_element_key(r, value) for r in record ]) else: parent_ids.append( self.get_element_key(record, value)) #if isinstance(key, tuple): # parent_ids = [x for record in self.parent_output if key in record for x in record[key]] #else: # ##### record[key] = text, dict, or list of (text, dict) # parent_ids = [record[key] for record in self.parent_output if key in record] # #if parent_ids and isinstance(parent_ids[0], dict): # parent_ids = map(lambda x: x[value], parent_ids) if len(parent_ids) == 1: parent_id, = parent_ids filter_pred = Predicate(value, eq, parent_id) else: filter_pred = Predicate(value, included, parent_ids) # Injecting predicate old_child_callback = child.get_callback() self.children[i] = child.optimize_selection( Filter().filter_by(filter_pred)) self.children[i].set_callback(old_child_callback) elif op == contains: # 1..N # Example: parent 'slice' has a list of 'user' keys == user_hrn for slice in self.parent_output: if not child.get_query().object in slice: continue users = slice[key] # users est soit une liste d'id, soit une liste de records user_data = [] for user in users: if isinstance(user, dict): user_data.append(user) else: # have have a key # XXX Take multiple keys into account user_data.append({value: user}) # Let's inject user_data in the right child child.inject(user_data, value, None) else: raise Exception, "No link between parent and child queries" #print "*** before run children ***" #self.dump() # We make another loop since the children might have been modified in # the previous one. for i, child in enumerate(self.children): if i in useless_children: continue self.status.started(i) for i, child in enumerate(self.children): if i in useless_children: continue child.start()
def success_cb(self, table): print "Manifold SUCCESS", len(table) for record in table: self.callback(Record(record)) self.callback(LastRecord())