Exemple #1
0
    def right_callback(self, record):
        """
        \brief Process records received from the right child
        \param record A dictionary representing the received record 
        """
        if record.is_last():
            self._on_right_done()
            return

        # Skip records missing information necessary to join
#DEPRECATED|        if self.predicate.value not in record or not record[self.predicate.value]:
#Log.tmp("%s <= %s" %(set(self.predicate.get_value()) , set(record.keys())))
        if not set([self.predicate.get_value()]) <= set(record.keys()) \
        or Record.is_empty_record(record, set([self.predicate.get_value()])):
            Log.warning("Missing LEFTJOIN predicate %s in right record %r: ignored" % \
                    (self.predicate, record))
            return

        # We expect to receive information about keys we asked, and only these,
        # so we are confident the key exists in the map
        # XXX Dangers of duplicates ?
        key = Record.get_value(record, self.predicate.value)
        left_records = self.left_map.get(key, None)
        if left_records:
            for left_record in self.left_map[key]:
                left_record.update(record)
                self.send(left_record)

            del self.left_map[key]
Exemple #2
0
def row2record(row):
    try:
        return Record(
            {c.name: getattr(row, c.name)
             for c in row.__table__.columns})
    except:
        #Log.tmp("Inconsistency in ROW2RECORD", row)
        return Record({c: getattr(row, c) for c in row.keys()})
Exemple #3
0
    def __init__(self, left_child, right_child, predicate):  #, callback):
        """
        \brief Constructor
        \param left_child  A Node instance corresponding to left  operand of the LEFT JOIN
        \param right_child A Node instance corresponding to right operand of the LEFT JOIN
        \param predicate A Predicate instance invoked to determine whether two record of
            left_child and right_child can be joined.
        \param callback The callback invoked when the LeftJoin instance returns records. 
        """
        assert predicate.op == eq
        assert predicate.get_key()
        assert predicate.get_value()

        super(LeftJoin, self).__init__()

        # Check parameters
        LeftJoin.check_init(left_child, right_child, predicate)  #, callback)

        # Initialization
        self.left = left_child
        self.right = right_child
        self.predicate = predicate
        #        self.set_callback(callback)
        self.left_map = {}
        if isinstance(left_child, list):
            self.left_done = True
            for r in left_child:
                if isinstance(r, dict):
                    self.left_map[Record.get_value(r, self.predicate.key)] = r
                else:
                    # r is generally a tuple
                    self.left_map[r] = Record.from_key_value(
                        self.predicate.key, r)
        else:
            old_cb = left_child.get_callback()
            self.left_done = False
            left_child.set_callback(self.left_callback)
            self.set_callback(old_cb)

        right_child.set_callback(self.right_callback)

        # CASE WHERE WE HAVE A LIST
        if isinstance(left_child, list):
            self.query = self.right.get_query().copy()
            # adding left fields: we know left_child is always a dict, since it
            # holds more than the key only, since otherwise we would not have
            # injected but only added a filter.
            if left_child:
                self.query.fields |= left_child[0].keys()
            return

        # CASE WHERE WE HAVE TWO ASTs:
        self.query = self.left.get_query().copy()
        self.query.filters |= self.right.get_query().filters
        if self.query.fields is not None:
            self.query.fields |= self.right.get_query().fields
Exemple #4
0
    def left_callback(self, record):
        """
        \brief Process records received by the left child
        \param record A dictionary representing the received record 
        """
        if record.is_last():
            # left_done. Injection is not the right way to do this.
            # We need to insert a filter on the key in the right member
            predicate = Predicate(self.predicate.get_value(), included,
                                  self.left_map.keys())

            if self.right.get_query().action == ACTION_CREATE:
                # XXX If multiple insert, we need to match the right ID with the
                # right inserted items
                if len(self.left_map.keys()) > 1:
                    raise NotImplemented

                # Pass the id as a param
                keys = self.left_map.keys()
                if not keys:
                    # No JOIN possible
                    self.left_done = True
                    self._on_right_done()
                    return
                key = self.left_map.keys()[0]
                query = self.right.get_query()
                query.params[self.predicate.get_value()] = key
            else:  # pass the id as a filter which is the normal behaviour
                self.right = self.right.optimize_selection(
                    Filter().filter_by(predicate))
                self.right.set_callback(
                    self.right_callback)  # already done in __init__ ?

            self.left_done = True
            self.right.start()
            return

        # Directly send records missing information necessary to join
        # XXXX !!! XXX XXX XXX
        if not Record.has_fields(record, self.predicate.get_field_names()):
            Log.warning("Missing LEFTJOIN predicate %s in left record %r : forwarding" % \
                    (self.predicate, record))
            self.send(record)

        # Store the result in a hash for joining later
        hash_key = Record.get_value(record, self.predicate.key)
        if not hash_key in self.left_map:
            self.left_map[hash_key] = []
        self.left_map[hash_key].append(record)
Exemple #5
0
    def child_callback(self, child_id, record):
        """
        \brief Processes records received by the child node
        \param child_id identifier of the child that received the record
        \param record dictionary representing the received record
        """
        if record.is_last():
            # XXX SEND ALL
            self.status.completed(child_id)
            return

        key = self.key.get_field_names()

        # DISTINCT not implemented, just forward the record
        if not key:
            Log.critical("No key associated to UNION operator")
            self.send(record)
            return

        # Send records that have no key
        if not Record.has_fields(record, key):
            Log.info(
                "UNION::child_callback sent record without key '%(key)s': %(record)r",
                **locals())
            self.send(record)
            return

        key_value = Record.get_value(record, key)

        if key_value in self.key_map:
            Log.debug("UNION::child_callback merged duplicate records: %r" %
                      record)
            prev_record = self.key_map[key_value]
            for k, v in record.items():
                if not k in prev_record:
                    prev_record[k] = v
                    continue
                if isinstance(v, list):
                    if not prev_record[k]:
                        prev_record[k] = list(
                        )  # with failures it can occur that this is None
                    prev_record[k].extend(v)  # DUPLICATES ?
                #else:
                #    if not v == previous[k]:
                #        print "W: ignored conflictual field"
                #    # else: nothing to do
        else:
            self.key_map[key_value] = record
Exemple #6
0
 def convert(self, row, field_names, field_types):
     #return dict([ (name, type_by_name(type)(value)) for value, name, type in izip(row, field_names, field_types)])
     for value, name, type in izip(row, field_names, field_types):
         return Record([
             (name, type_by_name(type)(value))
             for value, name, type in izip(row, field_names, field_types)
         ])
Exemple #7
0
 def get_element_key(element, key):
     if isinstance(element, Record):
         return element.get_value(key)
     elif isinstance(element, dict):
         # record
         return Record.get_value(element, key)
     else:
         # id or tuple(id1, id2, ...)
         return element
Exemple #8
0
def do_projection(record, fields):
    """
    Take the necessary fields in dic
    """
    ret = Record()
    # Preserve annotations !
    # Not for Last Record which is of dict type
    if isinstance(record, Record):
        ret.set_annotations(record.get_annotations())

    # 1/ split subqueries
    local = []
    subqueries = {}
    for f in fields:
        if '.' in f:
            method, subfield = f.split('.', 1)
            if not method in subqueries:
                subqueries[method] = []
            subqueries[method].append(subfield)
        else:
            local.append(f)

    # 2/ process local fields
    for l in local:
        ret[l] = record[l] if l in record else None

    # 3/ recursively process subqueries
    for method, subfields in subqueries.items():
        # record[method] is an array whose all elements must be
        # filtered according to subfields
        arr = []
        if not method in record:
            continue
        for x in record[method]:
            arr.append(do_projection(x, subfields))
        ret[method] = arr

    return ret
Exemple #9
0
    def callback_records(self, rows):
        """
        (Internal usage) See ManifoldGateway::receive_impl.
        Args:
            packet: A QUERY Packet.
            rows: The corresponding list of dict or Record instances.
        """
        if rows is not None:
            try:
                iterator = iter(rows)
            except TypeError, te:
                print "rows = ", rows
                rows = [{'initscript_code': rows}]

            if isinstance(rows, basestring):
                rows = [{'initscript_code': rows}]

            for row in rows:
                print row
                self.send(Record(row))
Exemple #10
0
    def all_done(self):
        """
        \brief Called when all children of the current subquery are done: we
         process results stored in the parent.
        """
        try:
            for parent_record in self.parent_output:
                # Dispatching child results
                for i, child in enumerate(self.children):
                    relation = self.relations[i]
                    predicate = relation.get_predicate()

                    key, op, value = predicate.get_tuple()

                    if op == eq:
                        # 1..N
                        # Example: parent has slice_hrn, resource has a reference to slice
                        #            PARENT       CHILD
                        # Predicate: (slice_hrn,) == slice

                        # Collect in parent all child such as they have a pointer to the parent
                        record = Record.get_value(parent_record, key)
                        if not record:
                            record = []
                        if not isinstance(record,
                                          (list, tuple, set, frozenset)):
                            record = [record]
                        if relation.get_type() in [
                                Relation.types.LINK_1N,
                                Relation.types.LINK_1N_BACKWARDS
                        ]:
                            # we have a list of elements
                            # element = id or dict    : cle simple
                            #         = tuple or dict : cle multiple
                            ids = [
                                SubQuery.get_element_key(r, value)
                                for r in record
                            ]
                        else:
                            ids = [SubQuery.get_element_key(record, value)]
                        if len(ids) == 1:
                            id, = ids
                            filter = Filter().filter_by(
                                Predicate(value, eq, id))
                        else:
                            filter = Filter().filter_by(
                                Predicate(value, included, ids))
                        #if isinstance(key, StringTypes):
                        #    # simple key
                        #    ids = [o[key]] if key in o else []
                        #    #print "IDS=", ids
                        #    #if ids and isinstance(ids[0], dict):
                        #    #    ids = map(lambda x: x[value], ids)
                        #    # XXX we might have equality instead of IN in case of a single ID
                        #    print "VALUE", value, "INCLUDED ids=", ids
                        #    filter = Filter().filter_by(Predicate(value, included, ids))
                        #else:
                        #    # Composite key, o[value] is a dictionary
                        #    for field in value:
                        #        filter = filter.filter_by(Predicate(field, included, o[value][field])) # o[value] might be multiple

                        parent_record[relation.get_relation_name()] = []
                        for child_record in self.child_results[i]:
                            if filter.match(child_record):
                                parent_record[relation.get_relation_name(
                                )].append(child_record)

                    elif op == contains:
                        # 1..N
                        # Example: parent 'slice' has a list of 'user' keys == user_hrn
                        #            PARENT        CHILD
                        # Predicate: user contains (user_hrn, )

                        # first, replace records by dictionaries. This only works for non-composite keys
                        if parent_record[child.query.object]:
                            record = parent_record[child.query.object][0]
                            if not isinstance(record, dict):
                                parent_record[child.query.object] = [{
                                    value:
                                    record
                                } for record in parent_record[
                                    child.query.object]]

                        if isinstance(value, StringTypes):
                            for record in parent_record[child.query.object]:
                                # Find the corresponding record in child_results and update the one in the parent with it
                                for k, v in record.items():
                                    filter = Filter().filter_by(
                                        Predicate(value, eq, record[value]))
                                    for r in self.child_results[i]:
                                        if filter.match(r):
                                            record.update(r)
                        else:
                            for record in parent_record[child.query.object]:
                                # Find the corresponding record in child_results and update the one in the parent with it
                                for k, v in record.items():
                                    filter = Filter()
                                    for field in value:
                                        filter = filter.filter_by(
                                            Predicate(field, eq,
                                                      record[field]))
                                    for r in self.child_results[i]:
                                        if filter.match(r):
                                            record.update(r)

                    else:
                        raise Exception, "No link between parent and child queries"

                self.send(parent_record)
            self.send(LastRecord())
        except Exception, e:
            print "EEE", e
            traceback.print_exc()
Exemple #11
0
    def run_children(self):
        """
        Run children queries (subqueries) assuming the parent query (main query)
        has successfully ended.
        """
        if not self.parent_output:
            # No parent record, this is useless to run children queries.
            self.send(LastRecord())
            return

        #print "=" * 80
        #print self.parent_output
        #print "=" * 80

        if not self.children:
            # The top operator has build a SubQuery node without child queries,
            # so this SubQuery operator is useless and should be replaced by
            # its main query.
            Log.warning(
                "SubQuery::run_children: no child node. The query plan could be improved"
            )
            self.send(LastRecord())
            return

        # Inspect the first parent record to deduce which fields have already
        # been fetched
        first_record = self.parent_output[0]
        parent_fields = set(first_record.keys())

        # Optimize child queries according to the fields already retrieved thanks
        # to the parent query.
        useless_children = set()
        for i, child in enumerate(self.children[:]):
            # Test whether the current child provides relevant fields (e.g.
            # fields not already fetched in the parent record). If so, reduce
            # the set of queried field in order to only retrieve relevant fields.
            child_fields = child.get_query().get_select()
            relation = self.relations[i]
            relation_name = relation.get_relation_name()
            already_fetched_fields = set()
            if relation_name in parent_fields:
                if relation.get_type() in [
                        Relation.types.LINK_1N,
                        Relation.types.LINK_1N_BACKWARDS
                ]:
                    if relation_name in first_record and first_record[
                            relation_name] and len(
                                first_record[relation_name]) > 0:
                        if isinstance(first_record[relation_name][0], Record):
                            already_fetched_fields = set(
                                first_record[relation_name][0].keys())
                        else:
                            # If we do not have a dict, we have only keys, so it's like we had no field of importance...
                            already_fetched_fields = set()
                    else:
                        already_fetched_fields = set()
                else:
                    if relation_name in first_record and first_record[
                            relation_name] and len(
                                first_record[relation_name]) > 0:
                        already_fetched_fields = set(
                            first_record[relation_name].keys())
                    else:
                        already_fetched_fields = set()

            # XXX routerv2: we need to keep key used for subquery
            key_field = relation.get_predicate().get_value()

            relevant_fields = child_fields - already_fetched_fields

            if not relevant_fields:
                tmp = list()
                for pr in self.parent_output:
                    tmp.extend(pr[relation_name])
                self.child_results[i] = tmp  # Records

                useless_children.add(i)
                continue
            else:
                relevant_fields |= frozenset([key_field])  # necessary ?
                if child_fields != relevant_fields:
                    # XXX This seems to remove the key used for joining
                    self.children[i] = child.optimize_projection(
                        relevant_fields)

        # If every children are useless, this means that we already have full records
        # thanks to the parent query, so we simply forward those records.
        if len(self.children) == len(useless_children):
            map(self.send, self.parent_output)
            self.send(LastRecord())
            return

        # Loop through children and inject the appropriate parent results
        for i, child in enumerate(self.children):
            if i in useless_children: continue

            # We have two cases:
            # (1) either the parent query has subquery fields (a list of child
            #     ids + eventually some additional information)
            # (2) either the child has a backreference to the parent
            #     ... eventually a partial reference in case of a 1..N relationship
            #
            # In all cases, we will collect all identifiers to proceed to a
            # single child query for efficiency purposes, unless it's not
            # possible (?).
            #
            # We have several parent records stored in self.parent_output
            #
            # /!\ Can we have a mix of (1) and (2) ? For now, let's suppose NO.
            #  *  We could expect key information to be stored in the DBGraph

            # The operation to be performed is understood only be looking at the predicate
            relation = self.relations[i]
            predicate = relation.get_predicate()

            key, op, value = predicate.get_tuple()
            if op == eq:
                # 1..N
                # Example: parent has slice_hrn, resource has a reference to slice
                if relation.get_type() == Relation.types.LINK_1N_BACKWARDS:
                    parent_ids = [record[key] for record in self.parent_output]
                    if len(parent_ids) == 1:
                        parent_id, = parent_ids
                        filter_pred = Predicate(value, eq, parent_id)
                    else:
                        filter_pred = Predicate(value, included, parent_ids)
                else:
                    parent_ids = []
                    for parent_record in self.parent_output:
                        record = Record.get_value(parent_record, key)
                        if not record:
                            record = []
                        # XXX Nothing to do for the case where the list of keys in the parent is empty
                        if relation.get_type() in [
                                Relation.types.LINK_1N,
                                Relation.types.LINK_1N_BACKWARDS
                        ]:
                            # we have a list of elements
                            # element = id or dict    : cle simple
                            #         = tuple or dict : cle multiple
                            parent_ids.extend([
                                self.get_element_key(r, value) for r in record
                            ])
                        else:
                            parent_ids.append(
                                self.get_element_key(record, value))

                    #if isinstance(key, tuple):
                    #    parent_ids = [x for record in self.parent_output if key in record for x in record[key]]
                    #else:
                    #    ##### record[key] = text, dict, or list of (text, dict)
                    #    parent_ids = [record[key] for record in self.parent_output if key in record]
                    #
                    #if parent_ids and isinstance(parent_ids[0], dict):
                    #    parent_ids = map(lambda x: x[value], parent_ids)

                    if len(parent_ids) == 1:
                        parent_id, = parent_ids
                        filter_pred = Predicate(value, eq, parent_id)
                    else:
                        filter_pred = Predicate(value, included, parent_ids)

                # Injecting predicate
                old_child_callback = child.get_callback()
                self.children[i] = child.optimize_selection(
                    Filter().filter_by(filter_pred))
                self.children[i].set_callback(old_child_callback)

            elif op == contains:
                # 1..N
                # Example: parent 'slice' has a list of 'user' keys == user_hrn
                for slice in self.parent_output:
                    if not child.get_query().object in slice: continue
                    users = slice[key]
                    # users est soit une liste d'id, soit une liste de records
                    user_data = []
                    for user in users:
                        if isinstance(user, dict):
                            user_data.append(user)
                        else:
                            # have have a key
                            # XXX Take multiple keys into account
                            user_data.append({value: user})
                    # Let's inject user_data in the right child
                    child.inject(user_data, value, None)

            else:
                raise Exception, "No link between parent and child queries"

        #print "*** before run children ***"
        #self.dump()

        # We make another loop since the children might have been modified in
        # the previous one.
        for i, child in enumerate(self.children):
            if i in useless_children: continue
            self.status.started(i)
        for i, child in enumerate(self.children):
            if i in useless_children: continue
            child.start()
Exemple #12
0
 def success_cb(self, table):
     print "Manifold SUCCESS", len(table)
     for record in table:
         self.callback(Record(record))
     self.callback(LastRecord())