def clear(self): self.action = 'get' self.object = None self.filters = Filter() self.params = {} self.fields = set() self.timestamp = 'now' # ignored for now
def prune_from_query(query, found_fields): new_fields = query.get_select() - found_fields query.select(None).select(new_fields) old_filter = query.get_where() new_filter = Filter() for pred in old_filter: if pred.get_key() not in found_fields: new_filter.add(pred) query.filter_by(None).filter_by(new_filter)
def optimize_selection(self, filter): for i, child in enumerate(self.children): child_fields = child.query.get_select() child_filter = Filter() for predicate in filter: if predicate.get_field_names() <= child_fields: child_filter.add(predicate) if child_filter: self.children[i] = child.optimize_selection(child_filter) return self
def build_simple(self, query, metadata, allowed_capabilities): """ Builds a QueryPlan (self) related to a single Gateway. This is used only by a Forwarder. This function will probably soon become DEPRECATED. If several Gateways are involved, you must use QueryPlan::build. Args: query: The Query issued by the user. metadata: allowed_capabilities: The Capabilities related to this Gateway. """ # XXX allowed_capabilities should be a property of the query plan ! # XXX Check whether we can answer query.object # Here we assume we have a single platform platform = metadata.keys()[0] announce = metadata[platform][query.get_from()] # eg. table test # Set up an AST for missing capabilities (need configuration) # Selection ? if query.filters and not announce.capabilities.selection: if not allowed_capabilities.selection: raise Exception, 'Cannot answer query: SELECTION' add_selection = query.filters query.filters = Filter() else: add_selection = None # Projection ? announce_fields = announce.get_table().get_fields() if query.fields < announce_fields and not announce.capabilities.projection: if not allowed_capabilities.projection: raise Exception, 'Cannot answer query: PROJECTION' add_projection = query.fields query.fields = set() else: add_projection = None table = Table({platform: ''}, {}, query.get_from(), set(), set()) key = metadata.get_key(query.get_from()) capabilities = metadata.get_capabilities(platform, query.get_from()) self.ast = self.ast.From(table, query, capabilities, key) # XXX associate the From node to the Gateway from_node = self.ast.get_root() self.add_from(from_node) #from_node.set_gateway(gw_or_router) #gw_or_router.query = query if not self.root: return if add_selection: self.ast.optimize_selection(add_selection) if add_projection: self.ast.optimize_projection(add_projection) self.inject_at(query)
def optimize_selection(self, filter): parent_filter, top_filter = Filter(), Filter() for predicate in filter: if predicate.get_field_names() <= self.parent.get_query( ).get_select(): parent_filter.add(predicate) else: Log.warning( "SubQuery::optimize_selection() is only partially implemented : %r" % predicate) top_filter.add(predicate) if parent_filter: self.parent = self.parent.optimize_selection(parent_filter) self.parent.set_callback(self.parent_callback) if top_filter: return Selection(self, top_filter) return self
def sanitize(self): if not self.filters: self.filters = Filter() if not self.params: self.params = {} if not self.fields: self.fields = set() if not self.timestamp: self.timestamp = "now" if isinstance(self.filters, list): f = self.filters self.filters = Filter() for x in f: pred = Predicate(x) self.filters.add(pred) elif isinstance(self.filters, Clause): self.filters = Filter.from_clause(self.filters) if isinstance(self.fields, list): self.fields = set(self.fields) for field in self.fields: if not isinstance(field, StringTypes): raise TypeError("Invalid field name %s (string expected, got %s)" % (field, type(field)))
def left_callback(self, record): """ \brief Process records received by the left child \param record A dictionary representing the received record """ if record.is_last(): # left_done. Injection is not the right way to do this. # We need to insert a filter on the key in the right member predicate = Predicate(self.predicate.get_value(), included, self.left_map.keys()) if self.right.get_query().action == ACTION_CREATE: # XXX If multiple insert, we need to match the right ID with the # right inserted items if len(self.left_map.keys()) > 1: raise NotImplemented # Pass the id as a param keys = self.left_map.keys() if not keys: # No JOIN possible self.left_done = True self._on_right_done() return key = self.left_map.keys()[0] query = self.right.get_query() query.params[self.predicate.get_value()] = key else: # pass the id as a filter which is the normal behaviour self.right = self.right.optimize_selection( Filter().filter_by(predicate)) self.right.set_callback( self.right_callback) # already done in __init__ ? self.left_done = True self.right.start() return # Directly send records missing information necessary to join # XXXX !!! XXX XXX XXX if not Record.has_fields(record, self.predicate.get_field_names()): Log.warning("Missing LEFTJOIN predicate %s in left record %r : forwarding" % \ (self.predicate, record)) self.send(record) # Store the result in a hash for joining later hash_key = Record.get_value(record, self.predicate.key) if not hash_key in self.left_map: self.left_map[hash_key] = [] self.left_map[hash_key].append(record)
def filter_by(self, *args): """ Args: args: It may be: - the parts of a Predicate (key, op, value) - None - a Filter instance - a set/list/tuple of Predicate instances """ if len(args) == 1: filters = args[0] if filters == None: self.filters = Filter() return self if not isinstance(filters, (set, list, tuple, Filter)): filters = [filters] for predicate in filters: self.filters.add(predicate) elif len(args) == 3: predicate = Predicate(*args) self.filters.add(predicate) else: raise Exception('Invalid expression for filter') return self
def filter_by(self, *args): """ Args: args: It may be: - the parts of a Predicate (key, op, value) - None - a Filter instance - a set/list/tuple of Predicate instances """ if len(args) == 1: filters = args[0] if filters == None: self.filters = Filter() return self if not isinstance(filters, (set, list, tuple, Filter)): filters = [filters] for predicate in filters: self.filters.add(predicate) elif len(args) == 3: predicate = Predicate(*args) self.filters.add(predicate) else: raise Exception, 'Invalid expression for filter' return self
def optimize_selection(self, filter): # LEFT JOIN # We are pushing selections down as much as possible: # - selection on filters on the left: can push down in the left child # - selection on filters on the right: cannot push down unless the field is on both sides # - selection on filters on the key / common fields ??? TODO parent_filter, left_filter, right_filter = Filter(), Filter(), Filter() for predicate in filter: Log.tmp("1) predicate.get_field_names() = %s" % predicate.get_field_names()) Log.tmp("2) self.left.get_query().get_select() = %s" % self.left.get_query().get_select()) Log.tmp("3) lquery = %s" % self.left.get_query()) if predicate.get_field_names() <= self.left.get_query().get_select( ): left_filter.add(predicate) if predicate.get_field_names() <= self.right.get_query( ).get_select(): right_filter.add(predicate) else: parent_filter.add(predicate) if left_filter: self.left = self.left.optimize_selection(left_filter) #selection = Selection(self.left, left_filter) #selection.query = self.left.copy().filter_by(left_filter) self.left.set_callback(self.left_callback) #self.left = selection if right_filter: self.right = self.right.optimize_selection(right_filter) self.right.set_callback(self.right_callback) if parent_filter: old_self_callback = self.get_callback() selection = Selection(self, parent_filter) # XXX do we need to set query here ? #selection.query = self.query.copy().filter_by(parent_filter) selection.set_callback(old_self_callback) return selection return self
def __init__(self, *args, **kwargs): self.query_uuid = uniqid() # Initialize optional parameters self.clear() #l = len(kwargs.keys()) len_args = len(args) if len(args) == 1: if isinstance(args[0], dict): kwargs = args[0] args = [] # Initialization from a tuple if len_args in range(2, 7) and type(args) == tuple: # Note: range(x,y) <=> [x, y[ # XXX UGLY if len_args == 3: self.action = 'get' self.params = {} self.timestamp = 'now' self.object, self.filters, self.fields = args elif len_args == 4: self.object, self.filters, self.params, self.fields = args self.action = 'get' self.timestamp = 'now' else: self.action, self.object, self.filters, self.params, self.fields, self.timestamp = args # Initialization from a dict elif "object" in kwargs: if "action" in kwargs: self.action = kwargs["action"] del kwargs["action"] else: print("W: defaulting to get action") self.action = "get" self.object = kwargs["object"] del kwargs["object"] if "filters" in kwargs: self.filters = kwargs["filters"] del kwargs["filters"] else: self.filters = Filter() if "fields" in kwargs: self.fields = set(kwargs["fields"]) del kwargs["fields"] else: self.fields = set() # "update table set x = 3" => params == set if "params" in kwargs: self.params = kwargs["params"] del kwargs["params"] else: self.params = {} if "timestamp" in kwargs: self.timestamp = kwargs["timestamp"] del kwargs["timestamp"] else: self.timestamp = "now" if kwargs: raise ParameterError("Invalid parameter(s) : %r" % kwargs.keys()) #else: # raise ParameterError, "No valid constructor found for %s : args = %r" % (self.__class__.__name__, args) self.sanitize()
class Query(object): """ Implements a TopHat query. We assume this is a correct DAG specification. 1/ A field designates several tables = OR specification. 2/ The set of fields specifies a AND between OR clauses. """ #--------------------------------------------------------------------------- # Constructor #--------------------------------------------------------------------------- def __init__(self, *args, **kwargs): self.query_uuid = uniqid() # Initialize optional parameters self.clear() #l = len(kwargs.keys()) len_args = len(args) if len(args) == 1: if isinstance(args[0], dict): kwargs = args[0] args = [] # Initialization from a tuple if len_args in range(2, 7) and type(args) == tuple: # Note: range(x,y) <=> [x, y[ # XXX UGLY if len_args == 3: self.action = 'get' self.params = {} self.timestamp = 'now' self.object, self.filters, self.fields = args elif len_args == 4: self.object, self.filters, self.params, self.fields = args self.action = 'get' self.timestamp = 'now' else: self.action, self.object, self.filters, self.params, self.fields, self.timestamp = args # Initialization from a dict elif "object" in kwargs: if "action" in kwargs: self.action = kwargs["action"] del kwargs["action"] else: print("W: defaulting to get action") self.action = "get" self.object = kwargs["object"] del kwargs["object"] if "filters" in kwargs: self.filters = kwargs["filters"] del kwargs["filters"] else: self.filters = Filter() if "fields" in kwargs: self.fields = set(kwargs["fields"]) del kwargs["fields"] else: self.fields = set() # "update table set x = 3" => params == set if "params" in kwargs: self.params = kwargs["params"] del kwargs["params"] else: self.params = {} if "timestamp" in kwargs: self.timestamp = kwargs["timestamp"] del kwargs["timestamp"] else: self.timestamp = "now" if kwargs: raise ParameterError("Invalid parameter(s) : %r" % kwargs.keys()) #else: # raise ParameterError, "No valid constructor found for %s : args = %r" % (self.__class__.__name__, args) self.sanitize() def sanitize(self): if not self.filters: self.filters = Filter() if not self.params: self.params = {} if not self.fields: self.fields = set() if not self.timestamp: self.timestamp = "now" if isinstance(self.filters, list): f = self.filters self.filters = Filter() for x in f: pred = Predicate(x) self.filters.add(pred) elif isinstance(self.filters, Clause): self.filters = Filter.from_clause(self.filters) if isinstance(self.fields, list): self.fields = set(self.fields) for field in self.fields: if not isinstance(field, str): raise TypeError( "Invalid field name %s (string expected, got %s)" % (field, type(field))) #--------------------------------------------------------------------------- # Helpers #--------------------------------------------------------------------------- def copy(self): return copy.deepcopy(self) def clear(self): self.action = 'get' self.object = None self.filters = Filter() self.params = {} self.fields = set() self.timestamp = 'now' # ignored for now def to_sql(self, platform='', multiline=False): get_params_str = lambda: ', '.join( ['%s = %r' % (k, v) for k, v in self.get_params().items()]) get_select_str = lambda: ', '.join(self.get_select()) table = self.get_from() select = 'SELECT %s' % (get_select_str() if self.get_select() else '*') where = 'WHERE %s' % self.get_where() if self.get_where() else '' at = 'AT %s' % self.get_timestamp() if self.get_timestamp() else '' params = 'SET %s' % get_params_str() if self.get_params() else '' sep = ' ' if not multiline else '\n ' if platform: platform = "%s:" % platform strmap = { 'get': '%(select)s%(sep)s%(at)s%(sep)sFROM %(platform)s%(table)s%(sep)s%(where)s%(sep)s', 'update': 'UPDATE %(platform)s%(table)s%(sep)s%(params)s%(sep)s%(where)s%(sep)s%(select)s', 'create': 'INSERT INTO %(platform)s%(table)s%(sep)s%(params)s%(sep)s%(select)s', 'delete': 'DELETE FROM %(platform)s%(table)s%(sep)s%(where)s' } return strmap[self.action] % locals() @returns(str) def __str__(self): return self.to_sql(multiline=True) @returns(str) def __repr__(self): return self.to_sql() def __key(self): return (self.action, self.object, self.filters, frozendict(self.params), frozenset(self.fields)) def __hash__(self): return hash(self.__key()) #--------------------------------------------------------------------------- # Conversion #--------------------------------------------------------------------------- def to_dict(self): return { 'action': self.action, 'object': self.object, 'timestamp': self.timestamp, 'filters': self.filters.to_list(), 'params': self.params, 'fields': list(self.fields) } def to_json(self, analyzed_query=None): query_uuid = self.query_uuid a = self.action o = self.object t = self.timestamp f = json.dumps(self.filters.to_list()) p = json.dumps(self.params) c = json.dumps(list(self.fields)) # xxx unique can be removed, but for now we pad the js structure unique = 0 if not analyzed_query: aq = 'null' else: aq = analyzed_query.to_json() sq = "{}" result = """ new ManifoldQuery('%(a)s', '%(o)s', '%(t)s', %(f)s, %(p)s, %(c)s, %(unique)s, '%(query_uuid)s', %(aq)s, %(sq)s)""" % locals( ) if debug: print('ManifoldQuery.to_json:', result) return result # this builds a ManifoldQuery object from a dict as received from javascript through its ajax request # we use a json-encoded string - see manifold.js for the sender part # e.g. here's what I captured from the server's output # manifoldproxy.proxy: request.POST <QueryDict: {u'json': [u'{"action":"get","object":"resource","timestamp":"latest","filters":[["slice_hrn","=","ple.inria.omftest"]],"params":[],"fields":["hrn","hostname"],"unique":0,"query_uuid":"436aae70a48141cc826f88e08fbd74b1","analyzed_query":null,"subqueries":{}}']}> def fill_from_POST(self, POST_dict): try: json_string = POST_dict['json'] dict = json.loads(json_string) for (k, v) in dict.iteritems(): setattr(self, k, v) except: print("Could not decode incoming ajax request as a Query, POST=", POST_dict) if (debug): import traceback traceback.print_exc() self.sanitize() #--------------------------------------------------------------------------- # Accessors #--------------------------------------------------------------------------- @returns(str) def get_action(self): return self.action @returns(frozenset) def get_select(self): return frozenset(self.fields) @returns(str) def get_from(self): return self.object @returns(Filter) def get_where(self): return self.filters @returns(dict) def get_params(self): return self.params @returns(str) def get_timestamp(self): return self.timestamp #DEPRECATED# #DEPRECATED# def make_filters(self, filters): #DEPRECATED# return Filter(filters) #DEPRECATED# #DEPRECATED# def make_fields(self, fields): #DEPRECATED# if isinstance(fields, (list, tuple)): #DEPRECATED# return set(fields) #DEPRECATED# else: #DEPRECATED# raise Exception, "Invalid field specification" #--------------------------------------------------------------------------- # LINQ-like syntax #--------------------------------------------------------------------------- @classmethod #@returns(Query) def action(self, action, object): """ (Internal usage). Craft a Query according to an action name See methods: get, update, delete, execute. Args: action: A String among {"get", "update", "delete", "execute"} object: The name of the queried object (String) Returns: The corresponding Query instance """ query = Query() query.action = action query.object = object return query @classmethod #@returns(Query) def get(self, object): """ Craft the Query which fetches the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("get", object) @classmethod #@returns(Query) def update(self, object): """ Craft the Query which updates the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("update", object) @classmethod #@returns(Query) def create(self, object): """ Craft the Query which create the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("create", object) @classmethod #@returns(Query) def delete(self, object): """ Craft the Query which delete the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("delete", object) @classmethod #@returns(Query) def execute(self, object): """ Craft the Query which execute a processing related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("execute", object) #@returns(Query) def at(self, timestamp): """ Set the timestamp carried by the query Args: timestamp: The timestamp (it may be a python timestamp, a string respecting the "%Y-%m-%d %H:%M:%S" python format, or "now") Returns: The self Query instance """ self.timestamp = timestamp return self def filter_by(self, *args): """ Args: args: It may be: - the parts of a Predicate (key, op, value) - None - a Filter instance - a set/list/tuple of Predicate instances """ if len(args) == 1: filters = args[0] if filters == None: self.filters = Filter() return self if not isinstance(filters, (set, list, tuple, Filter)): filters = [filters] for predicate in filters: self.filters.add(predicate) elif len(args) == 3: predicate = Predicate(*args) self.filters.add(predicate) else: raise Exception('Invalid expression for filter') return self def select(self, *fields): # Accept passing iterables if len(fields) == 1: tmp, = fields if not tmp: fields = None elif isinstance(tmp, (list, tuple, set, frozenset)): fields = tuple(tmp) if not fields: # Delete all fields self.fields = set() return self for field in fields: self.fields.add(field) return self def set(self, params): self.params.update(params) return self def __or__(self, query): assert self.action == query.action assert self.object == query.object assert self.timestamp == query.timestamp # XXX filter = self.filters | query.filters # fast dict union # http://my.safaribooksonline.com/book/programming/python/0596007973/python-shortcuts/pythoncook2-chp-4-sect-17 params = dict(self.params, **query.params) fields = self.fields | query.fields return Query.action(self.action, self.object).filter_by(filter).select(fields) def __and__(self, query): assert self.action == query.action assert self.object == query.object assert self.timestamp == query.timestamp # XXX filter = self.filters & query.filters # fast dict intersection # http://my.safaribooksonline.com/book/programming/python/0596007973/python-shortcuts/pythoncook2-chp-4-sect-17 params = dict.fromkeys([x for x in self.params if x in query.params]) fields = self.fields & query.fields return Query.action(self.action, self.object).filter_by(filter).select(fields) def __le__(self, query): return (self == self & query) or (query == self | query)
def __init__(self, *args, **kwargs): self.query_uuid = uniqid() # Initialize optional parameters self.clear() #l = len(kwargs.keys()) len_args = len(args) if len(args) == 1: if isinstance(args[0], dict): kwargs = args[0] args = [] # Initialization from a tuple if len_args in range(2, 7) and type(args) == tuple: # Note: range(x,y) <=> [x, y[ # XXX UGLY if len_args == 3: self.action = 'get' self.params = {} self.timestamp = 'now' self.object, self.filters, self.fields = args elif len_args == 4: self.object, self.filters, self.params, self.fields = args self.action = 'get' self.timestamp = 'now' else: self.action, self.object, self.filters, self.params, self.fields, self.timestamp = args # Initialization from a dict elif "object" in kwargs: if "action" in kwargs: self.action = kwargs["action"] del kwargs["action"] else: print "W: defaulting to get action" self.action = "get" self.object = kwargs["object"] del kwargs["object"] if "filters" in kwargs: self.filters = kwargs["filters"] del kwargs["filters"] else: self.filters = Filter() if "fields" in kwargs: self.fields = set(kwargs["fields"]) del kwargs["fields"] else: self.fields = set() # "update table set x = 3" => params == set if "params" in kwargs: self.params = kwargs["params"] del kwargs["params"] else: self.params = {} if "timestamp" in kwargs: self.timestamp = kwargs["timestamp"] del kwargs["timestamp"] else: self.timestamp = "now" if kwargs: raise ParameterError, "Invalid parameter(s) : %r" % kwargs.keys() #else: # raise ParameterError, "No valid constructor found for %s : args = %r" % (self.__class__.__name__, args) self.sanitize()
class Query(object): """ Implements a TopHat query. We assume this is a correct DAG specification. 1/ A field designates several tables = OR specification. 2/ The set of fields specifies a AND between OR clauses. """ #--------------------------------------------------------------------------- # Constructor #--------------------------------------------------------------------------- def __init__(self, *args, **kwargs): self.query_uuid = uniqid() # Initialize optional parameters self.clear() #l = len(kwargs.keys()) len_args = len(args) if len(args) == 1: if isinstance(args[0], dict): kwargs = args[0] args = [] # Initialization from a tuple if len_args in range(2, 7) and type(args) == tuple: # Note: range(x,y) <=> [x, y[ # XXX UGLY if len_args == 3: self.action = 'get' self.params = {} self.timestamp = 'now' self.object, self.filters, self.fields = args elif len_args == 4: self.object, self.filters, self.params, self.fields = args self.action = 'get' self.timestamp = 'now' else: self.action, self.object, self.filters, self.params, self.fields, self.timestamp = args # Initialization from a dict elif "object" in kwargs: if "action" in kwargs: self.action = kwargs["action"] del kwargs["action"] else: print "W: defaulting to get action" self.action = "get" self.object = kwargs["object"] del kwargs["object"] if "filters" in kwargs: self.filters = kwargs["filters"] del kwargs["filters"] else: self.filters = Filter() if "fields" in kwargs: self.fields = set(kwargs["fields"]) del kwargs["fields"] else: self.fields = set() # "update table set x = 3" => params == set if "params" in kwargs: self.params = kwargs["params"] del kwargs["params"] else: self.params = {} if "timestamp" in kwargs: self.timestamp = kwargs["timestamp"] del kwargs["timestamp"] else: self.timestamp = "now" if kwargs: raise ParameterError, "Invalid parameter(s) : %r" % kwargs.keys() #else: # raise ParameterError, "No valid constructor found for %s : args = %r" % (self.__class__.__name__, args) self.sanitize() def sanitize(self): if not self.filters: self.filters = Filter() if not self.params: self.params = {} if not self.fields: self.fields = set() if not self.timestamp: self.timestamp = "now" if isinstance(self.filters, list): f = self.filters self.filters = Filter() for x in f: pred = Predicate(x) self.filters.add(pred) elif isinstance(self.filters, Clause): self.filters = Filter.from_clause(self.filters) if isinstance(self.fields, list): self.fields = set(self.fields) for field in self.fields: if not isinstance(field, StringTypes): raise TypeError("Invalid field name %s (string expected, got %s)" % (field, type(field))) #--------------------------------------------------------------------------- # Helpers #--------------------------------------------------------------------------- def copy(self): return copy.deepcopy(self) def clear(self): self.action = 'get' self.object = None self.filters = Filter() self.params = {} self.fields = set() self.timestamp = 'now' # ignored for now def to_sql(self, platform='', multiline=False): get_params_str = lambda : ', '.join(['%s = %r' % (k, v) for k, v in self.get_params().items()]) get_select_str = lambda : ', '.join(self.get_select()) table = self.get_from() select = 'SELECT %s' % (get_select_str() if self.get_select() else '*') where = 'WHERE %s' % self.get_where() if self.get_where() else '' at = 'AT %s' % self.get_timestamp() if self.get_timestamp() else '' params = 'SET %s' % get_params_str() if self.get_params() else '' sep = ' ' if not multiline else '\n ' if platform: platform = "%s:" % platform strmap = { 'get' : '%(select)s%(sep)s%(at)s%(sep)sFROM %(platform)s%(table)s%(sep)s%(where)s%(sep)s', 'update': 'UPDATE %(platform)s%(table)s%(sep)s%(params)s%(sep)s%(where)s%(sep)s%(select)s', 'create': 'INSERT INTO %(platform)s%(table)s%(sep)s%(params)s%(sep)s%(select)s', 'delete': 'DELETE FROM %(platform)s%(table)s%(sep)s%(where)s' } return strmap[self.action] % locals() @returns(StringTypes) def __str__(self): return self.to_sql(multiline=True) @returns(StringTypes) def __repr__(self): return self.to_sql() def __key(self): return (self.action, self.object, self.filters, frozendict(self.params), frozenset(self.fields)) def __hash__(self): return hash(self.__key()) #--------------------------------------------------------------------------- # Conversion #--------------------------------------------------------------------------- def to_dict(self): return { 'action': self.action, 'object': self.object, 'timestamp': self.timestamp, 'filters': self.filters.to_list(), 'params': self.params, 'fields': list(self.fields) } def to_json (self, analyzed_query=None): query_uuid=self.query_uuid a=self.action o=self.object t=self.timestamp f=json.dumps (self.filters.to_list()) p=json.dumps (self.params) c=json.dumps (list(self.fields)) # xxx unique can be removed, but for now we pad the js structure unique=0 if not analyzed_query: aq = 'null' else: aq = analyzed_query.to_json() sq="{}" result= """ new ManifoldQuery('%(a)s', '%(o)s', '%(t)s', %(f)s, %(p)s, %(c)s, %(unique)s, '%(query_uuid)s', %(aq)s, %(sq)s)"""%locals() if debug: print 'ManifoldQuery.to_json:',result return result # this builds a ManifoldQuery object from a dict as received from javascript through its ajax request # we use a json-encoded string - see manifold.js for the sender part # e.g. here's what I captured from the server's output # manifoldproxy.proxy: request.POST <QueryDict: {u'json': [u'{"action":"get","object":"resource","timestamp":"latest","filters":[["slice_hrn","=","ple.inria.omftest"]],"params":[],"fields":["hrn","hostname"],"unique":0,"query_uuid":"436aae70a48141cc826f88e08fbd74b1","analyzed_query":null,"subqueries":{}}']}> def fill_from_POST (self, POST_dict): try: json_string=POST_dict['json'] dict=json.loads(json_string) for (k,v) in dict.iteritems(): setattr(self,k,v) except: print "Could not decode incoming ajax request as a Query, POST=",POST_dict if (debug): import traceback traceback.print_exc() self.sanitize() #--------------------------------------------------------------------------- # Accessors #--------------------------------------------------------------------------- @returns(StringTypes) def get_action(self): return self.action @returns(frozenset) def get_select(self): return frozenset(self.fields) @returns(StringTypes) def get_from(self): return self.object @returns(Filter) def get_where(self): return self.filters @returns(dict) def get_params(self): return self.params @returns(StringTypes) def get_timestamp(self): return self.timestamp #DEPRECATED# #DEPRECATED# def make_filters(self, filters): #DEPRECATED# return Filter(filters) #DEPRECATED# #DEPRECATED# def make_fields(self, fields): #DEPRECATED# if isinstance(fields, (list, tuple)): #DEPRECATED# return set(fields) #DEPRECATED# else: #DEPRECATED# raise Exception, "Invalid field specification" #--------------------------------------------------------------------------- # LINQ-like syntax #--------------------------------------------------------------------------- @classmethod #@returns(Query) def action(self, action, object): """ (Internal usage). Craft a Query according to an action name See methods: get, update, delete, execute. Args: action: A String among {"get", "update", "delete", "execute"} object: The name of the queried object (String) Returns: The corresponding Query instance """ query = Query() query.action = action query.object = object return query @classmethod #@returns(Query) def get(self, object): """ Craft the Query which fetches the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("get", object) @classmethod #@returns(Query) def update(self, object): """ Craft the Query which updates the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("update", object) @classmethod #@returns(Query) def create(self, object): """ Craft the Query which create the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("create", object) @classmethod #@returns(Query) def delete(self, object): """ Craft the Query which delete the records related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("delete", object) @classmethod #@returns(Query) def execute(self, object): """ Craft the Query which execute a processing related to a given object Args: object: The name of the queried object (String) Returns: The corresponding Query instance """ return self.action("execute", object) #@returns(Query) def at(self, timestamp): """ Set the timestamp carried by the query Args: timestamp: The timestamp (it may be a python timestamp, a string respecting the "%Y-%m-%d %H:%M:%S" python format, or "now") Returns: The self Query instance """ self.timestamp = timestamp return self def filter_by(self, *args): """ Args: args: It may be: - the parts of a Predicate (key, op, value) - None - a Filter instance - a set/list/tuple of Predicate instances """ if len(args) == 1: filters = args[0] if filters == None: self.filters = Filter() return self if not isinstance(filters, (set, list, tuple, Filter)): filters = [filters] for predicate in filters: self.filters.add(predicate) elif len(args) == 3: predicate = Predicate(*args) self.filters.add(predicate) else: raise Exception, 'Invalid expression for filter' return self def select(self, *fields): # Accept passing iterables if len(fields) == 1: tmp, = fields if not tmp: fields = None elif isinstance(tmp, (list, tuple, set, frozenset)): fields = tuple(tmp) if not fields: # Delete all fields self.fields = set() return self for field in fields: self.fields.add(field) return self def set(self, params): self.params.update(params) return self def __or__(self, query): assert self.action == query.action assert self.object == query.object assert self.timestamp == query.timestamp # XXX filter = self.filters | query.filters # fast dict union # http://my.safaribooksonline.com/book/programming/python/0596007973/python-shortcuts/pythoncook2-chp-4-sect-17 params = dict(self.params, **query.params) fields = self.fields | query.fields return Query.action(self.action, self.object).filter_by(filter).select(fields) def __and__(self, query): assert self.action == query.action assert self.object == query.object assert self.timestamp == query.timestamp # XXX filter = self.filters & query.filters # fast dict intersection # http://my.safaribooksonline.com/book/programming/python/0596007973/python-shortcuts/pythoncook2-chp-4-sect-17 params = dict.fromkeys([x for x in self.params if x in query.params]) fields = self.fields & query.fields return Query.action(self.action, self.object).filter_by(filter).select(fields) def __le__(self, query): return ( self == self & query ) or ( query == self | query )
def optimize(self): Log.warning("Calling optimize()") tree = self.optimize_selection(Filter()) tree = tree.optimize_projection(set()) return tree
def __init__(self): """ Our simple BNF: SELECT [fields[*] FROM table WHERE clause """ integer = pp.Combine(pp.Optional(pp.oneOf("+ -")) + pp.Word(pp.nums)).setParseAction( lambda t: int(t[0])) floatNumber = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?') point = pp.Literal(".") e = pp.CaselessLiteral("E") kw_select = pp.CaselessKeyword('select') kw_update = pp.CaselessKeyword('update') kw_insert = pp.CaselessKeyword('insert') kw_delete = pp.CaselessKeyword('delete') kw_from = pp.CaselessKeyword('from') kw_into = pp.CaselessKeyword('into') kw_where = pp.CaselessKeyword('where') kw_at = pp.CaselessKeyword('at') kw_set = pp.CaselessKeyword('set') kw_true = pp.CaselessKeyword('true').setParseAction(lambda t: 1) kw_false = pp.CaselessKeyword('false').setParseAction(lambda t: 0) # Regex string representing the set of possible operators # Example : ">=|<=|!=|>|<|=" OPERATOR_RX = "(?i)%s" % '|'.join( [re.sub('\|', '\|', o) for o in Predicate.operators.keys()]) # predicate field = pp.Word(pp.alphanums + '_' + '.' + '-') operator = pp.Regex(OPERATOR_RX).setName("operator") variable = pp.Literal('$').suppress() + pp.Word( pp.alphanums + '_' + '.' + '-').setParseAction(lambda t: "$%s" % t[0]) obj = pp.Forward() value = obj | pp.QuotedString('"') | pp.QuotedString( "'") | kw_true | kw_false | integer | variable def handle_value_list(s, l, t): t = t.asList() new_t = [t] debug("[handle_value_list] s = %(s)s ** l = %(l)s ** t = %(t)s" % locals()) debug(" new_t = %(new_t)s" % locals()) return new_t value_list = value \ | (pp.Literal("[").suppress() + pp.Literal("]").suppress()) \ .setParseAction(lambda s, l, t: [[]]) \ | pp.Literal("[").suppress() \ + pp.delimitedList(value) \ .setParseAction(handle_value_list) \ + pp.Literal("]") \ .suppress() table = pp.Word(pp.alphanums + ':_-').setResultsName('object') field_list = pp.Literal("*") | pp.delimitedList(field).setParseAction( lambda tokens: set(tokens)) assoc = (field + pp.Literal(":").suppress() + value_list).setParseAction(lambda tokens: [tokens.asList()]) obj << pp.Literal("{").suppress() + pp.delimitedList( assoc).setParseAction(lambda t: dict(t.asList())) + pp.Literal( "}").suppress() # PARAMETER (SET) # X = Y --> t=(X, Y) def handle_param(s, l, t): t = t.asList() assert len(t) == 2 new_t = tuple(t) debug("[handle_param] s = %(s)s ** l = %(l)s ** t = %(t)s" % locals()) debug(" new_t = %(new_t)s" % locals()) debug(" (we expect a tuple)") return new_t param = (field + pp.Literal("=").suppress() + value_list) \ .setParseAction(handle_param) # PARAMETERS (SET) # PARAMETER[, PARAMETER[, ...]] --> dict() def handle_parameters(s, l, t): t = list(t.asList()) new_t = dict(t) if t else dict() debug("[handle_parameters] s = %(s)s ** l = %(l)s ** t = %(t)s" % locals()) debug(" new_t = %(new_t)s" % locals()) debug(" (we expect a dict)") return new_t parameters = pp.delimitedList(param) \ .setParseAction(handle_parameters) predicate = (field + operator + value_list).setParseAction( self.handlePredicate) # clause of predicates and_op = pp.CaselessLiteral("and") | pp.Keyword("&&") or_op = pp.CaselessLiteral("or") | pp.Keyword("||") not_op = pp.Keyword("!") predicate_precedence_list = [ (not_op, 1, pp.opAssoc.RIGHT, lambda x: self.handleClause(*x)), (and_op, 2, pp.opAssoc.LEFT, lambda x: self.handleClause(*x)), (or_op, 2, pp.opAssoc.LEFT, lambda x: self.handleClause(*x)) ] clause = pp.operatorPrecedence( predicate, predicate_precedence_list ) #.setParseAction(lambda clause: Filter.from_clause(clause)) # END: clause of predicates # For the time being, we only support simple filters and not full clauses filter = pp.delimitedList( predicate, delim='&&').setParseAction(lambda tokens: Filter(tokens.asList())) datetime = pp.Regex(r'....-..-.. ..:..:..') timestamp = pp.CaselessKeyword('now') | datetime select_elt = (kw_select.suppress() + field_list.setResultsName('fields')) where_elt = (kw_where.suppress() + filter.setResultsName('filters')) set_elt = (kw_set.suppress() + parameters.setResultsName('params')) at_elt = (kw_at.suppress() + timestamp.setResultsName('timestamp')) # SELECT *|field_list [AT timestamp] FROM table [WHERE clause] # UPDATE table SET parameters [WHERE clause] [SELECT *|field_list] # INSERT INTO table SET parameters [SELECT *|field_list] # DELETE FROM table [WHERE clause] select = (select_elt + pp.Optional(at_elt) + kw_from.suppress() + table + pp.Optional(where_elt) ).setParseAction(lambda args: self.action(args, 'get')) update = (kw_update + table + set_elt + pp.Optional(where_elt) + pp.Optional(select_elt) ).setParseAction(lambda args: self.action(args, 'update')) insert = (kw_insert + kw_into + table + set_elt + pp.Optional(select_elt) ).setParseAction(lambda args: self.action(args, 'create')) delete = (kw_delete + kw_from + table + pp.Optional(where_elt) ).setParseAction(lambda args: self.action(args, 'delete')) self.bnf = select | update | insert | delete
def run_children(self): """ Run children queries (subqueries) assuming the parent query (main query) has successfully ended. """ if not self.parent_output: # No parent record, this is useless to run children queries. self.send(LastRecord()) return #print "=" * 80 #print self.parent_output #print "=" * 80 if not self.children: # The top operator has build a SubQuery node without child queries, # so this SubQuery operator is useless and should be replaced by # its main query. Log.warning( "SubQuery::run_children: no child node. The query plan could be improved" ) self.send(LastRecord()) return # Inspect the first parent record to deduce which fields have already # been fetched first_record = self.parent_output[0] parent_fields = set(first_record.keys()) # Optimize child queries according to the fields already retrieved thanks # to the parent query. useless_children = set() for i, child in enumerate(self.children[:]): # Test whether the current child provides relevant fields (e.g. # fields not already fetched in the parent record). If so, reduce # the set of queried field in order to only retrieve relevant fields. child_fields = child.get_query().get_select() relation = self.relations[i] relation_name = relation.get_relation_name() already_fetched_fields = set() if relation_name in parent_fields: if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: if relation_name in first_record and first_record[ relation_name] and len( first_record[relation_name]) > 0: if isinstance(first_record[relation_name][0], Record): already_fetched_fields = set( first_record[relation_name][0].keys()) else: # If we do not have a dict, we have only keys, so it's like we had no field of importance... already_fetched_fields = set() else: already_fetched_fields = set() else: if relation_name in first_record and first_record[ relation_name] and len( first_record[relation_name]) > 0: already_fetched_fields = set( first_record[relation_name].keys()) else: already_fetched_fields = set() # XXX routerv2: we need to keep key used for subquery key_field = relation.get_predicate().get_value() relevant_fields = child_fields - already_fetched_fields if not relevant_fields: tmp = list() for pr in self.parent_output: tmp.extend(pr[relation_name]) self.child_results[i] = tmp # Records useless_children.add(i) continue else: relevant_fields |= frozenset([key_field]) # necessary ? if child_fields != relevant_fields: # XXX This seems to remove the key used for joining self.children[i] = child.optimize_projection( relevant_fields) # If every children are useless, this means that we already have full records # thanks to the parent query, so we simply forward those records. if len(self.children) == len(useless_children): map(self.send, self.parent_output) self.send(LastRecord()) return # Loop through children and inject the appropriate parent results for i, child in enumerate(self.children): if i in useless_children: continue # We have two cases: # (1) either the parent query has subquery fields (a list of child # ids + eventually some additional information) # (2) either the child has a backreference to the parent # ... eventually a partial reference in case of a 1..N relationship # # In all cases, we will collect all identifiers to proceed to a # single child query for efficiency purposes, unless it's not # possible (?). # # We have several parent records stored in self.parent_output # # /!\ Can we have a mix of (1) and (2) ? For now, let's suppose NO. # * We could expect key information to be stored in the DBGraph # The operation to be performed is understood only be looking at the predicate relation = self.relations[i] predicate = relation.get_predicate() key, op, value = predicate.get_tuple() if op == eq: # 1..N # Example: parent has slice_hrn, resource has a reference to slice if relation.get_type() == Relation.types.LINK_1N_BACKWARDS: parent_ids = [record[key] for record in self.parent_output] if len(parent_ids) == 1: parent_id, = parent_ids filter_pred = Predicate(value, eq, parent_id) else: filter_pred = Predicate(value, included, parent_ids) else: parent_ids = [] for parent_record in self.parent_output: record = Record.get_value(parent_record, key) if not record: record = [] # XXX Nothing to do for the case where the list of keys in the parent is empty if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: # we have a list of elements # element = id or dict : cle simple # = tuple or dict : cle multiple parent_ids.extend([ self.get_element_key(r, value) for r in record ]) else: parent_ids.append( self.get_element_key(record, value)) #if isinstance(key, tuple): # parent_ids = [x for record in self.parent_output if key in record for x in record[key]] #else: # ##### record[key] = text, dict, or list of (text, dict) # parent_ids = [record[key] for record in self.parent_output if key in record] # #if parent_ids and isinstance(parent_ids[0], dict): # parent_ids = map(lambda x: x[value], parent_ids) if len(parent_ids) == 1: parent_id, = parent_ids filter_pred = Predicate(value, eq, parent_id) else: filter_pred = Predicate(value, included, parent_ids) # Injecting predicate old_child_callback = child.get_callback() self.children[i] = child.optimize_selection( Filter().filter_by(filter_pred)) self.children[i].set_callback(old_child_callback) elif op == contains: # 1..N # Example: parent 'slice' has a list of 'user' keys == user_hrn for slice in self.parent_output: if not child.get_query().object in slice: continue users = slice[key] # users est soit une liste d'id, soit une liste de records user_data = [] for user in users: if isinstance(user, dict): user_data.append(user) else: # have have a key # XXX Take multiple keys into account user_data.append({value: user}) # Let's inject user_data in the right child child.inject(user_data, value, None) else: raise Exception, "No link between parent and child queries" #print "*** before run children ***" #self.dump() # We make another loop since the children might have been modified in # the previous one. for i, child in enumerate(self.children): if i in useless_children: continue self.status.started(i) for i, child in enumerate(self.children): if i in useless_children: continue child.start()
def all_done(self): """ \brief Called when all children of the current subquery are done: we process results stored in the parent. """ try: for parent_record in self.parent_output: # Dispatching child results for i, child in enumerate(self.children): relation = self.relations[i] predicate = relation.get_predicate() key, op, value = predicate.get_tuple() if op == eq: # 1..N # Example: parent has slice_hrn, resource has a reference to slice # PARENT CHILD # Predicate: (slice_hrn,) == slice # Collect in parent all child such as they have a pointer to the parent record = Record.get_value(parent_record, key) if not record: record = [] if not isinstance(record, (list, tuple, set, frozenset)): record = [record] if relation.get_type() in [ Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS ]: # we have a list of elements # element = id or dict : cle simple # = tuple or dict : cle multiple ids = [ SubQuery.get_element_key(r, value) for r in record ] else: ids = [SubQuery.get_element_key(record, value)] if len(ids) == 1: id, = ids filter = Filter().filter_by( Predicate(value, eq, id)) else: filter = Filter().filter_by( Predicate(value, included, ids)) #if isinstance(key, StringTypes): # # simple key # ids = [o[key]] if key in o else [] # #print "IDS=", ids # #if ids and isinstance(ids[0], dict): # # ids = map(lambda x: x[value], ids) # # XXX we might have equality instead of IN in case of a single ID # print "VALUE", value, "INCLUDED ids=", ids # filter = Filter().filter_by(Predicate(value, included, ids)) #else: # # Composite key, o[value] is a dictionary # for field in value: # filter = filter.filter_by(Predicate(field, included, o[value][field])) # o[value] might be multiple parent_record[relation.get_relation_name()] = [] for child_record in self.child_results[i]: if filter.match(child_record): parent_record[relation.get_relation_name( )].append(child_record) elif op == contains: # 1..N # Example: parent 'slice' has a list of 'user' keys == user_hrn # PARENT CHILD # Predicate: user contains (user_hrn, ) # first, replace records by dictionaries. This only works for non-composite keys if parent_record[child.query.object]: record = parent_record[child.query.object][0] if not isinstance(record, dict): parent_record[child.query.object] = [{ value: record } for record in parent_record[ child.query.object]] if isinstance(value, StringTypes): for record in parent_record[child.query.object]: # Find the corresponding record in child_results and update the one in the parent with it for k, v in record.items(): filter = Filter().filter_by( Predicate(value, eq, record[value])) for r in self.child_results[i]: if filter.match(r): record.update(r) else: for record in parent_record[child.query.object]: # Find the corresponding record in child_results and update the one in the parent with it for k, v in record.items(): filter = Filter() for field in value: filter = filter.filter_by( Predicate(field, eq, record[field])) for r in self.child_results[i]: if filter.match(r): record.update(r) else: raise Exception, "No link between parent and child queries" self.send(parent_record) self.send(LastRecord()) except Exception, e: print "EEE", e traceback.print_exc()