def fetch(self, key, varenv): '''fetch a single namespace entry from the graph''' args = (self.namemap.bootstrap.has_key[1:], self.guid[1:], quote(key)) qs = '(typeguid=%s left=%s value=%s comparator="octet" datatype=string pagesize=2 result=((value left right)))' % args r = self.namemap.gc.read_varenv(qs, varenv) # we asked for pagesize=2 just to check this if len(r) > 1: LOG.warning('mql.duplicate.key', '%s in namespace %s' % (key, self.guid)) elif len(r) == 0: return False (value, nsg, g) = (unquote(r[0][0]), '#' + r[0][1], '#' + r[0][2]) if nsg != self.guid: raise MQLInternalError(None, "Mismatched namespace query", value=value, namespace=self.guid, returned_namespace=nsg, guid=g) # this assert can fail because of graphd case-insensitivity #assert unquote(value) == key, "%s != %s" % (unquote(value), key) self.store(key, g) return g
def refresh(self, varenv): '''try to refresh a complete namespace from the graph. set self.complete iff successful. ''' # see if we can fetch the whole thing: # for large namespaces, don't try to fetch the whole thing if not self.complete: return datelineqs = '' if self.last_dateline is not None: datelineqs = 'dateline>%s' % self.last_dateline args = (self.namemap.bootstrap.has_key[1:], self.guid[1:], datelineqs, self.max_complete + 1) qs = '(typeguid=%s left=%s comparator="octet" datatype=string %s pagesize=%d result=((value left right)))' % args r = self.namemap.gc.read_varenv(qs, varenv) # check if we hit the maximum size for cacheable namespaces if len(r) > self.max_complete: LOG.notice('mql.namespace.refresh', '%s too large to cache' % self.guid) self.complete = 0 elif self.complete == -1: self.complete = 1 if len(r) > 0: self.update_namespaces(r) else: if self.last_dateline is not None: # XXX should extract the dateline from the empty result and # update self.last_dateline in order to minimize the dateline # ranges in later queries. pass
def check_permission_permission(querier, varenv, permission_guid): """ Check if the user has permission to administer the given permission """ permission_permission = varenv.setdefault('permission_permission', {}) if permission_guid not in permission_permission: userguid = varenv.get_user_guid() authorityguid = varenv.authority_guid permission = Permission(querier, permission_guid) has_access = permission.user_has_permission_permission( userguid, varenv) if not has_access and authorityguid: has_access = permission.user_has_permission_permission( authorityguid, varenv) if has_access: LOG.notice( 'access.authority', 'for user %s, permission %s and authority %s' % (userguid, permission_guid, authorityguid)) permission_permission[permission_guid] = has_access return permission_permission[permission_guid]
def lookup_mids_of_guids(self, guid_list, varenv): # It's..sort of the same as before. We have some guids, # see if any of them are replaced_by. # If they are, if not guid_list: return {} ask_list = set() result = {} rev = {} for g in guid_list: # convert the mid directly. m = mid.of_guid(g[1:]) ask_list.add(g) result[g] = [m] rev[m] = g LOG.debug("mql.lookup.mids", "Looking up mids for guids") # we look foward, up replaced_by links, and from that node # to other replaced_by links, # and backwards from the root, for previous ones. # +-+ r.b. +-+ # |A| -------> |B| # +-+ +-+ # | # +-+ | # |C|-----------+ # +-+ # # in this diagram, we root at B. # We list B first but also A and C if present. query = [{ "@guid": ask_list, "@pagesize": len(ask_list) + 1, "-replaced_by": [{ "@guid": None, ":optional": True }] }] varenv["gr_log_code"] = "mids2guids" query_results = self.querier.read(query, varenv) varenv.pop("gr_log_code") # each result is going to (hopefully) either haave a -replaced_by link # or a replaced_by one. for item in query_results: guid = item["@guid"] # otherwise, theres just links pointing at me. if item["-replaced_by"]: # me first result[guid] = [mid.of_guid(guid[1:])] # then everyone else for r in item["-replaced_by"]: result[guid].append(mid.of_guid(r["@guid"][1:])) return result
def check_permission(querier, varenv, permissionguid): """ Check if the user can write to objects permitted by permission_guid """ write_permission = varenv.setdefault('write_permission', {}) if permissionguid not in write_permission: userguid = varenv.get_user_guid() authorityguid = varenv.authority_guid permission = Permission(querier, permissionguid) has_access = permission.user_has_write_permission(userguid, varenv) if not has_access and authorityguid: has_access = permission.user_has_write_permission( authorityguid, varenv) if has_access: LOG.notice( 'access.authority', 'for user %s, permission %s and authority %s' % (userguid, permissionguid, authorityguid)) if not has_access and varenv.get('$privileged') is Privileged: LOG.notice( 'access.privileged', 'for user %s and permission %s' % (userguid, permissionguid)) has_access = True write_permission[permissionguid] = has_access return write_permission[permissionguid]
def json_traceback(response=None, exception=None, **kws): """ This function is only used when debugging is on. It sends the output similar to what you'd see when using Python interactively to the browser """ debug = 1 etype, evalue, etb = sys.exc_info() try: # try/finally try: # try/except if debug and etype is IOError and str(evalue)[:5] == 'Write': # if this is an IOError while writing to client, # it is probably better not to try to write to the cleint # even if debug is on. LOG.error('json_traceback', 'skipping error write to client') debug = 0 # write to log for e in traceback.format_exception(etype, evalue, etb): s = '%s' % e[:-1] LOG.error('json_traceback', s) if response is None: response = JSONResponse(status='500 Internal Server Error', code='/api/status/error/server') response.extend(**kws) stack = [ dict(zip('file,line,func,source'.split(','), quad)) for quad in traceback.extract_tb(etb, None) ] text = '%s: %s' % (etype, evalue) response.log(text, stack=stack, level='error') return response.response except Exception, e: # hit the backstop. must be a bug in the normal exception handling code, # do something simple. response = { 'status': '500 Internal Server Error', 'messages': [{ 'level': 'error', 'text': traceback.format_exc() }], } return response finally: # erase the traceback etb = None
def update_namespaces(self, r): '''update this namespace cache based on the result of a graph query''' self.last_dateline = r.dateline for entry in r: (name, nsg, g) = (unquote(entry[0]), '#' + entry[1], '#' + entry[2]) assert nsg == self.guid self.store(name, g) LOG.debug('updated namespace %s' % self.guid, '%d entries' % len(r))
def reset_cost(self): LOG.debug('resetting graphd costs') # these 3 counters remain for backward compatiblity self.nrequests = 0 # -1 because the first attempt is not really a 'retry' self.dbretries = -1 self.qretries = -1 # all cost info is tracked in this dict # this includes cost info returned by GQL self.totalcost = defaultdict(float)
def add_graph_costs(self, costs, dbtime, tries): """feed costs from graphdb into self.totalcost.""" request_cost = coststr_to_dict(costs) request_cost['mql_dbtime'] = dbtime request_cost['mql_dbtries'] = tries or 1 request_cost['mql_dbreqs'] = 1 LOG.debug('graphdrequest.cost %s', request_cost) for k, v in request_cost.iteritems(): if k in ['mm', 'fm']: # These are high water marks. Don't sum them. self.totalcost[k] = max(v, self.totalcost.get(k)) else: if k in self.totalcost: self.totalcost[k] += v else: self.totalcost[k] = v
def write_varenv(self, qs, varenv): """Write to the graph the specified "query".""" if getattr(self, 'readonly', None): raise GraphConnectionError( 'Tried to write to a read-only graph', http_code=500, app_code='/mqlwrite/backend/read_only') dateline_in = varenv.get('write_dateline', None) self.write_occurred = 1 try: r = self._generate_and_transmit_query(qs, varenv, WriteMode) except MQLDatelineInvalidError: # see read_varenv comment on this LOG.info('mqlwrite.dateline.delete', 'got an invalid dateline, deleting from varenv', varenv.get('write_dateline')) varenv['write_dateline'] = '' r = self._generate_and_transmit_query(qs, varenv, WriteMode) dateline_out = r.dateline # update our write_dateline in case we do subsequent reads # or writes. The new 'write_dateline' is returned to the # user for use with subsequent mqlreads or mqlwrites they do varenv['write_dateline'] = dateline_out varenv['last_write_time'] = time.time() log_graph_write(varenv, dateline_in, dateline_out) LOG.debug( 'graph.write_dateline.set', '', last_write_time=varenv['last_write_time'], write_dateline=varenv['write_dateline']) # Record that a write has happened and following writes should set # the continuation flag. varenv['is_write_continuation'] = True return r
def lookup_guids_of_mids(self, mid_list, varenv): ask_list = set() result = {} rev = {} # arithmetically compute guids for m in mid_list: try: guid = "#" + mid.to_guid(m) ask_list.add(guid) # store the whole list here, down below we'll just # overwrite the things we got back. result[m] = guid #self.internal_guid_to_id(guid) # i need to go back + forth. rev[guid] = m except (mid.InvalidMIDVersion, mid.InvalidMID) as e: result[m] = False except (mid.InvalidMunch) as e: raise MQLParseError( None, "'%(mid)s' is not a properly formatted mid", mid=m) if not len(ask_list): return result # i'm not caching these. LOG.debug("mql.resolve.mids", "Looking up guids for mids", code=len(ask_list)) # look for replaced by links off the guids # replaced_by links are unique, if they arent then this will signify some # end-of-the-world type event. query = [{"@guid": ask_list, "replaced_by": {"@guid": None}}] # read varenv["gr_log_code"] = "guids2mids" query_results = self.querier.read(query, varenv) varenv.pop("gr_log_code") # "now see what we found out..." for item in query_results: # [guid, replaced_by { guid }] guid = item["@guid"] rep_by = item["replaced_by"]["@guid"] m = rev[guid] result[m] = rep_by # pray. return result
def parse_result_index(self, result, varenv): if len(result) > 1: # raise MQLInternalError(self.query,"More than one piece of order information") try: error_query = self.parent.query.original_query except: error_query = self.query LOG.error( 'multiple.indices', 'More than one piece of order information (using first one)', query=repr(error_query), indices=repr(result)) result = result[0] elif len(result) == 0: return None else: dz = dict(zip(self.result, result[0])) return unquote_value('float', dz['value'], False)
def check_write_throttle(querier, varenv): userguid = varenv.get_user_guid() max_writes = varenv.get('max_writes', None) if max_writes is None or userguid in MAX_WRITE_EXCEPTED_USERS: LOG.error('write.throttle.skipped', 'user=%s skipped write throttle' % userguid) return True # userguid starts with a '#' while max_writes['guid'] does not. # We need to strip the '#' in order for the comparison to succeed. if userguid[0] == '#': userguid = userguid[1:] if max_writes['guid'] != userguid: LOG.notice( 'write.throttle.different_users', 'Logged in user: %s different from mqlwrite user: %s' % (max_writes['guid'], userguid)) # 1 day tdelta = timedelta(1) yesterday = (datetime.utcnow() - tdelta).isoformat() # MQL attribution models documented at: # https://wiki.metaweb.com/index.php/MQL_Attribution_for_OAuth%2C_Acre%2C_etc # normal attribution query # need the optional to suppress EMPTY on count=0 graphq = ('(scope=%s timestamp>%s live=dontcare newest>=0 result=(count) ' 'optional)') % (max_writes['guid'], yesterday) gresult = querier.gc.read_varenv(graphq, varenv) count = int(gresult[0]) # oauth/open social attribution query graphq = ('(scope->(scope=%s) timestamp>%s live=dontcare newest>=0 ' 'result=(count) optional)') % (max_writes['guid'], yesterday) gresult = querier.gc.read_varenv(graphq, varenv) count += int(gresult[0]) if count > max_writes['limit']: LOG.alert( 'write.throttle.exceeded', 'user=%s count=%s max=%d delta=%s' % (max_writes['guid'], count, max_writes['limit'], str(tdelta))) msg = 'Daily write limit of %s was exceeded.' % max_writes['limit'] raise MQLWriteQuotaError(None, msg, user='******' + max_writes['guid'], count=count, max_writes=max_writes['limit'], period=str(tdelta)) else: LOG.notice( 'write.throttle.ok', 'user=%s count=%s max=%s' % (max_writes['guid'], count, max_writes['limit'])) return True
def read_varenv(self, qs, varenv): """Read from the graph the specified "query".""" try: # the pymql user provides a 'write_dateline', which should be a valid # dateline returned to said user by a previous mqlwrite query dateline_in = varenv.get('write_dateline', None) r = self._generate_and_transmit_query(qs, varenv, ReadMode) except MQLDatelineInvalidError: # Drop the datelines out of the varenv, # re-generate the query and try again. # the main use case here is when sandbox is refreshed # and the instance id in the dateline changes. The user's dateline # (usually in a cookie) is now invalid until they do a write, or a touch LOG.info('mqlread.dateline.delete', 'got an invalid dateline, deleting from varenv', varenv.get('write_dateline')) varenv['write_dateline'] = '' r = self._generate_and_transmit_query(qs, varenv, ReadMode) if not r and varenv.get('graph_noisy'): raise EmptyResult('query %s' % qs) dateline_out = r.dateline # 'dateline' is returned to the original caller of pymql read. # though, in practice, it is not passed on by frapi and # they really should only update their dateline after doing # a write. # we do *not* update the internal 'write_dateline' varenv, here. # in the case of reads, the idea being the user only needs to # demand the level of freshness of their last write, so # subsequent reads in this session will use the original # 'write_dateline' provided by the caller of pymql read/write. # the 'write_dateline' is updated in the event that a write # occurs in this session. varenv['dateline'] = dateline_out log_graph_read(varenv, dateline_in, dateline_out) LOG.debug('graph.dateline.set', '', dateline=varenv['dateline']) return r
def check_change_permission_by_user(querier, varenv, old_permission_guid, new_permission_guid): has_old_permission = \ check_permission_permission(querier, varenv, old_permission_guid) has_new_permission = \ check_permission_permission(querier, varenv, new_permission_guid) # privileged access bypass if varenv.get('$privileged') is Privileged: LOG.notice( 'access.privileged', 'for user %s changing permission %s to %s' % (varenv.get_user_guid(), old_permission_guid, new_permission_guid)) return True # no privileged block because I don't have any need to # privilege this operation (yet) when there is a need a # privileged block can be put here. return has_old_permission and has_new_permission
def parse_full_reply(self, replystr): """ parse the given reply string from the graph into a bunch of nested lists of tokens. Results are in the form: [ 'ok', 'id=', '"me;..."', [[['010000..', '01...', ...]]]] """ LOG.debug('graph.result', replystr) token_list = graphresult_re.findall(replystr) curlist = [] stack = [] push_state = stack.append pop_state = stack.pop for count, tok in enumerate(token_list): if tok == '(': push_state(curlist) curlist = [] elif tok == ')': sublist = curlist curlist = pop_state() curlist.append(sublist) elif tok == '\n': raise MQLGraphError( None, 'Not allowed a newline in parse_full_reply', reply=replystr, tokens=token_list) elif tok == ' ' or tok == '': pass else: curlist.append(tok) LOG.debug('graph.result.parsed', 'Parsed %d tokens' % count) if len(stack) != 0: raise MQLGraphError(None, 'got linefeed in the middle of a reply?', reply=replystr, tokens=token_list, depth=len(stack)) self.replyqueue.append(curlist)
def list_or_item(self, result, varenv): # return a list if we want a list or the first item if we don't if self.query.list: return result elif len(result) == 1: return result[0] elif len(result) == 0: return None elif varenv.get('uniqueness_failure', None) == 'soft': # this supresses the exception in favor of a warning message LOG.warning( 'soft.uniqueness.failure', repr(self.query), result=repr(result)) return result[0] else: # this mutates the result, but we don't want it anyway... varenv.lookup_manager.do_id_lookups() result = varenv.lookup_manager.substitute_ids(result) raise MQLTooManyValuesForUniqueQuery( self.query, results=result, count=len(result))
def __init__(self, no_timeouts=False, policy_map=None, default_policy=None, custom_policy=None): self.reset_cost() self.timeout_policies = policy_map if default_policy: self.default_policy = default_policy else: self.default_policy = self.DEFAULT_POLICY_NAME self.no_timeouts = no_timeouts if custom_policy: LOG.info('gc.custom.timeout.policy', '', policy=str(custom_policy)) self.timeout_policies['custom'] = custom_policy self.default_policy = 'custom'
def wrap_query(querier, sq, varenv=None, transaction_id=None): """ Run a query with the given querier (usually something like ctx.low_querier.read) - performing appropriate envelope packing and unpacking, multiple queries, error handling, etc """ LOG.error( 'deprecated', 'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!') if isinstance(sq, basestring): # convert to json query try: # XXX should eventually use unicode, for now utf8 sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8') except ValueError, e: # debug ME-907 LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv) SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)') m = SIMPLEJSON_ERR_RE.match(str(e)) if not m: raise response = JSONResponse(status='400 Bad Request', code='/api/status/error/request') text = 'json parse error: ' + m.group(1) response.log(text, line=int(m.group(2)), column=int(m.group(3)), level='error') return response.response except Exception, e: return json_traceback(exception=e, status='400 Bad Request', code='/api/status/error/request')
def __init__(self, msg, http_code=400, app_code=DEF_ME_CODE, inner_exc=None, **kwds): self.msg = msg Exception.__init__(self, msg) if not is_valid_HTTP_code(http_code): http_code = 500 self.http_status = get_HTTP_err(http_code) self.http_code = http_code # app_code and and api code setup codes = app_code.split('/') if len(codes) < 3: codes = self.DEF_ME_CODE.split('/') self.comp_code = '%s/%s' % (self.DEF_PFX, codes[1]) self.app_code = '%s' % '/'.join(codes[2:]) self.messages = [self.gen_msgs(**kwds)] if not kwds.has_key('error'): # don't extract the current frame (__init__) stack = traceback.extract_stack()[:-1] kwds['traceback'] = '\r\n'.join(traceback.format_list(stack)) # log inner exception or self exc = self if inner_exc: exc = inner_exc comp = app_code[1:].replace('/', '.') if exc == self: LOG.debug(comp, msg, **kwds) else: LOG.exception(msg, **kwds) self.kwds = kwds
def __init__(self, error_type, clause, message, cost=None, *args, **kwds): MQLParameterizedError.__init__(self, message, error_type, clause, *args, **kwds) if error_type not in self.error_types: error_type = 'UNKNOWN' self.error = {} self.error_type = error_type self.cost = cost if clause is not None: self.set_query(clause) self.error['code'] = self.get_error_id() # make sure we don't get ReadMode in a JSON response for key in self.kwds: if not isinstance( self.kwds[key], (basestring, int, long, float, dict, list, bool, type(None))): self.kwds[key] = str(self.kwds[key]) self.error['info'] = self.kwds self.error['message'] = str(self) if self.error_type in ('INTERNAL', 'ACCESS', 'WRITE_QUOTA', 'UNKNOWN'): level = log_util.CRIT else: level = log_util.WARNING if self.error_type in ('GRAPH', 'CONNECTION', 'TIMEOUT'): # these log the graph query # there is an idempotent write query that we do to get a dateline # that returns an error - but it's actually harmless and we dont # want to log-it. if self.error.get('info') and self.error['info'].get( 'detail', None) != 'primitive tagged as unique already exist': LOG.error(error_type.lower() + '_error', repr(self.error), gql=repr(getattr(self, 'graph_query', None))) # there's no graph query otherwise elif self.error_type in ('TYPE', 'DATELINE_INVALID', 'CURSOR_INVALID'): # this is probably a developer-level error, no need to LOG.error LOG.warn(error_type.lower() + '_error', repr(self.error)) else: LOG.error(error_type.lower() + '_error', repr(self.error))
def lookup_by_guid_oneoff(self, g, varenv): root_ns_guid = self.bootstrap.root_namespace res = [] name = [] found = set() next = g # this is ridiculously deep - 18 ply. for i in xrange(6): res = self.lookup_by_guid_oneoff_internal(next, varenv) if not res: # we've ceased to make progress - bail LOG.warning('mql.namespace.error', 'id for guid not found', guid=g) return g for pair in res: name.append(pair[0]) next = pair[1] if next == root_ns_guid: name.reverse() return "/" + "/".join(name) if next in found: LOG.warning('mql.namespace.error', 'cycle in namespace looking for guid', guid=g) return g found.add(next) LOG.warning('mql.namespace.error', 'depth limit exceeded in namespace lookup', guid=g) return g
def lookup_id_query(self, guid, varenv): """ lots of nasty heuristics to find ids for id-identified objects: This seems to be called when lookup_ids can't finish the job, but it also seems to duplicate some of the logic there. Current rules: - do we have a /freebase/object_hints/best_hrid property - do we have a name in / - do we have a name in /XXX/YYY (XXX not boot or pub) - do we have a name in /XXX/YYY/ZZZ - do we have a name in /XXX/YYY (XXX may be boot or pub) - ask namespace.lookup_by_guid_oneoff... All of this trouble is mostly because some things (/type/object/type being the best example) have names in /bootstrap-namespace that we dont want to expose by accident. """ query = { "@guid": guid, "best_hrid": [{ ":typeguid": self.best_hrid_guid, ":value": None, ":optional": True, }], "has_root_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "@id": "/" }], "has_2_level_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "-has_key": [{ ":comparator": "octet", ":value": None, "@id": "/" }] }], "has_3_level_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "-has_key": [{ ":comparator": "octet", ":value": None, "-has_key": [{ ":comparator": "octet", ":value": None, "@id": "/" }] }] }], } try: varenv["gr_log_code"] = "guid2id" result = self.querier.read(query, varenv) varenv.pop("gr_log_code") except EmptyResult: # everything was optional so we must not have found the guid itself # this code is unnecessary, but has key documentation value. raise # we may get nothing back if the guid has been deleted (or if we were deleting it) # in that case, just return the guid. if result is None: return guid idname = None hrids = result["best_hrid"] if hrids: if len(hrids) > 1: # This should never happen. # If it does, log an error but don't fail. LOG.error("mql.resolve.lookup_id_internal", "multiple /freebase/object_hints/best_hrid") hrid = hrids[0][":value"] return hrid if result["has_root_name"]: idname = "/" + result["has_root_name"][0][":value"] elif (result["has_2_level_name"] and result["has_2_level_name"][0]["-has_key"][0][":value"] not in ("boot", "pub")): idname = "/" + result["has_2_level_name"][0]["-has_key"][0][ ":value"] + "/" + result["has_2_level_name"][0][":value"] elif result["has_3_level_name"]: idname = ("/" + result["has_3_level_name"][0]["-has_key"][0] ["-has_key"][0][":value"] + "/" + result["has_3_level_name"][0]["-has_key"][0][":value"] + "/" + result["has_3_level_name"][0][":value"]) elif result["has_2_level_name"]: idname = "/" + result["has_2_level_name"][0]["-has_key"][0][ ":value"] + "/" + result["has_2_level_name"][0][":value"] else: idname = self.namemap.lookup_by_guid_oneoff(guid, varenv) # special hack for the root namespace if idname == "/boot/root_namespace": return "/" elif idname == "/boot/root_user": return "/user/root" elif idname is not None and valid_idname(idname): return idname else: return guid
def search_id_result(self, head, varenv): """ take the id result struct and attempt to produce an id. Here are the rules: - best_hrid is chosen if present - the shortest name is best - except that any three level name is better than a /boot name. - among names of the same length, pick any one at random. """ hrids = head["best_hrid"] if hrids: if len(hrids) > 1: # This should never happen. # If it does, log an error but don't fail. LOG.error("mql.resolve.best_hrid", "multiple /freebase/object_hints/best_hrid") hrid = hrids[0][":value"] return hrid # bfs_list format is an array of # ( value, parent, guid, keys, depth ) bfs_list = [(None, None, head["@guid"], head.get("-has_key", []), 0)] root = self.namemap.bootstrap.root_namespace boot = self.namemap.bootstrap.boot is_namespace = False if isinstance(head["is_instance_of"], dict): is_namespace = True has_boot = None if head["@guid"] == root: return "/" elif head["@guid"] == boot: return "/boot" while bfs_list: front = bfs_list.pop(0) for item in front[3]: bfs_item = (item[":value"], front, item["@guid"], item.get("-has_key", []), front[4] + 1) if bfs_item[2] == root: # we're done - what are we called? rv = [] pos = bfs_item while pos[1]: rv.append(pos[0]) pos = pos[1] return "/" + "/".join(rv) elif bfs_item[2] == boot: has_boot = bfs_item elif (self.topic_en and bfs_item[2] == self.topic_en and bfs_item[4] == 1): # hack for things *directly* in /en to short circuit early... return "/en/" + bfs_item[0] elif not is_namespace and bfs_item[ 2] in self.forbidden_namespaces: # terminate recursion at /wikipedia/en etc. pass else: bfs_list.append(bfs_item) # are we in /boot? if has_boot and has_boot[4] == 1: return "/boot/" + has_boot[0] # ok, we've searched the entire list. front is the last item... # try a regular lookup_id() on it. (so we can cache it too!) if front[4] == 3: leading_id = self.lookup_id_internal(front[2], varenv) if leading_id and leading_id[0] == "/": # we got something... rv = [leading_id] pos = front while pos[1]: rv.append(pos[0]) pos = pos[1] return "/".join(rv) # failure return None
def lookup_ids(self, guid_list, varenv): """ Given a list of guids returns an id for each one, using as few queries as possible. Returns a dictionary of guid->id. """ ask_list = set() result = {} if not "asof" in varenv: # Step 1: maybe we already know. for guid in guid_list: if isinstance(guid, unicode): guid = guid.encode("utf-8") if guid in self.guids: LOG.debug("mql.lookup.id.cached", "found %s in cache" % guid, value=self.guids[guid]) result[guid] = self.guids[guid] elif guid not in ask_list: ask_list.add(guid) cache = len(ask_list) < 10000 else: for guid in guid_list: if isinstance(guid, unicode): guid = guid.encode("utf-8") ask_list.add(guid) cache = False if not ask_list: return result LOG.debug("mql.lookup.ids", "Lookup ids", code=len(ask_list)) self.preload(varenv) # Step 2: resolve the ask_list query = [{ "@guid": ask_list, "@pagesize": len(ask_list) + 1, "best_hrid": [{ ":typeguid": self.best_hrid_guid, ":value": None, ":optional": True, }], "-has_key": [{ ":value": None, ":optional": True, ":comparator": "octet", ":pagesize": 1000, "@guid": None, "-has_key": [{ ":value": None, ":optional": True, ":comparator": "octet", "@guid": None, "-has_key": [{ ":value": None, ":optional": True, ":comparator": "octet", "@guid": None, }] }] }], "is_instance_of": { "@id": "/type/namespace", ":optional": True } }] varenv["gr_log_code"] = "guid2id" query_results = self.querier.read(query, varenv) varenv.pop("gr_log_code") LOG.debug("mql.lookup.id.results", "", results=query_results) # now see what we found out... # these should be cached. leftover_guids = [] for item in query_results: res = self.search_id_result(item, varenv) if res: result[item["@guid"]] = res if cache: self.guids[item["@guid"]] = res # every guid in guid_list has to be present in the result. for guid in guid_list: if guid not in result: LOG.debug("mql.lookup.id.notfound", "midifying %s" % guid) result[guid] = mid.of_guid(guid[1:]) return result
def cmdline_main(): LOG.warning("benchmark", "test start") start_time = time.time() from mql.mql import cmdline op = cmdline.OP("testing") op.add_option( "-n", dest="num", default=1000, type="int", help="number of iterations") op.add_option( "-P", dest="profile", default=None, help="run profiler with output to file") op.add_option("-c", dest="call", default=None, help="function to call") op.add_option( "-f", dest="query_file", default=None, help="file containing query") op.add_option( "--flush", dest="flush", default=None, help="flush cache between every request") op.add_option("-t", dest="type", default="mql", help="graph or MQL query") options, args = op.parse_args() stop_time = time.time() op.ctx.gc.totalcost["dt"] = stop_time - start_time LOG.warning("start cost", { "nreqs": op.ctx.gc.nrequests, "cost": op.ctx.gc.totalcost }) options, args = op.parse_args() queryfile = options.query_file if queryfile is not None: qf = open(queryfile, "r") query = "".join(qf.readlines()) regex = re.compile("[\n\t]+") query = regex.sub(" ", query) qf.close() elif options.call: query = globals()[options.call]() elif len(args) == 1: query = args[0] else: op.error("Must specify a query argument") if options.type == "mql": # XXX should eventually use unicode, for now utf8 query = json.loads(query, encoding="utf-8", result_encoding="utf-8") elif options.type == "graph": pass else: op.error("-t must be 'mql' or 'graph'") if options.profile: if profiler == "hotshot": profile = hotshot.Profile(options.profile) profile.runcall(test_run, op.ctx, op.varenv, options, query) LOG.warning( "benchmark", "Saving hotshot profile in Stats format to %s" % options.profile) elif profiler == "cProfile": profile = cProfile.Profile() profile.runcall(test_run, op.ctx, op.varenv, options, query) LOG.warning( "benchmark", "Saving cProfile data in kcachegrind format to %s" % options.profile) # get from http://jcalderone.livejournal.com/21124.html # and put in thirdparty/pyroot from mql.mql import lsprofcalltree k = lsprofcalltree.KCacheGrind(profile) k.output(open(options.profile, "w")) else: LOG.warning("benchmark", "No profiler available, not running benchmark") else: reslist = test_run(op.ctx, op.varenv, options, query) LOG.warning("run cost", { "nreqs": op.ctx.gc.nrequests, "cost": op.ctx.gc.totalcost })
reserved_names = ('request_id', 'cost', 'lang', 'transaction_id', 'permission', 'cursor', 'user') valid_queries = ((k, v) for k, v in sq.iteritems() if k not in reserved_names) # make sure to copy the request_id if 'request_id' in sq: response['request_id'] = sq['request_id'] # should only looking either at sq['query'] for a single query or # sq['queries'] for multiple queries for id, subq in valid_queries: # assuming querier is a bound method here.. LOG.notice('Query', '%s.%s' % (querier.im_class.__name__, querier.__name__), subq=subq) try: results[id] = querier(subq, varenv) response.extend(status='200 OK') except EmptyResult, e: LOG.info('emptyresult', '%s' % e) response.log('empty result for query %s' % subq) result = None # exceptions should be packed into response['error'] except ParameterizedError, e: if isinstance(e, MQLInternalError): response.extend(status='500 Internal Server Error')