def json_traceback(response=None, exception=None, **kws): """ This function is only used when debugging is on. It sends the output similar to what you'd see when using Python interactively to the browser """ debug = 1 etype, evalue, etb = sys.exc_info() try: # try/finally try: # try/except if debug and etype is IOError and str(evalue)[:5] == 'Write': # if this is an IOError while writing to client, # it is probably better not to try to write to the cleint # even if debug is on. LOG.error('json_traceback', 'skipping error write to client') debug = 0 # write to log for e in traceback.format_exception(etype, evalue, etb): s = '%s' % e[:-1] LOG.error('json_traceback', s) if response is None: response = JSONResponse(status='500 Internal Server Error', code='/api/status/error/server') response.extend(**kws) stack = [ dict(zip('file,line,func,source'.split(','), quad)) for quad in traceback.extract_tb(etb, None) ] text = '%s: %s' % (etype, evalue) response.log(text, stack=stack, level='error') return response.response except Exception, e: # hit the backstop. must be a bug in the normal exception handling code, # do something simple. response = { 'status': '500 Internal Server Error', 'messages': [{ 'level': 'error', 'text': traceback.format_exc() }], } return response finally: # erase the traceback etb = None
def check_write_throttle(querier, varenv): userguid = varenv.get_user_guid() max_writes = varenv.get('max_writes', None) if max_writes is None or userguid in MAX_WRITE_EXCEPTED_USERS: LOG.error('write.throttle.skipped', 'user=%s skipped write throttle' % userguid) return True # userguid starts with a '#' while max_writes['guid'] does not. # We need to strip the '#' in order for the comparison to succeed. if userguid[0] == '#': userguid = userguid[1:] if max_writes['guid'] != userguid: LOG.notice( 'write.throttle.different_users', 'Logged in user: %s different from mqlwrite user: %s' % (max_writes['guid'], userguid)) # 1 day tdelta = timedelta(1) yesterday = (datetime.utcnow() - tdelta).isoformat() # MQL attribution models documented at: # https://wiki.metaweb.com/index.php/MQL_Attribution_for_OAuth%2C_Acre%2C_etc # normal attribution query # need the optional to suppress EMPTY on count=0 graphq = ('(scope=%s timestamp>%s live=dontcare newest>=0 result=(count) ' 'optional)') % (max_writes['guid'], yesterday) gresult = querier.gc.read_varenv(graphq, varenv) count = int(gresult[0]) # oauth/open social attribution query graphq = ('(scope->(scope=%s) timestamp>%s live=dontcare newest>=0 ' 'result=(count) optional)') % (max_writes['guid'], yesterday) gresult = querier.gc.read_varenv(graphq, varenv) count += int(gresult[0]) if count > max_writes['limit']: LOG.alert( 'write.throttle.exceeded', 'user=%s count=%s max=%d delta=%s' % (max_writes['guid'], count, max_writes['limit'], str(tdelta))) msg = 'Daily write limit of %s was exceeded.' % max_writes['limit'] raise MQLWriteQuotaError(None, msg, user='******' + max_writes['guid'], count=count, max_writes=max_writes['limit'], period=str(tdelta)) else: LOG.notice( 'write.throttle.ok', 'user=%s count=%s max=%s' % (max_writes['guid'], count, max_writes['limit'])) return True
def __init__(self, error_type, clause, message, cost=None, *args, **kwds): MQLParameterizedError.__init__(self, message, error_type, clause, *args, **kwds) if error_type not in self.error_types: error_type = 'UNKNOWN' self.error = {} self.error_type = error_type self.cost = cost if clause is not None: self.set_query(clause) self.error['code'] = self.get_error_id() # make sure we don't get ReadMode in a JSON response for key in self.kwds: if not isinstance( self.kwds[key], (basestring, int, long, float, dict, list, bool, type(None))): self.kwds[key] = str(self.kwds[key]) self.error['info'] = self.kwds self.error['message'] = str(self) if self.error_type in ('INTERNAL', 'ACCESS', 'WRITE_QUOTA', 'UNKNOWN'): level = log_util.CRIT else: level = log_util.WARNING if self.error_type in ('GRAPH', 'CONNECTION', 'TIMEOUT'): # these log the graph query # there is an idempotent write query that we do to get a dateline # that returns an error - but it's actually harmless and we dont # want to log-it. if self.error.get('info') and self.error['info'].get( 'detail', None) != 'primitive tagged as unique already exist': LOG.error(error_type.lower() + '_error', repr(self.error), gql=repr(getattr(self, 'graph_query', None))) # there's no graph query otherwise elif self.error_type in ('TYPE', 'DATELINE_INVALID', 'CURSOR_INVALID'): # this is probably a developer-level error, no need to LOG.error LOG.warn(error_type.lower() + '_error', repr(self.error)) else: LOG.error(error_type.lower() + '_error', repr(self.error))
def parse_result_index(self, result, varenv): if len(result) > 1: # raise MQLInternalError(self.query,"More than one piece of order information") try: error_query = self.parent.query.original_query except: error_query = self.query LOG.error( 'multiple.indices', 'More than one piece of order information (using first one)', query=repr(error_query), indices=repr(result)) result = result[0] elif len(result) == 0: return None else: dz = dict(zip(self.result, result[0])) return unquote_value('float', dz['value'], False)
def wrap_query(querier, sq, varenv=None, transaction_id=None): """ Run a query with the given querier (usually something like ctx.low_querier.read) - performing appropriate envelope packing and unpacking, multiple queries, error handling, etc """ LOG.error( 'deprecated', 'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!') if isinstance(sq, basestring): # convert to json query try: # XXX should eventually use unicode, for now utf8 sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8') except ValueError, e: # debug ME-907 LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv) SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)') m = SIMPLEJSON_ERR_RE.match(str(e)) if not m: raise response = JSONResponse(status='400 Bad Request', code='/api/status/error/request') text = 'json parse error: ' + m.group(1) response.log(text, line=int(m.group(2)), column=int(m.group(3)), level='error') return response.response except Exception, e: return json_traceback(exception=e, status='400 Bad Request', code='/api/status/error/request')
def lookup_id_query(self, guid, varenv): """ lots of nasty heuristics to find ids for id-identified objects: This seems to be called when lookup_ids can't finish the job, but it also seems to duplicate some of the logic there. Current rules: - do we have a /freebase/object_hints/best_hrid property - do we have a name in / - do we have a name in /XXX/YYY (XXX not boot or pub) - do we have a name in /XXX/YYY/ZZZ - do we have a name in /XXX/YYY (XXX may be boot or pub) - ask namespace.lookup_by_guid_oneoff... All of this trouble is mostly because some things (/type/object/type being the best example) have names in /bootstrap-namespace that we dont want to expose by accident. """ query = { "@guid": guid, "best_hrid": [{ ":typeguid": self.best_hrid_guid, ":value": None, ":optional": True, }], "has_root_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "@id": "/" }], "has_2_level_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "-has_key": [{ ":comparator": "octet", ":value": None, "@id": "/" }] }], "has_3_level_name": [{ ":type": "has_key", ":comparator": "octet", ":reverse": True, ":value": None, ":optional": True, "-has_key": [{ ":comparator": "octet", ":value": None, "-has_key": [{ ":comparator": "octet", ":value": None, "@id": "/" }] }] }], } try: varenv["gr_log_code"] = "guid2id" result = self.querier.read(query, varenv) varenv.pop("gr_log_code") except EmptyResult: # everything was optional so we must not have found the guid itself # this code is unnecessary, but has key documentation value. raise # we may get nothing back if the guid has been deleted (or if we were deleting it) # in that case, just return the guid. if result is None: return guid idname = None hrids = result["best_hrid"] if hrids: if len(hrids) > 1: # This should never happen. # If it does, log an error but don't fail. LOG.error("mql.resolve.lookup_id_internal", "multiple /freebase/object_hints/best_hrid") hrid = hrids[0][":value"] return hrid if result["has_root_name"]: idname = "/" + result["has_root_name"][0][":value"] elif (result["has_2_level_name"] and result["has_2_level_name"][0]["-has_key"][0][":value"] not in ("boot", "pub")): idname = "/" + result["has_2_level_name"][0]["-has_key"][0][ ":value"] + "/" + result["has_2_level_name"][0][":value"] elif result["has_3_level_name"]: idname = ("/" + result["has_3_level_name"][0]["-has_key"][0] ["-has_key"][0][":value"] + "/" + result["has_3_level_name"][0]["-has_key"][0][":value"] + "/" + result["has_3_level_name"][0][":value"]) elif result["has_2_level_name"]: idname = "/" + result["has_2_level_name"][0]["-has_key"][0][ ":value"] + "/" + result["has_2_level_name"][0][":value"] else: idname = self.namemap.lookup_by_guid_oneoff(guid, varenv) # special hack for the root namespace if idname == "/boot/root_namespace": return "/" elif idname == "/boot/root_user": return "/user/root" elif idname is not None and valid_idname(idname): return idname else: return guid
def search_id_result(self, head, varenv): """ take the id result struct and attempt to produce an id. Here are the rules: - best_hrid is chosen if present - the shortest name is best - except that any three level name is better than a /boot name. - among names of the same length, pick any one at random. """ hrids = head["best_hrid"] if hrids: if len(hrids) > 1: # This should never happen. # If it does, log an error but don't fail. LOG.error("mql.resolve.best_hrid", "multiple /freebase/object_hints/best_hrid") hrid = hrids[0][":value"] return hrid # bfs_list format is an array of # ( value, parent, guid, keys, depth ) bfs_list = [(None, None, head["@guid"], head.get("-has_key", []), 0)] root = self.namemap.bootstrap.root_namespace boot = self.namemap.bootstrap.boot is_namespace = False if isinstance(head["is_instance_of"], dict): is_namespace = True has_boot = None if head["@guid"] == root: return "/" elif head["@guid"] == boot: return "/boot" while bfs_list: front = bfs_list.pop(0) for item in front[3]: bfs_item = (item[":value"], front, item["@guid"], item.get("-has_key", []), front[4] + 1) if bfs_item[2] == root: # we're done - what are we called? rv = [] pos = bfs_item while pos[1]: rv.append(pos[0]) pos = pos[1] return "/" + "/".join(rv) elif bfs_item[2] == boot: has_boot = bfs_item elif (self.topic_en and bfs_item[2] == self.topic_en and bfs_item[4] == 1): # hack for things *directly* in /en to short circuit early... return "/en/" + bfs_item[0] elif not is_namespace and bfs_item[ 2] in self.forbidden_namespaces: # terminate recursion at /wikipedia/en etc. pass else: bfs_list.append(bfs_item) # are we in /boot? if has_boot and has_boot[4] == 1: return "/boot/" + has_boot[0] # ok, we've searched the entire list. front is the last item... # try a regular lookup_id() on it. (so we can cache it too!) if front[4] == 3: leading_id = self.lookup_id_internal(front[2], varenv) if leading_id and leading_id[0] == "/": # we got something... rv = [leading_id] pos = front while pos[1]: rv.append(pos[0]) pos = pos[1] return "/".join(rv) # failure return None