Ejemplo n.º 1
0
    def fetch(self, key, varenv):
        '''fetch a single namespace entry from the graph'''

        args = (self.namemap.bootstrap.has_key[1:], self.guid[1:], quote(key))
        qs = '(typeguid=%s left=%s value=%s comparator="octet" datatype=string pagesize=2 result=((value left right)))' % args

        r = self.namemap.gc.read_varenv(qs, varenv)

        # we asked for pagesize=2 just to check this
        if len(r) > 1:
            LOG.warning('mql.duplicate.key',
                        '%s in namespace %s' % (key, self.guid))
        elif len(r) == 0:
            return False

        (value, nsg, g) = (unquote(r[0][0]), '#' + r[0][1], '#' + r[0][2])
        if nsg != self.guid:
            raise MQLInternalError(None,
                                   "Mismatched namespace query",
                                   value=value,
                                   namespace=self.guid,
                                   returned_namespace=nsg,
                                   guid=g)

        # this assert can fail because of graphd case-insensitivity
        #assert unquote(value) == key, "%s != %s" % (unquote(value), key)

        self.store(key, g)
        return g
Ejemplo n.º 2
0
    def refresh(self, varenv):
        '''try to refresh a complete namespace from the graph.
        set self.complete iff successful. '''

        # see if we can fetch the whole thing:
        # for large namespaces, don't try to fetch the whole thing
        if not self.complete:
            return

        datelineqs = ''
        if self.last_dateline is not None:
            datelineqs = 'dateline>%s' % self.last_dateline
        args = (self.namemap.bootstrap.has_key[1:], self.guid[1:], datelineqs,
                self.max_complete + 1)
        qs = '(typeguid=%s left=%s comparator="octet" datatype=string %s pagesize=%d result=((value left right)))' % args

        r = self.namemap.gc.read_varenv(qs, varenv)

        # check if we hit the maximum size for cacheable namespaces
        if len(r) > self.max_complete:
            LOG.notice('mql.namespace.refresh',
                       '%s too large to cache' % self.guid)
            self.complete = 0
        elif self.complete == -1:
            self.complete = 1

        if len(r) > 0:
            self.update_namespaces(r)
        else:
            if self.last_dateline is not None:
                # XXX should extract the dateline from the empty result and
                #  update self.last_dateline in order to minimize the dateline
                #  ranges in later queries.
                pass
Ejemplo n.º 3
0
def check_permission_permission(querier, varenv, permission_guid):
    """
    Check if the user has permission to administer the given permission
    """
    permission_permission = varenv.setdefault('permission_permission', {})

    if permission_guid not in permission_permission:
        userguid = varenv.get_user_guid()
        authorityguid = varenv.authority_guid
        permission = Permission(querier, permission_guid)

        has_access = permission.user_has_permission_permission(
            userguid, varenv)
        if not has_access and authorityguid:
            has_access = permission.user_has_permission_permission(
                authorityguid, varenv)
            if has_access:
                LOG.notice(
                    'access.authority',
                    'for user %s, permission %s and authority %s' %
                    (userguid, permission_guid, authorityguid))

        permission_permission[permission_guid] = has_access

    return permission_permission[permission_guid]
Ejemplo n.º 4
0
    def lookup_mids_of_guids(self, guid_list, varenv):
        # It's..sort of the same as before. We have some guids,
        # see if any of them are replaced_by.
        # If they are,
        if not guid_list:
            return {}

        ask_list = set()
        result = {}
        rev = {}
        for g in guid_list:
            # convert the mid directly.
            m = mid.of_guid(g[1:])
            ask_list.add(g)
            result[g] = [m]
            rev[m] = g

        LOG.debug("mql.lookup.mids", "Looking up mids for guids")

        # we look foward, up replaced_by links, and from that node
        # to other replaced_by links,
        # and backwards from the root, for previous ones.

        # +-+  r.b.    +-+
        # |A| -------> |B|
        # +-+          +-+
        #               |
        # +-+           |
        # |C|-----------+
        # +-+
        #
        # in this diagram, we root at B.
        # We list B first but also A and C if present.

        query = [{
            "@guid": ask_list,
            "@pagesize": len(ask_list) + 1,
            "-replaced_by": [{
                "@guid": None,
                ":optional": True
            }]
        }]

        varenv["gr_log_code"] = "mids2guids"
        query_results = self.querier.read(query, varenv)
        varenv.pop("gr_log_code")
        # each result is going to (hopefully) either haave a -replaced_by link
        # or a replaced_by one.
        for item in query_results:
            guid = item["@guid"]

            # otherwise, theres just links pointing at me.
            if item["-replaced_by"]:
                # me first
                result[guid] = [mid.of_guid(guid[1:])]
                # then everyone else
                for r in item["-replaced_by"]:
                    result[guid].append(mid.of_guid(r["@guid"][1:]))

        return result
Ejemplo n.º 5
0
def check_permission(querier, varenv, permissionguid):
    """
    Check if the user can write to objects permitted by permission_guid
    """
    write_permission = varenv.setdefault('write_permission', {})

    if permissionguid not in write_permission:
        userguid = varenv.get_user_guid()
        authorityguid = varenv.authority_guid
        permission = Permission(querier, permissionguid)

        has_access = permission.user_has_write_permission(userguid, varenv)
        if not has_access and authorityguid:
            has_access = permission.user_has_write_permission(
                authorityguid, varenv)
            if has_access:
                LOG.notice(
                    'access.authority',
                    'for user %s, permission %s and authority %s' %
                    (userguid, permissionguid, authorityguid))

        if not has_access and varenv.get('$privileged') is Privileged:
            LOG.notice(
                'access.privileged',
                'for user %s and permission %s' % (userguid, permissionguid))
            has_access = True

        write_permission[permissionguid] = has_access

    return write_permission[permissionguid]
Ejemplo n.º 6
0
def json_traceback(response=None, exception=None, **kws):
    """
    This function is only used when debugging is on.
    It sends the output similar to what you'd see
    when using Python interactively to the browser
    """

    debug = 1
    etype, evalue, etb = sys.exc_info()

    try:  # try/finally
        try:  # try/except

            if debug and etype is IOError and str(evalue)[:5] == 'Write':
                # if this is an IOError while writing to client,
                # it is probably better not to try to write to the cleint
                # even if debug is on.
                LOG.error('json_traceback', 'skipping error write to client')
                debug = 0

            # write to log
            for e in traceback.format_exception(etype, evalue, etb):
                s = '%s' % e[:-1]
                LOG.error('json_traceback', s)

            if response is None:
                response = JSONResponse(status='500 Internal Server Error',
                                        code='/api/status/error/server')
            response.extend(**kws)

            stack = [
                dict(zip('file,line,func,source'.split(','), quad))
                for quad in traceback.extract_tb(etb, None)
            ]

            text = '%s: %s' % (etype, evalue)
            response.log(text, stack=stack, level='error')

            return response.response

        except Exception, e:
            # hit the backstop.  must be a bug in the normal exception handling code,
            #  do something simple.
            response = {
                'status': '500 Internal Server Error',
                'messages': [{
                    'level': 'error',
                    'text': traceback.format_exc()
                }],
            }
            return response

    finally:
        # erase the traceback
        etb = None
Ejemplo n.º 7
0
    def update_namespaces(self, r):
        '''update this namespace cache based on the result of a graph query'''

        self.last_dateline = r.dateline

        for entry in r:
            (name, nsg, g) = (unquote(entry[0]), '#' + entry[1],
                              '#' + entry[2])
            assert nsg == self.guid
            self.store(name, g)
        LOG.debug('updated namespace %s' % self.guid, '%d entries' % len(r))
Ejemplo n.º 8
0
  def reset_cost(self):

    LOG.debug('resetting graphd costs')
    # these 3 counters remain for backward compatiblity
    self.nrequests = 0
    # -1 because the first attempt is not really a 'retry'
    self.dbretries = -1
    self.qretries = -1

    # all cost info is tracked in this dict
    # this includes cost info returned by GQL
    self.totalcost = defaultdict(float)
Ejemplo n.º 9
0
  def add_graph_costs(self, costs, dbtime, tries):
    """feed costs from graphdb into self.totalcost."""

    request_cost = coststr_to_dict(costs)
    request_cost['mql_dbtime'] = dbtime
    request_cost['mql_dbtries'] = tries or 1
    request_cost['mql_dbreqs'] = 1
    LOG.debug('graphdrequest.cost %s', request_cost)
    for k, v in request_cost.iteritems():
      if k in ['mm', 'fm']:
        # These are high water marks. Don't sum them.
        self.totalcost[k] = max(v, self.totalcost.get(k))
      else:
        if k in self.totalcost:
          self.totalcost[k] += v
        else:
          self.totalcost[k] = v
Ejemplo n.º 10
0
  def write_varenv(self, qs, varenv):
    """Write to the graph the specified "query"."""

    if getattr(self, 'readonly', None):
      raise GraphConnectionError(
          'Tried to write to a read-only graph',
          http_code=500,
          app_code='/mqlwrite/backend/read_only')

    dateline_in = varenv.get('write_dateline', None)

    self.write_occurred = 1

    try:
      r = self._generate_and_transmit_query(qs, varenv, WriteMode)

    except MQLDatelineInvalidError:
      # see read_varenv comment on this
      LOG.info('mqlwrite.dateline.delete',
               'got an invalid dateline, deleting from varenv',
               varenv.get('write_dateline'))
      varenv['write_dateline'] = ''

      r = self._generate_and_transmit_query(qs, varenv, WriteMode)

    dateline_out = r.dateline

    # update our write_dateline in case we do subsequent reads
    # or writes. The new 'write_dateline' is returned to the
    # user for use with subsequent mqlreads or mqlwrites they do
    varenv['write_dateline'] = dateline_out
    varenv['last_write_time'] = time.time()
    log_graph_write(varenv, dateline_in, dateline_out)

    LOG.debug(
        'graph.write_dateline.set',
        '',
        last_write_time=varenv['last_write_time'],
        write_dateline=varenv['write_dateline'])

    # Record that a write has happened and following writes should set
    # the continuation flag.
    varenv['is_write_continuation'] = True

    return r
Ejemplo n.º 11
0
    def lookup_guids_of_mids(self, mid_list, varenv):
        ask_list = set()
        result = {}
        rev = {}
        # arithmetically compute guids
        for m in mid_list:
            try:
                guid = "#" + mid.to_guid(m)
                ask_list.add(guid)
                # store the whole list here, down below we'll just
                # overwrite the things we got back.
                result[m] = guid  #self.internal_guid_to_id(guid)
                # i need to go back + forth.
                rev[guid] = m
            except (mid.InvalidMIDVersion, mid.InvalidMID) as e:
                result[m] = False
            except (mid.InvalidMunch) as e:
                raise MQLParseError(
                    None, "'%(mid)s' is not a properly formatted mid", mid=m)

        if not len(ask_list):
            return result

        # i'm not caching these.
        LOG.debug("mql.resolve.mids",
                  "Looking up guids for mids",
                  code=len(ask_list))
        # look for replaced by links off the guids
        # replaced_by links are unique, if they arent then this will signify some
        # end-of-the-world type event.
        query = [{"@guid": ask_list, "replaced_by": {"@guid": None}}]
        # read
        varenv["gr_log_code"] = "guids2mids"
        query_results = self.querier.read(query, varenv)
        varenv.pop("gr_log_code")
        # "now see what we found out..."
        for item in query_results:
            # [guid, replaced_by { guid }]
            guid = item["@guid"]
            rep_by = item["replaced_by"]["@guid"]
            m = rev[guid]
            result[m] = rep_by

        # pray.
        return result
Ejemplo n.º 12
0
 def parse_result_index(self, result, varenv):
   if len(result) > 1:
     #            raise MQLInternalError(self.query,"More than one piece of order information")
     try:
       error_query = self.parent.query.original_query
     except:
       error_query = self.query
     LOG.error(
         'multiple.indices',
         'More than one piece of order information (using first one)',
         query=repr(error_query),
         indices=repr(result))
     result = result[0]
   elif len(result) == 0:
     return None
   else:
     dz = dict(zip(self.result, result[0]))
     return unquote_value('float', dz['value'], False)
Ejemplo n.º 13
0
def check_write_throttle(querier, varenv):
    userguid = varenv.get_user_guid()
    max_writes = varenv.get('max_writes', None)
    if max_writes is None or userguid in MAX_WRITE_EXCEPTED_USERS:
        LOG.error('write.throttle.skipped',
                  'user=%s skipped write throttle' % userguid)
        return True

    # userguid starts with a '#' while max_writes['guid'] does not.
    # We need to strip the '#' in order for the comparison to succeed.
    if userguid[0] == '#':
        userguid = userguid[1:]
    if max_writes['guid'] != userguid:
        LOG.notice(
            'write.throttle.different_users',
            'Logged in user: %s different from mqlwrite user: %s' %
            (max_writes['guid'], userguid))

    # 1 day
    tdelta = timedelta(1)
    yesterday = (datetime.utcnow() - tdelta).isoformat()

    # MQL attribution models documented at:
    # https://wiki.metaweb.com/index.php/MQL_Attribution_for_OAuth%2C_Acre%2C_etc
    # normal attribution query
    # need the optional to suppress EMPTY on count=0
    graphq = ('(scope=%s timestamp>%s live=dontcare newest>=0 result=(count) '
              'optional)') % (max_writes['guid'], yesterday)
    gresult = querier.gc.read_varenv(graphq, varenv)
    count = int(gresult[0])

    # oauth/open social attribution query
    graphq = ('(scope->(scope=%s) timestamp>%s live=dontcare newest>=0 '
              'result=(count) optional)') % (max_writes['guid'], yesterday)
    gresult = querier.gc.read_varenv(graphq, varenv)

    count += int(gresult[0])

    if count > max_writes['limit']:
        LOG.alert(
            'write.throttle.exceeded', 'user=%s count=%s max=%d delta=%s' %
            (max_writes['guid'], count, max_writes['limit'], str(tdelta)))
        msg = 'Daily write limit of %s was exceeded.' % max_writes['limit']
        raise MQLWriteQuotaError(None,
                                 msg,
                                 user='******' + max_writes['guid'],
                                 count=count,
                                 max_writes=max_writes['limit'],
                                 period=str(tdelta))
    else:
        LOG.notice(
            'write.throttle.ok', 'user=%s count=%s max=%s' %
            (max_writes['guid'], count, max_writes['limit']))
        return True
Ejemplo n.º 14
0
  def read_varenv(self, qs, varenv):
    """Read from the graph the specified "query"."""
    try:
      # the pymql user provides a 'write_dateline', which should be a valid
      # dateline returned to said user by a previous mqlwrite query
      dateline_in = varenv.get('write_dateline', None)

      r = self._generate_and_transmit_query(qs, varenv, ReadMode)

    except MQLDatelineInvalidError:
      # Drop the datelines out of the varenv,
      # re-generate the query and try again.
      # the main use case here is when sandbox is refreshed
      # and the instance id in the dateline changes. The user's dateline
      # (usually in a cookie) is now invalid until they do a write, or a touch
      LOG.info('mqlread.dateline.delete',
               'got an invalid dateline, deleting from varenv',
               varenv.get('write_dateline'))
      varenv['write_dateline'] = ''

      r = self._generate_and_transmit_query(qs, varenv, ReadMode)

    if not r and varenv.get('graph_noisy'):
      raise EmptyResult('query %s' % qs)

    dateline_out = r.dateline

    # 'dateline' is returned to the original caller of pymql read.
    # though, in practice, it is not passed on by frapi and
    # they really should only update their dateline after doing
    # a write.
    # we do *not* update the internal 'write_dateline' varenv, here.
    # in the case of reads, the idea being the user only needs to
    # demand the level of freshness of their last write, so
    # subsequent reads in this session will use the original
    # 'write_dateline' provided by the caller of pymql read/write.
    # the 'write_dateline' is updated in the event that a write
    # occurs in this session.
    varenv['dateline'] = dateline_out
    log_graph_read(varenv, dateline_in, dateline_out)

    LOG.debug('graph.dateline.set', '', dateline=varenv['dateline'])

    return r
Ejemplo n.º 15
0
def check_change_permission_by_user(querier, varenv, old_permission_guid,
                                    new_permission_guid):

    has_old_permission = \
        check_permission_permission(querier, varenv, old_permission_guid)
    has_new_permission = \
        check_permission_permission(querier, varenv, new_permission_guid)

    # privileged access bypass
    if varenv.get('$privileged') is Privileged:
        LOG.notice(
            'access.privileged', 'for user %s changing permission %s to %s' %
            (varenv.get_user_guid(), old_permission_guid, new_permission_guid))
        return True

    # no privileged block because I don't have any need to
    # privilege this operation (yet) when there is a need a
    # privileged block can be put here.
    return has_old_permission and has_new_permission
Ejemplo n.º 16
0
    def parse_full_reply(self, replystr):
        """
        parse the given reply string from the graph into a bunch of
        nested lists of tokens. Results are in the form:
        [ 'ok', 'id=', '"me;..."', [[['010000..', '01...', ...]]]]
        """
        LOG.debug('graph.result', replystr)
        token_list = graphresult_re.findall(replystr)

        curlist = []

        stack = []
        push_state = stack.append
        pop_state = stack.pop

        for count, tok in enumerate(token_list):
            if tok == '(':
                push_state(curlist)
                curlist = []
            elif tok == ')':
                sublist = curlist
                curlist = pop_state()
                curlist.append(sublist)
            elif tok == '\n':
                raise MQLGraphError(
                    None,
                    'Not allowed a newline in parse_full_reply',
                    reply=replystr,
                    tokens=token_list)
            elif tok == ' ' or tok == '':
                pass
            else:
                curlist.append(tok)

        LOG.debug('graph.result.parsed', 'Parsed %d tokens' % count)
        if len(stack) != 0:
            raise MQLGraphError(None,
                                'got linefeed in the middle of a reply?',
                                reply=replystr,
                                tokens=token_list,
                                depth=len(stack))

        self.replyqueue.append(curlist)
Ejemplo n.º 17
0
 def list_or_item(self, result, varenv):
   # return a list if we want a list or the first item if we don't
   if self.query.list:
     return result
   elif len(result) == 1:
     return result[0]
   elif len(result) == 0:
     return None
   elif varenv.get('uniqueness_failure', None) == 'soft':
     # this supresses the exception in favor of a warning message
     LOG.warning(
         'soft.uniqueness.failure', repr(self.query), result=repr(result))
     return result[0]
   else:
     # this mutates the result, but we don't want it anyway...
     varenv.lookup_manager.do_id_lookups()
     result = varenv.lookup_manager.substitute_ids(result)
     raise MQLTooManyValuesForUniqueQuery(
         self.query, results=result, count=len(result))
Ejemplo n.º 18
0
  def __init__(self,
               no_timeouts=False,
               policy_map=None,
               default_policy=None,
               custom_policy=None):

    self.reset_cost()
    self.timeout_policies = policy_map

    if default_policy:
      self.default_policy = default_policy
    else:
      self.default_policy = self.DEFAULT_POLICY_NAME

    self.no_timeouts = no_timeouts

    if custom_policy:
      LOG.info('gc.custom.timeout.policy', '', policy=str(custom_policy))
      self.timeout_policies['custom'] = custom_policy
      self.default_policy = 'custom'
Ejemplo n.º 19
0
def wrap_query(querier, sq, varenv=None, transaction_id=None):
    """
    Run a query with the given querier (usually something like
    ctx.low_querier.read) - performing appropriate envelope packing and
    unpacking, multiple queries, error handling, etc
    """

    LOG.error(
        'deprecated',
        'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!')

    if isinstance(sq, basestring):
        # convert to json query
        try:
            # XXX should eventually use unicode, for now utf8
            sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8')

        except ValueError, e:
            # debug ME-907
            LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv)

            SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)')
            m = SIMPLEJSON_ERR_RE.match(str(e))
            if not m:
                raise
            response = JSONResponse(status='400 Bad Request',
                                    code='/api/status/error/request')
            text = 'json parse error: ' + m.group(1)
            response.log(text,
                         line=int(m.group(2)),
                         column=int(m.group(3)),
                         level='error')
            return response.response

        except Exception, e:
            return json_traceback(exception=e,
                                  status='400 Bad Request',
                                  code='/api/status/error/request')
Ejemplo n.º 20
0
    def __init__(self,
                 msg,
                 http_code=400,
                 app_code=DEF_ME_CODE,
                 inner_exc=None,
                 **kwds):
        self.msg = msg
        Exception.__init__(self, msg)

        if not is_valid_HTTP_code(http_code):
            http_code = 500
        self.http_status = get_HTTP_err(http_code)
        self.http_code = http_code

        # app_code and and api code setup
        codes = app_code.split('/')
        if len(codes) < 3:
            codes = self.DEF_ME_CODE.split('/')
        self.comp_code = '%s/%s' % (self.DEF_PFX, codes[1])
        self.app_code = '%s' % '/'.join(codes[2:])
        self.messages = [self.gen_msgs(**kwds)]

        if not kwds.has_key('error'):
            # don't extract the current frame (__init__)
            stack = traceback.extract_stack()[:-1]
            kwds['traceback'] = '\r\n'.join(traceback.format_list(stack))

        # log inner exception or self
        exc = self
        if inner_exc:
            exc = inner_exc
        comp = app_code[1:].replace('/', '.')
        if exc == self:
            LOG.debug(comp, msg, **kwds)
        else:
            LOG.exception(msg, **kwds)
        self.kwds = kwds
Ejemplo n.º 21
0
    def __init__(self, error_type, clause, message, cost=None, *args, **kwds):

        MQLParameterizedError.__init__(self, message, error_type, clause,
                                       *args, **kwds)
        if error_type not in self.error_types:
            error_type = 'UNKNOWN'

        self.error = {}
        self.error_type = error_type
        self.cost = cost
        if clause is not None:
            self.set_query(clause)

        self.error['code'] = self.get_error_id()

        # make sure we don't get ReadMode in a JSON response
        for key in self.kwds:
            if not isinstance(
                    self.kwds[key],
                (basestring, int, long, float, dict, list, bool, type(None))):
                self.kwds[key] = str(self.kwds[key])

        self.error['info'] = self.kwds
        self.error['message'] = str(self)

        if self.error_type in ('INTERNAL', 'ACCESS', 'WRITE_QUOTA', 'UNKNOWN'):
            level = log_util.CRIT
        else:
            level = log_util.WARNING

        if self.error_type in ('GRAPH', 'CONNECTION', 'TIMEOUT'):
            # these log the graph query
            # there is an idempotent write query that we do to get a dateline
            # that returns an error - but it's actually harmless and we dont
            # want to log-it.
            if self.error.get('info') and self.error['info'].get(
                    'detail',
                    None) != 'primitive tagged as unique already exist':
                LOG.error(error_type.lower() + '_error',
                          repr(self.error),
                          gql=repr(getattr(self, 'graph_query', None)))

        # there's no graph query otherwise
        elif self.error_type in ('TYPE', 'DATELINE_INVALID', 'CURSOR_INVALID'):
            # this is probably a developer-level error, no need to LOG.error
            LOG.warn(error_type.lower() + '_error', repr(self.error))
        else:
            LOG.error(error_type.lower() + '_error', repr(self.error))
Ejemplo n.º 22
0
    def lookup_by_guid_oneoff(self, g, varenv):
        root_ns_guid = self.bootstrap.root_namespace
        res = []
        name = []
        found = set()
        next = g
        # this is ridiculously deep - 18 ply.
        for i in xrange(6):
            res = self.lookup_by_guid_oneoff_internal(next, varenv)

            if not res:
                # we've ceased to make progress - bail
                LOG.warning('mql.namespace.error',
                            'id for guid not found',
                            guid=g)
                return g

            for pair in res:
                name.append(pair[0])
                next = pair[1]
                if next == root_ns_guid:
                    name.reverse()
                    return "/" + "/".join(name)

                if next in found:
                    LOG.warning('mql.namespace.error',
                                'cycle in namespace looking for guid',
                                guid=g)
                    return g

                found.add(next)

        LOG.warning('mql.namespace.error',
                    'depth limit exceeded in namespace lookup',
                    guid=g)
        return g
Ejemplo n.º 23
0
    def lookup_id_query(self, guid, varenv):
        """
        lots of nasty heuristics to find ids for id-identified objects:

        This seems to be called when lookup_ids can't finish the job,
        but it also seems to duplicate some of the logic there.

        Current rules:

        - do we have a /freebase/object_hints/best_hrid property
        - do we have a name in /
        - do we have a name in /XXX/YYY (XXX not boot or pub)
        - do we have a name in /XXX/YYY/ZZZ
        - do we have a name in /XXX/YYY (XXX may be boot or pub)
        - ask namespace.lookup_by_guid_oneoff...

        All of this trouble is mostly because some things (/type/object/type
        being the best example) have names in /bootstrap-namespace
        that we dont want to expose by accident.

        """

        query = {
            "@guid":
            guid,
            "best_hrid": [{
                ":typeguid": self.best_hrid_guid,
                ":value": None,
                ":optional": True,
            }],
            "has_root_name": [{
                ":type": "has_key",
                ":comparator": "octet",
                ":reverse": True,
                ":value": None,
                ":optional": True,
                "@id": "/"
            }],
            "has_2_level_name": [{
                ":type":
                "has_key",
                ":comparator":
                "octet",
                ":reverse":
                True,
                ":value":
                None,
                ":optional":
                True,
                "-has_key": [{
                    ":comparator": "octet",
                    ":value": None,
                    "@id": "/"
                }]
            }],
            "has_3_level_name": [{
                ":type":
                "has_key",
                ":comparator":
                "octet",
                ":reverse":
                True,
                ":value":
                None,
                ":optional":
                True,
                "-has_key": [{
                    ":comparator":
                    "octet",
                    ":value":
                    None,
                    "-has_key": [{
                        ":comparator": "octet",
                        ":value": None,
                        "@id": "/"
                    }]
                }]
            }],
        }

        try:
            varenv["gr_log_code"] = "guid2id"
            result = self.querier.read(query, varenv)
            varenv.pop("gr_log_code")
        except EmptyResult:
            # everything was optional so we must not have found the guid itself
            # this code is unnecessary, but has key documentation value.
            raise

        # we may get nothing back if the guid has been deleted (or if we were deleting it)
        # in that case, just return the guid.
        if result is None:
            return guid

        idname = None

        hrids = result["best_hrid"]
        if hrids:
            if len(hrids) > 1:
                # This should never happen.
                # If it does, log an error but don't fail.
                LOG.error("mql.resolve.lookup_id_internal",
                          "multiple /freebase/object_hints/best_hrid")
            hrid = hrids[0][":value"]
            return hrid

        if result["has_root_name"]:
            idname = "/" + result["has_root_name"][0][":value"]

        elif (result["has_2_level_name"]
              and result["has_2_level_name"][0]["-has_key"][0][":value"]
              not in ("boot", "pub")):
            idname = "/" + result["has_2_level_name"][0]["-has_key"][0][
                ":value"] + "/" + result["has_2_level_name"][0][":value"]

        elif result["has_3_level_name"]:
            idname = ("/" + result["has_3_level_name"][0]["-has_key"][0]
                      ["-has_key"][0][":value"] + "/" +
                      result["has_3_level_name"][0]["-has_key"][0][":value"] +
                      "/" + result["has_3_level_name"][0][":value"])

        elif result["has_2_level_name"]:
            idname = "/" + result["has_2_level_name"][0]["-has_key"][0][
                ":value"] + "/" + result["has_2_level_name"][0][":value"]

        else:
            idname = self.namemap.lookup_by_guid_oneoff(guid, varenv)

        # special hack for the root namespace
        if idname == "/boot/root_namespace":
            return "/"
        elif idname == "/boot/root_user":
            return "/user/root"
        elif idname is not None and valid_idname(idname):
            return idname
        else:
            return guid
Ejemplo n.º 24
0
    def search_id_result(self, head, varenv):
        """
        take the id result struct and attempt to produce an id.
        Here are the rules:

        - best_hrid is chosen if present
        - the shortest name is best
        - except that any three level name is better than a /boot name.
        - among names of the same length, pick any one at random.
        """

        hrids = head["best_hrid"]
        if hrids:
            if len(hrids) > 1:
                # This should never happen.
                # If it does, log an error but don't fail.
                LOG.error("mql.resolve.best_hrid",
                          "multiple /freebase/object_hints/best_hrid")
            hrid = hrids[0][":value"]
            return hrid

        # bfs_list format is an array of
        # ( value, parent, guid, keys, depth )
        bfs_list = [(None, None, head["@guid"], head.get("-has_key", []), 0)]
        root = self.namemap.bootstrap.root_namespace
        boot = self.namemap.bootstrap.boot
        is_namespace = False
        if isinstance(head["is_instance_of"], dict):
            is_namespace = True

        has_boot = None

        if head["@guid"] == root:
            return "/"
        elif head["@guid"] == boot:
            return "/boot"

        while bfs_list:
            front = bfs_list.pop(0)
            for item in front[3]:
                bfs_item = (item[":value"], front, item["@guid"],
                            item.get("-has_key", []), front[4] + 1)
                if bfs_item[2] == root:
                    # we're done - what are we called?
                    rv = []
                    pos = bfs_item
                    while pos[1]:
                        rv.append(pos[0])
                        pos = pos[1]

                    return "/" + "/".join(rv)
                elif bfs_item[2] == boot:
                    has_boot = bfs_item
                elif (self.topic_en and bfs_item[2] == self.topic_en
                      and bfs_item[4] == 1):
                    # hack for things *directly* in /en to short circuit early...
                    return "/en/" + bfs_item[0]
                elif not is_namespace and bfs_item[
                        2] in self.forbidden_namespaces:
                    # terminate recursion at /wikipedia/en etc.
                    pass
                else:
                    bfs_list.append(bfs_item)

        # are we in /boot?
        if has_boot and has_boot[4] == 1:
            return "/boot/" + has_boot[0]

        # ok, we've searched the entire list. front is the last item...
        # try a regular lookup_id() on it. (so we can cache it too!)

        if front[4] == 3:
            leading_id = self.lookup_id_internal(front[2], varenv)
            if leading_id and leading_id[0] == "/":
                # we got something...
                rv = [leading_id]
                pos = front
                while pos[1]:
                    rv.append(pos[0])
                    pos = pos[1]

                return "/".join(rv)

        # failure
        return None
Ejemplo n.º 25
0
    def lookup_ids(self, guid_list, varenv):
        """
        Given a list of guids returns an id for each one,
        using as few queries as possible.

        Returns a dictionary of guid->id.
        """

        ask_list = set()
        result = {}

        if not "asof" in varenv:
            # Step 1: maybe we already know.
            for guid in guid_list:
                if isinstance(guid, unicode):
                    guid = guid.encode("utf-8")

                if guid in self.guids:
                    LOG.debug("mql.lookup.id.cached",
                              "found %s in cache" % guid,
                              value=self.guids[guid])
                    result[guid] = self.guids[guid]
                elif guid not in ask_list:
                    ask_list.add(guid)

            cache = len(ask_list) < 10000

        else:
            for guid in guid_list:
                if isinstance(guid, unicode):
                    guid = guid.encode("utf-8")

                ask_list.add(guid)

            cache = False

        if not ask_list:
            return result

        LOG.debug("mql.lookup.ids", "Lookup ids", code=len(ask_list))

        self.preload(varenv)

        # Step 2: resolve the ask_list
        query = [{
            "@guid":
            ask_list,
            "@pagesize":
            len(ask_list) + 1,
            "best_hrid": [{
                ":typeguid": self.best_hrid_guid,
                ":value": None,
                ":optional": True,
            }],
            "-has_key": [{
                ":value":
                None,
                ":optional":
                True,
                ":comparator":
                "octet",
                ":pagesize":
                1000,
                "@guid":
                None,
                "-has_key": [{
                    ":value":
                    None,
                    ":optional":
                    True,
                    ":comparator":
                    "octet",
                    "@guid":
                    None,
                    "-has_key": [{
                        ":value": None,
                        ":optional": True,
                        ":comparator": "octet",
                        "@guid": None,
                    }]
                }]
            }],
            "is_instance_of": {
                "@id": "/type/namespace",
                ":optional": True
            }
        }]

        varenv["gr_log_code"] = "guid2id"
        query_results = self.querier.read(query, varenv)
        varenv.pop("gr_log_code")

        LOG.debug("mql.lookup.id.results", "", results=query_results)

        # now see what we found out...
        # these should be cached.
        leftover_guids = []
        for item in query_results:
            res = self.search_id_result(item, varenv)
            if res:
                result[item["@guid"]] = res

                if cache:
                    self.guids[item["@guid"]] = res

        # every guid in guid_list has to be present in the result.
        for guid in guid_list:
            if guid not in result:
                LOG.debug("mql.lookup.id.notfound", "midifying %s" % guid)
                result[guid] = mid.of_guid(guid[1:])

        return result
Ejemplo n.º 26
0
def cmdline_main():
  LOG.warning("benchmark", "test start")
  start_time = time.time()

  from mql.mql import cmdline
  op = cmdline.OP("testing")

  op.add_option(
      "-n", dest="num", default=1000, type="int", help="number of iterations")

  op.add_option(
      "-P",
      dest="profile",
      default=None,
      help="run profiler with output to file")

  op.add_option("-c", dest="call", default=None, help="function to call")

  op.add_option(
      "-f", dest="query_file", default=None, help="file containing query")

  op.add_option(
      "--flush",
      dest="flush",
      default=None,
      help="flush cache between every request")

  op.add_option("-t", dest="type", default="mql", help="graph or MQL query")

  options, args = op.parse_args()

  stop_time = time.time()
  op.ctx.gc.totalcost["dt"] = stop_time - start_time

  LOG.warning("start cost", {
      "nreqs": op.ctx.gc.nrequests,
      "cost": op.ctx.gc.totalcost
  })

  options, args = op.parse_args()

  queryfile = options.query_file
  if queryfile is not None:
    qf = open(queryfile, "r")
    query = "".join(qf.readlines())
    regex = re.compile("[\n\t]+")
    query = regex.sub(" ", query)
    qf.close()
  elif options.call:
    query = globals()[options.call]()
  elif len(args) == 1:
    query = args[0]
  else:
    op.error("Must specify a query argument")

  if options.type == "mql":
    # XXX should eventually use unicode, for now utf8
    query = json.loads(query, encoding="utf-8", result_encoding="utf-8")
  elif options.type == "graph":
    pass
  else:
    op.error("-t must be 'mql' or 'graph'")

  if options.profile:
    if profiler == "hotshot":
      profile = hotshot.Profile(options.profile)
      profile.runcall(test_run, op.ctx, op.varenv, options, query)
      LOG.warning(
          "benchmark",
          "Saving hotshot profile in Stats format to %s" % options.profile)

    elif profiler == "cProfile":
      profile = cProfile.Profile()
      profile.runcall(test_run, op.ctx, op.varenv, options, query)

      LOG.warning(
          "benchmark",
          "Saving cProfile data in kcachegrind format to %s" % options.profile)
      # get from http://jcalderone.livejournal.com/21124.html
      # and put in thirdparty/pyroot
      from mql.mql import lsprofcalltree
      k = lsprofcalltree.KCacheGrind(profile)
      k.output(open(options.profile, "w"))
    else:
      LOG.warning("benchmark", "No profiler available, not running benchmark")
  else:
    reslist = test_run(op.ctx, op.varenv, options, query)

  LOG.warning("run cost", {
      "nreqs": op.ctx.gc.nrequests,
      "cost": op.ctx.gc.totalcost
  })
Ejemplo n.º 27
0
        reserved_names = ('request_id', 'cost', 'lang', 'transaction_id',
                          'permission', 'cursor', 'user')

        valid_queries = ((k, v) for k, v in sq.iteritems()
                         if k not in reserved_names)

        # make sure to copy the request_id
        if 'request_id' in sq:
            response['request_id'] = sq['request_id']

        # should only looking either at sq['query'] for a single query or
        # sq['queries'] for multiple queries
        for id, subq in valid_queries:
            # assuming querier is a bound method here..
            LOG.notice('Query',
                       '%s.%s' % (querier.im_class.__name__, querier.__name__),
                       subq=subq)
            try:
                results[id] = querier(subq, varenv)

                response.extend(status='200 OK')

            except EmptyResult, e:
                LOG.info('emptyresult', '%s' % e)
                response.log('empty result for query %s' % subq)
                result = None

            # exceptions should be packed into response['error']
            except ParameterizedError, e:
                if isinstance(e, MQLInternalError):
                    response.extend(status='500 Internal Server Error')