Example #1
0
def fetch_message(object_id):
    """Returns a message, from local cache if available, otherwise loads via REST API, you probably should be calling recursive_fetch_message first"""
    if debug:
        print "fetch_message(%s) called" % repr(object_id)
        if debug_tracebacks:
            traceback.print_stack()
    object_id = str(object_id)  # cast to normal str
    if storage.in_cache_byid(object_id):
        obj = storage.get_byid(object_id)
        if ((obj.has_key('truncated') and obj['truncated']) or
            (obj.has_key('QaikuBackup_stale') and obj['QaikuBackup_stale'])):
            # Object is stale, do not return from cache
            if debug:
                print "storage.objectcache[%s] is stale" % repr(object_id)
                if debug_tracebacks:
                    print json.dumps(storage.get_byid(object_id),
                                     sort_keys=True,
                                     indent=4)
            pass
        else:
            if debug:
                print "message %s returned from cache" % object_id
            return storage.get_byid(object_id)
    else:
        #print "objectcache has no key %s" % repr(object_id)
        #print json.dumps(storage.objectcache, sort_keys=True, indent=4)
        pass
    url = "http://www.qaiku.com/api/statuses/show/%s.json?apikey=%s" % (
        object_id, apikey)
    parsed = json_parse_url(url)
    if not parsed:
        # parse failed, return stale object if we have one
        if storage.in_cache_byid(object_id):
            return storage.get_byid(object_id)
        else:
            return None
    storage.update(parsed)
    return storage.get_byid(object_id)
Example #2
0
def fetch_message(object_id):
    """Returns a message, from local cache if available, otherwise loads via REST API, you probably should be calling recursive_fetch_message first"""
    if debug:
        print "fetch_message(%s) called" % repr(object_id)
        if debug_tracebacks:
            traceback.print_stack()
    object_id = str(object_id) # cast to normal str
    if storage.in_cache_byid(object_id):
        obj = storage.get_byid(object_id)
        if (   (    obj.has_key('truncated')
                and obj['truncated'])
            or (    obj.has_key('QaikuBackup_stale')
                and obj['QaikuBackup_stale'])
            ):
            # Object is stale, do not return from cache
            if debug:
                print "storage.objectcache[%s] is stale" % repr(object_id)
                if debug_tracebacks:
                    print json.dumps(storage.get_byid(object_id), sort_keys=True, indent=4)
            pass
        else:
            if debug:
                print "message %s returned from cache" % object_id
            return storage.get_byid(object_id)
    else:
        #print "objectcache has no key %s" % repr(object_id)
        #print json.dumps(storage.objectcache, sort_keys=True, indent=4)
        pass
    url = "http://www.qaiku.com/api/statuses/show/%s.json?apikey=%s" % (object_id, apikey)
    parsed = json_parse_url(url)
    if not parsed:
        # parse failed, return stale object if we have one
        if storage.in_cache_byid(object_id):
            return storage.get_byid(object_id)
        else:
            return None
    storage.update(parsed)
    return storage.get_byid(object_id)
Example #3
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (    not storage.in_cache_byid(message_id)
        and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (   not obj.has_key('image_url')
        or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj['in_reply_to_status_url'] # This is a redirect but urllib has no problem following it
    if (    not url
        and obj.has_key('channel')
        and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (obj['channel'], obj['id']) # Channel message
    if (    not url
        and obj.has_key('user')
        and obj['user'].has_key('url')
        and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']) # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception,e:
        print "Got exception %s" % e
        return False
Example #4
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (not storage.in_cache_byid(message_id)
            and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (not obj.has_key('image_url') or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj[
            'in_reply_to_status_url']  # This is a redirect but urllib has no problem following it
    if (not url and obj.has_key('channel') and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (
            obj['channel'], obj['id'])  # Channel message
    if (not url and obj.has_key('user') and obj['user'].has_key('url')
            and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']
                               )  # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception, e:
        print "Got exception %s" % e
        return False