Ejemplo n.º 1
0
def fill_and_fetch_image_urls(message_id):
    """Loads the given object, hits the screenscraper to fill the extra image properties and then fetches those images"""
    if not fill_image_urls(message_id):
        return False

    obj = fetcherparser.fetch_message(message_id)

    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if not obj.has_key(prop):
            continue
        res = fetcherparser.fetch_resource(obj[prop])
        if res:
            obj[prop] = res

    # Force objectcache update to make sure we don't have funky COW issues
    storage.update(obj)
    return True
Ejemplo n.º 2
0
def fill_and_fetch_image_urls(message_id):
    """Loads the given object, hits the screenscraper to fill the extra image properties and then fetches those images"""
    if not fill_image_urls(message_id):
        return False

    obj = fetcherparser.fetch_message(message_id)

    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if not obj.has_key(prop):
            continue
        res = fetcherparser.fetch_resource(obj[prop])
        if res:
            obj[prop] = res

    # Force objectcache update to make sure we don't have funky COW issues
    storage.update(obj)
    return True
Ejemplo n.º 3
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (    not storage.in_cache_byid(message_id)
        and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (   not obj.has_key('image_url')
        or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj['in_reply_to_status_url'] # This is a redirect but urllib has no problem following it
    if (    not url
        and obj.has_key('channel')
        and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (obj['channel'], obj['id']) # Channel message
    if (    not url
        and obj.has_key('user')
        and obj['user'].has_key('url')
        and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']) # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception,e:
        print "Got exception %s" % e
        return False
Ejemplo n.º 4
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (not storage.in_cache_byid(message_id)
            and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (not obj.has_key('image_url') or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj[
            'in_reply_to_status_url']  # This is a redirect but urllib has no problem following it
    if (not url and obj.has_key('channel') and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (
            obj['channel'], obj['id'])  # Channel message
    if (not url and obj.has_key('user') and obj['user'].has_key('url')
            and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']
                               )  # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception, e:
        print "Got exception %s" % e
        return False