Beispiel #1
0
def json_parse_url(url):
    """Trivial helper to avoid copy-pasting same code all over"""
    if debug:
        print "Fetching (JSON) %s" % url
    try:
        fp = urllib_cached.urlopen(url)
        parsed = json.load(fp)
        fp.close()
    except Exception, e:
        print "Got exception %s" % e
        return None
Beispiel #2
0
def json_parse_url(url):
    """Trivial helper to avoid copy-pasting same code all over"""
    if debug:
        print "Fetching (JSON) %s" % url
    try:
        fp = urllib_cached.urlopen(url)
        parsed = json.load(fp)
        fp.close()
    except Exception,e:
        print "Got exception %s" % e
        return None
Beispiel #3
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (    not storage.in_cache_byid(message_id)
        and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (   not obj.has_key('image_url')
        or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj['in_reply_to_status_url'] # This is a redirect but urllib has no problem following it
    if (    not url
        and obj.has_key('channel')
        and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (obj['channel'], obj['id']) # Channel message
    if (    not url
        and obj.has_key('user')
        and obj['user'].has_key('url')
        and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']) # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception,e:
        print "Got exception %s" % e
        return False
Beispiel #4
0
def fill_image_urls(message_id):
    """Loads the object, then tries to figure out the web URL for it and scrape said url for the images"""
    if not can_scrape:
        return False
    if (not storage.in_cache_byid(message_id)
            and not fetcherparser.recursive_fetch_message(message_id)):
        return False
    obj = fetcherparser.fetch_message(message_id)

    # There is no image, don't bother...
    if (not obj.has_key('image_url') or not obj['image_url']):
        return False

    # Already processed this one
    for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']:
        if obj.has_key(prop):
            return True

    # Try to figure the shortest way to the canonical message HTML view
    url = None
    if obj.has_key('in_reply_to_status_url'):
        url = obj[
            'in_reply_to_status_url']  # This is a redirect but urllib has no problem following it
    if (not url and obj.has_key('channel') and obj['channel']):
        url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (
            obj['channel'], obj['id'])  # Channel message
    if (not url and obj.has_key('user') and obj['user'].has_key('url')
            and obj['user']['url']):
        url = "%s/show/%s/" % (obj['user']['url'], obj['id']
                               )  # non-Channel message
    # Are there other possible combinations ?
    if not url:
        return False

    if debug:
        print "Soupifying %s" % url
    try:
        soup = BeautifulSoup(urllib_cached.urlopen(url))
    except Exception, e:
        print "Got exception %s" % e
        return False
Beispiel #5
0
 if os.path.isfile(local_path):
     # Make sure the file has sane amount of data...
     if (os.stat(local_path).st_size < 16):
         print "ERR: Local file %s is empty, removing" % local_path
         os.unlink(local_path)
     else:
         return local_path
 # Create the container dir if it's not there
 if not os.path.isdir(os.path.dirname(local_path)):
     os.makedirs(os.path.dirname(local_path))
 if debug:
     print "Fetching (BIN) %s to %s" % (url, local_path)
 fp_from = None
 fp_to = None
 try:
     fp_from = urllib_cached.urlopen(url)
     fp_to = open(local_path, 'wb')
     # TODO: use a sensibly sized buffer ?
     fp_to.write(fp_from.read())
     fp_from.close()
     fp_to.close()
 except Exception, e:
     print "Got exception %s" % e
     if fp_from:
         fp_from.close()
     if fp_to:
         fp_to.close()
     if os.path.isfile(local_path):
         os.unlink(local_path)
     return None
 return local_path
Beispiel #6
0
 if os.path.isfile(local_path):
     # Make sure the file has sane amount of data...
     if (os.stat(local_path).st_size < 16):
         print "ERR: Local file %s is empty, removing" % local_path
         os.unlink(local_path)
     else:
         return local_path
 # Create the container dir if it's not there
 if not os.path.isdir(os.path.dirname(local_path)):
     os.makedirs(os.path.dirname(local_path))
 if debug:
     print "Fetching (BIN) %s to %s" % (url, local_path)
 fp_from = None
 fp_to = None
 try:
     fp_from = urllib_cached.urlopen(url)
     fp_to = open(local_path, 'wb')
     # TODO: use a sensibly sized buffer ?
     fp_to.write(fp_from.read())
     fp_from.close()
     fp_to.close()
 except Exception,e:
     print "Got exception %s" % e
     if fp_from:
         fp_from.close()
     if fp_to:
         fp_to.close()
     if os.path.isfile(local_path):
         os.unlink(local_path)
     return None
 return local_path