def fetch_message(object_id): """Returns a message, from local cache if available, otherwise loads via REST API, you probably should be calling recursive_fetch_message first""" if debug: print "fetch_message(%s) called" % repr(object_id) if debug_tracebacks: traceback.print_stack() object_id = str(object_id) # cast to normal str if storage.in_cache_byid(object_id): obj = storage.get_byid(object_id) if ((obj.has_key('truncated') and obj['truncated']) or (obj.has_key('QaikuBackup_stale') and obj['QaikuBackup_stale'])): # Object is stale, do not return from cache if debug: print "storage.objectcache[%s] is stale" % repr(object_id) if debug_tracebacks: print json.dumps(storage.get_byid(object_id), sort_keys=True, indent=4) pass else: if debug: print "message %s returned from cache" % object_id return storage.get_byid(object_id) else: #print "objectcache has no key %s" % repr(object_id) #print json.dumps(storage.objectcache, sort_keys=True, indent=4) pass url = "http://www.qaiku.com/api/statuses/show/%s.json?apikey=%s" % ( object_id, apikey) parsed = json_parse_url(url) if not parsed: # parse failed, return stale object if we have one if storage.in_cache_byid(object_id): return storage.get_byid(object_id) else: return None storage.update(parsed) return storage.get_byid(object_id)
def fetch_message(object_id): """Returns a message, from local cache if available, otherwise loads via REST API, you probably should be calling recursive_fetch_message first""" if debug: print "fetch_message(%s) called" % repr(object_id) if debug_tracebacks: traceback.print_stack() object_id = str(object_id) # cast to normal str if storage.in_cache_byid(object_id): obj = storage.get_byid(object_id) if ( ( obj.has_key('truncated') and obj['truncated']) or ( obj.has_key('QaikuBackup_stale') and obj['QaikuBackup_stale']) ): # Object is stale, do not return from cache if debug: print "storage.objectcache[%s] is stale" % repr(object_id) if debug_tracebacks: print json.dumps(storage.get_byid(object_id), sort_keys=True, indent=4) pass else: if debug: print "message %s returned from cache" % object_id return storage.get_byid(object_id) else: #print "objectcache has no key %s" % repr(object_id) #print json.dumps(storage.objectcache, sort_keys=True, indent=4) pass url = "http://www.qaiku.com/api/statuses/show/%s.json?apikey=%s" % (object_id, apikey) parsed = json_parse_url(url) if not parsed: # parse failed, return stale object if we have one if storage.in_cache_byid(object_id): return storage.get_byid(object_id) else: return None storage.update(parsed) return storage.get_byid(object_id)
def fill_image_urls(message_id): """Loads the object, then tries to figure out the web URL for it and scrape said url for the images""" if not can_scrape: return False if ( not storage.in_cache_byid(message_id) and not fetcherparser.recursive_fetch_message(message_id)): return False obj = fetcherparser.fetch_message(message_id) # There is no image, don't bother... if ( not obj.has_key('image_url') or not obj['image_url']): return False # Already processed this one for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']: if obj.has_key(prop): return True # Try to figure the shortest way to the canonical message HTML view url = None if obj.has_key('in_reply_to_status_url'): url = obj['in_reply_to_status_url'] # This is a redirect but urllib has no problem following it if ( not url and obj.has_key('channel') and obj['channel']): url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (obj['channel'], obj['id']) # Channel message if ( not url and obj.has_key('user') and obj['user'].has_key('url') and obj['user']['url']): url = "%s/show/%s/" % (obj['user']['url'], obj['id']) # non-Channel message # Are there other possible combinations ? if not url: return False if debug: print "Soupifying %s" % url try: soup = BeautifulSoup(urllib_cached.urlopen(url)) except Exception,e: print "Got exception %s" % e return False
def fill_image_urls(message_id): """Loads the object, then tries to figure out the web URL for it and scrape said url for the images""" if not can_scrape: return False if (not storage.in_cache_byid(message_id) and not fetcherparser.recursive_fetch_message(message_id)): return False obj = fetcherparser.fetch_message(message_id) # There is no image, don't bother... if (not obj.has_key('image_url') or not obj['image_url']): return False # Already processed this one for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']: if obj.has_key(prop): return True # Try to figure the shortest way to the canonical message HTML view url = None if obj.has_key('in_reply_to_status_url'): url = obj[ 'in_reply_to_status_url'] # This is a redirect but urllib has no problem following it if (not url and obj.has_key('channel') and obj['channel']): url = "http://www.qaiku.com/channels/show/%s/view/%s/" % ( obj['channel'], obj['id']) # Channel message if (not url and obj.has_key('user') and obj['user'].has_key('url') and obj['user']['url']): url = "%s/show/%s/" % (obj['user']['url'], obj['id'] ) # non-Channel message # Are there other possible combinations ? if not url: return False if debug: print "Soupifying %s" % url try: soup = BeautifulSoup(urllib_cached.urlopen(url)) except Exception, e: print "Got exception %s" % e return False