def __setitem__(self, key, item): self.log.info("CacheObject setitem: %s -> %s", key, item) db.set_in_db_key_value_store(key, item)
def __delitem__(self, key): db.set_in_db_key_value_store(key, {})
def __local_rpc_get_title(self, itemUrl): # Whoa, super dumb search/replace bug for the mobile view. I'm an idiot # 'm.wuxiaworld.com' is a substring of 'forum.wuxiaworld.com' despite also # being a active subdomain if 'foruwww.wuxiaworld.com' in itemUrl: return None url_key = "url-to-title:{}".format(itemUrl) have_cache = db.get_from_db_key_value_store(url_key) if have_cache and 'resolved' in have_cache: return have_cache['resolved'] itemUrl = itemUrl.strip().replace(" ", "%20") try: self.check_open_local_rpc_interface() raw_job = WebMirror.JobUtils.buildjob( module = 'SmartWebRequest', call = 'smartGetItem', dispatchKey = "fetcher", jobid = -1, args = [itemUrl], kwargs = {}, additionalData = {'mode' : 'fetch'}, postDelay = 0, ) ret = self.local_rpc.dispatch_request(raw_job) self.local_rpc.close() except: self.log.error("Failure fetching content!") raise if ret['success']: content, fileN, mType = ret['ret'] else: self.log.error("Failed to fetch page at URL '%s'!", itemUrl) for line in ret['traceback']: self.log.error(line) db.set_in_db_key_value_store(url_key, {'resolved': None}) return None if not content or not mType: db.set_in_db_key_value_store(url_key, {'resolved': None}) return None # If there is an encoding in the content-type (or any other info), strip it out. # We don't care about the encoding, since WebRequest will already have handled that, # and returned a decoded unicode object. if mType and ";" in mType: mType = mType.split(";")[0].strip() # *sigh*. So minus.com is f*****g up their http headers, and apparently urlencoding the # mime type, because apparently they're shit at things. # Anyways, fix that. if '%2F' in mType: mType = mType.replace('%2F', '/') self.log.info("Retreived file of type '%s', name of '%s' with a size of %0.3f K", mType, fileN, len(content)/1000.0) if 'text/html' not in mType: self.log.warning("Fetched content not HTML, cannot extract title.") db.set_in_db_key_value_store(url_key, {'resolved': None}) return None soup = WebRequest.as_soup(content) if not soup.title: self.log.warning("No title on page!") db.set_in_db_key_value_store(url_key, {'resolved': None}) return None resolved_title = soup.title.get_text().strip() self.log.info("title for content at '%s' resolved to '%s'.", itemUrl, resolved_title) db.set_in_db_key_value_store(url_key, {'resolved': resolved_title}) return resolved_title