Esempio n. 1
0
	def get(self, key, default="super_sekrit_not_specified_value"):
		self.log.info("Cache get for key %s", key)
		ret = db.get_from_db_key_value_store(key)
		if ret:
			return ret
		if default != "super_sekrit_not_specified_value":
			return default
		raise KeyError("Key %s not found in CacheObject backing store!" % (key, ))
Esempio n. 2
0
	def has_key(self, key):
		self.log.info("Cache has_key for key %s", key)
		return db.get_from_db_key_value_store(key) != {}
Esempio n. 3
0
	def __getitem__(self, key):
		self.log.info("CacheObject getitem: %s", key)
		return db.get_from_db_key_value_store(key)
Esempio n. 4
0
	def __local_rpc_get_title(self, itemUrl):

		# Whoa, super dumb search/replace bug for the mobile view. I'm an idiot
		# 'm.wuxiaworld.com' is a substring of 'forum.wuxiaworld.com' despite also
		# being a active subdomain
		if 'foruwww.wuxiaworld.com' in itemUrl:
			return None

		url_key = "url-to-title:{}".format(itemUrl)
		have_cache = db.get_from_db_key_value_store(url_key)

		if have_cache and 'resolved' in have_cache:
			return have_cache['resolved']

		itemUrl = itemUrl.strip().replace(" ", "%20")

		try:
			self.check_open_local_rpc_interface()
			raw_job = WebMirror.JobUtils.buildjob(
				module                 = 'SmartWebRequest',
				call                   = 'smartGetItem',
				dispatchKey            = "fetcher",
				jobid                  = -1,
				args                   = [itemUrl],
				kwargs                 = {},
				additionalData         = {'mode' : 'fetch'},
				postDelay              = 0,
			)
			ret = self.local_rpc.dispatch_request(raw_job)
			self.local_rpc.close()

		except:
			self.log.error("Failure fetching content!")
			raise

		if ret['success']:
			content, fileN, mType = ret['ret']

		else:
			self.log.error("Failed to fetch page at URL '%s'!", itemUrl)

			for line in ret['traceback']:
				self.log.error(line)

			db.set_in_db_key_value_store(url_key, {'resolved': None})
			return None

		if not content or not mType:
			db.set_in_db_key_value_store(url_key, {'resolved': None})
			return None

		# If there is an encoding in the content-type (or any other info), strip it out.
		# We don't care about the encoding, since WebRequest will already have handled that,
		# and returned a decoded unicode object.
		if mType and ";" in mType:
			mType = mType.split(";")[0].strip()

		# *sigh*. So minus.com is f*****g up their http headers, and apparently urlencoding the
		# mime type, because apparently they're shit at things.
		# Anyways, fix that.
		if '%2F' in  mType:
			mType = mType.replace('%2F', '/')

		self.log.info("Retreived file of type '%s', name of '%s' with a size of %0.3f K", mType, fileN, len(content)/1000.0)

		if 'text/html' not in mType:
			self.log.warning("Fetched content not HTML, cannot extract title.")
			db.set_in_db_key_value_store(url_key, {'resolved': None})
			return None

		soup = WebRequest.as_soup(content)

		if not soup.title:
			self.log.warning("No title on page!")
			db.set_in_db_key_value_store(url_key, {'resolved': None})
			return None

		resolved_title = soup.title.get_text().strip()
		self.log.info("title for content at '%s' resolved to '%s'.", itemUrl, resolved_title)
		db.set_in_db_key_value_store(url_key, {'resolved': resolved_title})
		return resolved_title