예제 #1
0
	def ScrubMeta(self, itemd):
		itemd = dict(itemd)
		itemd.setdefault("title", "[Untitled]")

		if self.AtomLike():
			#
			#	Author is close enough to owner
			#
			author_name = bm_extract.as_string(itemd, "author")
			if author_name:
				itemd["ownerName"] = author_name
			
			author_href = bm_extract.as_string(itemd, "author.uri")
			if author_href:
				itemd["ownerId"] = author_href
			
			author_email = bm_extract.as_string(itemd, "author.email")
			if author_email:
				itemd["ownerEmail"] = author_email

			try: itemd.pop("author")
			except KeyError: pass

			#
			#
			#
			for k_from, k_to in [ ( 'created', 'dateCreated' ), ( 'updated', 'dateModified' ), ]:
				try:
					value = itemd.pop(k_from)
					itemd[k_to] = bm_extract.coerce_datetime(value, otherwise = value, rfc822 = True)
				except KeyError:
					pass

		return	itemd
예제 #2
0
	def Fetch(self):
		if self._parser:
			return

		self.CustomizeValidate()

		self._parser = self._parser_class(page_uri = self.uri, at_prefix = '@@', **self._parserd)
		self._parser.PragmaCLI()
		self._items = list(self._parser.Iterate())
		self._meta = {
			"link" : self.uri,
			"title" : self._parser.document_title,
		}

		if self._parser.document_date:
			self._meta['updated'] = bm_extract.coerce_datetime(self._parser.document_date).isoformat()
예제 #3
0
	def CustomizeAtomMeta(self, itemd):
		itemd = dict(itemd)

		#
		#	datetimes
		#
		for k_to, k_from in [ ( 'created', 'dateCreated' ), ( 'updated', 'dateModified' ), ]:
			try:
				value = itemd.pop(k_from)
				itemd[k_to] = bm_extract.coerce_datetime(value, otherwise = value, atom = True)
			except KeyError:
				pass

		#
		#	OPML Garbage
		#
		for key in [ 'expansionState', 'vertScrollState', 'windowBottom', 'windowLeft', 'windowRight', 'windowTop', ]:
			try: del itemd[key]
			except: pass

		#
		#	Atom author
		#
		try: author_name = itemd.pop("ownerName")
		except: author_name = None

		try: author_email = itemd.pop("ownerEmail")
		except: author_email = None

		try: author_href = itemd.pop("ownerId")
		except: author_href = None

		if author_name or author_email or author_href:
			authord = {
				"@" : author_name or "",
			}
			if author_email: authord["email"] = author_email
			if author_href: authord["uri"] = author_href

			itemd["author"] = authord

		return	itemd
예제 #4
0
	def ScrubItem(self, itemd):
		"""Note: *not* CustomizeAtomItem"""

		if not self.AtomLike():
			return	itemd

		itemd = dict(itemd)

		#
		#	Atom title
		#
		try:
			if not itemd.get("title"):
				itemd["title"] = itemd.pop("text")
		except KeyError:
			pass

		#
		#	Atom datetimes
		#
		try:
			created = itemd.pop("created")
			itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, atom = True)
		except KeyError:
			pass

		#
		#	Atom categories
		#
		try:
			tags = itemd.pop("tags")
			tags = bm_extract.coerce_list(tags, separator = ",", strip = True)

			itemd["category"] = [ { "term" : tag } for tag in tags ]
		except KeyError:
			pass

		return	itemd
예제 #5
0
	def as_datetime(self, path, **ad):
		return	bm_extract.coerce_datetime(self.get(path), **ad)
예제 #6
0
	def ScrubEntry(self, itemd):
		if bm_extract.is_dict(itemd):
			nd = {}

			seen_html = False
			seen_rss = False
			seen_url = False

			for key, value in itemd.iteritems():
				if self.AtomLike():
					if key == "link":
						key = "htmlUrl"
					elif key == "feeds":
						key = "rssUrl"
					elif key == "content":
						key = "description"
					elif key == "title":
						key = "text"
					elif key == "category":
						key = "tags"
						value = ", ".join(map(lambda d: d["term"], value))
					elif key == "links":
						for ld in bm_extract.coerce_list(value):
							if bm_extract.as_string(ld, "rel") == "alternate":
								key = "rssUrl"
								value = bm_extract.as_string(ld, "href")

					#
					#	datetimes (?)
					#
					try:
						created = itemd.pop("created")
						itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, rfc822 = True)
					except KeyError:
						pass


				if key == "rssUrl":
					value = self.FirstInListLikeObject(value, value)
					if value == None:
						continue

					seen_rss = True
				elif key == "htmlUrl":
					value = self.FirstInListLikeObject(value, value)
					if value == None:
						continue

					seen_html = True
				elif key == "url":
					seen_url = True

				if key in [ "items", "outline" ]:
					nd["outline"] = self.ScrubEntry(value)
				elif value == None:
					pass
				elif bm_extract.is_atomic(value):
					nd['@%s' % key] = value

			if seen_rss:
				nd.setdefault("@type", "rss")
			elif seen_html:
				nd.setdefault("@type", "link")
			elif seen_url:
				nd.setdefault("@type", "link")

			nd.setdefault("@text", "")

			return	nd
		elif bm_extract.is_atomic(itemd):
			return	{
				"@title" : bm_extract.coerce_string(itemd)
			}
		elif bm_extract.is_list(itemd) or bm_extract.is_list_like(itemd):
			return	map(self.ScrubEntry, itemd)
			
		return	itemd