Ejemplo n.º 1
0
	def ScrubPerson(self, itemd, person_key):
		persons = bm_extract.as_list(itemd, person_key)
		if persons:
			npersons = []
			for persond in persons:
				person_name = bm_extract.coerce_string(persond)
				if not person_name:
					person_name = bm_extract.as_string(persond, "name")
				if not person_name:
					continue

				npersond = {
					"name" : person_name,
				}

				for key in [ "uri", "email" ]:
					value = bm_extract.as_string(persond, key)
					if value:
						npersond[key] = value

				npersons.append(npersond)

			persons = npersons

		if not persons:
			try: del itemd[person_key]
			except: pass
		else:
			itemd[person_key] = persons
Ejemplo n.º 2
0
	def ScrubMeta(self, itemd):
		itemd = dict(itemd)
		itemd.setdefault("title", "[Untitled]")

		if self.AtomLike():
			#
			#	Author is close enough to owner
			#
			author_name = bm_extract.as_string(itemd, "author")
			if author_name:
				itemd["ownerName"] = author_name
			
			author_href = bm_extract.as_string(itemd, "author.uri")
			if author_href:
				itemd["ownerId"] = author_href
			
			author_email = bm_extract.as_string(itemd, "author.email")
			if author_email:
				itemd["ownerEmail"] = author_email

			try: itemd.pop("author")
			except KeyError: pass

			#
			#
			#
			for k_from, k_to in [ ( 'created', 'dateCreated' ), ( 'updated', 'dateModified' ), ]:
				try:
					value = itemd.pop(k_from)
					itemd[k_to] = bm_extract.coerce_datetime(value, otherwise = value, rfc822 = True)
				except KeyError:
					pass

		return	itemd
Ejemplo n.º 3
0
	def ScrubCategory(self, itemd):
		cats = bm_extract.as_list(itemd, "category")
		if cats:
			ncats = []
			for catd in cats:
				cat_name = bm_extract.coerce_string(catd)
				if not cat_name:
					cat_name = bm_extract.as_string(catd, "term")
				if not cat_name:
					continue

				ncatd = {
					"@term" : cat_name,
				}

				for key in [ "scheme", "label" ]:
					value = bm_extract.as_string(catd, key)
					if value:
						ncatd["@" + key] = value

				ncats.append(ncatd)

			cats = ncats

		if not cats:
			try: del itemd["category"]
			except: pass
		else:
			itemd["category"] = cats
Ejemplo n.º 4
0
	def CustomizeAtomItem(self, itemd):
		return	{
			"title" : bm_extract.as_string(itemd, "@@title"),
			"content" : bm_extract.as_string(itemd, "@@html"),
			"link" : itemd.find('url') or bm_extract.as_string(itemd, "@@uri"),
			"hcard:hcard" : hcard.decompose(itemd, "hcard"),
		}
Ejemplo n.º 5
0
		def fset(self, service_name):
			d = bm_cfg.cfg.get(service_name)
			if not d:
				Log("warning - authentication service was not found: don't be surprised by an exception soon", service_name = service_name)
				return

			self.username = bm_extract.as_string(d, 'username')
			self.password = bm_extract.as_string(d, 'password')
Ejemplo n.º 6
0
		def fset(self, service_name):
			d = bm_cfg.cfg.get(service_name)
			if not d:
				Log("warning - authentication service was not found: don't be surprised by an exception soon", service_name = service_name)
				return

			if d.get('oauth_consumer_key'):
				self._authenticate = bm_uri.Authenticate(
					auth = bm_oauth.OAuth(service_name = service_name)
				)
			elif d.get('username'):
				self._authenticate = bm_uri.Authenticate(
					auth = bm_uri.AuthBasic(
						username = bm_extract.as_string(d, 'username'),
						password = bm_extract.as_string(d, 'password'),
					)
				)
Ejemplo n.º 7
0
	def ScrubLinks(self, itemd):
		links = bm_extract.as_list(itemd, "links")
		if links:
			nlinks = []
			for linkd in links:
				link_href = bm_extract.coerce_string(linkd)
				if not link_href:
					link_href = bm_extract.as_string(linkd, "href")
				if not link_href:
					continue

				nlinkd = {
					"@href" : link_href,
				}

				for key in [ "rel", "type", "hreflang", "title", "length", ]:
					value = bm_extract.as_string(linkd, key)
					if value:
						nlinkd["@" + key] = value

				nlinks.append(nlinkd)

			links = nlinks

		link = bm_extract.as_string(itemd, "link")
		if link:
			found = False
			for linkd in links:
				if link == bm_extract.as_string(linkd, "@href"):
					found = True
					break

			if not found:
				links.append({
					"@href" : link,
					"@rel" : "alternate",
				})

		for key in [ "link", "links" ]:
			try: del itemd[key]
			except: pass

		if links:
			itemd["link"] = links
Ejemplo n.º 8
0
	def CustomizeAtomItem(self, d):
		d = bm_api.APIReader.CustomizeAtomItem(self, d)

		if bm_extract.as_string(d, "geometry.type") == "Point":
			coordinates = bm_extract.as_list(d, "geometry.coordinates")
			if len(coordinates) >= 2:
				bm_api.add_latlon(d, coordinates[0], coordinates[1])

			try: del d["geometry"]
			except: pass

		return	d
Ejemplo n.º 9
0
	def CustomizeAtomItem(self, itemd):
		try:
			author = itemd.pop("author")
			if author:
				itemd["author"] = bm_extract.as_string(author, "@@title")

				if bm_extract.is_list(author) or bm_extract.is_list_like(author):
					itemd["hcard:author"] = map(lambda a: hcard.decompose(a, "hcard"), author)
				elif bm_extract.is_dict(author):
					itemd["hcard:author"] = hcard.decompose(author, "hcard")
		except KeyError:
			pass

		self.ExtractCategories(itemd)

		return	bm_api.APIBase.CustomizeAtomItem(self, itemd)
Ejemplo n.º 10
0
	def CustomizeAtomItem(self, d):
		d = bm_api.APIReader.CustomizeAtomItem(self, d)
		
		images = bm_extract.as_list(d, "image")
		if images:
			images = map(lambda i: i.strip(">"), images)	## common last.fm bug
			d["images"] = images
			d["photo"] = images[-1]

		content = bm_extract.as_string(d, "bio.content")
		if content:
			d["content"] = content

			try: del d["bio"]
			except: pass

		return	d
Ejemplo n.º 11
0
	def as_string(self, path, **ad):
		return	bm_extract.as_string(self.private, path, **ad)
Ejemplo n.º 12
0
	def CustomizeAtomItem(self, d):
		d = Google.CustomizeAtomItem(self, d)
		
		#
		#	Build a hCard from the data
		#	... should add lat/lon here?
		#
		hd = uf_mfdict.mfdict()
		for k_from, k_to in [
			( "country", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ),
			( "streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ),
			( "city", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ),
			( "region", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ),
			( "staticMapUrl", "%s" % ( uf_vcard.Photo, ), ),
			( "title", uf_vcard.OrganizationName, ),
			( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ),
			( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ),
		]:
			try:
				value = bm_extract.as_string(d, k_from)
				if value:
					hd[k_to] = value
			except KeyError:
				pass

		for pd in bm_extract.as_list(d, "phoneNumbers"):
			number = bm_extract.as_string(pd, "number")
			if not number:
				continue

			type = bm_extract.as_string(pd, "type")

			if type in [ "main", "" ]:
				hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number
			elif type in [ "fax", "data", ]:
				hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Fax, uf_vcard.Work, )] = number
			elif type == "mobile":
				hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Mobile, uf_vcard.Work, )] = number
			else:
				hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number

		if hd:
			d["hcard:hcard"] = hcard.decompose(hd, "hcard")

		#
		#
		#
		try:
			bm_api.add_latlon(d, d.pop("lat"), d.pop("lng"), )
		except KeyError:
			pass

		#
		#	Remove stuff
		#
		for key in [ "country", "streetAddress", "city", "region", "staticMapUrl", "phoneNumbers", ]:
			try:
				del d[key]
			except KeyError:
				pass

		#
		#	The result
		#
		return	d
Ejemplo n.º 13
0
		"Sort" : "relevancerank",
		"Operation" : "ItemSearch", 
		"Version" : "2008-08-19",
		"ResponseGroup" : [ "Small", ], 
	}
	_uri_base = "http://ecs.amazonaws.com/onca/xml"
	_meta_path = "Items.Request"
	_item_path = "Items.Item"
	_page_max_path = 'Items.TotalPages'
	_item_max_path = 'Items.TotalResults'
	_page_max = -1

	def __init__(self, **ad):
		bm_api.APIReader.__init__(self, **ad)

	def CustomizePageURI(self, page_index):
		if page_index == 1:
			return

		return	"%s=%s" % ( "ItemPage", page_index )

if __name__ == '__main__':
	api = AmazonECS(AWSAccessKeyId = os.environ["AWS_ECS_ACCESSKEYID"])
	api.SetRequest(
		Keywords = "Larry Niven",
		SearchIndex = "Books", 
		Condition = "New",
	)
	for item in api.IterItems():
		print "-", bm_extract.as_string(item, 'ItemAttributes.Title')
Ejemplo n.º 14
0
	def ScrubEntry(self, itemd):
		if bm_extract.is_dict(itemd):
			nd = {}

			seen_html = False
			seen_rss = False
			seen_url = False

			for key, value in itemd.iteritems():
				if self.AtomLike():
					if key == "link":
						key = "htmlUrl"
					elif key == "feeds":
						key = "rssUrl"
					elif key == "content":
						key = "description"
					elif key == "title":
						key = "text"
					elif key == "category":
						key = "tags"
						value = ", ".join(map(lambda d: d["term"], value))
					elif key == "links":
						for ld in bm_extract.coerce_list(value):
							if bm_extract.as_string(ld, "rel") == "alternate":
								key = "rssUrl"
								value = bm_extract.as_string(ld, "href")

					#
					#	datetimes (?)
					#
					try:
						created = itemd.pop("created")
						itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, rfc822 = True)
					except KeyError:
						pass


				if key == "rssUrl":
					value = self.FirstInListLikeObject(value, value)
					if value == None:
						continue

					seen_rss = True
				elif key == "htmlUrl":
					value = self.FirstInListLikeObject(value, value)
					if value == None:
						continue

					seen_html = True
				elif key == "url":
					seen_url = True

				if key in [ "items", "outline" ]:
					nd["outline"] = self.ScrubEntry(value)
				elif value == None:
					pass
				elif bm_extract.is_atomic(value):
					nd['@%s' % key] = value

			if seen_rss:
				nd.setdefault("@type", "rss")
			elif seen_html:
				nd.setdefault("@type", "link")
			elif seen_url:
				nd.setdefault("@type", "link")

			nd.setdefault("@text", "")

			return	nd
		elif bm_extract.is_atomic(itemd):
			return	{
				"@title" : bm_extract.coerce_string(itemd)
			}
		elif bm_extract.is_list(itemd) or bm_extract.is_list_like(itemd):
			return	map(self.ScrubEntry, itemd)
			
		return	itemd
Ejemplo n.º 15
0
	def ScrubEntry(self, itemd):
		"""Make sure we look like an RSS entry"""

		#
		#	Look for known items and namespaced items
		#
		nd, xd = self.Separate(itemd, self._known_item, "rss")

		#
		#	atom links
		#
		try:
			links = xd.pop('links')
			if links:
				nd["atom:links"] = links

				#
				#	default an RSS value
				#
				if not nd.get("link"):
					ld = dict([ ( l["rel"], l ) for l in links ])
					v = ld.get("alternate") or ld.get("self")
					if v:
						nd["link"] = v["href"]
		except KeyError:
			pass

		#
		#	author.uri
		#
		try:
			value = bm_extract.as_string(xd, 'author.uri')
			if value:
				nd["source"] = value
		except KeyError:
			pass
			
		#
		#	author
		#
		try:
			value = xd.pop('author')
			if value:
				value = bm_extract.coerce_string(value)
			if value:
				nd["atom:author"] = value
				nd["dc:creator"] = value
		except KeyError:
			pass

		#
		#	atom published/updated
		#	 'updated': '2009-01-09T12:20:02+00:00'}
		#
		for key in [ 'updated', 'published' ]:
			#
			#	atom updated / published
			#	 'updated': '2009-01-09T12:20:02+00:00'}
			#
			try:
				value = xd.pop('%s' % key)
				if value:
					nd["atom:%s" % key] = value
			except KeyError:
				pass

		#
		#	default a pubDate
		#
		if not nd.get("pubDate"):
			dts = nd.get("atom:updated") or nd.get("atom:published")
			if dts:
				try:
					import dateutil.parser

					dt = dateutil.parser.parse(dts)
					if dt:
						nd["pubDate"] = dt.strftime("%a, %d %b %Y %H:%M:%S %z")
				except:
					Log("date could not be parsed - maybe a missing module?", exception = True, dts = dts)

		#
		#	Our fake composite value, body
		#
		try:
			value = xd.pop("body")
			if value:
				nd["description"] = value
		except KeyError:
			pass

		#
		#	Atom content
		#
		try:
			value = xd.pop("content")
			if value:
				nd.setdefault("description", value)
				nd["atom:content"] = value
		except KeyError:
			pass

		#
		#	Atom summary
		#
		try:
			value = xd.pop("summary")
			if value:
				nd.setdefault("description", value)
				nd["atom:summary"] = value
		except KeyError:
			pass

		#
		#	Atom ID
		#
		try:
			value = xd.pop("id")
			if value:
				nd.setdefault("guid", value)
				nd["atom:id"] = value
		except KeyError:
			pass

		#
		#	Required item elements
		#
		nd.setdefault("title", "");
		nd.setdefault("link", "#");
		nd.setdefault("description", "");

		#
		#	Remaining items
		#
		if xd:
			for key, item in xd.iteritems():
				nd["unknown:%s" % key] =  item

		return	nd
Ejemplo n.º 16
0
	def _ProcessRow(self, rd, uri, rel = None):
		d = {}

		#
		#	Title
		#
		d['title'] = rd['title'] = rd.get('name') or rd.get('nick') or '[No Name]'

		#
		#	Image
		#
		logo = rd.get('image') or rd.get('img')
		if logo:
			rd['logo'] = logo
			d['logo'] = logo

		#
		#	Lat/Lng
		#
		bm_api.add_latlon(d, rd.get('lat'), rd.get('lng'))

		#
		#	Get everything that goes into the hCard
		#
		hd = uf_mfdict.mfdict()
		for k_from, k_to in [
			( "country_name", "%s.%s" % ( uf_vcard.ADR, uf_vcard.CountryName, ), ),
			( "street_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.StreetAddress, ), ),
			( "extended_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.ExtendedAddress, ), ),
			( "locality", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Locality, ), ),
			( "region", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Region, ), ),
			( "postal_code", "%s.%s" % ( uf_vcard.ADR, uf_vcard.PostalCode, ), ),
			( "title", uf_vcard.FN, ),
			( "mbox_sha1sum", uf_vcard.UID, ),
			( "phone", "%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, ), ),
			( "logo", uf_vcard.Logo, ),
			( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ),
			( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ),
		]:
			try:
				value = bm_extract.as_string(rd, k_from)
				if value:
					if k_from in [ "phone" ]:
						if value.startswith("tel:"):
							value = value[4:]

					hd[k_to] = value

				rd.pop(k_from)
			except KeyError:
				pass

		for key in [ "name", "nick", "photo", "image", "img", ]:
			try: rd.pop(key)
			except KeyError: pass

		if hd:
			uf_vcard.scrub(hd)
			d["hcard:hcard"] = hcard.decompose(hd, "hcard")

		#
		#	Add links
		#
		d["link"] = rd.get('homepage') or rd.get("weblog") or uri

		links = [{
			"rel" : "related",
			"href" : uri,
			"title" : "FOAF source",
		}]
		d["links"] = links

		for html_key in [ "homepage", "weblog", ]:
			try:
				uri = rd.pop(html_key)
				if uri:
					links.append({
						"href" : uri,
						"rel" : "related",
						"type" : "text/html",
						"title" : html_key,
					})
			except KeyError:
				pass

		if uri != self.uri and rel:
			links.append({
				"href" : self.uri,
				"rel" : "xfn",
				"rev" : rel
			})

##		if rel:
##			d["xfn:rel"] = rel

		return	d
Ejemplo n.º 17
0
	def CustomizeAtomItem(self, d):
		d = bm_api.APIReader.CustomizeAtomItem(self, d)

		#
		#	Tags become categories
		#
		cats = []

		for tag in bm_extract.as_list(d, "tags.tag"):
			cats.append({
				"term" : tag["name"]
			})

		d["category"] = cats

		#
		#	Geolocation
		#
		bm_api.add_latlon(d, bm_extract.as_string(d, "location.latitude"), bm_extract.as_string(d, "location.longitude"))

		#
		#	hcard
		#
		hd = uf_mfdict.mfdict()
		for k_from, k_to in [
			( "location.country.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ),
			( "location.streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ),
			( "location.city.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ),
			( "location.regions.province", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ),
			( "location.postal_code", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.PostalCode, ), ),
			( "phone", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.TEL, uf_vcard.Voice, ), ),
			( "title", uf_vcard.OrganizationName, ),
			( "location.latitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ),
			( "location.longitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ),
		]:
			try:
				value = bm_extract.as_string(d, k_from)
				if value:
					hd[k_to] = value
			except KeyError:
				pass

		if hd:
			d["hcard:hcard"] = hcard.decompose(hd, "hcard")

		#
		#	Links
		#
		try:
			alt = d.pop("short_url")
			if alt:
				d["links"] = [
					{
						"type" : "text/html",
						"rel" : "alternate",
						"href" : alt,
					},
				]
		except KeyError:
			pass

		#
		#	Removables
		#
		for key in [ "tags", "tag_count", "location", "phone", ]:
			try: del d[key]
			except KeyError: pass

		return	d