def ScrubPerson(self, itemd, person_key): persons = bm_extract.as_list(itemd, person_key) if persons: npersons = [] for persond in persons: person_name = bm_extract.coerce_string(persond) if not person_name: person_name = bm_extract.as_string(persond, "name") if not person_name: continue npersond = { "name" : person_name, } for key in [ "uri", "email" ]: value = bm_extract.as_string(persond, key) if value: npersond[key] = value npersons.append(npersond) persons = npersons if not persons: try: del itemd[person_key] except: pass else: itemd[person_key] = persons
def ScrubMeta(self, itemd): itemd = dict(itemd) itemd.setdefault("title", "[Untitled]") if self.AtomLike(): # # Author is close enough to owner # author_name = bm_extract.as_string(itemd, "author") if author_name: itemd["ownerName"] = author_name author_href = bm_extract.as_string(itemd, "author.uri") if author_href: itemd["ownerId"] = author_href author_email = bm_extract.as_string(itemd, "author.email") if author_email: itemd["ownerEmail"] = author_email try: itemd.pop("author") except KeyError: pass # # # for k_from, k_to in [ ( 'created', 'dateCreated' ), ( 'updated', 'dateModified' ), ]: try: value = itemd.pop(k_from) itemd[k_to] = bm_extract.coerce_datetime(value, otherwise = value, rfc822 = True) except KeyError: pass return itemd
def ScrubCategory(self, itemd): cats = bm_extract.as_list(itemd, "category") if cats: ncats = [] for catd in cats: cat_name = bm_extract.coerce_string(catd) if not cat_name: cat_name = bm_extract.as_string(catd, "term") if not cat_name: continue ncatd = { "@term" : cat_name, } for key in [ "scheme", "label" ]: value = bm_extract.as_string(catd, key) if value: ncatd["@" + key] = value ncats.append(ncatd) cats = ncats if not cats: try: del itemd["category"] except: pass else: itemd["category"] = cats
def CustomizeAtomItem(self, itemd): return { "title" : bm_extract.as_string(itemd, "@@title"), "content" : bm_extract.as_string(itemd, "@@html"), "link" : itemd.find('url') or bm_extract.as_string(itemd, "@@uri"), "hcard:hcard" : hcard.decompose(itemd, "hcard"), }
def fset(self, service_name): d = bm_cfg.cfg.get(service_name) if not d: Log("warning - authentication service was not found: don't be surprised by an exception soon", service_name = service_name) return self.username = bm_extract.as_string(d, 'username') self.password = bm_extract.as_string(d, 'password')
def fset(self, service_name): d = bm_cfg.cfg.get(service_name) if not d: Log("warning - authentication service was not found: don't be surprised by an exception soon", service_name = service_name) return if d.get('oauth_consumer_key'): self._authenticate = bm_uri.Authenticate( auth = bm_oauth.OAuth(service_name = service_name) ) elif d.get('username'): self._authenticate = bm_uri.Authenticate( auth = bm_uri.AuthBasic( username = bm_extract.as_string(d, 'username'), password = bm_extract.as_string(d, 'password'), ) )
def ScrubLinks(self, itemd): links = bm_extract.as_list(itemd, "links") if links: nlinks = [] for linkd in links: link_href = bm_extract.coerce_string(linkd) if not link_href: link_href = bm_extract.as_string(linkd, "href") if not link_href: continue nlinkd = { "@href" : link_href, } for key in [ "rel", "type", "hreflang", "title", "length", ]: value = bm_extract.as_string(linkd, key) if value: nlinkd["@" + key] = value nlinks.append(nlinkd) links = nlinks link = bm_extract.as_string(itemd, "link") if link: found = False for linkd in links: if link == bm_extract.as_string(linkd, "@href"): found = True break if not found: links.append({ "@href" : link, "@rel" : "alternate", }) for key in [ "link", "links" ]: try: del itemd[key] except: pass if links: itemd["link"] = links
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) if bm_extract.as_string(d, "geometry.type") == "Point": coordinates = bm_extract.as_list(d, "geometry.coordinates") if len(coordinates) >= 2: bm_api.add_latlon(d, coordinates[0], coordinates[1]) try: del d["geometry"] except: pass return d
def CustomizeAtomItem(self, itemd): try: author = itemd.pop("author") if author: itemd["author"] = bm_extract.as_string(author, "@@title") if bm_extract.is_list(author) or bm_extract.is_list_like(author): itemd["hcard:author"] = map(lambda a: hcard.decompose(a, "hcard"), author) elif bm_extract.is_dict(author): itemd["hcard:author"] = hcard.decompose(author, "hcard") except KeyError: pass self.ExtractCategories(itemd) return bm_api.APIBase.CustomizeAtomItem(self, itemd)
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) images = bm_extract.as_list(d, "image") if images: images = map(lambda i: i.strip(">"), images) ## common last.fm bug d["images"] = images d["photo"] = images[-1] content = bm_extract.as_string(d, "bio.content") if content: d["content"] = content try: del d["bio"] except: pass return d
def as_string(self, path, **ad): return bm_extract.as_string(self.private, path, **ad)
def CustomizeAtomItem(self, d): d = Google.CustomizeAtomItem(self, d) # # Build a hCard from the data # ... should add lat/lon here? # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "country", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "city", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "region", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "staticMapUrl", "%s" % ( uf_vcard.Photo, ), ), ( "title", uf_vcard.OrganizationName, ), ( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass for pd in bm_extract.as_list(d, "phoneNumbers"): number = bm_extract.as_string(pd, "number") if not number: continue type = bm_extract.as_string(pd, "type") if type in [ "main", "" ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number elif type in [ "fax", "data", ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Fax, uf_vcard.Work, )] = number elif type == "mobile": hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Mobile, uf_vcard.Work, )] = number else: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # # try: bm_api.add_latlon(d, d.pop("lat"), d.pop("lng"), ) except KeyError: pass # # Remove stuff # for key in [ "country", "streetAddress", "city", "region", "staticMapUrl", "phoneNumbers", ]: try: del d[key] except KeyError: pass # # The result # return d
"Sort" : "relevancerank", "Operation" : "ItemSearch", "Version" : "2008-08-19", "ResponseGroup" : [ "Small", ], } _uri_base = "http://ecs.amazonaws.com/onca/xml" _meta_path = "Items.Request" _item_path = "Items.Item" _page_max_path = 'Items.TotalPages' _item_max_path = 'Items.TotalResults' _page_max = -1 def __init__(self, **ad): bm_api.APIReader.__init__(self, **ad) def CustomizePageURI(self, page_index): if page_index == 1: return return "%s=%s" % ( "ItemPage", page_index ) if __name__ == '__main__': api = AmazonECS(AWSAccessKeyId = os.environ["AWS_ECS_ACCESSKEYID"]) api.SetRequest( Keywords = "Larry Niven", SearchIndex = "Books", Condition = "New", ) for item in api.IterItems(): print "-", bm_extract.as_string(item, 'ItemAttributes.Title')
def ScrubEntry(self, itemd): if bm_extract.is_dict(itemd): nd = {} seen_html = False seen_rss = False seen_url = False for key, value in itemd.iteritems(): if self.AtomLike(): if key == "link": key = "htmlUrl" elif key == "feeds": key = "rssUrl" elif key == "content": key = "description" elif key == "title": key = "text" elif key == "category": key = "tags" value = ", ".join(map(lambda d: d["term"], value)) elif key == "links": for ld in bm_extract.coerce_list(value): if bm_extract.as_string(ld, "rel") == "alternate": key = "rssUrl" value = bm_extract.as_string(ld, "href") # # datetimes (?) # try: created = itemd.pop("created") itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, rfc822 = True) except KeyError: pass if key == "rssUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_rss = True elif key == "htmlUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_html = True elif key == "url": seen_url = True if key in [ "items", "outline" ]: nd["outline"] = self.ScrubEntry(value) elif value == None: pass elif bm_extract.is_atomic(value): nd['@%s' % key] = value if seen_rss: nd.setdefault("@type", "rss") elif seen_html: nd.setdefault("@type", "link") elif seen_url: nd.setdefault("@type", "link") nd.setdefault("@text", "") return nd elif bm_extract.is_atomic(itemd): return { "@title" : bm_extract.coerce_string(itemd) } elif bm_extract.is_list(itemd) or bm_extract.is_list_like(itemd): return map(self.ScrubEntry, itemd) return itemd
def ScrubEntry(self, itemd): """Make sure we look like an RSS entry""" # # Look for known items and namespaced items # nd, xd = self.Separate(itemd, self._known_item, "rss") # # atom links # try: links = xd.pop('links') if links: nd["atom:links"] = links # # default an RSS value # if not nd.get("link"): ld = dict([ ( l["rel"], l ) for l in links ]) v = ld.get("alternate") or ld.get("self") if v: nd["link"] = v["href"] except KeyError: pass # # author.uri # try: value = bm_extract.as_string(xd, 'author.uri') if value: nd["source"] = value except KeyError: pass # # author # try: value = xd.pop('author') if value: value = bm_extract.coerce_string(value) if value: nd["atom:author"] = value nd["dc:creator"] = value except KeyError: pass # # atom published/updated # 'updated': '2009-01-09T12:20:02+00:00'} # for key in [ 'updated', 'published' ]: # # atom updated / published # 'updated': '2009-01-09T12:20:02+00:00'} # try: value = xd.pop('%s' % key) if value: nd["atom:%s" % key] = value except KeyError: pass # # default a pubDate # if not nd.get("pubDate"): dts = nd.get("atom:updated") or nd.get("atom:published") if dts: try: import dateutil.parser dt = dateutil.parser.parse(dts) if dt: nd["pubDate"] = dt.strftime("%a, %d %b %Y %H:%M:%S %z") except: Log("date could not be parsed - maybe a missing module?", exception = True, dts = dts) # # Our fake composite value, body # try: value = xd.pop("body") if value: nd["description"] = value except KeyError: pass # # Atom content # try: value = xd.pop("content") if value: nd.setdefault("description", value) nd["atom:content"] = value except KeyError: pass # # Atom summary # try: value = xd.pop("summary") if value: nd.setdefault("description", value) nd["atom:summary"] = value except KeyError: pass # # Atom ID # try: value = xd.pop("id") if value: nd.setdefault("guid", value) nd["atom:id"] = value except KeyError: pass # # Required item elements # nd.setdefault("title", ""); nd.setdefault("link", "#"); nd.setdefault("description", ""); # # Remaining items # if xd: for key, item in xd.iteritems(): nd["unknown:%s" % key] = item return nd
def _ProcessRow(self, rd, uri, rel = None): d = {} # # Title # d['title'] = rd['title'] = rd.get('name') or rd.get('nick') or '[No Name]' # # Image # logo = rd.get('image') or rd.get('img') if logo: rd['logo'] = logo d['logo'] = logo # # Lat/Lng # bm_api.add_latlon(d, rd.get('lat'), rd.get('lng')) # # Get everything that goes into the hCard # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "country_name", "%s.%s" % ( uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "street_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "extended_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.ExtendedAddress, ), ), ( "locality", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Locality, ), ), ( "region", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Region, ), ), ( "postal_code", "%s.%s" % ( uf_vcard.ADR, uf_vcard.PostalCode, ), ), ( "title", uf_vcard.FN, ), ( "mbox_sha1sum", uf_vcard.UID, ), ( "phone", "%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, ), ), ( "logo", uf_vcard.Logo, ), ( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(rd, k_from) if value: if k_from in [ "phone" ]: if value.startswith("tel:"): value = value[4:] hd[k_to] = value rd.pop(k_from) except KeyError: pass for key in [ "name", "nick", "photo", "image", "img", ]: try: rd.pop(key) except KeyError: pass if hd: uf_vcard.scrub(hd) d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # Add links # d["link"] = rd.get('homepage') or rd.get("weblog") or uri links = [{ "rel" : "related", "href" : uri, "title" : "FOAF source", }] d["links"] = links for html_key in [ "homepage", "weblog", ]: try: uri = rd.pop(html_key) if uri: links.append({ "href" : uri, "rel" : "related", "type" : "text/html", "title" : html_key, }) except KeyError: pass if uri != self.uri and rel: links.append({ "href" : self.uri, "rel" : "xfn", "rev" : rel }) ## if rel: ## d["xfn:rel"] = rel return d
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) # # Tags become categories # cats = [] for tag in bm_extract.as_list(d, "tags.tag"): cats.append({ "term" : tag["name"] }) d["category"] = cats # # Geolocation # bm_api.add_latlon(d, bm_extract.as_string(d, "location.latitude"), bm_extract.as_string(d, "location.longitude")) # # hcard # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "location.country.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "location.streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "location.city.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "location.regions.province", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "location.postal_code", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.PostalCode, ), ), ( "phone", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.TEL, uf_vcard.Voice, ), ), ( "title", uf_vcard.OrganizationName, ), ( "location.latitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "location.longitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # Links # try: alt = d.pop("short_url") if alt: d["links"] = [ { "type" : "text/html", "rel" : "alternate", "href" : alt, }, ] except KeyError: pass # # Removables # for key in [ "tags", "tag_count", "location", "phone", ]: try: del d[key] except KeyError: pass return d