def Flatten(self, item, path = None): path = path or [] children = bm_extract.as_list(item, "outline") # # # nitem = dict() for key, value in item.iteritems(): if key != "outline": nitem[key] = value item = nitem if path: item["tags"] = bm_extract.coerce_string(path, separator = ", ") item["@@children"] = len(children) # # # if not self._leaf_only or len(children) == 0: yield item # # # for child in children: for child_item in self.Flatten(child, list(path) + [ item.get("text", "") ]): yield child_item
def ScrubCategory(self, itemd): cats = bm_extract.as_list(itemd, "category") if cats: ncats = [] for catd in cats: cat_name = bm_extract.coerce_string(catd) if not cat_name: cat_name = bm_extract.as_string(catd, "term") if not cat_name: continue ncatd = { "@term" : cat_name, } for key in [ "scheme", "label" ]: value = bm_extract.as_string(catd, key) if value: ncatd["@" + key] = value ncats.append(ncatd) cats = ncats if not cats: try: del itemd["category"] except: pass else: itemd["category"] = cats
def ScrubPerson(self, itemd, person_key): persons = bm_extract.as_list(itemd, person_key) if persons: npersons = [] for persond in persons: person_name = bm_extract.coerce_string(persond) if not person_name: person_name = bm_extract.as_string(persond, "name") if not person_name: continue npersond = { "name" : person_name, } for key in [ "uri", "email" ]: value = bm_extract.as_string(persond, key) if value: npersond[key] = value npersons.append(npersond) persons = npersons if not persons: try: del itemd[person_key] except: pass else: itemd[person_key] = persons
def _IterItemsOnPage(self, page): if self._item_path == None: raise StopIteration if not self._item_path: for item in bm_extract.coerce_list(page): yield item else: for item in bm_extract.as_list(page, self._item_path, otherwise = []): yield item
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) if bm_extract.as_string(d, "geometry.type") == "Point": coordinates = bm_extract.as_list(d, "geometry.coordinates") if len(coordinates) >= 2: bm_api.add_latlon(d, coordinates[0], coordinates[1]) try: del d["geometry"] except: pass return d
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) images = bm_extract.as_list(d, "image") if images: images = map(lambda i: i.strip(">"), images) ## common last.fm bug d["images"] = images d["photo"] = images[-1] content = bm_extract.as_string(d, "bio.content") if content: d["content"] = content try: del d["bio"] except: pass return d
def ScrubLinks(self, itemd): links = bm_extract.as_list(itemd, "links") if links: nlinks = [] for linkd in links: link_href = bm_extract.coerce_string(linkd) if not link_href: link_href = bm_extract.as_string(linkd, "href") if not link_href: continue nlinkd = { "@href" : link_href, } for key in [ "rel", "type", "hreflang", "title", "length", ]: value = bm_extract.as_string(linkd, key) if value: nlinkd["@" + key] = value nlinks.append(nlinkd) links = nlinks link = bm_extract.as_string(itemd, "link") if link: found = False for linkd in links: if link == bm_extract.as_string(linkd, "@href"): found = True break if not found: links.append({ "@href" : link, "@rel" : "alternate", }) for key in [ "link", "links" ]: try: del itemd[key] except: pass if links: itemd["link"] = links
def CustomizeAtomItem(self, d): d = Google.CustomizeAtomItem(self, d) # # Build a hCard from the data # ... should add lat/lon here? # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "country", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "city", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "region", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "staticMapUrl", "%s" % ( uf_vcard.Photo, ), ), ( "title", uf_vcard.OrganizationName, ), ( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass for pd in bm_extract.as_list(d, "phoneNumbers"): number = bm_extract.as_string(pd, "number") if not number: continue type = bm_extract.as_string(pd, "type") if type in [ "main", "" ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number elif type in [ "fax", "data", ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Fax, uf_vcard.Work, )] = number elif type == "mobile": hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Mobile, uf_vcard.Work, )] = number else: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # # try: bm_api.add_latlon(d, d.pop("lat"), d.pop("lng"), ) except KeyError: pass # # Remove stuff # for key in [ "country", "streetAddress", "city", "region", "staticMapUrl", "phoneNumbers", ]: try: del d[key] except KeyError: pass # # The result # return d
def as_list(self, path, **ad): return bm_extract.as_list(self.private, path, **ad)
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) # # Tags become categories # cats = [] for tag in bm_extract.as_list(d, "tags.tag"): cats.append({ "term" : tag["name"] }) d["category"] = cats # # Geolocation # bm_api.add_latlon(d, bm_extract.as_string(d, "location.latitude"), bm_extract.as_string(d, "location.longitude")) # # hcard # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "location.country.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "location.streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "location.city.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "location.regions.province", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "location.postal_code", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.PostalCode, ), ), ( "phone", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.TEL, uf_vcard.Voice, ), ), ( "title", uf_vcard.OrganizationName, ), ( "location.latitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "location.longitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # Links # try: alt = d.pop("short_url") if alt: d["links"] = [ { "type" : "text/html", "rel" : "alternate", "href" : alt, }, ] except KeyError: pass # # Removables # for key in [ "tags", "tag_count", "location", "phone", ]: try: del d[key] except KeyError: pass return d