def import_vcard(vin): """Convert a _single_ vcard (text) to a mfdict. Parameters: in - a string or a file object """ vcard = vobject.readOne(vin) mfd = uf_mfdict.mfdict() for child in vcard.getChildren(): if type(child.value) not in types.StringTypes: child.value = unicode(child.value) if child.name == "N": d = dict(zip(vobject.vcard.NAME_ORDER, vobject.vcard.splitFields(child.value))) elif child.name == "ADR": d = dict(zip(vobject.vcard.ADDRESS_ORDER, vobject.vcard.splitFields(child.value))) else: d = { "" : child.value } ctype = child.params.get("TYPE", []) for key, value in d.iteritems(): keys = filter(lambda s: s, [ child.name.lower(), key, ] + ctype) if type(value) == types.ListType: value = " ".join(value) value = clean_space(value) mfd.add(keys, value) return mfd
def underscore2dash(d): """Convert a dictionary using '_' to one using '-'""" rd = uf_mfdict.mfdict() for key, value in d.iteritems(): rd[key.replace("_", "-")] = value return rd
def import_csv(str): """Import vcards from a CSV export file Note: - this does not do any scrubbing """ result = [] din = StringIO.StringIO(str) try: csv_reader = csv.reader(din) mta = "" headers = None for line in csv_reader: # Log(line = line) if headers == None: headers = map(lambda k: fieldd.get(k.lower(), ''), line) ## no (or few) matches in header is indicative that we're looking at firefox if len(filter(lambda k: k, headers)) <= 5 and headers[0] == '': headers = map(lambda k: fieldd.get(k.lower(), ''), thunderbird_headers) mta = "thunderbird" # Log("HEADERS", headers_out = headers, line = line) else: line = map(bm_text.tounicode, line) mfd = uf_mfdict.mfdict() for key, item in zip(headers, line): if key and item: mfd.add(key, item) ## a common juxtiposition for prefix in [ "home.", "work.", "" ]: a_key = prefix + 'street-address' a_value = mfd.get(a_key) if not a_value: continue b_key = prefix + 'extended-address' b_value = mfd.get(b_key) if not b_value: continue mfd[a_key] = b_value mfd[b_value] = a_value result.append(mfd) finally: try: din.close() except: pass return result
def __init__(self, root_name, uf_name, root_type = "class", collect_ids = False, page_uri = None, include = False, scrub_scripts = True, keep_html = None, # see code parent = None, ): self.root_name = root_name self.uf_name = uf_name self.root_type = root_type # class, rel self.collect_ids = collect_ids or include self.root_element = None self.page_uri = page_uri self.actions = {} self.repeats = [] self.uris = [] self.repeat_count = {} self.intermediates = [] self.id_map = {} self.data = uf_mfdict.mfdict() self.dom = None self.is_include = include self.is_scrub_scripts = scrub_scripts self.is_keep_html = bool(keep_html) self.title_class = None self.bookmark_class = None self.index_count = 0 self.parent = parent if self.parent: self.is_scrub_scripts = parent.is_scrub_scripts self.is_include = False # is already done! self.page_uri = parent.page_uri if keep_html == None: self.is_keep_html = False else: if keep_html == None: self.is_keep_html = True self.Reset()
def CustomizeAtomItem(self, d): d = Google.CustomizeAtomItem(self, d) # # Build a hCard from the data # ... should add lat/lon here? # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "country", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "city", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "region", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "staticMapUrl", "%s" % ( uf_vcard.Photo, ), ), ( "title", uf_vcard.OrganizationName, ), ( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass for pd in bm_extract.as_list(d, "phoneNumbers"): number = bm_extract.as_string(pd, "number") if not number: continue type = bm_extract.as_string(pd, "type") if type in [ "main", "" ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number elif type in [ "fax", "data", ]: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Fax, uf_vcard.Work, )] = number elif type == "mobile": hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Mobile, uf_vcard.Work, )] = number else: hd["%s.%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, uf_vcard.Work, )] = number if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # # try: bm_api.add_latlon(d, d.pop("lat"), d.pop("lng"), ) except KeyError: pass # # Remove stuff # for key in [ "country", "streetAddress", "city", "region", "staticMapUrl", "phoneNumbers", ]: try: del d[key] except KeyError: pass # # The result # return d
def Reset(self): self.root_element = None self.intermediates = [] self.data = uf_mfdict.mfdict() self.repeat_count = {} self.CustomizeReset()
def _ProcessRow(self, rd, uri, rel = None): d = {} # # Title # d['title'] = rd['title'] = rd.get('name') or rd.get('nick') or '[No Name]' # # Image # logo = rd.get('image') or rd.get('img') if logo: rd['logo'] = logo d['logo'] = logo # # Lat/Lng # bm_api.add_latlon(d, rd.get('lat'), rd.get('lng')) # # Get everything that goes into the hCard # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "country_name", "%s.%s" % ( uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "street_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "extended_address", "%s.%s" % ( uf_vcard.ADR, uf_vcard.ExtendedAddress, ), ), ( "locality", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Locality, ), ), ( "region", "%s.%s" % ( uf_vcard.ADR, uf_vcard.Region, ), ), ( "postal_code", "%s.%s" % ( uf_vcard.ADR, uf_vcard.PostalCode, ), ), ( "title", uf_vcard.FN, ), ( "mbox_sha1sum", uf_vcard.UID, ), ( "phone", "%s.%s" % ( uf_vcard.TEL, uf_vcard.Voice, ), ), ( "logo", uf_vcard.Logo, ), ( "lat", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "lng", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(rd, k_from) if value: if k_from in [ "phone" ]: if value.startswith("tel:"): value = value[4:] hd[k_to] = value rd.pop(k_from) except KeyError: pass for key in [ "name", "nick", "photo", "image", "img", ]: try: rd.pop(key) except KeyError: pass if hd: uf_vcard.scrub(hd) d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # Add links # d["link"] = rd.get('homepage') or rd.get("weblog") or uri links = [{ "rel" : "related", "href" : uri, "title" : "FOAF source", }] d["links"] = links for html_key in [ "homepage", "weblog", ]: try: uri = rd.pop(html_key) if uri: links.append({ "href" : uri, "rel" : "related", "type" : "text/html", "title" : html_key, }) except KeyError: pass if uri != self.uri and rel: links.append({ "href" : self.uri, "rel" : "xfn", "rev" : rel }) ## if rel: ## d["xfn:rel"] = rel return d
def scrub_address(mfd): """Try to fill in missing fields in addresses""" adr_keys = [ ExtendedAddress, StreetAddress, Locality, Region, CountryName, PostalCode, Email, ] for card_group in card_groups: adrd = uf_mfdict.mfdict() keyd = {} for adr_key in adr_keys: if card_group == "": in_key = adr_key out_key, out_value = mfd.finditem(in_key, exclude_keys = card_groups) else: in_key = "%s.%s" % ( card_group, adr_key ) out_key, out_value = mfd.finditem(in_key) adrd[out_key or in_key] = out_value or "" keyd[adr_key] = out_key or in_key if not filter(lambda s: s, adrd.values()): continue pprint.pprint((card_group, adrd), sys.stderr) # # Fix up # parts = extended_rex.split(adrd[keyd[StreetAddress]]) parts = filter(lambda s: s, parts) if len(parts) > 1: parts = map(lambda s: s.strip(), parts) adrd[keyd[StreetAddress]] = parts[-1] parts = parts[:-1] extended = adrd[keyd[ExtendedAddress]] if extended: parts.append(extended) adrd[keyd[ExtendedAddress]] = ", ".join(parts) # # By telephone # # tel = mfd.find(Voice) or mfd.find(Cell) or mfd.find(TEL) if tel and ( not adrd[keyd[Region]] or not adrd[keyd[CountryName]] ) and tel: result = LookupPhone(phone = tel) if not adrd[keyd[Locality]] and result.get(Locality): adrd[keyd[Locality]] = result[Locality] if not adrd[keyd[Region]] and result.get(Region): adrd[keyd[Region]] = result[Region] if not adrd[keyd[CountryName]] and result.get(CountryName): adrd[keyd[CountryName]] = result[CountryName] # # By postal code # if ( not adrd[keyd[Region]] or not adrd[keyd[CountryName]] ) and adrd.find(PostalCode): result = LookupPostalCode(adrd.find(PostalCode)) for k in [ PostalCode, CountryName, Region, Locality ]: value = result.get(k) if value: adrd[keyd[k]] = value # # By name of region # result = LookupRegionCountry(region = adrd[keyd[Region]], country = adrd[keyd[CountryName]]) adrd[keyd[Region]] = result[Region] adrd[keyd[CountryName]] = result[CountryName] # # Scrub street # result = LookupStreet(street = adrd[keyd[StreetAddress]], region = adrd[keyd[Region]], country = adrd[keyd[CountryName]]) adrd[keyd[StreetAddress]] = result[StreetAddress] for key, value in adrd.iteritems(): if value: mfd[key] = value
def CustomizeAtomItem(self, d): d = bm_api.APIReader.CustomizeAtomItem(self, d) # # Tags become categories # cats = [] for tag in bm_extract.as_list(d, "tags.tag"): cats.append({ "term" : tag["name"] }) d["category"] = cats # # Geolocation # bm_api.add_latlon(d, bm_extract.as_string(d, "location.latitude"), bm_extract.as_string(d, "location.longitude")) # # hcard # hd = uf_mfdict.mfdict() for k_from, k_to in [ ( "location.country.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.CountryName, ), ), ( "location.streetAddress", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.StreetAddress, ), ), ( "location.city.name", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Locality, ), ), ( "location.regions.province", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.Region, ), ), ( "location.postal_code", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.ADR, uf_vcard.PostalCode, ), ), ( "phone", "%s.%s.%s" % ( uf_vcard.Work, uf_vcard.TEL, uf_vcard.Voice, ), ), ( "title", uf_vcard.OrganizationName, ), ( "location.latitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Latitude, ), ), ( "location.longitude", "%s.%s" % ( uf_vcard.GEO, uf_vcard.Longitude, ), ), ]: try: value = bm_extract.as_string(d, k_from) if value: hd[k_to] = value except KeyError: pass if hd: d["hcard:hcard"] = hcard.decompose(hd, "hcard") # # Links # try: alt = d.pop("short_url") if alt: d["links"] = [ { "type" : "text/html", "rel" : "alternate", "href" : alt, }, ] except KeyError: pass # # Removables # for key in [ "tags", "tag_count", "location", "phone", ]: try: del d[key] except KeyError: pass return d