def FirstInListLikeObject(self, value, otherwise = None): if bm_extract.is_list(value): if value: return value[0] return None if bm_extract.is_list_like(value): any = False for sub in value: return value return None return otherwise
def CustomizeAtomItem(self, itemd): try: author = itemd.pop("author") if author: itemd["author"] = bm_extract.as_string(author, "@@title") if bm_extract.is_list(author) or bm_extract.is_list_like(author): itemd["hcard:author"] = map(lambda a: hcard.decompose(a, "hcard"), author) elif bm_extract.is_dict(author): itemd["hcard:author"] = hcard.decompose(author, "hcard") except KeyError: pass self.ExtractCategories(itemd) return bm_api.APIBase.CustomizeAtomItem(self, itemd)
def TranscribeNode(self, e_node, o): """Convert a dictionary into XML""" if bm_extract.is_list(o): for sub in o: self.TranscribeNode(e_node, sub) elif bm_extract.is_dict(o): # # Get the attributes # ad = {} for key, sub in o.iteritems(): if key.startswith("@@") or key.find(":@@") > -1: continue if key == '@': e_node.text = bm_extract.coerce_string(sub, separator = ",") continue if key.startswith("@") or key.find(":@") > -1: if bm_extract.is_atomic(sub): e_node.set(key.replace('@', ''), bm_extract.coerce_string(sub)) ## ad[key.replace('@', '')] = bm_extract.coerce_string(sub) elif bm_extract.is_list(sub) or bm_extract.is_list_like(sub): e_node.set(key.replace('@', ''), bm_extract.coerce_string(sub, separator = ",")) ## ad[key.replace('@', '')] = bm_extract.coerce_string(sub, separator = ",") # # Note here that: # - @@ means an attribute it hidden # - @ are attributes, and are found in the previous step # - lists are processed specially, as they result in repeated children # for key, sub in o.iteritems(): if key.startswith("@@") or key.find(":@@") > -1: continue if key.startswith("@") or key.find(":@") > -1: continue # # # if bm_extract.is_list_like(sub): sub = list(sub) if bm_extract.is_list(sub): any = False for subsub in sub: any = True e_child = ElementTree.SubElement(e_node, key) self.TranscribeNode(e_child, subsub) if any: continue sub = None # # # e_child = ElementTree.SubElement(e_node, key) self.TranscribeNode(e_child, sub) elif bm_extract.is_list_like(o): for sub in list(o): self.TranscribeNode(e_node, sub) elif bm_extract.is_none(o): pass else: if e_node.text: e_node.text += "\n" e_node.text += bm_extract.coerce_string(o) else: e_node.text = bm_extract.coerce_string(o)
def ScrubEntry(self, itemd): if bm_extract.is_dict(itemd): nd = {} seen_html = False seen_rss = False seen_url = False for key, value in itemd.iteritems(): if self.AtomLike(): if key == "link": key = "htmlUrl" elif key == "feeds": key = "rssUrl" elif key == "content": key = "description" elif key == "title": key = "text" elif key == "category": key = "tags" value = ", ".join(map(lambda d: d["term"], value)) elif key == "links": for ld in bm_extract.coerce_list(value): if bm_extract.as_string(ld, "rel") == "alternate": key = "rssUrl" value = bm_extract.as_string(ld, "href") # # datetimes (?) # try: created = itemd.pop("created") itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, rfc822 = True) except KeyError: pass if key == "rssUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_rss = True elif key == "htmlUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_html = True elif key == "url": seen_url = True if key in [ "items", "outline" ]: nd["outline"] = self.ScrubEntry(value) elif value == None: pass elif bm_extract.is_atomic(value): nd['@%s' % key] = value if seen_rss: nd.setdefault("@type", "rss") elif seen_html: nd.setdefault("@type", "link") elif seen_url: nd.setdefault("@type", "link") nd.setdefault("@text", "") return nd elif bm_extract.is_atomic(itemd): return { "@title" : bm_extract.coerce_string(itemd) } elif bm_extract.is_list(itemd) or bm_extract.is_list_like(itemd): return map(self.ScrubEntry, itemd) return itemd