def generateAtomFeed(dom, out_dir): rss_node = __getFirstNodeByCIName(dom, "__rss") # don't continue, if there is no rss node if rss_node is None: return None # iterate thru feed records first = True entries = "" update_times = [] for node in rss_node.find("node"): # skip first iteration (main node containing information about feed) if first: first = False continue # convert node from rich_text to html html_node = d.parseString(convertToHtml(dom, node.params["unique_id"])) if len(html_node.find("a")) > 0: first_link = html_node.find("a")[0] else: raise ValueError( "Item '" + node.params["name"] + "' doesn't have date and/or URL!" ) updated = first_link.getContent() # get url from first link, or set it to default url = first_link.params["href"] if "href" in first_link.params else "" url = "./" + url[5:] if url.startswith("./../") and len(url) > 5 else url # remove first link (and it's content) from html code if first_link is not None: first_link.replaceWith(d.HTMLElement("")) # preprocess content content = html_node.getContent().replace("<p></p>", "").strip() for key, val in HTML_ENTITIES.iteritems(): content = content.replace(val, key) entries += Template(ATOM_ENTRY_TEMPLATE).substitute( title=node.params["name"], url=url, uid=hashlib.md5( node.params["name"] + str(url) + str(updated) ).hexdigest(), updated=updated, content=content ) update_times.append(updated) # remove node from DOM node.replaceWith(d.HTMLElement("")) # extract Atom template from .ctd atom_template = rss_node.find("codebox") if len(atom_template) <= 0: raise ValueError("There is no codebox with Atom template!") atom_template = atom_template[0].getContent() atom_template = __removeHTMLEntities(atom_template) atom_feed = Template(atom_template).substitute( updated=update_times[0], entries=entries ) # get feed's filename - it is specified in atom template filename = d.parseString(atom_feed).find("link") if len(filename) <= 0: raise ValueError("There has to be link in your Atom template!") filename = filename[0] if not "href" in filename.params: raise ValueError( "Link in your Atom template has to have 'href' parameter!" ) filename = filename.params["href"].split("/")[-1] if "." not in filename: filename = "atom.xml" writeln( "You didn't specified filename of your feed, so I choosed " + "'%s'" % (filename) ) fh = open(out_dir + "/" + filename, "wt") fh.write(atom_feed) fh.close() # get rid of RSS node rss_node.replaceWith(d.HTMLElement(""))
def _transformLink(tag, dom, node_id, out_dir, root_path): """ Transform <rich_text link="webs http://kitakitsune.org">odkaz</rich_text> to <a href="http://kitakitsune.org">odkaz</a>. Also some basic link handling, ala local links and links to other nodes. """ if "link" in tag.params: el = d.HTMLElement("<a>") el.childs = tag.childs # cherrytree puts string "webs "/"node " before every link for some # reason link = tag.params["link"] link = link[5:] if tag.params["link"].startswith("webs"): # absolute path to local files if link.startswith("http:///"): link = link[7:] # relative path to local files if link.startswith("http://.."): link = link[7:] # relative path to local files in current directory if link.startswith("http://./"): link = link[7:] elif tag.params["link"].startswith("file "): link = base64.b64decode(tag.params["link"].split()[1]) # support for local images - I did tried to make it work as node, # but that failed miserably, because there is limit to picture # dimensions and other shitty crap file_type = link.split(".") pic_types = ["png", "gif", "jpg", "jpeg"] if len(file_type) >= 1 and file_type[-1].lower() in pic_types: directory = out_dir + "/pictures" if not os.path.exists(directory): os.makedirs(directory) local_name = "%s/%s_%s" % ( directory, hashlib.md5(link).hexdigest(), os.path.basename(link) ) shutil.copyfile(link, local_name) elif tag.params["link"].startswith("node "): # internal links contains only node id link_id = link.strip() # get nodename linked_nodename = dom.find("node", {"unique_id": str(link_id)}) if not linked_nodename: writeln("Broken link to node ID '" + link_id + "'", sys.stderr) link = "[broken link to internal node]" else: # get (this) node depth depth = len(getNodePath(dom, node_id).split("/")) - 1 link = "./" + (depth * "../") + getNodePath(dom, link_id) el.params["href"] = link.strip() el.endtag = d.HTMLElement("</a>") tag.replaceWith(el)