def saveNode(dom, nodeid, html_template, out_dir, name=None, do_anchors=True): "Convert node to the HTML and save it to the HTML." nodeid = str(nodeid) filename = getNodePath(dom, nodeid) root_path = filename.count("/") * "../" root_path = root_path[:-1] if root_path.endswith("/") else root_path root_path = "." if root_path == "" else root_path # ugly, bud increase parsing speed a bit if name is None: name = dom.find("node", {"unique_id": nodeid})[0] name = name.params["name"] # generate filename, convert html data = convertToHtml( dom, nodeid, do_anchors=do_anchors, out_dir=out_dir, root_path=root_path, ) # apply html template data = Template(html_template).substitute( content=data, title=name, copyright=COPYRIGHT, rootpath=root_path ) # check if directory tree exists - if not, create it directory = out_dir + "/" + os.path.dirname(filename) if not os.path.exists(directory): os.makedirs(directory) fh = open(out_dir + "/" + filename, "wt") fh.write(data) fh.close() return filename
def __getUserTemplate(dom, name): """" Return users template identified by name (case insensitive). Template is then converted to html. Returns: (template_node, html_content) """ template_node = __getFirstNodeByCIName(dom, name) # don't continue, if there is no rss node if template_node is None: return (None, None) html_content = d.parseString( convertToHtml(dom, template_node.params["unique_id"]) ) # preprocess content content = html_content.getContent().replace("<p></p>", "").strip() for key, val in HTML_ENTITIES.iteritems(): content = content.replace(val, key) return (template_node, html_content)
def generateAtomFeed(dom, out_dir): rss_node = __getFirstNodeByCIName(dom, "__rss") # don't continue, if there is no rss node if rss_node is None: return None # iterate thru feed records first = True entries = "" update_times = [] for node in rss_node.find("node"): # skip first iteration (main node containing information about feed) if first: first = False continue # convert node from rich_text to html html_node = d.parseString(convertToHtml(dom, node.params["unique_id"])) if len(html_node.find("a")) > 0: first_link = html_node.find("a")[0] else: raise ValueError( "Item '" + node.params["name"] + "' doesn't have date and/or URL!" ) updated = first_link.getContent() # get url from first link, or set it to default url = first_link.params["href"] if "href" in first_link.params else "" url = "./" + url[5:] if url.startswith("./../") and len(url) > 5 else url # remove first link (and it's content) from html code if first_link is not None: first_link.replaceWith(d.HTMLElement("")) # preprocess content content = html_node.getContent().replace("<p></p>", "").strip() for key, val in HTML_ENTITIES.iteritems(): content = content.replace(val, key) entries += Template(ATOM_ENTRY_TEMPLATE).substitute( title=node.params["name"], url=url, uid=hashlib.md5( node.params["name"] + str(url) + str(updated) ).hexdigest(), updated=updated, content=content ) update_times.append(updated) # remove node from DOM node.replaceWith(d.HTMLElement("")) # extract Atom template from .ctd atom_template = rss_node.find("codebox") if len(atom_template) <= 0: raise ValueError("There is no codebox with Atom template!") atom_template = atom_template[0].getContent() atom_template = __removeHTMLEntities(atom_template) atom_feed = Template(atom_template).substitute( updated=update_times[0], entries=entries ) # get feed's filename - it is specified in atom template filename = d.parseString(atom_feed).find("link") if len(filename) <= 0: raise ValueError("There has to be link in your Atom template!") filename = filename[0] if not "href" in filename.params: raise ValueError( "Link in your Atom template has to have 'href' parameter!" ) filename = filename.params["href"].split("/")[-1] if "." not in filename: filename = "atom.xml" writeln( "You didn't specified filename of your feed, so I choosed " + "'%s'" % (filename) ) fh = open(out_dir + "/" + filename, "wt") fh.write(atom_feed) fh.close() # get rid of RSS node rss_node.replaceWith(d.HTMLElement(""))