Exemple #1
0
def generateAtomFeed(dom, out_dir):
    rss_node = __getFirstNodeByCIName(dom, "__rss")

    # don't continue, if there is no rss node
    if rss_node is None:
        return None

    # iterate thru feed records
    first = True
    entries = ""
    update_times = []
    for node in rss_node.find("node"):
        # skip first iteration (main node containing information about feed)
        if first:
            first = False
            continue

        # convert node from rich_text to html
        html_node = d.parseString(convertToHtml(dom, node.params["unique_id"]))

        if len(html_node.find("a")) > 0:
            first_link = html_node.find("a")[0]
        else:
            raise ValueError(
                "Item '" +
                node.params["name"] +
                "' doesn't have date and/or URL!"
            )

        updated = first_link.getContent()

        # get url from first link, or set it to default
        url = first_link.params["href"] if "href" in first_link.params else ""
        url = "./" + url[5:] if url.startswith("./../") and len(url) > 5 else url

        # remove first link (and it's content) from html code
        if first_link is not None:
            first_link.replaceWith(d.HTMLElement(""))

        # preprocess content
        content = html_node.getContent().replace("<p></p>", "").strip()
        for key, val in HTML_ENTITIES.iteritems():
            content = content.replace(val, key)


        entries += Template(ATOM_ENTRY_TEMPLATE).substitute(
            title=node.params["name"],
            url=url,
            uid=hashlib.md5(
                node.params["name"] +
                str(url) +
                str(updated)
            ).hexdigest(),
            updated=updated,
            content=content
        )

        update_times.append(updated)

        # remove node from DOM
        node.replaceWith(d.HTMLElement(""))

    # extract Atom template from .ctd
    atom_template = rss_node.find("codebox")
    if len(atom_template) <= 0:
        raise ValueError("There is no codebox with Atom template!")
    atom_template = atom_template[0].getContent()

    atom_template = __removeHTMLEntities(atom_template)

    atom_feed = Template(atom_template).substitute(
        updated=update_times[0],
        entries=entries
    )

    # get feed's filename - it is specified in atom template
    filename = d.parseString(atom_feed).find("link")
    if len(filename) <= 0:
        raise ValueError("There has to be link in your Atom template!")
    filename = filename[0]

    if not "href" in filename.params:
        raise ValueError(
            "Link in your Atom template has to have 'href' parameter!"
        )
    filename = filename.params["href"].split("/")[-1]

    if "." not in filename:
        filename = "atom.xml"
        writeln(
            "You didn't specified filename of your feed, so I choosed " +
            "'%s'" % (filename)
        )

    fh = open(out_dir + "/" + filename, "wt")
    fh.write(atom_feed)
    fh.close()

    # get rid of RSS node
    rss_node.replaceWith(d.HTMLElement(""))
def _transformLink(tag, dom, node_id, out_dir, root_path):
    """
    Transform <rich_text link="webs http://kitakitsune.org">odkaz</rich_text>
    to <a href="http://kitakitsune.org">odkaz</a>.

    Also some basic link handling, ala local links and links to other nodes.
    """

    if "link" in tag.params:
        el = d.HTMLElement("<a>")
        el.childs = tag.childs

        # cherrytree puts string "webs "/"node " before every link for some
        # reason
        link = tag.params["link"]
        link = link[5:]

        if tag.params["link"].startswith("webs"):
            # absolute path to local files
            if link.startswith("http:///"):
                link = link[7:]
            # relative path to local files
            if link.startswith("http://.."):
                link = link[7:]
            # relative path to local files in current directory
            if link.startswith("http://./"):
                link = link[7:]
        elif tag.params["link"].startswith("file "):
            link = base64.b64decode(tag.params["link"].split()[1])

            # support for local images - I did tried to make it work as node,
            # but that failed miserably, because there is limit to picture
            # dimensions and other shitty crap
            file_type = link.split(".")
            pic_types = ["png", "gif", "jpg", "jpeg"]
            if len(file_type) >= 1 and file_type[-1].lower() in pic_types:
                directory = out_dir + "/pictures"
                if not os.path.exists(directory):
                    os.makedirs(directory)

                local_name = "%s/%s_%s" % (
                    directory,
                    hashlib.md5(link).hexdigest(),
                    os.path.basename(link)
                )

                shutil.copyfile(link, local_name)
        elif tag.params["link"].startswith("node "):
            # internal links contains only node id
            link_id = link.strip()

            # get nodename
            linked_nodename = dom.find("node", {"unique_id": str(link_id)})
            if not linked_nodename:
                writeln("Broken link to node ID '" + link_id + "'", sys.stderr)
                link = "[broken link to internal node]"
            else:
                # get (this) node depth
                depth = len(getNodePath(dom, node_id).split("/")) - 1
                link = "./" + (depth * "../") + getNodePath(dom, link_id)

        el.params["href"] = link.strip()

        el.endtag = d.HTMLElement("</a>")
        tag.replaceWith(el)