Beispiel #1
0
    def process(self, dom):
        for child in domutils.getUniqueChildbyTagName(dom, 'rss', 'channel').childNodes:
            if not child.nodeType == child.ELEMENT_NODE:
                continue

            if child.nodeName == 'title':
                domutils.replaceChildren(child, dom.createTextNode('Meneame (Directo)'))

            if child.nodeName == 'link':
                domutils.replaceChildren(child, dom.createTextNode('https://github.com/ldotlopez/feedfilter'))

            if child.nodeName == 'atom:link' and child.hasAttribute('rel') and child.getAttribute('rel') in ('self','hub'):
                child.parentNode.removeChild(child)

        for item in dom.getElementsByTagName('item'):
            self.process_item(item)
Beispiel #2
0
    def process_item(self, node):
        if not self.is_valid_item(node):
            return

        dom = domutils.getDomFromNode(node)

        # Get nodes for the 'meneame' link and 'dest' link.
        # Note than tag names and variable names are swapped, this is correct.
        # They aren't textNodes yet
        dest_link_node    = node.getElementsByTagName('link')[0]
        meneame_link_node = node.getElementsByTagName('meneame:url')[0]
        guid_node         = node.getElementsByTagName('guid')[0]

        # Get URLs as text. Contrary to their names and variables this code is correct, I'm storing them
        # swapped.
        meneame_url = dest_link_node.childNodes[0].data
        dest_url = meneame_link_node.childNodes[0].data

        # Now, we have to swap them on the dom
        domutils.replaceChildren(dest_link_node, dom.createTextNode(dest_url))
        domutils.replaceChildren(guid_node, dom.createTextNode(dest_url))

        # Rewrite description
        description = node.getElementsByTagName('description')[0].childNodes[0].data
        description = description.replace('noticia original', 'enlace meneame')
        description = description.replace(dest_url, meneame_url)
        domutils.replaceChildren(node.getElementsByTagName('description')[0],
                                 dom.createCDATASection(description))
Beispiel #3
0
    def process(self, dom):
        cache = Cache(debug = False)

        items = dom.getElementsByTagName('item')

        for i in xrange(0, len(items)):
            print "Item %d of %d" % (i+1, len(items))
            item = items[i]

            link_node = item.getElementsByTagName('link')[0]
            link_url = link_node.childNodes[0].data

            # Fetch page
            try:
                (buff, cached) = cache.fetch_url(link_url)
                soup = BeautifulSoup.BeautifulSoup(buff)
            except IOError as e:
                print "Unable to load url %s: %s" % (link_url, e)
                continue

            # Dont do anything
            if soup.title.text == 'Too Many Requests':
                cache.delete(link_url)
                print "Reddit is angry"
                continue

            try:
                real_link_url = soup.find('p', 'title').find('a', 'title').get('href')
            except AttributeError:
                cache.delete(link_url)
                print "Unable to retrieve original link for '%s'" % soup.title
                continue

            self._debug("Got real link on '%s' (%s), replacing" % (soup.title.text, real_link_url))

            domutils.replaceChildren(link_node, dom.createTextNode(real_link_url))
            replaceChildren(item.getElementsByTagName('guid')[0], dom.createTextNode(real_link_url))

            if not cached:
                time.sleep(2)