コード例 #1
0
    def get_message(self, msgid):
        """ Returns a FeedParserPlus object (which inherits from
        traditional FeedparserDict) representing one SeenThis message. """
        endpoint = get_endpoint_tmpl % int(msgid)
        request = urllib2.Request(url=endpoint)
        self._add_headers(request)
        server = urllib2.urlopen(request)
        data = server.read()
        try:
            atom_entry = FeedParserPlus.parse(data)
        except:
            import tempfile

            (datafilefd, datafilename) = tempfile.mkstemp(suffix=".atom", prefix="seenthis_", text=True)
            datafile = os.fdopen(datafilefd, "w")
            datafile.write(data)
            datafile.close()
            raise InvalidResponse("ATOM parsing error of the answer. The data has been saved in %s" % datafilename)
        # If the message-ID does not exist, SeenThis does not return a
        # 404 and sends back an invalid XML file :-(
        # http://seenthis.net/messages/70646 So, we hack.
        if atom_entry.has_key("bozo_exception"):
            raise NotFound("Message %i does not apparently exist" % int(msgid))
        return atom_entry
コード例 #2
0
ファイル: test-parse.py プロジェクト: Fil/seenthis-python
#!/usr/bin/env python

from FeedParserPlus import FeedParserPlus
import sys

for atom in sys.argv[1:]:
    atom_feed = FeedParserPlus.parse(open(atom).read())
    #print atom_feed.serialize()
    

コード例 #3
0
    def get(self, n=None, continue_on_error=False):
        """
        n is the number of messages to retrieve. When None, we just retrieve what
        SeenThis gives us (today, the last 25 messages). Otherwise, we
        loop to retrieve n messages. Warning: because there may be
        less messages on SeenThis, there is no guarantee to have n
        entries in the result.

        To get all the messages, just set n to a very high number.

        The result is a FeedParserPlus object (which inherits from
        traditional FeedparserDict).
        """
        total = 0
        over = False
        result = None
        step = 0
        while not over:
            # TODO: we could use feedparser to retrieve the feed, not
            # only to parse it...
            if step == 0:
                endpoint = self.retrieve_endpoint
            else:
                endpoint = "%s/%i" % (self.retrieve_endpoint, total)
            request = urllib2.Request(url=endpoint)
            self._add_headers(request)
            server = urllib2.urlopen(request)
            data = server.read()
            # Bad workaround FeedParser's bug #91 http://code.google.com/p/feedparser/issues/detail?id=91
            for line in data:
                # Awfully slow, we should replace :-) it with something faster
                if string.find(data, "!DOCTYPE") != -1:
                    data = string.replace(data, "!DOCTYPE", "!\ DOCTYPE")
            try:
                atom_feed = FeedParserPlus.parse(data)
            except:
                import tempfile

                (datafilefd, datafilename) = tempfile.mkstemp(suffix=".atom", prefix="seenthis_", text=True)
                datafile = os.fdopen(datafilefd, "w")
                datafile.write(data)
                datafile.close()
                print >> sys.stderr, "Parsing error of the answer. The data has been saved in %s" % datafilename
                if continue_on_error:
                    (exc_type, exc_value, exc_traceback) = sys.exc_info()
                    print 'Exception %s "%s" %s' % (
                        exc_type,
                        exc_value,
                        traceback.format_list(traceback.extract_tb(exc_traceback)),
                    )
                    step += 1
                    continue
                else:
                    raise
            got = len(atom_feed["entries"])
            if got == 0:
                over = True
            else:
                total += got
                step += 1
                print >> sys.stderr, "Got %i entries..." % total
                if result is None:
                    result = atom_feed
                else:
                    for entry in atom_feed["entries"]:
                        result["entries"].append(entry)
                if n is not None and total >= n:
                    over = True
        return result