Ejemplo n.º 1
0
  def get(self):
    feedurl = self.request.get('feed')
    data = feedparser.parse(feedurl)

    # Augment with a salmon endpoint. Don't overwrite existing!
    foundsalmon = False
    for link in data.feed.links:
      if link.rel.lower() == 'salmon':
        foundsalmon = True
        break
    if foundsalmon == False:
      endpoint = u'http://'+self.request.headers['Host']+'/post'
      data.feed.links.append({'href' : endpoint,'type': u'application/atom+xml', 'rel': u'salmon'})

    # if feedfields.bozo:
    # TODO: Annotate stored data and/or hand back a warning.

    # TODO: Have an alternate template that just shows the Atom with the salmon stuff highlighted in some way.
    self.response.out.write(template.render('atom.xml', data))
    self.response.headers.add_header("Content-Type","application/atom+xml; charset=utf-8")
    self.response.set_status(200)

    # Add a fake BlogProxy entry so that we can fetch updated comments for this feed.
    bloggerproxy.addNonBloggerBlogProxy(feedurl)

    # And store the entries discovered in our own DB for reference.
    for entry in data.entries:
      e = model.makeEntry(entry,data.feed)
      #logging.info('Made %s from %s',e,entry)
      db.put([e])
      logging.info('Remembering entry with title = "%s", id = "%s", '
                   'link = "%s"',
                   e.title, e.entry_id, e.link)
Ejemplo n.º 2
0
def crawlFeedAndComments(feedurl, data, hacked_in_reply_to_override=None):
    """Crawl a single feed and all comments on its entries, recursively"""
    if data.bozo:
        logging.warning(
            "Feed %s has errors: %s: %r", feedurl, data.bozo_exception.__class__.__name__, data.bozo_exception
        )
        if hasattr(data.bozo_exception, "getLineNumber") and hasattr(data.bozo_exception, "getMessage"):
            line = data.bozo_exception.getLineNumber()
            logging.warning("Line %d: %s", line, data.bozo_exception.getMessage())

    update_list = []
    for entry in data.entries:
        # TODO: Get rid of this if/when feedparser is fixed
        if hacked_in_reply_to_override:
            entry["in-reply-to"] = hacked_in_reply_to_override

        e = model.makeEntry(entry, data.feed)
        # logging.info("Made an entry, salmon endpoint = %s",e.salmonendpoint)
        update_list.append(e)
        # Now look to see if the entry has comments
        commentfeeds = model.getLinkRel(entry, "replies")
        logging.info("Comments for entry: %s", commentfeeds)
        for f in commentfeeds:
            if f.type == "application/atom+xml":
                # TODO: Discover most recent comment from this and use to update last active timestamp
                crawlFeedAndComments(f, feedparser.parse(f.href), entry.id)
    db.put(update_list)
Ejemplo n.º 3
0
def crawlFeedAndComments(feedurl, data, hacked_in_reply_to_override=None):
    """Crawl a single feed and all comments on its entries, recursively"""
    if data.bozo:
        logging.warning("Feed %s has errors: %s: %r", feedurl,
                        data.bozo_exception.__class__.__name__,
                        data.bozo_exception)
        if (hasattr(data.bozo_exception, 'getLineNumber')
                and hasattr(data.bozo_exception, 'getMessage')):
            line = data.bozo_exception.getLineNumber()
            logging.warning('Line %d: %s', line,
                            data.bozo_exception.getMessage())

    update_list = []
    for entry in data.entries:
        # TODO: Get rid of this if/when feedparser is fixed
        if hacked_in_reply_to_override:
            entry['in-reply-to'] = hacked_in_reply_to_override

        e = model.makeEntry(entry, data.feed)
        #logging.info("Made an entry, salmon endpoint = %s",e.salmonendpoint)
        update_list.append(e)
        # Now look to see if the entry has comments
        commentfeeds = model.getLinkRel(entry, "replies")
        logging.info("Comments for entry: %s", commentfeeds)
        for f in commentfeeds:
            if f.type == 'application/atom+xml':
                # TODO: Discover most recent comment from this and use to update last active timestamp
                crawlFeedAndComments(f, feedparser.parse(f.href), entry.id)
    db.put(update_list)
Ejemplo n.º 4
0
    def post(self):
        headers = self.request.headers
        logging.info('Headers =\n%s\n', headers)
        in_reply_to = self.request.get(
            'inreplyto'
        )  #Get this from entry thr:in-reply-to if possible; feedparser.py BUG here

        # TODO: Do a check for application/atom+xml and charset
        content_type = headers['Content-Type']
        body = self.request.body.decode('utf-8')

        logging.info('Post body is %d characters', len(body))
        logging.info('Post body is:\n%s\n----', body)

        data = feedparser.parse(body)
        logging.info('Data returned was:\n%s\n----', data)
        if data.bozo:
            logging.error('Bozo feed data. %s: %r',
                          data.bozo_exception.__class__.__name__,
                          data.bozo_exception)
            if (hasattr(data.bozo_exception, 'getLineNumber')
                    and hasattr(data.bozo_exception, 'getMessage')):
                line = data.bozo_exception.getLineNumber()
                logging.error('Line %d: %s', line,
                              data.bozo_exception.getMessage())
                # segment = self.request.body.split('\n')[line-1]
                # logging.info('Body segment with error: %r', segment.decode('utf-8'))
            return self.response.set_status(500)

        update_list = []
        logging.info('Found %d entries', len(data.entries))
        for entry in data.entries:
            s = model.makeEntry(entry)

            referents = model.getTopicsOf(s)

            logging.info('Saw %d parents!', referents.count())
            if referents.count() == 0:
                logging.info(
                    'No parent found for %s, returning error to client.',
                    s.entry_id)
                self.response.set_status(400)
                self.response.out.write('Bad Salmon, no parent with id ' +
                                        unicode(s.in_reply_to) +
                                        ' found -- rejected.\n')
                return

            # Look for parents, update thread_updated if necessary
            for parent in referents:
                logging.info('Saw parent: %s\n', parent)
                if parent.thread_updated < s.updated:
                    parent.thread_updated = s.updated
                    parent.put()

            update_list.append(s)

        db.put(update_list)
        self.response.set_status(200)
        self.response.out.write("Salmon accepted, swimming upstream!\n")
Ejemplo n.º 5
0
  def post(self):
    headers = self.request.headers;
    logging.info('Headers =\n%s\n',headers)
    in_reply_to = self.request.get('inreplyto') #Get this from entry thr:in-reply-to if possible; feedparser.py BUG here

    # TODO: Do a check for application/atom+xml and charset
    content_type = headers['Content-Type'];
    body = self.request.body.decode('utf-8')

    logging.info('Post body is %d characters', len(body))
    logging.info('Post body is:\n%s\n----', body);

    data = feedparser.parse(body)
    logging.info('Data returned was:\n%s\n----',data)
    if data.bozo:
      logging.error('Bozo feed data. %s: %r',
                     data.bozo_exception.__class__.__name__,
                     data.bozo_exception)
      if (hasattr(data.bozo_exception, 'getLineNumber') and
          hasattr(data.bozo_exception, 'getMessage')):
        line = data.bozo_exception.getLineNumber()
        logging.error('Line %d: %s', line, data.bozo_exception.getMessage())
        # segment = self.request.body.split('\n')[line-1]
        # logging.info('Body segment with error: %r', segment.decode('utf-8'))
      return self.response.set_status(500)

    update_list = []
    logging.info('Found %d entries', len(data.entries))
    for entry in data.entries:
      s = model.makeEntry(entry)

      referents = model.getTopicsOf(s)

      logging.info('Saw %d parents!', referents.count() )
      if referents.count() == 0:
        logging.info('No parent found for %s, returning error to client.',s.entry_id)
        self.response.set_status(400)
        self.response.out.write('Bad Salmon, no parent with id '+unicode(s.in_reply_to)+' found -- rejected.\n');
        return

      # Look for parents, update thread_updated if necessary
      for parent in referents:
        logging.info('Saw parent: %s\n',parent)
        if parent.thread_updated < s.updated:
          parent.thread_updated = s.updated
          parent.put()

      update_list.append(s)

    db.put(update_list)
    self.response.set_status(200)
    self.response.out.write("Salmon accepted, swimming upstream!\n");
Ejemplo n.º 6
0
    def get(self):
        feedurl = self.request.get('feed')
        data = feedparser.parse(feedurl)

        # Augment with a salmon endpoint. Don't overwrite existing!
        foundsalmon = False
        for link in data.feed.links:
            if link.rel.lower() == 'salmon':
                foundsalmon = True
                break
        if foundsalmon == False:
            endpoint = u'http://' + self.request.headers['Host'] + '/post'
            data.feed.links.append({
                'href': endpoint,
                'type': u'application/atom+xml',
                'rel': u'salmon'
            })

        # if feedfields.bozo:
        # TODO: Annotate stored data and/or hand back a warning.

        # TODO: Have an alternate template that just shows the Atom with the salmon stuff highlighted in some way.
        self.response.out.write(template.render('atom.xml', data))
        self.response.headers.add_header(
            "Content-Type", "application/atom+xml; charset=utf-8")
        self.response.set_status(200)

        # Add a fake BlogProxy entry so that we can fetch updated comments for this feed.
        bloggerproxy.addNonBloggerBlogProxy(feedurl)

        # And store the entries discovered in our own DB for reference.
        for entry in data.entries:
            e = model.makeEntry(entry, data.feed)
            #logging.info('Made %s from %s',e,entry)
            db.put([e])
            logging.info(
                'Remembering entry with title = "%s", id = "%s", '
                'link = "%s"', e.title, e.entry_id, e.link)
Ejemplo n.º 7
0
    def post(self):
        # Take care of incoming salmon
        # pull out oauth token, fire up OAuth client, identify
        # the particular post in question and its comment stream,
        # create an entry, and post a comment.
        blog_id = self.request.get("id")

        body = self.request.body.decode("utf-8")

        logging.info("Salmon body is:\n%s\n----", body)

        data = feedparser.parse(body)
        logging.info("Data parsed was:\n%s\n----", data)
        if data.bozo:
            logging.error("Bozo feed data. %s: %r", data.bozo_exception.__class__.__name__, data.bozo_exception)
            if hasattr(data.bozo_exception, "getLineNumber") and hasattr(data.bozo_exception, "getMessage"):
                line = data.bozo_exception.getLineNumber()
                logging.error("Line %d: %s", line, data.bozo_exception.getMessage())
            return self.response.set_status(400)

        logging.info("Found %d entries", len(data.entries))
        for entry in data.entries:
            s = model.makeEntry(entry)

            referents = model.getTopicsOf(s)

            logging.info("Saw %d parent(s)", referents.count())
            if referents.count() == 0:
                logging.info("No parent found for %s, returning error to client.", s.entry_id)
                self.response.set_status(400)
                self.response.out.write(
                    "Bad Salmon, no parent with id " + unicode(s.in_reply_to) + " found -- rejected.\n"
                )
                return

        # Pull body & other info out of salmon

        # Create an Atom entry and post as a comment
        text = s.content
        # TODO: Fix Blogger so it accepts acct: URIs... sigh...
        name = re.sub("(..\@.+)", "...", s.author_name)
        author_uri = "http://example.org/profile/" + name
        # if author_uri.startswith("acct:"):
        #  author_uri = author_uri.replace("acct:","http://")
        text = text + ' by <a href="' + author_uri + '">' + name + "</a>"
        entry = atom.Entry(content=atom.Content(text=text))

        # Grab the entry ID from the in-reply-to element of the salmon
        p = re.compile("tag:blogger\.com,1999:blog-(\d+)\.post-(\d+)")
        m = p.match(s.in_reply_to)
        if not m:
            self.response.set_status(400)
            return
        blog_id = m.group(1)
        post_id = m.group(2)

        logging.info("About to post comment to blog %s, post %s", blog_id, post_id)

        # Grab auth info from DB (this is also an ACL check...)
        bp = BlogProxy.all().filter("blog_id =", blog_id).fetch(1)[0]
        origfeed = bp.feed_uri
        tokens = pickle.loads(bp.pickled_tokens)
        oauth_token = tokens["http://www.blogger.com/feeds/"]
        # TODO: Add some error checking, for Ghu's sake.

        # Let's see if override_token, at least, does what it says in this hall of
        # funhouse mirrors we call a GData client:
        self.client.blogger.override_token = oauth_token
        logging.info("Auth token = %s, override_token = %s", oauth_token, self.client.blogger.override_token)
        self.client.blogger.AddComment(entry, blog_id=blog_id, post_id=post_id)
        self.response.out.write("Salmon accepted, sent upstream to source!\n")
        self.response.set_status(200)
Ejemplo n.º 8
0
    def post(self):
        # Take care of incoming salmon
        # pull out oauth token, fire up OAuth client, identify
        # the particular post in question and its comment stream,
        # create an entry, and post a comment.
        blog_id = self.request.get('id')

        body = self.request.body.decode('utf-8')

        logging.info('Salmon body is:\n%s\n----', body)

        data = feedparser.parse(body)
        logging.info('Data parsed was:\n%s\n----', data)
        if data.bozo:
            logging.error('Bozo feed data. %s: %r',
                          data.bozo_exception.__class__.__name__,
                          data.bozo_exception)
            if (hasattr(data.bozo_exception, 'getLineNumber')
                    and hasattr(data.bozo_exception, 'getMessage')):
                line = data.bozo_exception.getLineNumber()
                logging.error('Line %d: %s', line,
                              data.bozo_exception.getMessage())
            return self.response.set_status(400)

        logging.info('Found %d entries', len(data.entries))
        for entry in data.entries:
            s = model.makeEntry(entry)

            referents = model.getTopicsOf(s)

            logging.info('Saw %d parent(s)', referents.count())
            if referents.count() == 0:
                logging.info(
                    'No parent found for %s, returning error to client.',
                    s.entry_id)
                self.response.set_status(400)
                self.response.out.write('Bad Salmon, no parent with id ' +
                                        unicode(s.in_reply_to) +
                                        ' found -- rejected.\n')
                return

        # Pull body & other info out of salmon

        # Create an Atom entry and post as a comment
        text = s.content
        # TODO: Fix Blogger so it accepts acct: URIs... sigh...
        name = re.sub("(..\@.+)", "...", s.author_name)
        author_uri = "http://example.org/profile/" + name
        #if author_uri.startswith("acct:"):
        #  author_uri = author_uri.replace("acct:","http://")
        text = text + ' by <a href="' + author_uri + '">' + name + '</a>'
        entry = atom.Entry(content=atom.Content(text=text))

        # Grab the entry ID from the in-reply-to element of the salmon
        p = re.compile('tag:blogger\.com,1999:blog-(\d+)\.post-(\d+)')
        m = p.match(s.in_reply_to)
        if not m:
            self.response.set_status(400)
            return
        blog_id = m.group(1)
        post_id = m.group(2)

        logging.info("About to post comment to blog %s, post %s", blog_id,
                     post_id)

        # Grab auth info from DB (this is also an ACL check...)
        bp = BlogProxy.all().filter('blog_id =', blog_id).fetch(1)[0]
        origfeed = bp.feed_uri
        tokens = pickle.loads(bp.pickled_tokens)
        oauth_token = tokens["http://www.blogger.com/feeds/"]
        # TODO: Add some error checking, for Ghu's sake.

        # Let's see if override_token, at least, does what it says in this hall of
        # funhouse mirrors we call a GData client:
        self.client.blogger.override_token = oauth_token
        logging.info("Auth token = %s, override_token = %s", oauth_token,
                     self.client.blogger.override_token)
        self.client.blogger.AddComment(entry, blog_id=blog_id, post_id=post_id)
        self.response.out.write("Salmon accepted, sent upstream to source!\n")
        self.response.set_status(200)