def get(self): feedurl = self.request.get('feed') data = feedparser.parse(feedurl) # Augment with a salmon endpoint. Don't overwrite existing! foundsalmon = False for link in data.feed.links: if link.rel.lower() == 'salmon': foundsalmon = True break if foundsalmon == False: endpoint = u'http://'+self.request.headers['Host']+'/post' data.feed.links.append({'href' : endpoint,'type': u'application/atom+xml', 'rel': u'salmon'}) # if feedfields.bozo: # TODO: Annotate stored data and/or hand back a warning. # TODO: Have an alternate template that just shows the Atom with the salmon stuff highlighted in some way. self.response.out.write(template.render('atom.xml', data)) self.response.headers.add_header("Content-Type","application/atom+xml; charset=utf-8") self.response.set_status(200) # Add a fake BlogProxy entry so that we can fetch updated comments for this feed. bloggerproxy.addNonBloggerBlogProxy(feedurl) # And store the entries discovered in our own DB for reference. for entry in data.entries: e = model.makeEntry(entry,data.feed) #logging.info('Made %s from %s',e,entry) db.put([e]) logging.info('Remembering entry with title = "%s", id = "%s", ' 'link = "%s"', e.title, e.entry_id, e.link)
def crawlFeedAndComments(feedurl, data, hacked_in_reply_to_override=None): """Crawl a single feed and all comments on its entries, recursively""" if data.bozo: logging.warning( "Feed %s has errors: %s: %r", feedurl, data.bozo_exception.__class__.__name__, data.bozo_exception ) if hasattr(data.bozo_exception, "getLineNumber") and hasattr(data.bozo_exception, "getMessage"): line = data.bozo_exception.getLineNumber() logging.warning("Line %d: %s", line, data.bozo_exception.getMessage()) update_list = [] for entry in data.entries: # TODO: Get rid of this if/when feedparser is fixed if hacked_in_reply_to_override: entry["in-reply-to"] = hacked_in_reply_to_override e = model.makeEntry(entry, data.feed) # logging.info("Made an entry, salmon endpoint = %s",e.salmonendpoint) update_list.append(e) # Now look to see if the entry has comments commentfeeds = model.getLinkRel(entry, "replies") logging.info("Comments for entry: %s", commentfeeds) for f in commentfeeds: if f.type == "application/atom+xml": # TODO: Discover most recent comment from this and use to update last active timestamp crawlFeedAndComments(f, feedparser.parse(f.href), entry.id) db.put(update_list)
def crawlFeedAndComments(feedurl, data, hacked_in_reply_to_override=None): """Crawl a single feed and all comments on its entries, recursively""" if data.bozo: logging.warning("Feed %s has errors: %s: %r", feedurl, data.bozo_exception.__class__.__name__, data.bozo_exception) if (hasattr(data.bozo_exception, 'getLineNumber') and hasattr(data.bozo_exception, 'getMessage')): line = data.bozo_exception.getLineNumber() logging.warning('Line %d: %s', line, data.bozo_exception.getMessage()) update_list = [] for entry in data.entries: # TODO: Get rid of this if/when feedparser is fixed if hacked_in_reply_to_override: entry['in-reply-to'] = hacked_in_reply_to_override e = model.makeEntry(entry, data.feed) #logging.info("Made an entry, salmon endpoint = %s",e.salmonendpoint) update_list.append(e) # Now look to see if the entry has comments commentfeeds = model.getLinkRel(entry, "replies") logging.info("Comments for entry: %s", commentfeeds) for f in commentfeeds: if f.type == 'application/atom+xml': # TODO: Discover most recent comment from this and use to update last active timestamp crawlFeedAndComments(f, feedparser.parse(f.href), entry.id) db.put(update_list)
def post(self): headers = self.request.headers logging.info('Headers =\n%s\n', headers) in_reply_to = self.request.get( 'inreplyto' ) #Get this from entry thr:in-reply-to if possible; feedparser.py BUG here # TODO: Do a check for application/atom+xml and charset content_type = headers['Content-Type'] body = self.request.body.decode('utf-8') logging.info('Post body is %d characters', len(body)) logging.info('Post body is:\n%s\n----', body) data = feedparser.parse(body) logging.info('Data returned was:\n%s\n----', data) if data.bozo: logging.error('Bozo feed data. %s: %r', data.bozo_exception.__class__.__name__, data.bozo_exception) if (hasattr(data.bozo_exception, 'getLineNumber') and hasattr(data.bozo_exception, 'getMessage')): line = data.bozo_exception.getLineNumber() logging.error('Line %d: %s', line, data.bozo_exception.getMessage()) # segment = self.request.body.split('\n')[line-1] # logging.info('Body segment with error: %r', segment.decode('utf-8')) return self.response.set_status(500) update_list = [] logging.info('Found %d entries', len(data.entries)) for entry in data.entries: s = model.makeEntry(entry) referents = model.getTopicsOf(s) logging.info('Saw %d parents!', referents.count()) if referents.count() == 0: logging.info( 'No parent found for %s, returning error to client.', s.entry_id) self.response.set_status(400) self.response.out.write('Bad Salmon, no parent with id ' + unicode(s.in_reply_to) + ' found -- rejected.\n') return # Look for parents, update thread_updated if necessary for parent in referents: logging.info('Saw parent: %s\n', parent) if parent.thread_updated < s.updated: parent.thread_updated = s.updated parent.put() update_list.append(s) db.put(update_list) self.response.set_status(200) self.response.out.write("Salmon accepted, swimming upstream!\n")
def post(self): headers = self.request.headers; logging.info('Headers =\n%s\n',headers) in_reply_to = self.request.get('inreplyto') #Get this from entry thr:in-reply-to if possible; feedparser.py BUG here # TODO: Do a check for application/atom+xml and charset content_type = headers['Content-Type']; body = self.request.body.decode('utf-8') logging.info('Post body is %d characters', len(body)) logging.info('Post body is:\n%s\n----', body); data = feedparser.parse(body) logging.info('Data returned was:\n%s\n----',data) if data.bozo: logging.error('Bozo feed data. %s: %r', data.bozo_exception.__class__.__name__, data.bozo_exception) if (hasattr(data.bozo_exception, 'getLineNumber') and hasattr(data.bozo_exception, 'getMessage')): line = data.bozo_exception.getLineNumber() logging.error('Line %d: %s', line, data.bozo_exception.getMessage()) # segment = self.request.body.split('\n')[line-1] # logging.info('Body segment with error: %r', segment.decode('utf-8')) return self.response.set_status(500) update_list = [] logging.info('Found %d entries', len(data.entries)) for entry in data.entries: s = model.makeEntry(entry) referents = model.getTopicsOf(s) logging.info('Saw %d parents!', referents.count() ) if referents.count() == 0: logging.info('No parent found for %s, returning error to client.',s.entry_id) self.response.set_status(400) self.response.out.write('Bad Salmon, no parent with id '+unicode(s.in_reply_to)+' found -- rejected.\n'); return # Look for parents, update thread_updated if necessary for parent in referents: logging.info('Saw parent: %s\n',parent) if parent.thread_updated < s.updated: parent.thread_updated = s.updated parent.put() update_list.append(s) db.put(update_list) self.response.set_status(200) self.response.out.write("Salmon accepted, swimming upstream!\n");
def get(self): feedurl = self.request.get('feed') data = feedparser.parse(feedurl) # Augment with a salmon endpoint. Don't overwrite existing! foundsalmon = False for link in data.feed.links: if link.rel.lower() == 'salmon': foundsalmon = True break if foundsalmon == False: endpoint = u'http://' + self.request.headers['Host'] + '/post' data.feed.links.append({ 'href': endpoint, 'type': u'application/atom+xml', 'rel': u'salmon' }) # if feedfields.bozo: # TODO: Annotate stored data and/or hand back a warning. # TODO: Have an alternate template that just shows the Atom with the salmon stuff highlighted in some way. self.response.out.write(template.render('atom.xml', data)) self.response.headers.add_header( "Content-Type", "application/atom+xml; charset=utf-8") self.response.set_status(200) # Add a fake BlogProxy entry so that we can fetch updated comments for this feed. bloggerproxy.addNonBloggerBlogProxy(feedurl) # And store the entries discovered in our own DB for reference. for entry in data.entries: e = model.makeEntry(entry, data.feed) #logging.info('Made %s from %s',e,entry) db.put([e]) logging.info( 'Remembering entry with title = "%s", id = "%s", ' 'link = "%s"', e.title, e.entry_id, e.link)
def post(self): # Take care of incoming salmon # pull out oauth token, fire up OAuth client, identify # the particular post in question and its comment stream, # create an entry, and post a comment. blog_id = self.request.get("id") body = self.request.body.decode("utf-8") logging.info("Salmon body is:\n%s\n----", body) data = feedparser.parse(body) logging.info("Data parsed was:\n%s\n----", data) if data.bozo: logging.error("Bozo feed data. %s: %r", data.bozo_exception.__class__.__name__, data.bozo_exception) if hasattr(data.bozo_exception, "getLineNumber") and hasattr(data.bozo_exception, "getMessage"): line = data.bozo_exception.getLineNumber() logging.error("Line %d: %s", line, data.bozo_exception.getMessage()) return self.response.set_status(400) logging.info("Found %d entries", len(data.entries)) for entry in data.entries: s = model.makeEntry(entry) referents = model.getTopicsOf(s) logging.info("Saw %d parent(s)", referents.count()) if referents.count() == 0: logging.info("No parent found for %s, returning error to client.", s.entry_id) self.response.set_status(400) self.response.out.write( "Bad Salmon, no parent with id " + unicode(s.in_reply_to) + " found -- rejected.\n" ) return # Pull body & other info out of salmon # Create an Atom entry and post as a comment text = s.content # TODO: Fix Blogger so it accepts acct: URIs... sigh... name = re.sub("(..\@.+)", "...", s.author_name) author_uri = "http://example.org/profile/" + name # if author_uri.startswith("acct:"): # author_uri = author_uri.replace("acct:","http://") text = text + ' by <a href="' + author_uri + '">' + name + "</a>" entry = atom.Entry(content=atom.Content(text=text)) # Grab the entry ID from the in-reply-to element of the salmon p = re.compile("tag:blogger\.com,1999:blog-(\d+)\.post-(\d+)") m = p.match(s.in_reply_to) if not m: self.response.set_status(400) return blog_id = m.group(1) post_id = m.group(2) logging.info("About to post comment to blog %s, post %s", blog_id, post_id) # Grab auth info from DB (this is also an ACL check...) bp = BlogProxy.all().filter("blog_id =", blog_id).fetch(1)[0] origfeed = bp.feed_uri tokens = pickle.loads(bp.pickled_tokens) oauth_token = tokens["http://www.blogger.com/feeds/"] # TODO: Add some error checking, for Ghu's sake. # Let's see if override_token, at least, does what it says in this hall of # funhouse mirrors we call a GData client: self.client.blogger.override_token = oauth_token logging.info("Auth token = %s, override_token = %s", oauth_token, self.client.blogger.override_token) self.client.blogger.AddComment(entry, blog_id=blog_id, post_id=post_id) self.response.out.write("Salmon accepted, sent upstream to source!\n") self.response.set_status(200)
def post(self): # Take care of incoming salmon # pull out oauth token, fire up OAuth client, identify # the particular post in question and its comment stream, # create an entry, and post a comment. blog_id = self.request.get('id') body = self.request.body.decode('utf-8') logging.info('Salmon body is:\n%s\n----', body) data = feedparser.parse(body) logging.info('Data parsed was:\n%s\n----', data) if data.bozo: logging.error('Bozo feed data. %s: %r', data.bozo_exception.__class__.__name__, data.bozo_exception) if (hasattr(data.bozo_exception, 'getLineNumber') and hasattr(data.bozo_exception, 'getMessage')): line = data.bozo_exception.getLineNumber() logging.error('Line %d: %s', line, data.bozo_exception.getMessage()) return self.response.set_status(400) logging.info('Found %d entries', len(data.entries)) for entry in data.entries: s = model.makeEntry(entry) referents = model.getTopicsOf(s) logging.info('Saw %d parent(s)', referents.count()) if referents.count() == 0: logging.info( 'No parent found for %s, returning error to client.', s.entry_id) self.response.set_status(400) self.response.out.write('Bad Salmon, no parent with id ' + unicode(s.in_reply_to) + ' found -- rejected.\n') return # Pull body & other info out of salmon # Create an Atom entry and post as a comment text = s.content # TODO: Fix Blogger so it accepts acct: URIs... sigh... name = re.sub("(..\@.+)", "...", s.author_name) author_uri = "http://example.org/profile/" + name #if author_uri.startswith("acct:"): # author_uri = author_uri.replace("acct:","http://") text = text + ' by <a href="' + author_uri + '">' + name + '</a>' entry = atom.Entry(content=atom.Content(text=text)) # Grab the entry ID from the in-reply-to element of the salmon p = re.compile('tag:blogger\.com,1999:blog-(\d+)\.post-(\d+)') m = p.match(s.in_reply_to) if not m: self.response.set_status(400) return blog_id = m.group(1) post_id = m.group(2) logging.info("About to post comment to blog %s, post %s", blog_id, post_id) # Grab auth info from DB (this is also an ACL check...) bp = BlogProxy.all().filter('blog_id =', blog_id).fetch(1)[0] origfeed = bp.feed_uri tokens = pickle.loads(bp.pickled_tokens) oauth_token = tokens["http://www.blogger.com/feeds/"] # TODO: Add some error checking, for Ghu's sake. # Let's see if override_token, at least, does what it says in this hall of # funhouse mirrors we call a GData client: self.client.blogger.override_token = oauth_token logging.info("Auth token = %s, override_token = %s", oauth_token, self.client.blogger.override_token) self.client.blogger.AddComment(entry, blog_id=blog_id, post_id=post_id) self.response.out.write("Salmon accepted, sent upstream to source!\n") self.response.set_status(200)