Ejemplo n.º 1
0
def twitter_bot(rss_guid=None):
    """
    Consumes a feed and checks if there are new entries in db.
    If so, gets a shortened url and tweets the new status.
    """

    if rss_guid is None:
        # ancestor_key = ndb.Key("RSS_GUID", rss_guid or "*norss*")
        # consumer = FeedConsume.get_last_rss_guid(ancestor_key)
        # rss_guid = consumer[0].rss_guid
        query = FeedConsume.gql("WHERE entry = :1", "latest")
        result = query.get()
        rss_guid = result.rss_guid
    else:
        consumer = FeedConsume(parent=ndb.Key("RSS_GUID", rss_guid or "*norss*"),
                               rss_guid=rss_guid, entry="latest")
        consumer.put()
    url = "{}erss.cgi?rss_guid={}".format(conf("pubmed_rss"), rss_guid)
    feeds = feedparser.parse(url)
    tweets = []
    for feed in feeds["items"]:
        pmid = (feed["link"].split("/")[-1]).rstrip("?dopt=Abstract")
        if "entrez?" in pmid:
            continue
        query = FeedItem.gql("WHERE pmid = :1", pmid)
        # if pmid not in db
        if (query.count() == 0):
            title = feed["title"]
            otitle = title
            url = feed["link"]
            category = feed["category"]
            item = FeedItem()
            item.pmid = pmid
        
            # shorten the url with Bitly.com
            shorturl = shorten_url_bitly(url)

            # tweet the new entry
            max_length = (140 - len(category) - len(shorturl) - 7)
            print(max_length)
            if len(title) > max_length:
                title = title[0:max_length]
            status = "#{}: {}... {}".format("".join(category.split()), title.rstrip(". "), shorturl)
            try:
                status = unicode(status).encode("utf-8")
            except UnicodeEncodeError:
                pass
                # TODO: add logging

            # tweet new status
            # tweets.append({'title': "{}...".format(title.rstrip(". ")), 'url': shorturl})
            ttitle = "#{}: {}...".format("".join(category.split()), otitle[0:100].rstrip(". "))
            tweets.append({'title': ttitle, 'url': shorturl})
            try:
                update_status_twitter(status)
                item.put()
            except:
                pass
            
    return tweets
Ejemplo n.º 2
0
	def get(self):
		feeds = self._getFeeds()
		logging.debug("Got %d feeds" % feeds.count())
		for feed in feeds:
			logging.debug("Feed %s last updated %s" % (feed.name, feed.last_updated))
			force = self.request.get("force") == "1"
			if force:
				logging.debug("Force option enabled")

			if not force and time.time() - time.mktime(datetime.datetime.timetuple(feed.last_updated)) < 3600*4:
				logging.debug("Feed %s doesn't need updates, skipping" % feed.name)
				continue

			logging.debug("Fetching %s" % feed.url)
			feed_content = urlfetch.fetch(feed.url)
			logging.debug("Fetched, status = %d" % feed_content.status_code)
			if feed_content.status_code == 200:
				parsed_feed = feedparser.parse(feed_content.content)
				feed.last_updated = datetime.datetime.now()
				feed.put()
			else:
				logging.error("Failed to load feed %s" % feed.name)
				self.error(500)
				
			linkre = re.compile("http://(?:www\.)?explosm.net/comics/\d+/?")
			comicre = re.compile('(http://(?:www\.)?explosm.net/db/files/Comics/[A-z0-9_\-\+]+/[A-z0-9\-_\+]+\.(gif|png))')
	
			logging.debug("Got %d entries" % len(parsed_feed.entries))
			for e in parsed_feed.entries:
				if linkre.match(e.link):
					if not FeedItem.is_fetched(e.link):
						logging.debug("Going to fetch entry %s" % e.link)
						result = urlfetch.fetch(e.link)
						logging.debug("Fetched, status = %d" % result.status_code)
						if result.status_code == 200:
							results = comicre.findall(result.content)
							if results and len(results) > 0:
								logging.debug("Going to fetch enclosure %s" % results[0][0])
								enclosure = urlfetch.fetch(results[0][0])
								logging.debug("Fetched, status = %d" % enclosure.status_code)
								if enclosure.status_code == 200:
									feed_item = FeedItem()
									feed_item.title = e.title
									feed_item.url = e.link
									feed_item.content_type = "image/"+results[0][1]
									feed_item.feed = feed
									feed_item.date = datetime.datetime.fromtimestamp(time.mktime(e.updated_parsed))
									feed_item.content = db.Text(e.description)
									feed_item.enclosure = enclosure.content
									feed_item.put()
								else:
									logging.error("Failed to fetch enclosure %s" % results[0])
								
							else:
								logging.debug("Got no enclosure in %s" % e.link)
	
						else:
							logging.debug("Failed to download %s" % e.link)
					else:
						logging.debug("Skipping already fetch item %s" % e.link)
				else:
					logging.debug("Skipping unknown link %s" % e.link)