cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) num_of_news_news = 0 for news in news_list: news_digest = hashlib.md5( news['title'].encode('utf-8')).digest().encode('base64') if redis_client.get(news_digest) is None: num_of_news_news = num_of_news_news + 1 news['digest'] = news_digest if news['publishedAt'] is None: news['publishedAt'] = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") redis_client.set(news_digest, "True") redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS) cloudAMQP_client.send_message(news) LOGGING_NEWS_MONITOR.info('[x] Sent message to %s' % (news['title'])) print "Fetched %d news." % num_of_news_news cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)
SCRAPE_NEWS_TASK_QUEUE_NAME) def handle_message(msg): if msg is None or not isinstance(msg, dict): print 'message is broken' return task = msg text = None article = Article(task['url']) article.download() article.parse() task['text'] = article.text.encode('utf-8') dedupe_news_queue_client.send_message(task) while True: if scrape_news_queue_client is not None: msg = scrape_news_queue_client.get_message() if msg is not None: try: handle_message(msg) except Exception as e: print # coding=utf-8 pass scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)