def test_basic(): client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME) sendMsg = {'test': 'success'} client.sendMessage(sendMsg) client.sleep(2) assert client.getMessage() == sendMsg print 'cloudAMQP connection success'
def test_basic(): client = CloudAMQPClient(CLOUDAMQP_URL, QUEUE_NAME) sentMsg = {'test_key': 'test_value'} client.sendMessage(sentMsg) client.sleep(5) receivedMsg = client.getMessage() assert sentMsg == receivedMsg print "test_basic passed!"
dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) def handle_message(msg): if msg is None or not isinstance(msg, dict): print 'message is broken' return task = msg article = Article(task['url']) article.download() article.parse() task['text'] = article.text print article.text dedupe_news_queue_client.sendMessage(task) while True: if scrape_news_queue_client is not None: msg = scrape_news_queue_client.getMessage() if msg is not None: # Parse and process the task try: handle_message(msg) except Exception as e: print e pass scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
if pairwise_sim[row, 0] > SAME_NEWS_SIMILARITY_THRESHOLD: print "Duplicated news. Ignore." return task['publishedAt'] = parser.parse(task['publishedAt']) # Classify news title = task['title'] if title is not None: topic = news_topic_modeling_service_client.classify(title) task['class'] = topic db[NEWS_TABLE_NAME].replace_one({'digest': task['digest']}, task, upsert=True) while True: if dedupe_news_queue_client is not None: msg = dedupe_news_queue_client.getMessage() if msg is not None: # Parse and process the task try: # print "haha" handle_message(msg) except Exception as e: print e pass dedupe_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@donkey.rmq.cloudamqp.com/vtrjgcrd' SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue' SLEEP_TIME_IN_SECONDS = 10 cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) num_of_new_news = 0 for news in news_list: news_digest = hashlib.md5(news['title'].encode('utf-8')).digest().encode('base64') if redis_client.get(news_digest) is None: num_of_new_news = num_of_new_news + 1 news['digest'] = news_digest # If 'publishedAt' is None, set it to current UTC time if news['publishedAt'] is None: # Make the time in format YYYY-MM-DDTHH:MM:SS in UTC news['publishedAt'] = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') redis_client.set(news_digest, news) redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS) cloudAMQP_client.sendMessage(news) print "Fetch %d news." % (num_of_new_news) cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)