def test_basic():
    client = CloudAMQPClient(CloudAMQP_URL, QUEUE_NAME)
    sendMsg = {'test': 'success'}
    client.sendMessage(sendMsg)
    client.sleep(2)
    assert client.getMessage() == sendMsg
    print 'cloudAMQP connection success'
def test_basic():
    client = CloudAMQPClient(CLOUDAMQP_URL, QUEUE_NAME)

    sentMsg = {'test_key': 'test_value'}
    client.sendMessage(sentMsg)
    client.sleep(5)
    receivedMsg = client.getMessage()
    assert sentMsg == receivedMsg
    print "test_basic passed!"
Beispiel #3
0
dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)

def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        print 'message is broken'
        return

    task = msg

    article = Article(task['url'])
    article.download()
    article.parse()

    task['text'] = article.text

    print article.text
    dedupe_news_queue_client.sendMessage(task)


while True:
	if scrape_news_queue_client is not None:
		msg = scrape_news_queue_client.getMessage()
		if msg is not None:
			# Parse and process the task
			try:
				handle_message(msg)
			except Exception as e:
				print e
				pass
		scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
Beispiel #4
0
            if pairwise_sim[row, 0] > SAME_NEWS_SIMILARITY_THRESHOLD:
                print "Duplicated news. Ignore."
                return

    task['publishedAt'] = parser.parse(task['publishedAt'])

    # Classify news
    title = task['title']
    if title is not None:
        topic = news_topic_modeling_service_client.classify(title)
        task['class'] = topic

    db[NEWS_TABLE_NAME].replace_one({'digest': task['digest']},
                                    task,
                                    upsert=True)


while True:
    if dedupe_news_queue_client is not None:
        msg = dedupe_news_queue_client.getMessage()
        if msg is not None:
            # Parse and process the task
            try:
                # print "haha"
                handle_message(msg)
            except Exception as e:
                print e
                pass

        dedupe_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
Beispiel #5
0
SCRAPE_NEWS_TASK_QUEUE_URL = 'amqp://*****:*****@donkey.rmq.cloudamqp.com/vtrjgcrd'
SCRAPE_NEWS_TASK_QUEUE_NAME = 'tap-news-scrape-news-task-queue'
SLEEP_TIME_IN_SECONDS = 10

cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME)

while True:
	news_list = news_api_client.getNewsFromSource(NEWS_SOURCES)
	num_of_new_news = 0

	for news in news_list:
		news_digest = hashlib.md5(news['title'].encode('utf-8')).digest().encode('base64')

		if redis_client.get(news_digest) is None:
			num_of_new_news = num_of_new_news + 1
			news['digest'] = news_digest

			# If 'publishedAt' is None, set it to current UTC time
			if news['publishedAt'] is None:
				# Make the time in format YYYY-MM-DDTHH:MM:SS in UTC
				news['publishedAt'] = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')

			redis_client.set(news_digest, news)
			redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)

			cloudAMQP_client.sendMessage(news)

	print "Fetch %d news." % (num_of_new_news)

	cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)