def test_basic():
    client = CloudAMQPClient(TEST_CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sentMsg = {'test': 'test'}
    client.send_message(sentMsg)
    receivedMsg = client.get_message()
    assert sentMsg == receivedMsg
    print('test_basic passed.')
Ejemplo n.º 2
0
def test_basic():
    """"Test method"""
    client = CloudAMQPClient(CLOUDAMQP_URL, QUEUE_NAME)

    sent_message = {'test': 'test'}
    client.send_message(sent_message)
    received_message = client.get_message()
    assert sent_message == received_message
    print('test_basic passed.')
Ejemplo n.º 3
0
def test_basic():
    ''' test cloudAMQP_client '''
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sent_msg = {'test': 'test'}
    client.send_message(sent_msg)
    received_msg = client.get_message()

    assert sent_msg == received_msg
    print 'test_basic passed.'
def clearQueue(queue_url, queue_name):
    scrape_news_queue_client = CloudAMQPClient(queue_url, queue_name)

    num_of_messages = 0

    while True:
        if scrape_news_queue_client is not None:
            msg = scrape_news_queue_client.get_message()
            if msg is None:
                print "Clear %d messages." % num_of_messages
                return
            num_of_messages += 1
Ejemplo n.º 5
0
def test_basic():
    """ test unit """
    client = CloudAMQPClient(TEST_CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sent_msg = {'test': 'test'}
    client.send_message(sent_msg)

    client.sleep(5)

    receive_msg = client.get_message()
    assert sent_msg == receive_msg

    print('test_basic passed!')
    text = None

    # if task['source'] == 'cnn':
    #     print 'scraping CNN news'
    #     text = cnn_news_scraper.extract_news(task['url'])
    # else:
    #     print 'news source [%s] is not supported.' % task['source']

    # task['text'] = text

    article = Article(task['url'])
    article.download()
    article.parse()

    # article.text is unicode, need to encode it to utf-8
    task['text'] = article.text.encode('utf-8')

    dedupe_news_queue_client.send_message(task)


while True:
    if scrape_news_queue_client is not None:
        msg = scrape_news_queue_client.get_message()
        if msg is not None:
            try:
                handle_message(msg)
            except Exception as e:
                print e
                pass
        scrape_news_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        rows, _ = pairwise_sim.shape

        for row in range(1, rows):
            if pairwise_sim[row, 0] > SAME_NEWS_SIMILARITY_THRESHOLD:
                print 'Duplicated news. Ignore.'
                return
    task['publishedAt'] = parser.parse(task['publishedAt'])

    # Classify news
    description = task['description']
    if description is not None:
        topic = news_topic_modeling_service_client.classify(description)
        task['class'] = topic

    db[NEWS_TABLE_NAME].replace_one({'digest': task['digest']},
                                    task,
                                    upsert=True)


while True:
    if cloudAMQP_client is not None:
        msg = cloudAMQP_client.get_message()
        if msg is not None:
            # Parse and process the task
            try:
                handle_message(msg)
            except Exception as e:
                print e
                pass
        cloudAMQP_client.sleep(SLEEP_TIME_IN_SECONDS)
Ejemplo n.º 8
0
import os
import sys
import news_deduper as deduper
# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
from cloudAMQP_client import CloudAMQPClient

DEDUPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@otter.rmq.cloudamqp.com/xemwnewz"
DEDUPE_NEWS_TASK_QUEUE_NAME = "tap-news-dedupe-news-task-queue"

cloudAMQP_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL,
                                   DEDUPE_NEWS_TASK_QUEUE_NAME)

TEST_MSG1 = ""
TEST_MSG2 = cloudAMQP_client.get_message()


def test_basic():
    deduper.handle_message(TEST_MSG1)
    deduper.handle_message(TEST_MSG2)
    print('test_basic passed!')


if __name__ == "__main__":
    test_basic()
Ejemplo n.º 9
0
import os
import sys
import news_fetcher as fetcher
# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

from cloudAMQP_client import CloudAMQPClient

SCRAPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@otter.rmq.cloudamqp.com/jxasfgzt"
SCRAPE_NEWS_TASK_QUEUE_NAME = "tap-news-scrape-news-task-queue"
scrape_news_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                           SCRAPE_NEWS_TASK_QUEUE_NAME)
TEST_MSG1 = ""
TEST_MSG2 = scrape_news_queue_client.get_message()


def test_basic():
    fetcher.handle_message(TEST_MSG1)
    fetcher.handle_message(TEST_MSG2)
    print('test_basic passed!')


if __name__ == "__main__":
    test_basic()