Exemple #1
0
def test_basic():
    print('news_fetcher_test: cleaning all queues...')
    queue_cleaner.clear_all()
    print('flushing all cache in Redis')
    redis_client.flushall()  # TODO dangerous to run when deployed

    scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL,
                                     SCRAPE_NEWS_TASK_QUEUE_NAME)
    scrape_queue_client.connect()
    assert scrape_queue_client.is_connected()

    print('test_fetcher_basic: adding news onto scrape queue...')
    for message in TEST_SCRAPE_TASK:
        scrape_queue_client.send_message(message)

    print('getting messages from the queue and process...')
    news_fetcher.SLEEP_TIME_IN_SECONDS = 1
    news_fetcher.run(len(TEST_SCRAPE_TASK))

    should_be_empty_msg = scrape_queue_client.get_message()
    print('news_fetcher_test(expecting None):', should_be_empty_msg)
    assert should_be_empty_msg is None
    scrape_queue_client.close()

    queue_cleaner.clear_queue(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME)
    print('news_fetcher test passed')
Exemple #2
0
def run():
    redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
    amqp_client = AMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                             SCRAPE_NEWS_TASK_QUEUE_NAME)
    amqp_client.connect()

    try:
        while True:
            news_list = news_client.get_news_from_sources(NEWS_SOURCES)
            num_news = 0

            for news in news_list:
                digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()

                if redis_client.get(digest):
                    continue

                num_news += 1
                news['digest'] = digest
                redis_client.set(digest, True)
                redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS)

                print(news)
                amqp_client.send_message(news)

            print('News Monitor: fectched {} news'.format(num_news))
            amqp_client.sleep(SLEEP_TIME_IN_SECONDS)
    except KeyboardInterrupt:
        print('keyboard interrupt')
    # except SigTerm
    finally:
        amqp_client.close()
Exemple #3
0
def run(redis_host=REDIS_HOST,
        redis_port=REDIS_PORT,
        scrape_queue_url=SCRAPE_NEWS_TASK_QUEUE_URL,
        scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME):
    redis_client = redis.StrictRedis(redis_host, redis_port)
    amqp_client = AMQPClient(scrape_queue_url, scrape_queue_name)
    amqp_client.connect()

    while True:
        logger.debug('News monitor: iter..')
        news_list = news_client.get_news_from_sources(NEWS_SOURCES)
        num_news = 0

        for news in news_list:
            digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()

            if redis_client.get(digest):
                continue

            num_news += 1
            news['digest'] = digest
            redis_client.set(digest, True)
            redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS)

            logger.debug('News Monitor: got news {}'.format(news))
            amqp_client.send_message(news)

        logger.info('News Monitor: fectched {} news'.format(num_news))
        amqp_client.sleep(SLEEP_TIME_IN_SECONDS)
def test_basic():
    # print(amqp_url + "({})".format(type(amqp_url)))
    client = AMQPClient(AMQP_URL, 'my_queue')

    client.connect()

    assert client.is_connected()

    assert client.get_message() is None

    client.send_message('hello world')

    assert client.get_message() == 'hello world'

    obj = {"hello": "world"}
    client.send_message(obj)

    assert client.get_message() == obj

    assert client.get_message() is None

    client.cancel_queue()

    client.close()
    print('[x] cloud amqp_client test passed')
Exemple #5
0
def run(scrape_queue_url=SCRAPE_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME,
        dedupe_queue_url=DEDUPE_QUEUE_URL, dedupe_queue_name=DEDUPE_NEWS_TASK_QUEUE_NAME,
        times=-1):
    scrape_queue_client = AMQPClient(scrape_queue_url, scrape_queue_name)
    scrape_queue_client.connect()
    dedupe_queue_client = AMQPClient(dedupe_queue_url, dedupe_queue_name)
    dedupe_queue_client.connect()

    assert scrape_queue_client.is_connected()
    assert dedupe_queue_client.is_connected()

    while True:
        logger.debug('News fetcher: iter..')
        msg = scrape_queue_client.get_message()
        if msg is not None:
            try:
                handle_message(msg)
                dedupe_queue_client.send_message(msg)
                logger.info('News Fetcher: message sent to dedupe queue (url: {})'
                    .format(msg.get('url')))
            except Exception as e:
                logger.warning('News fetcher: handling error: {}'.format(e))
        # if decreas count here, weird behavior, decreasing happens before processing message
        scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
        if times > 0: times -= 1
        if times == 0: break
Exemple #6
0
def test_basic():
    # print('Click Learner tester: clearing queue {}'.format(CLICK_QUEUE_NAME))
    clear_queue(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME)

    clicklearner.PREF_COLLECTION_NAME = PREF_COLLECTION_NAME
    clicklearner.USER_CLICK_QUEUE_NAME = CLICK_QUEUE_NAME
    clicklearner.SLEEP_TIME_IN_SECONDS = 1

    click_queue_client = AMQPClient(clicklearner.USER_CLICK_QUEUE_URL,
                                    CLICK_QUEUE_NAME)
    click_queue_client.connect()

    assert click_queue_client.is_connected()

    print('Click Learner tester: clearing collection "{}" in db "{}"...'.format(PREF_COLLECTION_NAME, 
        PREF_DB_NAME))
    pref_collection.remove()

    print('Click Learner tester: sending click logs')
    for click in TEST_SEQUENCE:
        click_queue_client.send_message(click)
    

    print('Click Learner tester: start handling clicks')

    pref_model_ref = {'userId': TEST_USER_NAME}
    for cat in NEWS_CATEGORIES:
        pref_model_ref[cat] = 1 / len(NEWS_CATEGORIES)

    for click_log in TEST_SEQUENCE:
        clicklearner.run(1)
        pref_model = pref_collection.find_one({'userId': TEST_USER_NAME})

        selected = news_collection.find_one({'digest': (click_log['newsDigest'])})['category']
        pref_model_ref[selected] = (1 - ALPHA) * pref_model_ref[selected] + ALPHA
        for cat in NEWS_CATEGORIES:
            if cat != selected:
                pref_model_ref[cat] = (1 - ALPHA) * pref_model_ref[cat]

        del pref_model['_id']
        print('Click Learner tester: expecting {} == {}'.format(pref_model, pref_model_ref))
        assert pref_model == pref_model_ref

    print('xx Click Learner test passed')