def run(scrape_queue_url=SCRAPE_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME, dedupe_queue_url=DEDUPE_QUEUE_URL, dedupe_queue_name=DEDUPE_NEWS_TASK_QUEUE_NAME, times=-1): scrape_queue_client = AMQPClient(scrape_queue_url, scrape_queue_name) scrape_queue_client.connect() dedupe_queue_client = AMQPClient(dedupe_queue_url, dedupe_queue_name) dedupe_queue_client.connect() assert scrape_queue_client.is_connected() assert dedupe_queue_client.is_connected() while True: logger.debug('News fetcher: iter..') msg = scrape_queue_client.get_message() if msg is not None: try: handle_message(msg) dedupe_queue_client.send_message(msg) logger.info('News Fetcher: message sent to dedupe queue (url: {})' .format(msg.get('url'))) except Exception as e: logger.warning('News fetcher: handling error: {}'.format(e)) # if decreas count here, weird behavior, decreasing happens before processing message scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
def test_basic(): print('news_fetcher_test: cleaning all queues...') queue_cleaner.clear_all() print('flushing all cache in Redis') redis_client.flushall() # TODO dangerous to run when deployed scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) scrape_queue_client.connect() assert scrape_queue_client.is_connected() print('test_fetcher_basic: adding news onto scrape queue...') for message in TEST_SCRAPE_TASK: scrape_queue_client.send_message(message) print('getting messages from the queue and process...') news_fetcher.SLEEP_TIME_IN_SECONDS = 1 news_fetcher.run(len(TEST_SCRAPE_TASK)) should_be_empty_msg = scrape_queue_client.get_message() print('news_fetcher_test(expecting None):', should_be_empty_msg) assert should_be_empty_msg is None scrape_queue_client.close() queue_cleaner.clear_queue(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) print('news_fetcher test passed')
def test_basic(): # print(amqp_url + "({})".format(type(amqp_url))) client = AMQPClient(AMQP_URL, 'my_queue') client.connect() assert client.is_connected() assert client.get_message() is None client.send_message('hello world') assert client.get_message() == 'hello world' obj = {"hello": "world"} client.send_message(obj) assert client.get_message() == obj assert client.get_message() is None client.cancel_queue() client.close() print('[x] cloud amqp_client test passed')
def run(times=-1): click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) click_queue_client.connect() assert click_queue_client.is_connected() print('Click Handler: my queue name: {}'.format(click_queue_client)) while True: message = click_queue_client.get_message() try: handle_message(message) except Exception as e: raise e click_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
def test_basic(): # print('Click Learner tester: clearing queue {}'.format(CLICK_QUEUE_NAME)) clear_queue(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME) clicklearner.PREF_COLLECTION_NAME = PREF_COLLECTION_NAME clicklearner.USER_CLICK_QUEUE_NAME = CLICK_QUEUE_NAME clicklearner.SLEEP_TIME_IN_SECONDS = 1 click_queue_client = AMQPClient(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME) click_queue_client.connect() assert click_queue_client.is_connected() print('Click Learner tester: clearing collection "{}" in db "{}"...'.format(PREF_COLLECTION_NAME, PREF_DB_NAME)) pref_collection.remove() print('Click Learner tester: sending click logs') for click in TEST_SEQUENCE: click_queue_client.send_message(click) print('Click Learner tester: start handling clicks') pref_model_ref = {'userId': TEST_USER_NAME} for cat in NEWS_CATEGORIES: pref_model_ref[cat] = 1 / len(NEWS_CATEGORIES) for click_log in TEST_SEQUENCE: clicklearner.run(1) pref_model = pref_collection.find_one({'userId': TEST_USER_NAME}) selected = news_collection.find_one({'digest': (click_log['newsDigest'])})['category'] pref_model_ref[selected] = (1 - ALPHA) * pref_model_ref[selected] + ALPHA for cat in NEWS_CATEGORIES: if cat != selected: pref_model_ref[cat] = (1 - ALPHA) * pref_model_ref[cat] del pref_model['_id'] print('Click Learner tester: expecting {} == {}'.format(pref_model, pref_model_ref)) assert pref_model == pref_model_ref print('xx Click Learner test passed')
def run(times=-1): click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) click_queue_client.connect() news_collection = mongodb_client.get_news_collection() pref_collection = (mongodb_client.get_db(PREF_DB_NAME).get_collection( PREF_COLLECTION_NAME)) assert click_queue_client.is_connected() while True: msg = click_queue_client.get_message() try: handle_message(msg, news_collection, pref_collection) except Exception as e: raise e click_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
coloredlogs.install(level=os.environ.get('LOGGER_LEVEL', 'INFO'), logger=logger) # TODO: this global are bad, and makes it uncovered by tests config = os.environ DB_NAME = config['news_db'] COLLECTION_NAME = config['new_collection'] DEDUPE_QUEUE_URL = config['dedupe_task_queue_url'] DEDUPE_QUEUE_NAME = config['dedupe_task_queue_name'] SLEEP_TIME_IN_SECONDS = 5 dedupe_queue_client = AMQPClient(DEDUPE_QUEUE_URL, DEDUPE_QUEUE_NAME) dedupe_queue_client.connect() assert dedupe_queue_client.is_connected() NEWS_SIMILARITY_THRESHOLD = 0.8 class NotContainPublishTimeError(Exception): def __str__(self): return 'News not containing publish time!!!' def handle_message(msg): logger.debug('dedupter handling message: {}'.format(msg)) if msg is None or not isinstance(msg, dict): logger.info('News Deduper: message is broken') return