def test_basic(): print('news_fetcher_test: cleaning all queues...') queue_cleaner.clear_all() print('flushing all cache in Redis') redis_client.flushall() # TODO dangerous to run when deployed scrape_queue_client = AMQPClient(SCRAPE_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) scrape_queue_client.connect() assert scrape_queue_client.is_connected() print('test_fetcher_basic: adding news onto scrape queue...') for message in TEST_SCRAPE_TASK: scrape_queue_client.send_message(message) print('getting messages from the queue and process...') news_fetcher.SLEEP_TIME_IN_SECONDS = 1 news_fetcher.run(len(TEST_SCRAPE_TASK)) should_be_empty_msg = scrape_queue_client.get_message() print('news_fetcher_test(expecting None):', should_be_empty_msg) assert should_be_empty_msg is None scrape_queue_client.close() queue_cleaner.clear_queue(DEDUPE_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) print('news_fetcher test passed')
def test_basic(): # print(amqp_url + "({})".format(type(amqp_url))) client = AMQPClient(AMQP_URL, 'my_queue') client.connect() assert client.is_connected() assert client.get_message() is None client.send_message('hello world') assert client.get_message() == 'hello world' obj = {"hello": "world"} client.send_message(obj) assert client.get_message() == obj assert client.get_message() is None client.cancel_queue() client.close() print('[x] cloud amqp_client test passed')
def run(): redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) amqp_client = AMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) amqp_client.connect() try: while True: news_list = news_client.get_news_from_sources(NEWS_SOURCES) num_news = 0 for news in news_list: digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest() if redis_client.get(digest): continue num_news += 1 news['digest'] = digest redis_client.set(digest, True) redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS) print(news) amqp_client.send_message(news) print('News Monitor: fectched {} news'.format(num_news)) amqp_client.sleep(SLEEP_TIME_IN_SECONDS) except KeyboardInterrupt: print('keyboard interrupt') # except SigTerm finally: amqp_client.close()
def test_monitor_basic(): news_monitor.NEWS_SOURCES = news_client.MOCK_SOURCES MOCK_DATA = news_client.MOCK_DATA print('test_monitor_basic: cleaning queue "{}" first---------'.format(QUEUE_NAME)) clear_queue(QUEUE_URL, QUEUE_NAME) # TODO: redis server flush all redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) redis_client.flushall() print('test_monitor_basic: adding message to queue "{}"--------'.format(QUEUE_NAME)) amqp_client = AMQPClient(QUEUE_URL, QUEUE_NAME) amqp_client.connect() proc = Process(target=news_monitor.run, name='monitor_run', args=(REDIS_HOST, REDIS_PORT, QUEUE_URL, QUEUE_NAME)) proc.start() print('test_monitor_basic: executing... (wait for 2 seconds to cut)') time.sleep(2) proc.terminate() for i in range(len(MOCK_DATA)): message = amqp_client.get_message() del message['digest'] print(message, MOCK_DATA[i]) assert message == MOCK_DATA[i] print('test_monitor_basic: [x] test_monitor_basic test passed')
def run(redis_host=REDIS_HOST, redis_port=REDIS_PORT, scrape_queue_url=SCRAPE_NEWS_TASK_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME): redis_client = redis.StrictRedis(redis_host, redis_port) amqp_client = AMQPClient(scrape_queue_url, scrape_queue_name) amqp_client.connect() while True: logger.debug('News monitor: iter..') news_list = news_client.get_news_from_sources(NEWS_SOURCES) num_news = 0 for news in news_list: digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest() if redis_client.get(digest): continue num_news += 1 news['digest'] = digest redis_client.set(digest, True) redis_client.expire(digest, NEWS_TIME_OUT_IN_SECONDS) logger.debug('News Monitor: got news {}'.format(news)) amqp_client.send_message(news) logger.info('News Monitor: fectched {} news'.format(num_news)) amqp_client.sleep(SLEEP_TIME_IN_SECONDS)
def run(scrape_queue_url=SCRAPE_QUEUE_URL, scrape_queue_name=SCRAPE_NEWS_TASK_QUEUE_NAME, dedupe_queue_url=DEDUPE_QUEUE_URL, dedupe_queue_name=DEDUPE_NEWS_TASK_QUEUE_NAME, times=-1): scrape_queue_client = AMQPClient(scrape_queue_url, scrape_queue_name) scrape_queue_client.connect() dedupe_queue_client = AMQPClient(dedupe_queue_url, dedupe_queue_name) dedupe_queue_client.connect() assert scrape_queue_client.is_connected() assert dedupe_queue_client.is_connected() while True: logger.debug('News fetcher: iter..') msg = scrape_queue_client.get_message() if msg is not None: try: handle_message(msg) dedupe_queue_client.send_message(msg) logger.info('News Fetcher: message sent to dedupe queue (url: {})' .format(msg.get('url'))) except Exception as e: logger.warning('News fetcher: handling error: {}'.format(e)) # if decreas count here, weird behavior, decreasing happens before processing message scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
def test_basic(): # not a good idea to test, but a way I can think of seperating the queue # in test and production rpc_operations.USER_CLICK_QUEUE_URL = USER_CLICK_QUEUE_URL rpc_operations.USER_CLICK_QUEUE_NAME = USER_CLICK_QUEUE_NAME rpc_operations.init() clickhandler.USER_CLICK_QUEUE_URL = USER_CLICK_QUEUE_URL clickhandler.USER_CLICK_QUEUE_NAME = USER_CLICK_QUEUE_NAME print('Click Handler test: cleaning up click queue...') clear_queue(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) print('Click Handler test: logging clicks...') for pair in TEST_CLICK_DATA: rpc_operations.log_click(*pair) print(print('Click Handler test: handling clicks...')) clickhandler.run(len(TEST_CLICK_DATA)) queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) queue_client.connect() should_be_empty = queue_client.get_message() print('Click Handler Test: {} should be None'.format(should_be_empty)) assert should_be_empty is None queue_client.close print('Click Handler: test passed')
def clear_queue(queue_url, queue_name): count = 0 amqp_client = AMQPClient(queue_url, queue_name) amqp_client.connect() print('xxx cleaning queue "{}"'.format(queue_name)) while True: message = amqp_client.get_message() if message is None: break print(message) count += 1 amqp_client.close() print('xxx cleaned {} message on {}'.format(count, queue_name))
def run(times=-1): click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) click_queue_client.connect() assert click_queue_client.is_connected() print('Click Handler: my queue name: {}'.format(click_queue_client)) while True: message = click_queue_client.get_message() try: handle_message(message) except Exception as e: raise e click_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
def test_basic(): # print('Click Learner tester: clearing queue {}'.format(CLICK_QUEUE_NAME)) clear_queue(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME) clicklearner.PREF_COLLECTION_NAME = PREF_COLLECTION_NAME clicklearner.USER_CLICK_QUEUE_NAME = CLICK_QUEUE_NAME clicklearner.SLEEP_TIME_IN_SECONDS = 1 click_queue_client = AMQPClient(clicklearner.USER_CLICK_QUEUE_URL, CLICK_QUEUE_NAME) click_queue_client.connect() assert click_queue_client.is_connected() print('Click Learner tester: clearing collection "{}" in db "{}"...'.format(PREF_COLLECTION_NAME, PREF_DB_NAME)) pref_collection.remove() print('Click Learner tester: sending click logs') for click in TEST_SEQUENCE: click_queue_client.send_message(click) print('Click Learner tester: start handling clicks') pref_model_ref = {'userId': TEST_USER_NAME} for cat in NEWS_CATEGORIES: pref_model_ref[cat] = 1 / len(NEWS_CATEGORIES) for click_log in TEST_SEQUENCE: clicklearner.run(1) pref_model = pref_collection.find_one({'userId': TEST_USER_NAME}) selected = news_collection.find_one({'digest': (click_log['newsDigest'])})['category'] pref_model_ref[selected] = (1 - ALPHA) * pref_model_ref[selected] + ALPHA for cat in NEWS_CATEGORIES: if cat != selected: pref_model_ref[cat] = (1 - ALPHA) * pref_model_ref[cat] del pref_model['_id'] print('Click Learner tester: expecting {} == {}'.format(pref_model, pref_model_ref)) assert pref_model == pref_model_ref print('xx Click Learner test passed')
def run(times=-1): click_queue_client = AMQPClient(USER_CLICK_QUEUE_URL, USER_CLICK_QUEUE_NAME) click_queue_client.connect() news_collection = mongodb_client.get_news_collection() pref_collection = (mongodb_client.get_db(PREF_DB_NAME).get_collection( PREF_COLLECTION_NAME)) assert click_queue_client.is_connected() while True: msg = click_queue_client.get_message() try: handle_message(msg, news_collection, pref_collection) except Exception as e: raise e click_queue_client.sleep(SLEEP_TIME_IN_SECONDS) if times > 0: times -= 1 if times == 0: break
logger = logging.getLogger(__name__) coloredlogs.install(level=os.environ.get('LOGGER_LEVEL', 'INFO'), logger=logger) # TODO: this global are bad, and makes it uncovered by tests config = os.environ DB_NAME = config['news_db'] COLLECTION_NAME = config['new_collection'] DEDUPE_QUEUE_URL = config['dedupe_task_queue_url'] DEDUPE_QUEUE_NAME = config['dedupe_task_queue_name'] SLEEP_TIME_IN_SECONDS = 5 dedupe_queue_client = AMQPClient(DEDUPE_QUEUE_URL, DEDUPE_QUEUE_NAME) dedupe_queue_client.connect() assert dedupe_queue_client.is_connected() NEWS_SIMILARITY_THRESHOLD = 0.8 class NotContainPublishTimeError(Exception): def __str__(self): return 'News not containing publish time!!!' def handle_message(msg): logger.debug('dedupter handling message: {}'.format(msg)) if msg is None or not isinstance(msg, dict): logger.info('News Deduper: message is broken')