def test_basic(): client = RabbitMQClient(HOST, TEST_QUEUE_NAME) sentMsg = {'test': 'demo'} client.sendMessage(sentMsg) client.sleep(10) receivedMsg = client.getMessage() assert sentMsg == receivedMsg print('test_basic passed!')
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import mongodb_client from rabbitMQ_client import RabbitMQClient import news_topic_modeling_service_client DEDUPE_NEWS_TASK_QUEUE_HOST = 'localhost' DEDUPE_NEWS_TASK_QUEUE_NAME = 'dedupe-news-task-queue' SLEEP_TIME_IN_SECONDS = 1 NEWS_TABLE_NAME = 'news' SAME_NEWS_SIMILARITY_THRESHOLD = 0.8 dedupe_news_queue_client = RabbitMQClient(DEDUPE_NEWS_TASK_QUEUE_HOST, DEDUPE_NEWS_TASK_QUEUE_NAME) def handle_message(msg): if msg is None or not isinstance(msg, dict): return task = msg text = str(task['text']) if text is None: return # Get recent news from mongodb published_at = parser.parse(task['publishedAt']) print(published_at) published_at_day_begin = published_at - datetime.timedelta(days=1) print(published_at_day_begin)
REDIS_HOST = 'localhost' REDIS_PORT = 6379 NEWS_TABLE_NAME = 'news' CLICK_LOGS_TABLE_NAME = 'click_logs' NEWS_LIMIT = 100 NEWS_LIST_BATCH_SIZE = 10 USER_NEWS_TIMEOUT_IN_SECONDS = 600 LOG_CLICK_TASK_QUEUE_HOST = 'localhost' LOG_CLICK_TASK_QUEUE_NAME = "log-click-task-queue" redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0) click_queue_client = RabbitMQClient(LOG_CLICK_TASK_QUEUE_HOST, LOG_CLICK_TASK_QUEUE_NAME) def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) if page_num <= 0: raise ValueError('page_num should be a positive integer.') begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The news list to be returned sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id))
from rabbitMQ_client import RabbitMQClient REDIS_HOST = 'localhost' REDIS_PORT = 6379 SCRAPE_NEWS_TASK_QUEUE_HOST = 'localhost' SCRAPE_NEWS_TASK_QUEUE_NAME = "scrape-news-task-queue" NEWS_SOURCES = [ 'cnn, bbc-news, bloomberg, espn, cnbc, business-insider, abc-news, buzzfeed, bbc-sport, fox-news, the-verge, techradar, talksport, nfl-news, nhl-news, reddit-r-all'] NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 SLEEP_TIME_IN_SECONDS = 60 redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) rabbitMQ_client = RabbitMQClient(SCRAPE_NEWS_TASK_QUEUE_HOST, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) nums_of_new_news = 0 for news in news_list: news_digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest() if redis_client.get(news_digest) is None: nums_of_new_news += 1 news['digest'] = news_digest if news['publishedAt'] is None: news['publishedAt'] = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") redis_client.set(news_digest, json.dumps(news))