def test_basic(): """ Method for testing basic """ client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME) sent_msg = {"test": "test"} client.send_message(sent_msg) received_msg = client.get_message() assert sent_msg == received_msg print 'test_basic passed'
def test_basic(): client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME) sentMsg = {"test": "test"} client.sendMessage(sentMsg) receivedMsg = client.getMessage() assert sentMsg == receivedMsg print "test_basic passed."
def test_basic(): """ Send messages and receive messages """ client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME) sent_msg = {"test": "test"} client.send_message(sent_msg) received_msg = client.get_message() assert sent_msg == received_msg print "test_basic passed."
def test_basic(): """Test CloudAMQP client basically""" clear_queue(CLOUDAMQP_URL, TEST_QUEUE_NAME) client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME) sent_msg = {"title": "test news"} client.send_message(sent_msg, "[cloudamqp tester]") received_msg = client.get_message("[cloudamqp tester]") assert sent_msg == received_msg print("test_basic passed!")
def clearQueue(queue_url, queue_name): scrape_news_queue_client = CloudAMQPClient(queue_url, queue_name) num_of_messages = 0 while True: if scrape_news_queue_client is not None: msg = scrape_news_queue_client.get_message() if msg is None: print "Cleared %d messages." % num_of_messages return num_of_messages += 1
def run(): logging.info("[news_fetcher] start running") scrape_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: if scrape_queue_client is not None: message = scrape_queue_client.get_message("[news_fetcher]") if message is not None: try: handle_message(message) except Exception as error: print(error) pass scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
def run(): logging.info("[news_deduper] start running") cloudamqp_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) while True: if cloudamqp_client is not None: message = cloudamqp_client.get_message("[news_deduper]") if message is not None: try: handle_message(message) except Exception as error: print(error) pass cloudamqp_client.sleep(SLEEP_TIME_IN_SECONDS)
def test_basic(): client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME) sentMsg = {'test': 1234} client.sendMessage(sentMsg) client.sleep(10) receivedMsg = client.getMessage() assert sentMsg == receivedMsg print 'test_basic passed!'
def run(): logging.info("[news_monitor] start running") redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) cloudamqp_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.get_news_from_source(NEWS_SOURCES) logging.info("[news_monitor] get %d from news API", len(news_list)) number_of_news = 0 for news in news_list: news_digest = hashlib.md5( news['title'].encode('utf-8')).hexdigest() if redis_client.get(news_digest) is None: number_of_news += 1 news['digest'] = news_digest if news['publishedAt'] is None: news['publishedAt'] = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") redis_client.set(news_digest, "True") redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS) cloudamqp_client.send_message(news, "[news_monitor]") logging.info("[news_monitor] fetch %d news" % number_of_news) cloudamqp_client.sleep(SLEEP_TIME_IN_SECONDS)
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) from cloudamqp_client import CloudAMQPClient from newspaper import Article # use AMQP queue DEDUPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@fish.rmq.cloudamqp.com/ofmdwnsa" DEDUPE_NEWS_TASK_QUEUE_NAME = "tap-news-dedupe-news-task-queue" SCRAPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@fish.rmq.cloudamqp.com/ofmdwnsa" SCRAPE_NEWS_TASK_QUEUE_NAME = "tap-news-scrape-news-task-queue" SLEEP_TIME_IN_SECONDS = 5 dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) scrape_news_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) def handleMessage(msg): if msg is None or not isinstance(msg, dict): print "Message is broken." return # give url to newspaper to get article article = Article(msg['url']) article.download() article.parse() print article.text
NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 3 REDIS_HOST = 'localhost' REDIS_PORT = 6379 SCRAPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@crane.rmq.cloudamqp.com/mitqwttx" SCRAPE_NEWS_TASK_QUEUE_NAME = "news-scrape-queue" NEWS_SOURCES = [ 'bbc-news', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly', 'espn', 'ign', 'techcrunch', 'the-new-york-times', 'the-wall-street-journal', 'the-washington-post' ] redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL, SCRAPE_NEWS_TASK_QUEUE_NAME) while True: news_list = news_api_client.getNewsFromSource(NEWS_SOURCES) num_of_news_news = 0 for news in news_list: news_digest = hashlib.md5( news['title'].encode('utf-8')).digest().encode('base64') if redis_client.get(news_digest) is None: num_of_news_news = num_of_news_news + 1 news['digest'] = news_digest
# import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) from cloudamqp_client import CloudAMQPClient # pylint: disable=import-error, wrong-import-position DEDUPE_NEWS_TASK_QUEUE_URL = \ "amqp://*****:*****@wombat.rmq.cloudamqp.com/fhptegqw" DEDUPE_NEWS_TASK_QUEUE_NAME = "tiny-news-dedupe-news-task-queue" SCRAPE_NEWS_TASK_QUEUE_URL = \ "amqp://*****:*****@wombat.rmq.cloudamqp.com/vvfthevj" SCRAPE_NEWS_TASK_QUEUE_NAME = "tiny-news-scrape-news-task-queue" SLEEP_TIME_IN_SECONDS = 5 dedupe_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) def handle_message(msg): """Extract and send news""" if msg is not isinstance(msg, dict): logging.error("[news_fetcher] news is not dict") return task = msg article = Article(task['url']) article.download() article.parse()
import os import sys from dateutil import parser from sklearn.feature_extraction.text import TfidfVectorizer # import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import mongodb_client from cloudamqp_client import CloudAMQPClient DEDUPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/roenklvc" DEDUPE_NEWS_TASK_QUEUE_NAME = "tap-news-dedupe-news-task-queue" cloudamqp_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL, DEDUPE_NEWS_TASK_QUEUE_NAME) NEWS_TABLE_NAME = 'news-test' SAME_NEWS_SIMILARITY_THRESHOLD = 0.9 SLEEP_TIME_IN_SECONDS = 1 def handle_message(msg): if msg is None or not isinstance(msg, dict): return task = msg text = task['text'] if text is None: return
REDIS_PORT = 6379 LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@wombat.rmq.cloudamqp.com/ajftnpdj" LOG_CLICKS_TASK_QUEUE_NAME = "tiny-news-log-clicks-task-queue" # import utils packages sys.path.append(os.path.join(os.path.dirname(__file__), '../common/')) import mongodb_client # pylint: disable=import-error, wrong-import-position from cloudamqp_client import CloudAMQPClient # pylint: disable=import-error, wrong-import-position import news_recommendation_service_client # pylint: disable=import-error, wrong-import-position NEWS_TABLE_NAME = "news" redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0) cloudamqp_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME) def get_one_news(): """Get one news""" news = mongodb_client.get_db()[NEWS_TABLE_NAME].find_one() # bson to string, string to json return json.loads(dumps(news)) def get_news_summaries_for_user(user_id, page_num): """Get news summaries""" page_num = int(page_num) # [begin_index, end_index) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE