def test(): client = CloudAMQPClient(URL, QUEUE_NAME) message = {"hhh": "hhh"} client.sendMessage(message) receiveMessage = client.receiveMessage() assert message == receiveMessage print("passed!")
def clear_queue(queue_url, queue_name): queue_client = CloudAMQPClient(queue_url, queue_name) num_of_messages = 0 while True: if queue_client: message = queue_client.receiveMessage() if message: num_of_messages = num_of_messages + 1 else: print("%s num_of_messages" % num_of_messages) #return return
import os import sys import news_scraper from newspaper import Article SLEEP_TIME_IN_SECOND = 10 RECEIVE_QUEUE_URL = 'amqp://*****:*****@termite.rmq.cloudamqp.com/svowqrcq' RECEIVE_QUEUE_NAME = 'news-test' FETCH_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/txggakbg' FETCH_QUEUE_NAME = 'fect_news' sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'utils')) from AMQP_client import CloudAMQPClient scraper_news_queue_client = CloudAMQPClient(RECEIVE_QUEUE_URL, RECEIVE_QUEUE_NAME) fecth_news_queue_client = CloudAMQPClient(FETCH_QUEUE_URL, FETCH_QUEUE_NAME) def handle_message(msg): if not msg or not isinstance(msg, dict): print('msg in broken') return text = None #if msg['source'] == 'cnn': #text = news_scrapter.extract_news(msg['url']) #else: #print('News source [%s] is not supported.' % msg['source']) #Download article according the url article = Article(msg['url']) article.download()
import news_classes import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils')) import mongodb_client from AMQP_client import CloudAMQPClient LOG_CLICKS_TASK_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/cvarabcw' LOG_CLICKS_TASK_QUEUE_NAME = 'tap-news-log-clicks-task-queue' cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME) db = mongodb_client.get_db() NEWS_TABLE_NAME = "newstest" PREFERENCE_MODEL_TABLE_NAME = "user_preference_model" SLEEP_TIME_IN_SECONDS = 10 NUM_OF_CLASSES = 17 INITIAL_P = 1.0 / NUM_OF_CLASSES ALPHA = 0.1 def handle_message(msg): if msg is None or not isinstance(msg, dict): return if 'userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg: return userId = msg['userId'] newsId = msg['newsId'] model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({"userId": userId}) #if user is not in the database if not model: print('create preference model for new user: %s' % userId) new_model = {'userId': userId}
import os import sys from dateutil import parser from sklearn.feature_extraction.text import TfidfVectorizer sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils')) import mongodb_client from AMQP_client import CloudAMQPClient SLEEP_TIME_IN_SECOND = 10 FETCH_QUEUE_URL = 'amqp://*****:*****@llama.rmq.cloudamqp.com/txggakbg' FETCH_QUEUE_NAME = 'fect_news' NEWS_TABLE_NAME = "newstest" cloudAMQP_client = CloudAMQPClient(FETCH_QUEUE_URL, FETCH_QUEUE_NAME) SAME_NEWS_SIMILARITY_THRESHOLD = 0.9 def handle_mesage(msg): if not msg or not isinstance(msg, dict): return text = msg['text'] if not text: return print(msg) #get the start and end time of this day and find list in db published_at = parser.parse(msg['publishedAt']) published_at_day_begin = datetime.datetime(published_at.year, published_at.month, published_at.day, 0, 0, 0, 0)
SLEEP_TIME_TASK_SECONDS = 10 NEWS_SOURCES = [ 'bbc-newson', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly', 'espn', 'ign', 'techcrunch', 'the-new-york-times', 'the-wall-street-journal', 'the-washington-post' ] #redis REDIS_HOST = 'localhost' REDIS_PORT = 6379 redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT) #AMQP_client QUEUE_URL = "amqp://*****:*****@termite.rmq.cloudamqp.com/svowqrcq" QUEUE_NAME = "news-test" cloudAMQP_client = CloudAMQPClient(QUEUE_URL, QUEUE_NAME) #while while True: news_list = news_api_client.getNews(NEWS_SOURCES) number_of_news = 0 for news in news_list: #redis to prevent duplicate #use md5 for title news_digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest() if not redis_client.get(news_digest): number_of_news = number_of_news + 1 news['digest'] = news_digest redis_client.set(news_digest, 'hh') #set expire time redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)