from bson.json_util import dumps from datetime import datetime # import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import mongodb_client import news_recommendation_service_client from cloud_amqp_client import CloudAMQPClient # get config import config_client config = config_client.get_config('../config/config_backend_server.yaml') REDIS_HOST = config['operations']['REDIS_HOST'] REDIS_PORT = config['operations']['REDIS_PORT'] NEWS_TABLE_NAME = config['operations']['NEWS_TABLE_NAME'] CLICK_LOGS_TABLE_NAME = config['operations']['CLICK_LOGS_TABLE_NAME'] NEWS_LIMIT = config['operations']['NEWS_LIMIT'] NEWS_LIST_BATCH_SIZE = config['operations']['NEWS_LIST_BATCH_SIZE'] USER_NEWS_TIME_OUT_IN_SECONDS = config['operations'][ 'USER_NEWS_TIME_OUT_IN_SECONDS'] LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/hwobvzoo" LOG_CLICKS_TASK_QUEUE_NAME = "tap-news-log-clicks-task-queue" redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0)
import datetime import hashlib import redis import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import news_api_client # pylint: disable=E0401, C0413 from cloud_amqp_client import CloudAMQPClient # pylint: disable=E0401, C0413 # get config import config_client # pylint: disable=E0401, C0413 config = config_client.get_config('../config/config_news_pipeline.yaml') REDIS_HOST = config['news_monitor']['REDIS_HOST'] REDIS_PORT = config['news_monitor']['REDIS_PORT'] SCRAPE_NEWS_TASK_QUEUE_URL = config['news_monitor'][ 'SCRAPE_NEWS_TASK_QUEUE_URL'] SCRAPE_NEWS_TASK_QUEUE_NAME = config['news_monitor'][ 'SCRAPE_NEWS_TASK_QUEUE_NAME'] SLEEP_TIME_IN_SECONDS = 10 * 6 NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 3 NEWS_SOURCES = [ 'bbc-news', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly', 'espn', 'ign', 'techcrunch', 'the-new-york-times', 'the-wall-street-journal', 'the-washington-post' ] sys.path.append(os.path.join(os.path.dirname(__file__), '..', '')) from logger.log import LOGGING_NEWS_MONITOR
import operator import os import pyjsonrpc import sys # import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import mongodb_client # get config import config_client config = config_client.get_config( '../config/config_news_recommendation_service.yaml') PREFERENCE_MODEL_TABLE_NAME = config['click_log_processor'][ 'PREFERENCE_MODEL_TABLE_NAME'] SERVER_HOST = config['service']['SERVER_HOST'] SERVER_PORT = config['service']['SERVER_PORT'] # Ref: https://www.python.org/dev/peps/pep-0485/#proposed-implementation # Ref: http://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) class RequestHandler(pyjsonrpc.HttpRequestHandler): """ Get user's preference in an ordered class list """ @pyjsonrpc.rpcmethod def getPreferenceForUser(self, user_id):
''' req to NewsAPI extract info from res populate source ''' from json import loads import requests # get config import config_client config = config_client.get_config('../config/config_common.yaml') NEWS_API_ENDPOINT = config['news_api_client']['NEWS_API_ENDPOINT'] NEWS_API_KEY = config['news_api_client']['NEWS_API_KEY'] ARTICLES_API = config['news_api_client']['ARTICLES_API'] SORT_BY_TOP = config['news_api_client']['SORT_BY_TOP'] BBC_NEWS = 'bbc-news' BBC_SPORT = 'bbc-sport' CNN = 'cnn' DEFAULT_SOURCES = [BBC_NEWS, CNN] def build_url(end_point=NEWS_API_ENDPOINT, api_name=ARTICLES_API): ''' get url ''' return end_point + api_name def getNewsFromSource(sources=DEFAULT_SOURCES, sort_by=SORT_BY_TOP):
def test_basic(): """ test basic """ config = client.get_config(PATH) assert len(config) > 0 print 'test_basic passed.' print config
import os import sys # import common package in parent directory sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import mongodb_client import news_topic_modeling_service_client import config_client config = config_client.get_config('../config/config_machine_learning_server.yaml') NEWS_TABLE_NAME = config['mongodb_client']['NEWS_TABLE_NAME'] if __name__ == '__main__': db = mongodb_client.get_db() cursor = db[NEWS_TABLE_NAME].find({}) count = 0 for news in cursor: count += 1 print count if 'class' not in news: print 'Populating classes...' description = news['description'] if description is None: description = news['title'] topic = news_topic_modeling_service_client.classify(description) news['class'] = topic db[NEWS_TABLE_NAME].replace_one({'digest': news['digest']}, news, upsert=True)