Exemple #1
0
from bson.json_util import dumps
from datetime import datetime

# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import mongodb_client
import news_recommendation_service_client

from cloud_amqp_client import CloudAMQPClient

# get config
import config_client

config = config_client.get_config('../config/config_backend_server.yaml')
REDIS_HOST = config['operations']['REDIS_HOST']
REDIS_PORT = config['operations']['REDIS_PORT']

NEWS_TABLE_NAME = config['operations']['NEWS_TABLE_NAME']
CLICK_LOGS_TABLE_NAME = config['operations']['CLICK_LOGS_TABLE_NAME']

NEWS_LIMIT = config['operations']['NEWS_LIMIT']
NEWS_LIST_BATCH_SIZE = config['operations']['NEWS_LIST_BATCH_SIZE']
USER_NEWS_TIME_OUT_IN_SECONDS = config['operations'][
    'USER_NEWS_TIME_OUT_IN_SECONDS']

LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/hwobvzoo"
LOG_CLICKS_TASK_QUEUE_NAME = "tap-news-log-clicks-task-queue"

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0)
Exemple #2
0
import datetime
import hashlib
import redis
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import news_api_client  # pylint: disable=E0401, C0413
from cloud_amqp_client import CloudAMQPClient  # pylint: disable=E0401, C0413

# get config
import config_client  # pylint: disable=E0401, C0413
config = config_client.get_config('../config/config_news_pipeline.yaml')
REDIS_HOST = config['news_monitor']['REDIS_HOST']
REDIS_PORT = config['news_monitor']['REDIS_PORT']
SCRAPE_NEWS_TASK_QUEUE_URL = config['news_monitor'][
    'SCRAPE_NEWS_TASK_QUEUE_URL']
SCRAPE_NEWS_TASK_QUEUE_NAME = config['news_monitor'][
    'SCRAPE_NEWS_TASK_QUEUE_NAME']
SLEEP_TIME_IN_SECONDS = 10 * 6
NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 3

NEWS_SOURCES = [
    'bbc-news', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly',
    'espn', 'ign', 'techcrunch', 'the-new-york-times',
    'the-wall-street-journal', 'the-washington-post'
]

sys.path.append(os.path.join(os.path.dirname(__file__), '..', ''))
from logger.log import LOGGING_NEWS_MONITOR
import operator
import os
import pyjsonrpc
import sys

# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import mongodb_client

# get config
import config_client
config = config_client.get_config(
    '../config/config_news_recommendation_service.yaml')
PREFERENCE_MODEL_TABLE_NAME = config['click_log_processor'][
    'PREFERENCE_MODEL_TABLE_NAME']

SERVER_HOST = config['service']['SERVER_HOST']
SERVER_PORT = config['service']['SERVER_PORT']


# Ref: https://www.python.org/dev/peps/pep-0485/#proposed-implementation
# Ref: http://stackoverflow.com/questions/5595425/what-is-the-best-way-to-compare-floats-for-almost-equality-in-python
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
    return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)


class RequestHandler(pyjsonrpc.HttpRequestHandler):
    """ Get user's preference in an ordered class list """
    @pyjsonrpc.rpcmethod
    def getPreferenceForUser(self, user_id):
'''
req to NewsAPI
extract info from res
populate source
'''
from json import loads
import requests

# get config
import config_client
config = config_client.get_config('../config/config_common.yaml')
NEWS_API_ENDPOINT = config['news_api_client']['NEWS_API_ENDPOINT']
NEWS_API_KEY = config['news_api_client']['NEWS_API_KEY']
ARTICLES_API = config['news_api_client']['ARTICLES_API']
SORT_BY_TOP = config['news_api_client']['SORT_BY_TOP']

BBC_NEWS = 'bbc-news'
BBC_SPORT = 'bbc-sport'
CNN = 'cnn'

DEFAULT_SOURCES = [BBC_NEWS, CNN]


def build_url(end_point=NEWS_API_ENDPOINT, api_name=ARTICLES_API):
    '''
    get url
    '''
    return end_point + api_name


def getNewsFromSource(sources=DEFAULT_SOURCES, sort_by=SORT_BY_TOP):
def test_basic():
    """ test basic """
    config = client.get_config(PATH)
    assert len(config) > 0
    print 'test_basic passed.'
    print config
Exemple #6
0
import os
import sys

# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import mongodb_client
import news_topic_modeling_service_client

import config_client
config = config_client.get_config('../config/config_machine_learning_server.yaml')

NEWS_TABLE_NAME = config['mongodb_client']['NEWS_TABLE_NAME']

if __name__ == '__main__':
    db = mongodb_client.get_db()
    cursor = db[NEWS_TABLE_NAME].find({})
    count = 0
    for news in cursor:
        count += 1
        print count
        if 'class' not in news:
            print 'Populating classes...'
            description = news['description']
            if description is None:
                description = news['title']

            topic = news_topic_modeling_service_client.classify(description)
            news['class'] = topic
            db[NEWS_TABLE_NAME].replace_one({'digest': news['digest']}, news, upsert=True)