Esempio n. 1
0
def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    # news range to be fetched for the page number
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # the final list of news to be returned
    sliced_news = []
    db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db()

    if redis_client.get(user_id) is not None:
        # user id already cached in redis, get next paginating data and fetch news
        news_digests = pickle.loads(redis_client.get(user_id))
        # both parameters are inclusive
        sliced_news_digest = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digest
            }}))
    else:
        # no cached data
        # retrieve news and store their digests list in redis with user id as key first)
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digest = [x['digest']
                             for x in total_news]  # lambda function in python
        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)
        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = NewsRecommenderClient(
        NEWS_RECOMMENDER_HOST,
        NEWS_RECOMMENDER_PORT).getPreferenceForUser(user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    print('topPreference', topPreference)
    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommended'

    return json.loads(dumps(sliced_news))
Esempio n. 2
0
def backfill():
    print ('begin backfilling')
    db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db()
    cursor = db['news'].find({})
    count = 0
    for news in cursor:
        count += 1
        print(count)
        if 'class' not in news:
            print('Populating classes...')
            description = news['description']
            if description is None:
                description = news['title']
            topic = classify(description)
            news['class'] = topic
            db['news'].replace_one({'digest': news['digest']}, news, upsert=True)
Esempio n. 3
0
def getPreferenceForUser(userId):
    """ Get user's preference in an ordered class list. """
    db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db()
    model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId})

    if model is None:
        return []

    sorted_tuples = sorted(list(model['preference'].items()), key=operator.itemgetter(1), reverse=True)
    sorted_list = [x[0] for x in sorted_tuples]
    sorted_value_list = [x[1] for x in sorted_tuples]

    # If the first preference is same as the last one, the preference makes
    # no sense.
    if isclose(float(sorted_value_list[0]), float(sorted_value_list[-1])):
        return []

    return sorted_list
Esempio n. 4
0
def getOneNews():
    db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db()
    news = db[NEWS_TABLE_NAME].find_one()
    return json.loads(dumps(news))
Esempio n. 5
0
from config import MONGO_DB_HOST, MONGO_DB_PORT
from config import PREFERENCE_MODEL_TABLE_NAME
from tap_news_utils.mongodb_client import MongoDBClient
from tap_news_utils.cloudAMQP_client import CloudAMQPClient

NUM_OF_CLASSES = 8
INITIAL_P = 1.0 / NUM_OF_CLASSES
ALPHA = 0.1

SLEEP_TIME_IN_SECONDS = 1

NEWS_TABLE_NAME = "news"

cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL,
                                   LOG_CLICKS_TASK_QUEUE_NAME)
mongodb_client = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT)


def handle_message(msg):
    print('click log processor: handle_message', msg)

    if not isinstance(msg, dict):
        print('Error not dict')
        return

    if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg):
        print('Error not valid msg')
        return

    userId = msg['userId']
    newsId = msg['newsId']