Exemple #1
0
def test_basic():
    client = RabbitMQClient(HOST, TEST_QUEUE_NAME)

    sentMsg = {'test': 'demo'}
    client.sendMessage(sentMsg)
    client.sleep(10)
    receivedMsg = client.getMessage()
    assert sentMsg == receivedMsg
    print('test_basic passed!')
Exemple #2
0
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import mongodb_client
from rabbitMQ_client import RabbitMQClient
import news_topic_modeling_service_client

DEDUPE_NEWS_TASK_QUEUE_HOST = 'localhost'
DEDUPE_NEWS_TASK_QUEUE_NAME = 'dedupe-news-task-queue'

SLEEP_TIME_IN_SECONDS = 1

NEWS_TABLE_NAME = 'news'

SAME_NEWS_SIMILARITY_THRESHOLD = 0.8

dedupe_news_queue_client = RabbitMQClient(DEDUPE_NEWS_TASK_QUEUE_HOST,
                                          DEDUPE_NEWS_TASK_QUEUE_NAME)


def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        return
    task = msg
    text = str(task['text'])
    if text is None:
        return

    # Get recent news from mongodb
    published_at = parser.parse(task['publishedAt'])
    print(published_at)
    published_at_day_begin = published_at - datetime.timedelta(days=1)
    print(published_at_day_begin)
REDIS_HOST = 'localhost'
REDIS_PORT = 6379

NEWS_TABLE_NAME = 'news'
CLICK_LOGS_TABLE_NAME = 'click_logs'

NEWS_LIMIT = 100
NEWS_LIST_BATCH_SIZE = 10
USER_NEWS_TIMEOUT_IN_SECONDS = 600

LOG_CLICK_TASK_QUEUE_HOST = 'localhost'
LOG_CLICK_TASK_QUEUE_NAME = "log-click-task-queue"

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0)
click_queue_client = RabbitMQClient(LOG_CLICK_TASK_QUEUE_HOST,
                                    LOG_CLICK_TASK_QUEUE_NAME)


def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    if page_num <= 0:
        raise ValueError('page_num should be a positive integer.')
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The news list to be returned
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
Exemple #4
0
from rabbitMQ_client import RabbitMQClient

REDIS_HOST = 'localhost'
REDIS_PORT = 6379

SCRAPE_NEWS_TASK_QUEUE_HOST = 'localhost'
SCRAPE_NEWS_TASK_QUEUE_NAME = "scrape-news-task-queue"

NEWS_SOURCES = [
    'cnn, bbc-news, bloomberg, espn, cnbc, business-insider, abc-news, buzzfeed, bbc-sport, fox-news, the-verge, techradar, talksport, nfl-news, nhl-news, reddit-r-all']

NEWS_TIME_OUT_IN_SECONDS = 3600 * 24
SLEEP_TIME_IN_SECONDS = 60

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
rabbitMQ_client = RabbitMQClient(SCRAPE_NEWS_TASK_QUEUE_HOST, SCRAPE_NEWS_TASK_QUEUE_NAME)

while True:
    news_list = news_api_client.getNewsFromSource(NEWS_SOURCES)

    nums_of_new_news = 0
    for news in news_list:
        news_digest = hashlib.md5(news['title'].encode('utf-8')).hexdigest()

        if redis_client.get(news_digest) is None:
            nums_of_new_news += 1
            news['digest'] = news_digest

            if news['publishedAt'] is None:
                news['publishedAt'] = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
            redis_client.set(news_digest, json.dumps(news))