Example #1
0
def test_basic():
    """ Method for testing basic """
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)
    sent_msg = {"test": "test"}
    client.send_message(sent_msg)
    received_msg = client.get_message()

    assert sent_msg == received_msg
    print 'test_basic passed'
Example #2
0
def test_basic():
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sentMsg = {"test": "test"}
    client.sendMessage(sentMsg)
    receivedMsg = client.getMessage()

    assert sentMsg == receivedMsg
    print "test_basic passed."
Example #3
0
def test_basic():
    """ Send messages and receive messages """
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sent_msg = {"test": "test"}
    client.send_message(sent_msg)
    received_msg = client.get_message()

    assert sent_msg == received_msg
    print "test_basic passed."
Example #4
0
def test_basic():
    """Test CloudAMQP client basically"""
    clear_queue(CLOUDAMQP_URL, TEST_QUEUE_NAME)
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sent_msg = {"title": "test news"}
    client.send_message(sent_msg, "[cloudamqp tester]")
    received_msg = client.get_message("[cloudamqp tester]")

    assert sent_msg == received_msg
    print("test_basic passed!")
Example #5
0
def clearQueue(queue_url, queue_name):
    scrape_news_queue_client = CloudAMQPClient(queue_url, queue_name)

    num_of_messages = 0

    while True:
        if scrape_news_queue_client is not None:
            msg = scrape_news_queue_client.get_message()
            if msg is None:
                print "Cleared %d messages." % num_of_messages
                return
            num_of_messages += 1
Example #6
0
def run():
    logging.info("[news_fetcher] start running")
    scrape_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                          SCRAPE_NEWS_TASK_QUEUE_NAME)

    while True:
        if scrape_queue_client is not None:
            message = scrape_queue_client.get_message("[news_fetcher]")
            if message is not None:
                try:
                    handle_message(message)
                except Exception as error:
                    print(error)
                    pass
            scrape_queue_client.sleep(SLEEP_TIME_IN_SECONDS)
Example #7
0
def run():
    logging.info("[news_deduper] start running")
    cloudamqp_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL,
                                       DEDUPE_NEWS_TASK_QUEUE_NAME)

    while True:
        if cloudamqp_client is not None:
            message = cloudamqp_client.get_message("[news_deduper]")

            if message is not None:
                try:
                    handle_message(message)
                except Exception as error:
                    print(error)
                    pass

            cloudamqp_client.sleep(SLEEP_TIME_IN_SECONDS)
Example #8
0
def test_basic():
    client = CloudAMQPClient(CLOUDAMQP_URL, TEST_QUEUE_NAME)

    sentMsg = {'test': 1234}
    client.sendMessage(sentMsg)
    client.sleep(10)
    receivedMsg = client.getMessage()
    assert sentMsg == receivedMsg
    print 'test_basic passed!'
Example #9
0
def run():
    logging.info("[news_monitor] start running")
    redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
    cloudamqp_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                       SCRAPE_NEWS_TASK_QUEUE_NAME)

    while True:
        news_list = news_api_client.get_news_from_source(NEWS_SOURCES)
        logging.info("[news_monitor] get %d from news API", len(news_list))
        number_of_news = 0

        for news in news_list:
            news_digest = hashlib.md5(
                news['title'].encode('utf-8')).hexdigest()

            if redis_client.get(news_digest) is None:
                number_of_news += 1
                news['digest'] = news_digest

                if news['publishedAt'] is None:
                    news['publishedAt'] = datetime.datetime.utcnow().strftime(
                        "%Y-%m-%dT%H:%M:%SZ")

                redis_client.set(news_digest, "True")
                redis_client.expire(news_digest, NEWS_TIME_OUT_IN_SECONDS)

                cloudamqp_client.send_message(news, "[news_monitor]")

        logging.info("[news_monitor] fetch %d news" % number_of_news)
        cloudamqp_client.sleep(SLEEP_TIME_IN_SECONDS)
Example #10
0
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

from cloudamqp_client import CloudAMQPClient

from newspaper import Article

# use AMQP queue
DEDUPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@fish.rmq.cloudamqp.com/ofmdwnsa"
DEDUPE_NEWS_TASK_QUEUE_NAME = "tap-news-dedupe-news-task-queue"
SCRAPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@fish.rmq.cloudamqp.com/ofmdwnsa"
SCRAPE_NEWS_TASK_QUEUE_NAME = "tap-news-scrape-news-task-queue"

SLEEP_TIME_IN_SECONDS = 5

dedupe_news_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL,
                                           DEDUPE_NEWS_TASK_QUEUE_NAME)
scrape_news_queue_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                           SCRAPE_NEWS_TASK_QUEUE_NAME)


def handleMessage(msg):
    if msg is None or not isinstance(msg, dict):
        print "Message is broken."
        return

    # give url to newspaper to get article
    article = Article(msg['url'])
    article.download()
    article.parse()

    print article.text
Example #11
0
NEWS_TIME_OUT_IN_SECONDS = 3600 * 24 * 3

REDIS_HOST = 'localhost'
REDIS_PORT = 6379

SCRAPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@crane.rmq.cloudamqp.com/mitqwttx"
SCRAPE_NEWS_TASK_QUEUE_NAME = "news-scrape-queue"

NEWS_SOURCES = [
    'bbc-news', 'bbc-sport', 'bloomberg', 'cnn', 'entertainment-weekly',
    'espn', 'ign', 'techcrunch', 'the-new-york-times',
    'the-wall-street-journal', 'the-washington-post'
]

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT)
cloudAMQP_client = CloudAMQPClient(SCRAPE_NEWS_TASK_QUEUE_URL,
                                   SCRAPE_NEWS_TASK_QUEUE_NAME)

while True:
    news_list = news_api_client.getNewsFromSource(NEWS_SOURCES)

    num_of_news_news = 0

    for news in news_list:

        news_digest = hashlib.md5(
            news['title'].encode('utf-8')).digest().encode('base64')

        if redis_client.get(news_digest) is None:
            num_of_news_news = num_of_news_news + 1
            news['digest'] = news_digest
Example #12
0
# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

from cloudamqp_client import CloudAMQPClient  # pylint: disable=import-error, wrong-import-position

DEDUPE_NEWS_TASK_QUEUE_URL = \
"amqp://*****:*****@wombat.rmq.cloudamqp.com/fhptegqw"
DEDUPE_NEWS_TASK_QUEUE_NAME = "tiny-news-dedupe-news-task-queue"
SCRAPE_NEWS_TASK_QUEUE_URL = \
"amqp://*****:*****@wombat.rmq.cloudamqp.com/vvfthevj"
SCRAPE_NEWS_TASK_QUEUE_NAME = "tiny-news-scrape-news-task-queue"

SLEEP_TIME_IN_SECONDS = 5

dedupe_queue_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL,
                                      DEDUPE_NEWS_TASK_QUEUE_NAME)


def handle_message(msg):
    """Extract and send news"""

    if msg is not isinstance(msg, dict):
        logging.error("[news_fetcher] news is not dict")
        return

    task = msg

    article = Article(task['url'])
    article.download()
    article.parse()
import os
import sys

from dateutil import parser
from sklearn.feature_extraction.text import TfidfVectorizer

# import common package in parent directory
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import mongodb_client
from cloudamqp_client import CloudAMQPClient

DEDUPE_NEWS_TASK_QUEUE_URL = "amqp://*****:*****@donkey.rmq.cloudamqp.com/roenklvc"
DEDUPE_NEWS_TASK_QUEUE_NAME = "tap-news-dedupe-news-task-queue"

cloudamqp_client = CloudAMQPClient(DEDUPE_NEWS_TASK_QUEUE_URL,
                                   DEDUPE_NEWS_TASK_QUEUE_NAME)

NEWS_TABLE_NAME = 'news-test'
SAME_NEWS_SIMILARITY_THRESHOLD = 0.9

SLEEP_TIME_IN_SECONDS = 1


def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        return
    task = msg
    text = task['text']
    if text is None:
        return
Example #14
0
REDIS_PORT = 6379

LOG_CLICKS_TASK_QUEUE_URL = "amqp://*****:*****@wombat.rmq.cloudamqp.com/ajftnpdj"
LOG_CLICKS_TASK_QUEUE_NAME = "tiny-news-log-clicks-task-queue"

# import utils packages
sys.path.append(os.path.join(os.path.dirname(__file__), '../common/'))

import mongodb_client  # pylint: disable=import-error, wrong-import-position
from cloudamqp_client import CloudAMQPClient  # pylint: disable=import-error, wrong-import-position
import news_recommendation_service_client  # pylint: disable=import-error, wrong-import-position

NEWS_TABLE_NAME = "news"

redis_client = redis.StrictRedis(REDIS_HOST, REDIS_PORT, db=0)
cloudamqp_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL,
                                   LOG_CLICKS_TASK_QUEUE_NAME)


def get_one_news():
    """Get one news"""
    news = mongodb_client.get_db()[NEWS_TABLE_NAME].find_one()
    # bson to string, string to json
    return json.loads(dumps(news))


def get_news_summaries_for_user(user_id, page_num):
    """Get news summaries"""
    page_num = int(page_num)
    # [begin_index, end_index)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE