Exemple #1
0
def test_basic():
    print "start scraping: "
    news = scraper.extract_news(CNN_NEWS_URL)

    print news
    assert EXPECTED_NEWS in news
    print 'test_basic passed!'
def test_basic():
    """Basic Test"""
    news = scraper.extract_news(CNN_NEWS_URL)

    print(news)
    assert EXPECTED_NEWS in news
    print("Test Passed!")
Exemple #3
0
def handle_message(msg):
	if msg is None or not isinstance(msg, dict):
		print 'message is broken'
		return

	task = msg
	text = None

	# We support CNN only now
	if task['source'] == 'cnn':
		print 'Scraping CNN news'
		text = cnn_news_scraper.extract_news(task['url'])
	else:
		print 'News source [%s] is not suppport.' % task['source']

	task['text'] = text
	# article = Article(task['url'])
	# article.download()
	# article.parse()

	# print article.text
	
	# task['text'] = article.text

	dedupe_news_queue_client.sendMessage(task)
Exemple #4
0
def test_basic():
    news = scraper.extract_news(CNN_NEWS_URL)


    print(news)
    assert EXPECTED_NEWS in news
    print('test_basic passed!')
Exemple #5
0
def test_basic():
    news = scraper.extract_news(CNN_NEWS_URL)

    print news
    assert EXPECTED_STRING in news

    print 'test_basic passed!'
def test_basic():
    """Test CNN news scraper basically"""
    news = scraper.extract_news(CNN_NEWS_URL)

    print(news)
    assert EXPECTED_NEWS in news
    print('test_basic passed!')
Exemple #7
0
def handle_message(msg):
    if msg is None or not isinstance(msg, dict):
        logger.warning('message is broken')
        return

    text = None

    if (msg['source'] == 'cnn'):
        text = cnn_news_scraper.extract_news(msg['url'])

    msg['text'] = text
    #to do:re_connect is lost
    dedupe_news_queue_client.sendMessage(msg)
def handle_message_old(msg):
  # if the msg is not json format
  if not isinstance(msg, dict):
    logger.warning('message is broken')
    return
  
  text = None
  if msg['source'] == 'cnn':
    text = cnn_news_scraper.extract_news(msg['url'])

  if text is not None and len(text) > 0:
    msg['text'] = text
    dedupe_queue_client.sendMessage(msg)
def test_basic(url=NEWS_URL):
    news = cnn_news_scraper.extract_news(url)

    print news
    assert EXPECTED_RESULT in news
    print 'test_basic passed'
def test_basic():
    news = extract_news(url)
    assert some_content in news
    print('extract_new trivial case passed')
Exemple #11
0
def basic_test():
    news = cnn_news_scraper.extract_news(CNN_NEWS_URL)
    assert EXPECTED_NEWS in news
    print("basic test passed!")
def test_basic():
    news = scraper.extract_news(CNN_NEWS_URL)
    print news
    assert EXPECTED_STRING in news
    print 'test_basic passed!'
def test_basic():
    news = cnn_news_scraper.extract_news(URL)
    print(news)
    assert EXPECTED_CONTENT in news

    print("CNN scraper works!")
def test_basic():
    news = scraper.extract_news(CNN_NEWS_URL)

    print news
    print 'test_basic passed!'