# -*- coding: utf-8 -*-
from goose import Goose
import pymongo
from bs4 import BeautifulSoup
import requests
import datetime
import zlib
import cPickle as CP
import cld
from requests.exceptions import ConnectionError, Timeout
import bson
import settings
import logging_mc

logger = logging_mc.get_logger('valor')

client = pymongo.MongoClient(settings.MONGOHOST, 27017)
MCDB = client.MCDB
ARTICLES = MCDB.articles  # Article Collection
ARTICLES.ensure_index("source")

def find_articles():
    """
    Get the urls of last news
    :return: last news' urls of all categories
    :rtype: set()
    """
    urls = ['http://www.valor.com.br/ultimas-noticias/brasil',
            'http://www.valor.com.br/ultimas-noticias/politica',
            'http://www.valor.com.br/ultimas-noticias/financas',
            'http://www.valor.com.br/ultimas-noticias/empresas',
# -*- coding: utf-8 -*-
from goose import Goose
import pymongo
from bs4 import BeautifulSoup
import requests
import datetime
import zlib
import cPickle as CP
import cld
from requests.exceptions import ConnectionError, Timeout
import bson
import settings
import logging_mc
import re

logger = logging_mc.get_logger('ZH')

client = pymongo.MongoClient(settings.MONGOHOST, 27017)
MCDB = client.MCDB
ARTICLES = MCDB.articles  # Article Collection
ARTICLES.ensure_index("source")

def find_articles():
    """
    Get the urls of last news
    :return: last news' urls of all categories
    :rtype: set()
    """
    urls = ['http://zh.clicrbs.com.br/rs/noticias/ultimas-noticias/',
            'http://zh.clicrbs.com.br/rs/entretenimento/ultimas-noticias/',
            'http://zh.clicrbs.com.br/rs/esportes/ultimas-noticias/',