def get_news(link: str) -> None:
    print(link)
    content = requests.get(link)

    filename = link.split('/')[-2] if link.endswith('/') else link.split(
        '/')[-1]

    s3_handler.upload_file(content.text, 'teste', filename + '.html')
def normalize_news(doc):
    print(doc['Key'])
    content = s3_handler.get_file(doc['Key'])
    obj = json.loads(content)
    obj['news']['body_normalized'] = normalize(obj['news']['body'])

    filename = get_filename(doc['Key'])
    s3_handler.upload_file(obj, '00/camara-news/json', filename)
예제 #3
0
def getNews(url):
    response = requests.get(url)
    # success ?
    if response.status_code == 200:
        data = response.text
        filename = url.split('/')[-2] if url.endswith('/') else url.split(
            '/')[-1]
        filename = '%s.html' % filename
        s3_handler.upload_file(data, '06/html', filename)
예제 #4
0
def get_detalhe_deputado(deputado_id):
    url = f'{url_deputados}/{deputado_id}'
    response = requests.get(url)

    if response.status_code == 200:
        deputado = json.loads(response.text)

        s3_handler.upload_file(deputado['dados'],
                               '00/dadosabertos-camara-deputados',
                               str(deputado['dados']['id']) + '.json')
def normalize_deputados(doc):
    print(doc['Key'])
    content = s3_handler.get_file(doc['Key'])
    obj = json.loads(content)

    obj['nomeCivil_normalized'] = normalize(obj['nomeCivil'])
    obj['ultimoStatus']['nome_normalized'] = normalize(
        obj['ultimoStatus']['nome'])
    obj['ultimoStatus']['siglaPartido_normalized'] = normalize(
        obj['ultimoStatus']['siglaPartido'])

    filename = get_filename(doc['Key'])
    s3_handler.upload_file(obj, '00/dadosabertos-camara-deputados', filename)
예제 #6
0
    date = soup.find('p', attrs={'class': 'g-artigo__data-hora'}).text.strip()
    article = soup.find('div', attrs={'class': 'js-article-read-more'})

    references = [{'link': reference['href'], 'label':reference.text}
                  for reference in article.find_all('a')]

    news_link = soup.find('meta', attrs={'property': 'og:url'})['content']

    news = {'body': article.text, 'link': news_link}

    category = soup.find('span', {'class': 'g-artigo__categoria'}).text
    proposals = [{'text': li.a.text, 'link': li.a['href']}
                 for li in soup.find_all('li', {'class': 'integra-lista__item'})]

    data = {
        'title': title,
        'date': date,
        'references': references,
        'news': news,
        'category': category,
        'proposals': proposals
    }

    filename = news_link.split(
        '/')[-2] if news_link.endswith('/') else news_link.split('/')[-1]

   s3_handler.upload_file(data, '00/camara-news/json', filename+'.json')


load_files()
def getDetailsPerson(id):
    url = '%s/%s' % (urlApi, id)
    response = requests.get(url)
    # success ?
    if response.status_code == 200:
        obj = json.loads(response.text)
        return obj
    return {}


def getAllPersons():
    '''
    Consume API - DadosAbertos (Deputados)
    '''
    response = requests.get(urlApi)
    # success ?
    if response.status_code == 200:
        obj = json.loads(response.text)['dados']
        ids = [ob['id'] for ob in obj]
        return ids


# Get ids from API
ids = getAllPersons()
# Iterate into ids
for id in ids:
    data = getDetailsPerson(id)
    filename = '%s.json' % id
    s3_handler.upload_file(data, '06/api', filename)