Exemple #1
0
req = requests.get(link)

# In[5]:

noticias = BeautifulSoup(req.text, "html.parser").find_all(
    'div', class_='card large card-medium')
noticias += BeautifulSoup(req.text,
                          "html.parser").find_all('div',
                                                  class_='card card-small')
noticias += BeautifulSoup(req.text,
                          "html.parser").find_all('div',
                                                  class_='card card-xsmall')

# In[6]:

for noticia in noticias:
    ref_link = noticia.find_all('a', href=True)[0]['href']
    print(noticia.find_all('a', href=True)[0]['href'])
    print(link_site + ref_link)
    if (link_site not in ref_link):
        util_midia.social_news_from_link(link_site + ref_link)
        print(link_site + ref_link)
    else:
        util_midia.social_news_from_link(ref_link)
        print(ref_link)

# In[7]:

noticias[0]
import requests

import datetime

import util_midia

link = 'https://diplomatique.org.br/'
req = requests.get(link)
bs = BeautifulSoup(req.text, "html.parser")
materias = bs.find_all('div', class_='owl-carousel')

for div in materias[1:]:
    try:
        page_link = div.find_all('a', href=True)[0]['href']

        util_midia.social_news_from_link(page_link)

#         article = NewsPlease.from_url(page_link)
#         print(article.title)
#         row = {'titulos': [], 'links': [], 'noticia': [], 'image': [], 'abstract': [], 'date': []}
#         if (article is not None):
#             row['titulos'].append(article.title)
#             row['noticia'].append(article.text)
#             row['links'].append(article.url)
#             row['abstract'].append(article.text)
#             row['date'].append(article.date_publish)
#             path_image = article.image_url
#             if path_image == '' or path_image == None:
#                 row['image'].append(0)
#             else:
#                 row['image'].append(download_and_move_image(article.image_url))
Exemple #3
0
from lexical_analyzer_package import midia_lexical
from midia_postagem import midia_post
from Model.News import News
from Database import midia_table

from newsplease import NewsPlease
from bs4 import BeautifulSoup
import requests
import util_midia
import datetime

# In[2]:

link = 'http://www.ebc.com.br'

# In[3]:

req = requests.get(link)

# In[4]:

noticias = BeautifulSoup(req.text, "html.parser").find_all(
    'div',
    class_='cmpGeneric isoGrid-item col-lg-4 col-md-4 col-sm-12 col-xs-12')

# In[5]:

for noticia in noticias:
    print(noticia.find_all('a', href=True)[0]['href'])
    util_midia.social_news_from_link(
        noticia.find_all('a', href=True)[0]['href'])
Exemple #4
0
import util_midia

link = 'https://operamundi.uol.com.br/'
req = requests.get(link)
bs = BeautifulSoup(req.text, "html.parser")
materias = bs.find('div', class_='news-grid').find_all('a', href=True)
materias += bs.find('div',
                    class_='col-xs-12 col-md-6 home-grid').find_all('a',
                                                                    href=True)
materias += bs.find('div', class_='col-xs-12 news-grid').find_all('a',
                                                                  href=True)
materias += bs.find('div', class_='row news-grid').find_all('a', href=True)

for materia in materias:
    util_midia.social_news_from_link(materia['href'])

#     article = NewsPlease.from_url(materia['href'])
#     print(article.title)
#     row = {'titulos': [], 'links': [], 'noticia': [], 'image': [], 'abstract': [], 'date': []}
#     if (article is not None):
#         row['titulos'].append(article.title)
#         row['noticia'].append(article.text)
#         row['links'].append(article.url)
#         row['abstract'].append(article.text)
#         row['date'].append(article.date_publish)
#         path_image = article.image_url
#         if path_image == '' or path_image == None:
#             row['image'].append(0)
#         else:
#             row['image'].append(download_and_move_image(article.image_url))
Exemple #5
0
from lexical_analyzer_package import midia_lexical
from midia_postagem import midia_post
from Model.News import News
from Database import midia_table

from newsplease import NewsPlease
from bs4 import BeautifulSoup
import requests

import util_midia

# In[2]:

link = 'https://extra.globo.com/noticias/plantao.html'

# In[3]:

req = requests.get(link)

# In[4]:

noticias = BeautifulSoup(req.text, "html.parser").find_all('div',
                                                           class_='text')

# In[ ]:

for noticia in noticias:
    print(noticia.find_all('a', href=True)[0]['href'])
    ref_link = noticia.find_all('a', href=True)[0]['href']
    util_midia.social_news_from_link(ref_link)