Esempio n. 1
0
def populate_notices():
    notices = extract_notices()
    for notice in notices:
        title = notice[0]
        link = notice[1]
        notice = Noticia(titulo=title, enlace=link)
        print(notice)
        notice.save()
Esempio n. 2
0
def noticiasPoliticaELMUNDO():
    codigoHtml = extraerCodigo("https://www.elmundo.es/t/po/politica.html")
    listaLinks = []
    listaLinksImagenes = []
    for i in codigoHtml.find_all('a', class_='ue-c-cover-content__link'):
        if i.get('href').startswith("https://www.elmundo.es/espana") or i.get(
                'href').startswith("https://www.elmundo.es/cataluna"):
            listaLinks.append(i.get('href'))
    titulares = []
    fechas = []
    autores = []
    for i in listaLinks:
        codigoHtml = extraerCodigo(i)
        a = codigoHtml.find("meta", property="og:image")
        if a:
            listaLinksImagenes.append(a['content'])
        else:
            b = codigoHtml.find("meta", attrs={'name': 'og:image'})
            listaLinksImagenes.append(b['content'])
        for i in codigoHtml.find('title'):
            titulares.append(i.split("|")[0])
        x = codigoHtml.find("meta", property="article:modified_time")
        fechas.append(x['content'][0:10])
        aut = codigoHtml.find('div', class_="ue-c-article__byline-name")
        autores.append(aut.text)
    print("Cargando Politica elMundo...")

    for i, x in enumerate(titulares):
        if not (Noticia.objects.filter(titulo=titulares[i])):
            Noticia(titulo=titulares[i],
                    fecha=fechas[i],
                    imagen=listaLinksImagenes[i],
                    autor=autores[i],
                    categoria="Politica",
                    link=listaLinks[i]).save()
Esempio n. 3
0
def noticiasDiarioPolitica():
    links = []
    linksImag = []

    codHtml = extraerCodigo("https://www.eldiario.es/temas/politica/")

    for i in codHtml.find_all('li', class_='lst-item cf '):
        for j in i.find_all('h2', class_="bkn headline typ-x4"):
            for p in j.find_all('a', class_='lnk'):
                if not ("autores" in p["href"]):
                    links.append(str("https://www.eldiario.es" + p["href"]))
                    break

        for j in i.find_all('div', class_='mg fl'):
            if (len(i.find_all('img')) >= 1):
                for p in i.find_all('img'):
                    linksImag.append(str("https://www.eldiario.es" + p["src"]))
            else:
                linksImag.append(
                    str("http://www.sanisidrolonas.com.ar/wp-content/uploads/2011/05/sin-imagen12.jpg"
                        ))

    fechas = []
    autores = []

    titulares = []
    for item in links:
        codHtml = extraerCodigo(str(item))

        #Titulo
        for i in codHtml.find_all('h1', class_="pg-headline"):
            titulares.append(str(i.get_text().strip()))

        #fecha
        for i in codHtml.find_all('span', class_="date"):
            fechas.append(str(i.get_text().strip()))
            break

        #autor
        for i in codHtml.find_all('small', class_="byline"):
            if (i.find_all('a', class_='lnk')):
                for p in i.find_all('a', class_='lnk'):
                    autores.append(str(i.get_text().strip()))
            else:
                autores.append("Sin autor")

    nuevasFechas = []
    for item in fechas:
        s = item.replace("-", "").replace(" ", "")
        w = s.split("/")
        nuevasFechas.append(w[2] + "-" + w[1] + "-" + w[0])
    for i in range(len(titulares)):
        if not (Noticia.objects.filter(titulo=titulares[i])):
            Noticia(titulo=titulares[i],
                    fecha=fechas[i],
                    autor=autores[i],
                    link=links[i],
                    categoria="Politica",
                    imagen=linksImag[i]).save()
Esempio n. 4
0
def noticiasPublicoCultura():
    codigoHtml = extraerCodigo("https://www.publico.es/culturas")
    listaLinks = []
    listaLinksImagenes = []

    #HREF
    for i in codigoHtml.find_all('div', class_='listing-item'):
        for p in i.find_all('a', class_='page-link'):
            listaLinks.append(str("https://www.publico.es" + p["href"]))
            break
        if (not (len(list(str(i.find_all('img')))) == 2)):
            for p in i.find_all('img'):
                listaLinksImagenes.append(
                    str("https://www.publico.es" + p["src"]))
                break
        else:
            listaLinksImagenes.append(
                str("http://www.sanisidrolonas.com.ar/wp-content/uploads/2011/05/sin-imagen12.jpg"
                    ))

    #titulares
    titulares = []
    fechas = []
    autores = []
    for item in listaLinks:
        codigoHtml = extraerCodigo(str(item))
        #titulares
        for i in codigoHtml.find_all('div', class_="article-header-title"):
            for ii in i.find_all('h1'):
                titulares.append(str(ii.text.strip()))
        #fechas
        for i in codigoHtml.find('span', class_="published"):
            fechaa = str(i.strip()).split(" ")
            fechaa1 = fechaa[0].replace("/", "-")
            d = fechaa1.split("-")

            fechas.append(d[2] + "-" + d[1] + "-" + d[0])
        #autor
        for i in codigoHtml.find_all('div', class_="article-info"):
            for ii in i.find('p'):
                autores.append(str(i.text.strip()))
    #im

    for i in range(len(titulares)):
        if not (Noticia.objects.filter(titulo=titulares[i])):
            Noticia(titulo=titulares[i],
                    fecha=fechas[i],
                    autor=autores[i],
                    link=listaLinks[i],
                    categoria="Cultura",
                    imagen=listaLinksImagenes[i]).save()