def populate_notices(): notices = extract_notices() for notice in notices: title = notice[0] link = notice[1] notice = Noticia(titulo=title, enlace=link) print(notice) notice.save()
def noticiasPoliticaELMUNDO(): codigoHtml = extraerCodigo("https://www.elmundo.es/t/po/politica.html") listaLinks = [] listaLinksImagenes = [] for i in codigoHtml.find_all('a', class_='ue-c-cover-content__link'): if i.get('href').startswith("https://www.elmundo.es/espana") or i.get( 'href').startswith("https://www.elmundo.es/cataluna"): listaLinks.append(i.get('href')) titulares = [] fechas = [] autores = [] for i in listaLinks: codigoHtml = extraerCodigo(i) a = codigoHtml.find("meta", property="og:image") if a: listaLinksImagenes.append(a['content']) else: b = codigoHtml.find("meta", attrs={'name': 'og:image'}) listaLinksImagenes.append(b['content']) for i in codigoHtml.find('title'): titulares.append(i.split("|")[0]) x = codigoHtml.find("meta", property="article:modified_time") fechas.append(x['content'][0:10]) aut = codigoHtml.find('div', class_="ue-c-article__byline-name") autores.append(aut.text) print("Cargando Politica elMundo...") for i, x in enumerate(titulares): if not (Noticia.objects.filter(titulo=titulares[i])): Noticia(titulo=titulares[i], fecha=fechas[i], imagen=listaLinksImagenes[i], autor=autores[i], categoria="Politica", link=listaLinks[i]).save()
def noticiasDiarioPolitica(): links = [] linksImag = [] codHtml = extraerCodigo("https://www.eldiario.es/temas/politica/") for i in codHtml.find_all('li', class_='lst-item cf '): for j in i.find_all('h2', class_="bkn headline typ-x4"): for p in j.find_all('a', class_='lnk'): if not ("autores" in p["href"]): links.append(str("https://www.eldiario.es" + p["href"])) break for j in i.find_all('div', class_='mg fl'): if (len(i.find_all('img')) >= 1): for p in i.find_all('img'): linksImag.append(str("https://www.eldiario.es" + p["src"])) else: linksImag.append( str("http://www.sanisidrolonas.com.ar/wp-content/uploads/2011/05/sin-imagen12.jpg" )) fechas = [] autores = [] titulares = [] for item in links: codHtml = extraerCodigo(str(item)) #Titulo for i in codHtml.find_all('h1', class_="pg-headline"): titulares.append(str(i.get_text().strip())) #fecha for i in codHtml.find_all('span', class_="date"): fechas.append(str(i.get_text().strip())) break #autor for i in codHtml.find_all('small', class_="byline"): if (i.find_all('a', class_='lnk')): for p in i.find_all('a', class_='lnk'): autores.append(str(i.get_text().strip())) else: autores.append("Sin autor") nuevasFechas = [] for item in fechas: s = item.replace("-", "").replace(" ", "") w = s.split("/") nuevasFechas.append(w[2] + "-" + w[1] + "-" + w[0]) for i in range(len(titulares)): if not (Noticia.objects.filter(titulo=titulares[i])): Noticia(titulo=titulares[i], fecha=fechas[i], autor=autores[i], link=links[i], categoria="Politica", imagen=linksImag[i]).save()
def noticiasPublicoCultura(): codigoHtml = extraerCodigo("https://www.publico.es/culturas") listaLinks = [] listaLinksImagenes = [] #HREF for i in codigoHtml.find_all('div', class_='listing-item'): for p in i.find_all('a', class_='page-link'): listaLinks.append(str("https://www.publico.es" + p["href"])) break if (not (len(list(str(i.find_all('img')))) == 2)): for p in i.find_all('img'): listaLinksImagenes.append( str("https://www.publico.es" + p["src"])) break else: listaLinksImagenes.append( str("http://www.sanisidrolonas.com.ar/wp-content/uploads/2011/05/sin-imagen12.jpg" )) #titulares titulares = [] fechas = [] autores = [] for item in listaLinks: codigoHtml = extraerCodigo(str(item)) #titulares for i in codigoHtml.find_all('div', class_="article-header-title"): for ii in i.find_all('h1'): titulares.append(str(ii.text.strip())) #fechas for i in codigoHtml.find('span', class_="published"): fechaa = str(i.strip()).split(" ") fechaa1 = fechaa[0].replace("/", "-") d = fechaa1.split("-") fechas.append(d[2] + "-" + d[1] + "-" + d[0]) #autor for i in codigoHtml.find_all('div', class_="article-info"): for ii in i.find('p'): autores.append(str(i.text.strip())) #im for i in range(len(titulares)): if not (Noticia.objects.filter(titulo=titulares[i])): Noticia(titulo=titulares[i], fecha=fechas[i], autor=autores[i], link=listaLinks[i], categoria="Cultura", imagen=listaLinksImagenes[i]).save()