def crawler_PBOC(): with open(news_list_indexes_file, "r") as fr: with open(out_file, "w") as fw: csvwriter = csv.writer(fw) csvwriter.writerow(["title", "href", "date", "content"]) for index_url in fr.readlines(): # print(index_url) html = urlopen(index_url) # print(html) bsObj = BeautifulSoup(html, "lxml") print(bsObj) news_objs = bsObj.find("div", {"class":"mainw950"})\ .find("div", {"opentype":"page"}).find("td", {"colspan":"2"})\ .find("div", {"id":"r_con"}).find("div", {"class":"portlet"})\ .find("div", {"style":"height:480px"}).find("table").find("td").findAll("table") # print(news_objs) # return for news_obj in news_objs: try: news = News() news.date = news_obj.find("span", {"class": "hui12"}) news.href = url_domain_pboc + news_obj.find( "a").attrs['href'] news.title = news_obj.find("a").text news.content = getget_content(news.href) r = [news.title, news.href, news.date, news.content] csvwriter.writerow(r) except: print("except..")
def crawler_FRB(): html = urlopen(url_frb_2016) bsObj = BeautifulSoup(html, "html.parser") events_list_obj = bsObj.find("div", { "class": "row eventlist" }).find("div", {"class": "col-xs-12 col-sm-8 col-md-8"}) event_rows_obj = events_list_obj.findAll("div", {"class": "row"}) # news_list = list() with open(base_dir + "csv_frb.csv", "a") as fw: csvwriter = csv.writer(fw) csvwriter.writerow(["title", "href", "date", "type", "content"]) for event_row_obj in event_rows_obj: try: news = News() date_obj = event_row_obj.find( "div", {"class": "col-xs-3 col-md-2 eventlist__time"}) news.date = date_obj.find("time").text event_obj = event_row_obj.find( "div", {"class": "col-xs-9 col-md-10 eventlist__event"}) news.href = url_domain_frb + event_obj.find("a").attrs['href'] news.title = event_obj.find("p").find("a").find("em").text news.type = event_obj.find("p", { "class": "eventlist__press" }).find("em").find("strong").text news.content = get_content(news.href) r = [news.title, news.href, news.date, news.type, news.content] csvwriter.writerow(r) # news_list.append(news) except: print("except..")
def storyteller(): form = ReusableForm(request.form) if request.method == "POST": if form.validate(): email = request.form["email"] password = str(request.form["password"]) login = firebase.login(email, password) if login == 0: news = News() news.title = request.form["title"] news.message = message_with_signature(request.form["message"], email) news.url = request.form["url"] news.date = time.strftime("%Y-%m-%d") news.is_private = False firebase.fcm(news, True) print(news.message) flash("Messaggio inviato con successo") if login == 1: flash("Errore: nome utente o password errata") elif login == 2: flash("Errore: chiave API non definita") elif login == 3: flash("Errore: account non valido") else: flash("Compila tutti i campi") return render_template("storyteller.html", form=form)