def get_article(url): # get/parse page contents r = requests.get(url) r_text = r.text soup = BeautifulSoup(r_text, "html.parser") # get articles page_articles = soup.find_all("div", class_="article-text category-slog") # search articles for excerpt for a in page_articles: # get article content ps_and_as = a.find_all(["p", "a"]) question = [] response = [] article_part = 1 # 1: question; 2: response for pa in ps_and_as: if pa.has_attr("name") and pa["name"] == "more": article_part = 2 else: if pa.name == "p": if article_part == 1: question.append(pa.get_text()) elif article_part == 2: response.append(pa.get_text()) # if a complete article is identified, create article opject and insert it if article_part == 2: art = article(None, "\n".join(question), "\n".join(response)) return art return False
def update_db(result): for short_article in result: a = article(canonical_name=short_article['canonical_name'], company_name=short_article['company_name'], author=short_article.get('author',''), section=short_article .get('section',[]), headline=short_article.get('headline',''), length=getlength( short_article.get('length',0)), content=short_article.get('content',''), date=getdate(short_article.get('dateline','')), location=getlocation( short_article.get('dateline',''))) a.save()
def ajouterArticle(): if request.method == 'POST': # création article newArticle = article(request.form["titre_article"], request.form["Auteur_name"], datetime.datetime.now(), request.form["contenu_article"], request.form["categorie_article"], request.form["Mots_cles_article"]) if newArticle.isValid(): # vérification que l'article est valide ajoutArticle = mongo.db.articles.insert_one(newArticle.format) return render_template('temp_Conf_soumissionArticle.html') else: return render_template('template_FormArticle.html') return render_template('template_FormArticle.html')
def valider_ajout_art(): if request.method == 'POST': demande_art = [] demande_art.append( mongo.db.demandes.find_one({"Mots_cles": { '$exists': True }})) newArt = article(demande_art[0]["Auteur"], demande_art[0]["Titre"], demande_art[0]["Mots_cles"], demande_art[0]["Contenu"], demande_art[0]["Categorie"], demande_art[0]["Date"]) ajoutArt = mongo.db.articles.insert( newArt.format) # ajouter un utilisateur à la base de données deleteArt = mongo.db.demandes.delete_one( {"Mots_cles": demande_art[0]["Mots_cles"]}) line = "Félicitations! Un nouveau article est ajouté à votre base de données" return render_template('basic.html', line=line) return render_template("valider_ajout_article.html")
def get_article(url,txt): txt = txt.replace('(','\(').replace(')','\)').replace('[','\[').replace(']','\]') rX_to = re.compile('^DEAR HARRIETTE:') rX_from = re.compile('^DEAR') rX_excerpt = re.compile(txt) # get/parse page contents r = requests.get(url) r_text = r.text soup = BeautifulSoup(r_text, "html.parser") # get articles page_articles = soup.find_all("article", class_="item-section") # search articles for excerpt for a in page_articles: if re.search(rX_excerpt, str(a)): # get article content paragraphs = a.find_all("p") question = [] response = [] article_part = 0 # 1: question; 2: response for p in paragraphs: txt = p.get_text().strip() if re.search(rX_from,txt): if re.search(rX_to,txt): article_part = 1 else: article_part = 2 if article_part == 1: question.append(txt) elif article_part == 2: response.append(txt) else: print "Error: no text matched." # if a complete article is identified, create article opject and insert it if article_part == 2: art = article(None,'\n'.join(question),'\n'.join(response)) art.tags = [category.get_text().strip() for category in a.find_all("a", class_="read-more-link")] return art return False