Example #1
0
def get_article(url):
    # get/parse page contents
    r = requests.get(url)
    r_text = r.text
    soup = BeautifulSoup(r_text, "html.parser")

    # get articles
    page_articles = soup.find_all("div", class_="article-text category-slog")

    # search articles for excerpt
    for a in page_articles:
        # get article content
        ps_and_as = a.find_all(["p", "a"])
        question = []
        response = []
        article_part = 1  # 1: question; 2: response
        for pa in ps_and_as:
            if pa.has_attr("name") and pa["name"] == "more":
                article_part = 2
            else:
                if pa.name == "p":
                    if article_part == 1:
                        question.append(pa.get_text())
                    elif article_part == 2:
                        response.append(pa.get_text())

                        # if a complete article is identified, create article opject and insert it
        if article_part == 2:
            art = article(None, "\n".join(question), "\n".join(response))
            return art

    return False
Example #2
0
File: cmd.py Project: aptrishu/dump
def update_db(result):
    for short_article in result:
        a = article(canonical_name=short_article['canonical_name'], company_name=short_article['company_name'],
            author=short_article.get('author',''), section=short_article
            .get('section',[]), headline=short_article.get('headline',''), length=getlength(
                short_article.get('length',0)), content=short_article.get('content',''),
            date=getdate(short_article.get('dateline','')), location=getlocation(
                short_article.get('dateline','')))
        a.save()
Example #3
0
def ajouterArticle():
    if request.method == 'POST':  # création article
        newArticle = article(request.form["titre_article"],
                             request.form["Auteur_name"],
                             datetime.datetime.now(),
                             request.form["contenu_article"],
                             request.form["categorie_article"],
                             request.form["Mots_cles_article"])
        if newArticle.isValid():  # vérification que l'article est valide
            ajoutArticle = mongo.db.articles.insert_one(newArticle.format)
            return render_template('temp_Conf_soumissionArticle.html')
        else:
            return render_template('template_FormArticle.html')

    return render_template('template_FormArticle.html')
Example #4
0
def valider_ajout_art():
    if request.method == 'POST':
        demande_art = []
        demande_art.append(
            mongo.db.demandes.find_one({"Mots_cles": {
                '$exists': True
            }}))
        newArt = article(demande_art[0]["Auteur"], demande_art[0]["Titre"],
                         demande_art[0]["Mots_cles"],
                         demande_art[0]["Contenu"],
                         demande_art[0]["Categorie"], demande_art[0]["Date"])
        ajoutArt = mongo.db.articles.insert(
            newArt.format)  # ajouter un utilisateur à la base de données
        deleteArt = mongo.db.demandes.delete_one(
            {"Mots_cles": demande_art[0]["Mots_cles"]})
        line = "Félicitations! Un nouveau article est ajouté à votre base de données"
        return render_template('basic.html', line=line)
    return render_template("valider_ajout_article.html")
Example #5
0
def get_article(url,txt):
	txt = txt.replace('(','\(').replace(')','\)').replace('[','\[').replace(']','\]')
	rX_to = re.compile('^DEAR HARRIETTE:')
	rX_from = re.compile('^DEAR')
	rX_excerpt = re.compile(txt)

	# get/parse page contents
	r = requests.get(url)
	r_text = r.text
	soup = BeautifulSoup(r_text, "html.parser")

	# get articles
	page_articles = soup.find_all("article", class_="item-section")

	# search articles for excerpt
	for a in page_articles:
		if re.search(rX_excerpt, str(a)):
			# get article content
			paragraphs = a.find_all("p")
			question = []
			response = []
			article_part = 0 # 1: question; 2: response
			for p in paragraphs:
				txt = p.get_text().strip()
				if re.search(rX_from,txt):
					if re.search(rX_to,txt):
						article_part = 1
					else:
						article_part = 2
				if article_part == 1:
					question.append(txt)
				elif article_part == 2:
					response.append(txt)
				else:
					print "Error: no text matched."

			# if a complete article is identified, create article opject and insert it
			if article_part == 2:
				art = article(None,'\n'.join(question),'\n'.join(response))
				art.tags = [category.get_text().strip() for category in a.find_all("a", class_="read-more-link")]
				return art

	return False