Python Article.is_parsed Exemples

Langage de programmation: Python

Espace de nommage/Pack: newspaper

Class/Type: Article

Méthode/Fonction: is_parsed

Exemples au hotexamples.com: 1

Python Article.is_parsed - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de newspaper.Article.is_parsed extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Article(30)

nlp(30)

set_html(30)

parse(30)

download(30)

build(20)

html(15)

text(11)

download_state(9)

fetch_images(6)

is_valid_url(6)

publish_date(5)

authors(5)

is_downloaded(5)

title(4)

top_image(3)

article_html(3)

keywords(3)

set_text(2)

images(2)

has_top_image(2)

is_valid_body(2)

summary(1)

summarylen(1)

split(1)

set_top_img_no_check(1)

tag(1)

set_title(1)

tags(1)

textlen(1)

set_meta_data(1)

lower(1)

set_keywords(1)

save(1)

prepareSentenceHighlights(1)

nlpEntropy(1)

meta_data(1)

append(1)

is_video(1)

is_parsed(1)

is_media_news(1)

has_video(1)

get_is_news(1)

format_top_node(1)

category_urls(1)

articles(1)

url(1)

Méthodes fréquemment utilisées

Article (30)

nlp (30)

set_html (30)

parse (30)

download (30)

build (20)

html (15)

text (11)

download_state (9)

fetch_images (6)

Méthodes fréquemment utilisées

is_valid_url (6)

publish_date (5)

authors (5)

is_downloaded (5)

title (4)

top_image (3)

article_html (3)

keywords (3)

set_text (2)

images (2)

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

Méthodes fréquemment utilisées

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Méthodes fréquemment utilisées

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Exemple #1

0

Afficher le fichier

Fichier : article_parser.py Projet : mardix/newstldr

def parse(url=None, html=None, text=None, title=None, sentences_count=5, options={}, summarize_algo="luhn", date_timezone="America/New_York"): """ Parse article to get relevant data :param url: :param html: :param text: :param title: :param sentences_count: :param options: {} :param summarize_algo: :param date_timezone: The timezone to convert the date to :return: """ article = Article("") if text and title: article.is_parsed = True article.is_downloaded = True article.set_title(title) article.set_text(text) else: if url: r = requests.get(url.strip()) if r.status_code != 200: raise Exception("Paper request failed '%s'" % url) html = r.content if html: soup = get_soup(html) else: raise Exception("Paper missing HTML content") article.set_html(remove_social_embeds(html)) article.parse() article.nlp() if options.get("title_selector"): title = soup.select(options.get("title_selector")) if title: title = title[0].text article.set_title(title) if options.get("image_selector"): img = soup.select(options.get("image_selector")) if img: img = img[0].text article.set_top_img_no_check(img) if options.get("content_selector"): html = soup.select(options.get("content_selector")) if html: article.set_text(html[0].text) summary = summarize(text=article.text, title=article.title, algo=summarize_algo, sentences_count=sentences_count) publish_date = article.publish_date if not publish_date and html: publish_date = extract_publish_date(html) if not publish_date: publish_date = datetime.datetime.now() return { "url": article.canonical_link, "title": article.title, "summary": summary, "summaries": summary.split("\n\n"), "text": article.text, "html": article.html, "top_image": article.top_image, "images": article.images, "videos": list(set(article.movies + extract_video_iframes(html))), "social_media_content": extract_social_media_content(html), "keywords": article.keywords, "tags": article.tags, "authors": article.authors, "published_date": datetime_to_local_timezone(publish_date), "md_text": "" }