Python Article.set_text Exemples

Langage de programmation: Python

Espace de nommage/Pack: newspaper

Class/Type: Article

Méthode/Fonction: set_text

Exemples au hotexamples.com: 2

Python Article.set_text - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de newspaper.Article.set_text extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Article(30)

nlp(30)

set_html(30)

parse(30)

download(30)

build(20)

html(15)

text(11)

download_state(9)

fetch_images(6)

is_valid_url(6)

publish_date(5)

authors(5)

is_downloaded(5)

title(4)

top_image(3)

article_html(3)

keywords(3)

set_text(2)

images(2)

has_top_image(2)

is_valid_body(2)

summary(1)

summarylen(1)

split(1)

set_top_img_no_check(1)

tag(1)

set_title(1)

tags(1)

textlen(1)

set_meta_data(1)

lower(1)

set_keywords(1)

save(1)

prepareSentenceHighlights(1)

nlpEntropy(1)

meta_data(1)

append(1)

is_video(1)

is_parsed(1)

is_media_news(1)

has_video(1)

get_is_news(1)

format_top_node(1)

category_urls(1)

articles(1)

url(1)

Méthodes fréquemment utilisées

Article (30)

nlp (30)

set_html (30)

parse (30)

download (30)

build (20)

html (15)

text (11)

download_state (9)

fetch_images (6)

Méthodes fréquemment utilisées

is_valid_url (6)

publish_date (5)

authors (5)

is_downloaded (5)

title (4)

top_image (3)

article_html (3)

keywords (3)

set_text (2)

images (2)

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

Méthodes fréquemment utilisées

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Méthodes fréquemment utilisées

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Exemple #1

0

Afficher le fichier

Fichier : downloader.py Projet : swatimegha/GoogleDataset

def get_article(url: str, text="") -> Article: """ Examine the url or text parameters. The method accepts text or webpage url for processing. If both are passed, the text is given priority, and the url is ignored. If only url is passed, then it tries to download and parse it into an Article object. If both parameters are null or empty, then an exception is thrown. :param url: string with the url :param text: string with the article's text. By default it equals empty string. :return: Article object from newspaper library """ if text and text.strip(): text = text.strip() article = Article("text_is_passed_so_no_url") article.download(input_html=text) article.set_text(text) elif url: url = f"http://{url}" if not str(url).startswith("http") else url article = Downloader.download_article(url) else: message = "Parameters are empty!" Downloader.logger.warn(message) # raise NliServiceException(Level.WARNING, __name__, message) return Downloader.parse_article(article)

Exemple #2

0

Afficher le fichier

Fichier : article_parser.py Projet : mardix/newstldr

def parse(url=None, html=None, text=None, title=None, sentences_count=5, options={}, summarize_algo="luhn", date_timezone="America/New_York"): """ Parse article to get relevant data :param url: :param html: :param text: :param title: :param sentences_count: :param options: {} :param summarize_algo: :param date_timezone: The timezone to convert the date to :return: """ article = Article("") if text and title: article.is_parsed = True article.is_downloaded = True article.set_title(title) article.set_text(text) else: if url: r = requests.get(url.strip()) if r.status_code != 200: raise Exception("Paper request failed '%s'" % url) html = r.content if html: soup = get_soup(html) else: raise Exception("Paper missing HTML content") article.set_html(remove_social_embeds(html)) article.parse() article.nlp() if options.get("title_selector"): title = soup.select(options.get("title_selector")) if title: title = title[0].text article.set_title(title) if options.get("image_selector"): img = soup.select(options.get("image_selector")) if img: img = img[0].text article.set_top_img_no_check(img) if options.get("content_selector"): html = soup.select(options.get("content_selector")) if html: article.set_text(html[0].text) summary = summarize(text=article.text, title=article.title, algo=summarize_algo, sentences_count=sentences_count) publish_date = article.publish_date if not publish_date and html: publish_date = extract_publish_date(html) if not publish_date: publish_date = datetime.datetime.now() return { "url": article.canonical_link, "title": article.title, "summary": summary, "summaries": summary.split("\n\n"), "text": article.text, "html": article.html, "top_image": article.top_image, "images": article.images, "videos": list(set(article.movies + extract_video_iframes(html))), "social_media_content": extract_social_media_content(html), "keywords": article.keywords, "tags": article.tags, "authors": article.authors, "published_date": datetime_to_local_timezone(publish_date), "md_text": "" }