Python Article.has_top_image Exemples

Langage de programmation: Python

Espace de nommage/Pack: newspaper

Class/Type: Article

Méthode/Fonction: has_top_image

Exemples au hotexamples.com: 5

Python Article.has_top_image - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de newspaper.Article.has_top_image extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Article(30)

nlp(30)

set_html(30)

parse(30)

download(30)

build(20)

html(15)

text(11)

download_state(9)

fetch_images(6)

is_valid_url(6)

publish_date(5)

authors(5)

is_downloaded(5)

title(4)

top_image(3)

article_html(3)

keywords(3)

set_text(2)

images(2)

has_top_image(2)

is_valid_body(2)

summary(1)

summarylen(1)

split(1)

set_top_img_no_check(1)

tag(1)

set_title(1)

tags(1)

textlen(1)

set_meta_data(1)

lower(1)

set_keywords(1)

save(1)

prepareSentenceHighlights(1)

nlpEntropy(1)

meta_data(1)

append(1)

is_video(1)

is_parsed(1)

is_media_news(1)

has_video(1)

get_is_news(1)

format_top_node(1)

category_urls(1)

articles(1)

url(1)

Méthodes fréquemment utilisées

Article (30)

nlp (30)

set_html (30)

parse (30)

download (30)

build (20)

html (15)

text (11)

download_state (9)

fetch_images (6)

Méthodes fréquemment utilisées

is_valid_url (6)

publish_date (5)

authors (5)

is_downloaded (5)

title (4)

top_image (3)

article_html (3)

keywords (3)

set_text (2)

images (2)

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

Méthodes fréquemment utilisées

has_top_image (2)

is_valid_body (2)

summary (1)

summarylen (1)

split (1)

set_top_img_no_check (1)

tag (1)

set_title (1)

tags (1)

textlen (1)

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Méthodes fréquemment utilisées

set_meta_data (1)

lower (1)

set_keywords (1)

save (1)

prepareSentenceHighlights (1)

nlpEntropy (1)

meta_data (1)

append (1)

is_video (1)

is_parsed (1)

is_media_news (1)

has_video (1)

get_is_news (1)

format_top_node (1)

category_urls (1)

articles (1)

url (1)

Exemple #1

0

Afficher le fichier

def curate_articles(self): for i in self.words: for t in self.api.search(q=i[0], rpp=10, lang='en'): t = t._json if t['entities'].get('urls'): try: url = t['entities']['urls'][0]['expanded_url'] a = Article(url) a.download() a.parse() if a.has_top_image( ) and a.meta_lang == 'en' and self.is_article_url( a.canonical_link): a.nlp() temp_data = { "url": a.canonical_link, "title": a.title, "image": a.top_img, "description": a.meta_description, "keywords": a.keywords, "summary": a.summary } print("{} | Saving | {}".format( self.screen_name, a.title)) k = self.coll.insert_one(temp_data) self.articles.append(temp_data) except Exception as e: print(e) print("Continue...")

Exemple #2

0

Afficher le fichier

Fichier : medium_blogger_discovery.py Projet : bosukeme/medium-blogger-discovery

def cleanup_blog_tweets(tweet_df, num_posts): """ This gets the potential tweets from medium users and then filters out the ones that dont have a medium link in them num_posts: this depicts how many posts we want to extract, we can scale this up as the capacity of the marketing channels get better """ ## First process the tweet df and remove any tweets that dont have links in them link_tweet_inds = [] for i in range(len(tweet_df)): tweet_url_list = tweet_df['urls'].iloc[i] if len(tweet_url_list) > 0: link_tweet_inds.append(i) link_tweet_inds = list(set(link_tweet_inds)) link_tweet_df = tweet_df.iloc[link_tweet_inds] # print('Number of link tweets -> %s' % len(link_tweet_df)) ## For now we first want to filter out and only work with tweets that are in english english_tweet_df = link_tweet_df[link_tweet_df['language']=='en'] english_tweet_df = english_tweet_df.sort_values(by=['nlikes'], ascending=False) # print('Number of english tweets -> %s' % len(english_tweet_df)) top_english_tweet_df = english_tweet_df.iloc[0:int(num_posts/2)] bottom_english_tweet_df = english_tweet_df.iloc[-int(num_posts/2):] tweet_df = pd.concat([top_english_tweet_df, bottom_english_tweet_df]) # print('Number of processing tweets -> %s' % len(tweet_df)) ## Now we get only the top 50 and bottom 50, this is all we process for now blog_tweet_inds = [] for i in range(len(tweet_df)): tweet_url_list = tweet_df['urls'].iloc[i] # Process the link to check if it passes the parameters of what a blog post should be try: article = Article(tweet_url_list[0]) article.download() article.parse() top_image = article.has_top_image() text_len = len(article.text) if top_image and (text_len > 1000): blog_tweet_inds.append(i) except Exception as e: pass blog_tweet_inds = list(set(blog_tweet_inds)) blog_tweet_df = tweet_df.iloc[blog_tweet_inds] # Sort them by number of likes blog_tweet_df = blog_tweet_df.sort_values(by=['nlikes'], ascending=False) return blog_tweet_df

Exemple #3

0

Afficher le fichier

Fichier : news.py Projet : robertarles/rabot

def extract_article_from(self, url): article = {} doc = Article(url) try: doc.download() doc.parse() except ArticleException: print("Exception getting article from url [{}]".format(url)) return article["image"] = "" if doc.has_top_image(): article["image"] = "<img src={}>".format(doc.top_image) article["title"] = doc.title article["source_title"] = "notYetSet" article["summary"] = article["image"] + doc.text[:300] + " ...</br>" article["href"]=url return article

Exemple #4

0

Afficher le fichier

Fichier : mongo.py Projet : IEMLdev/intlekt-importation

def get_document_json(post): """ Parameters ------------- post: dict post data. Returns ------------- dict: document data. """ try: article = Article(post['url']) article.download() article.parse() article.nlp() if article.publish_date is None or isinstance(article.publish_date, str): date = None else: date = article.publish_date.strftime('%Y-%m-%d') if article.meta_lang != None and article.meta_lang != '': stopwords = safe_get_stop_words(article.meta_lang) keywords = [i for i in article.keywords if i not in stopwords] else: keywords = article.keywords keywords = list(set([slugify(i) for i in keywords])) json = { 'title': article.title, 'authors': article.authors, 'created_on': date, 'language': article.meta_lang, 'keywords': keywords, 'url': post['url'], } if article.has_top_image() and post['image'] == MISSING_IMAGE: post['image'] = article.top_image except ArticleException: json = { 'url': post['url'] } return json

Exemple #5

0

Afficher le fichier

Fichier : analysis.py Projet : saidimu/blackandwhite

def article_handler(url=None, nlp=False): response = { 'publish_date': None, 'html': None, 'title': None, 'top_image': None, 'source_url': None, 'images': None, 'authors': None, 'text': None, 'canonical_link': None, 'movies': None, 'keywords': None, 'summary': None } if not url: statsd.increment('url_analysis.empty') loggly.error("Cannot parse empty URL") return response ## if try: article = Article(url) if not article.is_downloaded: statsd.increment('url_analysis.download') loggly.info("Downloading article") article.download() ##if # response['html'] = article.html if not article.is_parsed: statsd.increment('url_analysis.parse') loggly.info("Parsing article") article.parse() ##if response['title'] = article.title if article.has_top_image() is True: statsd.increment('url_analysis.get_top_image') loggly.info("Extracting top_image") response['top_image'] = article.top_image ##if-else if nlp is True: statsd.increment('url_analysis.nlp_process') loggly.info("Doing NLP processing") article.nlp() response['summary'] = article.summary response['keywords'] = article.keywords ##if response['movies'] = article.movies response['images'] = article.images response['authors'] = article.authors response['text'] = article.text response['publish_date'] = article.publish_date response['source_url'] = article.source_url response['canonical_link'] = article.canonical_link statsd.increment('url_analysis.ok') return response except Exception as e: statsd.increment('url_analysis.error') loggly.error(e) return response