def get_readtime_from_url(url): news_info = requests.get(url, headers=AUTH_HEADER).json() summary_html = news_info['summary'] body_html = news_info['body']['html'] summary_time = readtime.of_html( summary_html) if summary_html else EmptyResult body_time = readtime.of_html(body_html) if body_html else EmptyResult total_seconds = summary_time.seconds + body_time.seconds readtime_json = { "total": { "seconds": total_seconds, "minutes": math.ceil(total_seconds / 60), }, "sections": { "summary": { "seconds": summary_time.seconds, "minutes": summary_time.minutes, }, "body": { "seconds": body_time.seconds, "minutes": body_time.minutes, }, }, } return readtime_json
def _set_read_time(request, page, is_post_creation=False): if hasattr(page, 'estimated_read_duration'): html = render_to_string(page.template, { 'page': page, 'request': request }) soup = BeautifulSoup(html, 'html.parser') for tag in soup.body.find_all( ['script', 'noscript', 'link', 'style', 'meta', 'header']): tag.decompose() # Get the readtime of the main content section of the page (excluding header/footer) reading_seconds = readtime.of_html(str(soup.find('main'))).seconds video_nodes = soup.find_all( 'video', attrs={constants.VIDEO_DURATION_DATA_ATTR_NAME: True}) watching_seconds = sum([ int(node.get(constants.VIDEO_DURATION_DATA_ATTR_NAME, 0)) for node in video_nodes ]) seconds = reading_seconds + watching_seconds _update_data_for_appropriate_version( page=page, force_page_update=is_post_creation, data_to_update={ 'estimated_read_duration': datetime.timedelta(seconds=seconds) }, )
def read_one(id): """ This function responds to a request for /api/story/{id} with one matching blog from story :param lname: id of story to find :return: story matching id """ story = get(id) # Does the story exist? if story: readtime_result = readtime.of_html(story['body']) return { "id": story['id'], "title": story['title'], "description": story['description'], "body": story['body'], "timeToRead": convert(readtime_result.seconds), "tags": story['tags'], "createdAt": story['created'], "published": story['published'] } else: abort(404, "Record not found")
def processURL(url): #Scrape the page using requests page = requests.get(url) #Pull the mimetype and http status code mimetype = page.headers['content-type'] http_status = str(page.status_code) #Transforms url into unique id id = hashlib.md5(url.encode()).hexdigest() #Set default document fields, tags and slugs defualt to empty is_archived = 1 is_starred = 0 user_name = 'admin' user_email = '*****@*****.**' user_id = str(1) is_public = str(False) created_at = str(datetime.now())[:-3] updated_at = str(datetime.now())[:-3] links = ["api/entries/"+str(id)] tags = str([]) slugs = tags #Shorten given url to obtain domain name domain_name = re.search('https?:\/\/[^#?\/]+',url).group(0) #Load scraped page into beautiful soup parser bs = BeautifulSoup(page.content, 'html.parser') #Use beautiful soup to pull the images from the page, pick the first as preview image, if no images create one with the title as text images = bs.find_all('img', {'src':re.compile('.jpg')}) title = str(bs.title.string) if images == []: preview_picture = 'https://dummyimage.com/170/000/ffffff&text='+(title.replace(' ','%20')) else: preview_picture = images[0]['src'] #Pull whatever is in the language tag, if its empty set langugage to english language = bs.lang if language == None: language = 'en' #Pull the entire html content of the page, as well as text only content content = str(bs) content_text = bs.text #Use readingtime module to estimate reading time reading_time = str(readtime.of_html(str(bs)).minutes) #Collect all data into a dictionary result = {'is_archived':is_archived, 'is_starred':is_starred, 'user_name':user_name,'user_email':user_email, 'user_id':user_id, 'tags':tags, 'slugs':slugs, 'is_public':is_public, 'id':id, 'title':title, 'url':url, 'content_text':content_text, 'created_at':created_at, 'updated_at':updated_at, 'mimetype':mimetype, 'language':language, 'reading_time':reading_time, 'domain_name':domain_name, 'preview_picture':preview_picture, 'http_status':http_status, 'links':links, 'content':content, 'id':id} #Take all of the values in that dict, put them in a list and save that to all, then add all into the dictionary all = list(result.values()) all = [str(i) for i in all] result['all'] = all #print(result.keys()) #print([type(i) for i in result.values()]) #print(id) return result
def save(self, *args, **kwargs): """ Overriding the default save method. :param args: Not key-worded arguments of default save method. :param kwargs: Key-worded arguments of default save method. :return: none, but updates the Post with the slugified title for the reverse URL path, and the readtime of post content (the field that is editable with summernote) """ self.slug = slugify(self.title) self.post_readtime = readtime.of_html(self.content) super().save(*args, **kwargs)
def estimate_link_read_time(self, msg, args): """Listen to messages containing a link, and if so, fetch the page and estimate the reading time based on the HTML. If the bot is not active in the room, it simply does nothing. """ room = msg.to if not self.is_active_in_room(room): return url = re.search(URL_REGEX, msg.body).group(0) html = get_page_html(url) estimated_time = readtime.of_html(html) return 'Estimated time: {} min.'.format(estimated_time.minutes)
def find_posts() -> List[Post]: post_directories = os.listdir('blog') posts_found = list() for directory in post_directories: post = frontmatter.load(f'blog/{directory}/index.md') post_html = render_markdown(post.content) posts_found.append( Post( post["title"], post["created_date"], post.get("updated_date"), # Not all posts have update_date post["slug"], directory, post_html, readtime.of_html(post_html).text)) return sorted(posts_found, key=lambda x: x.created_date, reverse=True)
def read_time(self): return str(readtime.of_html(str(self.body)))
def read_time(value): return readtime.of_html(value)
def read(html): return readtime.of_html(html)
def get_readtime(self): return readtime.of_html(self.content)
def reading_time(self): return readtime.of_html(self.content).text
def get_read_time(self, instance): return str(readtime.of_html(instance.body))
def get_article_read_time_from_html(html_text: str): try: read_time = readtime.of_html(html_text) return read_time except Exception: return ''
def template(output_path: str): """The main template engine to generate the site's static content""" global TEMPLATES global ROUTEMAP print("[template] emptying working directory") directory_empty(output_path) print("[template] reading config file at ./config.json") config = json.loads(file_read("config.json")) print("[template] copying static directory") output_file = os.path.join(output_path, "static") shutil.copytree(config["static_directory"], output_file) print("[template] loading templates from config") TEMPLATES = templates_load(config["templates"]) print("[template] running blog article generator") blog_article_listings = "" for article in config["articles"]: article_url = f"/blog/{article['identifier']}" print( f"[template/blog] creating article '{article['title']}' at {article_url}" ) content = markdown2.markdown(file_read(article["markdown"])) content_time = str(readtime.of_html(content)) # Create a new listing for the blog archive page blog_article_listings += template_fill( TEMPLATES["blog-listing"], { "title": article["title"], "datestring": article["datestring"], "readtime": content_time, "banner": article["banner"], "description": article["description"], "permalink": article_url, }, ) # Create blog article from template blog_article = template_fill( TEMPLATES["blog-article"], { "title": article["title"], "datestring": article["datestring"], "readtime": content_time, "banner": article["banner"], "description": article["description"], "permalink": article_url, "content": content, }, ) output_file = os.path.join(output_path, f"blog-{article['identifier']}.html") file_write(output_file, blog_article) ROUTEMAP[f"{config['domain']}{article_url}"] = 0.7 TEMPLATES["@blog-listings"] = blog_article_listings print("[template] running page generator") for page in config["pages"]: page_url = page["location"] print(f"[template/page] creating page '{page['title']}' at {page_url}") content = template_fill( file_read(page["file"]), { "title": page["title"], "description": page["description"], "permalink": page_url, }, ) output_file = os.path.join(output_path, page["destination"]) file_write(output_file, content) ROUTEMAP[f"{config['domain']}{page_url}"] = page["priority"] print("[template] copying custom static files") for copy in config["copy"]: print( f"[template/copy] copying file '{copy['file']}' to '{copy['location']}'" ) output_file = os.path.join(output_path, copy["location"]) shutil.copy(copy["file"], output_file) print("[template] compiling sitemap XML") sitemap = TEMPLATES["sitemap"] for route in ROUTEMAP: sitemap += ( f"<url><loc>{route}</loc><priority>{ROUTEMAP[route]}</priority></url>" ) sitemap += "</urlset>" output_file = os.path.join(output_path, "sitemap.xml") file_write(output_file, sitemap) print("[template] finished")
def get_context(self, request): # context = super().get_context(request) context = super(ArticlePage, self).get_context(request) context["time_to_read"] = readtime.of_html(self.content.__html__()) return context
def get_read_time(self): ''' Returns the read time of the HTML body ''' string = str(self.main_content) result = readtime.of_html(string) return result
def test_html(self): inp = open('tests/samples/html.html').read() result = readtime.of_html(inp) self.assertEquals(result.seconds, 236) self.assertEquals(result.text, u('4 min')) self.assertEquals(u(result), u('4 min read'))
def tempo_de_leitura(self): return of_html(self.conteudo).text
def get_read_time(self): ''' Returns the read time of the Content body ''' string = str(self.body) result = readtime.of_html(string) return result
def new(*args, **kwargs): currentUser = User.get().filter_by( name=kwargs['token']['name']).first_or_404() if not currentUser.role.permissions.add_post: return make_response( jsonify({ 'operation': 'error', 'error': 'Missing permissions' }), 401) if not request.form['data']: return make_response( jsonify({ 'operation': 'error', 'error': 'Missing data' }), 401) data = json.loads(str(request.form['data'])) if not data['title'] or not data['content'] or not data['tags']: return make_response( jsonify({ 'operation': 'error', 'error': 'Missing data' }), 401) index = str(db.session.execute(Sequence('post_id_seq'))) thumbnail_link = None if data['image']: thumbnail = SaveImage(index) thumbnail_link = url_for('static', filename='thumbail_post/{}'.format(thumbnail)) else: thumbnail_link = 'none' lang = translate.getLanguageForText( str(cleanhtml(data['content'])).encode('utf-8-sig')) langQuery = Languages.get().filter_by(code=lang.iso_tag).first() if langQuery is None: new_lang = Languages(name=lang.language, code=lang.iso_tag) new_lang.add() langQuery = new_lang tags_ids = [] tags = [] for tag in data['tags']: check = Post_Tag.get().filter_by(name=tag).first() if check is None: new_tag = Post_Tag(name=tag, count=1) new_tag.add() check = new_tag else: setattr(check, 'count', Post_Tag.count + 1) check.save() tags_ids.append(check.id) for tag_id in tags_ids: tags.append({"post": index, "tag_id": tag_id}) nPost = NewPostSchema().load({ "id": int(index), "title": data['title'], "read_time": str(readtime.of_html(data['content'])), "author_id": currentUser.id, "language_id": langQuery.id, "info": { "thumbnail": thumbnail_link, "text": data['content'], "tags": tags }, "link": '/post/' + (str(data['title']).replace(' ', '-')).replace('?', '') + '-' + str(index) }) nPost.add() for user in currentUser.followed: not_id = str(db.session.execute(Sequence('notification_id_seq'))) notification = Notification( id=int(not_id), author=currentUser.id, user=user.user, type=5, title=nPost.title, body='{} shared a new post'.format(currentUser.name), link=nPost.link + '?notification_id=' + str(not_id)) send_notification( user.user, { 'text': '{} shared a new post'.format(currentUser.name), 'link': nPost.link + '?notification_id=' + str(not_id), 'icon': currentUser.info.avatar_img, 'id': int(not_id) }) notification.add() return make_response(jsonify({ 'operation': 'success', 'link': nPost.link }), 200)
def get_article_read_time_from_file(request, file_name: str): try: read_time = readtime.of_html(read_article_text(request, file_name)) return read_time except Exception: return ''