def revisions_feed(request=None, pagename=None, feedtype="atom"): if pagename is None: pagename ='Home' page = get_page(request.site._id, pagename) if not page: raise NotFound all_revisions = [page] + page.revisions() if feedtype == "atom": feed = AtomFeed( title="%s: Latest revisions of %s" % (request.site.cname, page.title), subtitle=request.site.subtitle, updated = page.updated, feed_url = request.url ) for rev in all_revisions: title = '' _url="%s%s" % (request.host_url, url_for("revision_page", pagename=pagename, nb_revision=rev.nb_revision )) for change in rev.changes: if change['type'] != "unmod": title = "\n".join(change['changed']['lines']) title = do_truncate(do_striptags(title), 60) title = title and title or "Edited." feed.add(title, convert_markdown(rev.content), updated=rev.updated, url=_url, id=_url, author=rev.title.replace(' ', '_') ) return feed.get_response() else: json = { 'title': "%s: Latest revisions of %s" % (request.site.cname, page.title), 'subtitle': request.site.subtitle, 'updated':datetime_tojson(page.updated), 'feed_url': request.url, 'revisions': [] } for rev in all_revisions: title = '' for change in rev.changes: if change['type'] != "unmod": title = "\n".join(change['changed']['lines']) title = do_truncate(do_striptags(title), 60) title = title and title or "Edited." url = "%s%s" % (request.host_url, url_for("revision_page", cname=request.site.cname, pagename=pagename, nb_revision=rev.nb_revision )) json['revisions'].append({ 'title': title, 'content': rev.content, 'url': url, 'updated':datetime_tojson(rev.updated), 'id':rev.nb_revision }) return send_json(json)
def test_extract_products_shortdescription(): activate("en") out = StringIO() html_description1 = "<b>a HTML description</b>" product1 = create_product("p1", description=html_description1) html_description2 = '<p class="what">another HTML description</p>' product2 = create_product("p2", description=html_description2) faker = Faker() long_description = faker.sentence(nb_words=150, variable_nb_words=True) product3 = create_product("p3", description=long_description) for lang, _ in settings.LANGUAGES: product2.set_current_language(lang) product2.description = html_description2 product2.save() call_command("shuup_extract_products_shortdescription", stdout=out) product1 = Product.objects.get(pk=product1.pk) product2 = Product.objects.get(pk=product2.pk) product3 = Product.objects.get(pk=product3.pk) assert product1.short_description == do_striptags(html_description1) for lang, _ in settings.LANGUAGES: product2.set_current_language(lang) assert product2.short_description == do_striptags(html_description2) assert product3.short_description == long_description[:150] assert "Done." in out.getvalue()
def __eq__(self, other: Any) -> bool: return bool(self.avis == other.avis and do_striptags(self.objet) # type: ignore == do_striptags(other.objet) # type: ignore and do_striptags(self.content) # type: ignore == do_striptags(other.content) # type: ignore )
def feed(self): for entry in self.xml_feed('entry'): try: html_entry = BeautifulSoup(do_striptags(entry.content.string)) autor, uid, conteudo = BeautifulSoup(do_striptags(entry.content.string)).li(text=re.compile('.*')) autor = BeautifulSoup(do_striptags(entry.content.string)).a.string yield {"id": uid, "titulo": entry.title.string, 'autor': entry.content.string, 'conteudo': conteudo[2:], } except Exception, e: pass
def grouping_key(amendement: Amendement) -> GroupingKey: """ Group successive amendements with the same answer (except maybe for tags), but not gouvernementaux. """ return ( amendement.num_str if amendement.gouvernemental else "", amendement.user_content.avis or "", ( do_striptags(amendement.user_content.objet) # type: ignore if amendement.user_content.objet else ""), ( do_striptags(amendement.user_content.reponse) # type: ignore if amendement.user_content.reponse else ""), )
def get_twitter_description(context, description=None): # Load from context if exists if not description: description = context.get('twitter_description') # Check the object if we still have nothing if not description: obj = context.get('object', None) if obj: field = getattr(obj, 'description', None) or getattr(obj, 'summary', None) description = do_striptags(truncate_paragraphs(field, 1)) if field else None # If we are still None, look at page content if not description: # Get current page from request request = context['request'] current_page = request.pages.current homepage = request.pages.homepage # Use either the current page twitter description, or the homepage twitter description if current_page: description = current_page.twitter_description if not description and homepage: description = homepage.twitter_description # If everything fails, fallback to OG tag title if not description: description = get_og_description(context) # Return description, or an empty string if nothing is working return escape(description or '')
def json_ld(self, **kwargs): ''' Returns a JSON serializable dictionary of the product with the Product schema markup. See: http://schema.org/Product Any key value pairs passed to kwargs overwrites default information. ''' sale_price = self.sale_price(1) return { "@context": "http://schema.org", "@type": "Product", "name": self.name, "sku": self.code, "description": do_striptags(self.description), "offers": { "@type": "Offer", "availability": "http://schema.org/InStock", "price": str(sale_price), "priceCurrency": request.nereid_currency.code, }, "image": self.default_image.transform_command().thumbnail( 500, 500, 'a').url(_external=True), "url": self.get_absolute_url(_external=True), }
def __init__(self, user, response_format, instance, *args, **kwargs): super(ObjectLinksForm, self).__init__(*args, **kwargs) queryset = Object.filter_permitted(user, Object.objects) self.fields['links'].queryset = queryset if not 'ajax' in response_format: if instance: queryset = queryset.exclude(pk__in=instance.links.all()) choices = [] for obj in queryset: human_type = obj.get_human_type() name = do_truncate(do_striptags(unicode(obj.object_name)), 20, True) if human_type: name += u" (" + human_type + u")" choices.append((obj.id, name)) self.fields['links'].choices = choices self.fields['links'].label = "" self.fields['links'].initial = "" self.fields['links'].widget.attrs.update({ 'class': 'autocomplete', 'callback': reverse('core_ajax_object_lookup') })
def __init__(self, user, response_format, instance, *args, **kwargs): super(ObjectLinksForm, self).__init__(*args, **kwargs) queryset = Object.filter_permitted(user, Object.objects) self.fields['links'].queryset = queryset if 'ajax' not in response_format: if instance: queryset = queryset.exclude(pk__in=instance.links.all()) choices = [] for obj in queryset: human_type = obj.get_human_type() name = do_truncate( do_striptags(unicode(obj.object_name)), 20, True) if human_type: name += u" (" + human_type + u")" choices.append((obj.id, name)) self.fields['links'].choices = choices self.fields['links'].label = "" self.fields['links'].initial = "" self.fields['links'].widget.attrs.update({'class': 'autocomplete', 'callback': reverse('core_ajax_object_lookup')})
def apost(): if request.method == 'GET': return render_template('front/apost.html') else: tags = request.form.get('tags') form = ApostForm(request.form) if form.validate(): title = form.title.data content = form.content.data tags = tags.split(',') all_tags = TagsModel.query.all() all_tagnames = [tag.tagname for tag in all_tags] post = PostsModel(title=title, content=content) post.author = g.front_user for tag in tags: if tag in all_tagnames: ta = TagsModel.query.filter_by(tagname=tag).first() else: ta = TagsModel(tagname=tag) post.tags.append(ta) charactors_len = len(do_striptags(content)) post.author.points += 2 post.author.charactors += charactors_len db.session.add(post) db.session.commit() return restful.success() else: return restful.params_error(message=get_error(form))
def post_detail(post_id): language = request.args.get('lang') post = PostsModel.query.get(post_id) post.read_count += 1 db.session.commit() comment = CommentModel.query.filter_by(post_id=post_id).order_by( CommentModel.create_time.desc()).all() users = FocusModel.query.filter_by(own_user_id=post.author.id).all() is_focus_id = [focus_users.author_id for focus_users in users] if language == 'en': title = get_result(post.title) content = get_result(do_striptags(post.content)) else: title = post.title content = post.content hot_posts = PostsModel.query.filter_by(author_id=post.author.id).order_by( PostsModel.read_count.desc()).all() ads = AdvertisementModel.query.all() text = { 'title': title, 'content': content, 'post': post, 'comments': comment, 'is_focus_id': is_focus_id, 'hot_posts': hot_posts, 'ads': ads, } return render_template('front/post_detail.html', **text)
def label_from_instance(self, obj): "Label From Instance" name = unicode(obj) obj_type = obj.get_human_type() label = filters.do_truncate(filters.do_striptags(name), 30) if obj_type: label += " (" + obj_type + ")" return label
def strip_tags(text): """Strip HTML tags and replace adjacent whitespace by one space.""" encode = False if isinstance(text, str): encode = True text = text.decode('utf-8') text = do_striptags(text) return text.encode('utf-8') if encode else text
def post_details(year, slug): post = post_from_year_and_slug_or_404(year, slug) description = do_truncate(current_app.jinja_env, do_striptags(post.rendered_content)) title = post.title return render_template('post_details.html', post=post, description=description, title=title)
def JsonPosts(posts_obj, en): posts = [] for post in posts_obj: time = handle_time(post.create_time) cover = post_cover(post.content) if len(post.content) > 200: content = do_striptags(post.content)[0:200] + '...' else: content = do_striptags(post.content) if len(post.hightlight) == 1: hight = 'ok' else: hight = '' if en == 'en': time = get_result(time) posts.append({ 'title': get_result(post.title), 'content': get_result(content), 'username': get_result(post.author.nickname), 'create_time': time, 'cover': cover, 'id': post.id, 'avatar': post.author.avatar, 'hight_light': hight, 'user_id': post.author.id }) else: posts.append({ 'title': post.title, 'content': content, 'username': post.author.nickname, 'create_time': time, 'cover': cover, 'id': post.id, 'avatar': post.author.avatar, 'hight_light': hight, 'user_id': post.author.id, 'nickname': post.author.nickname, 'email': post.author.email, 'read_count': post.read_count, 'comment_count': len(post.comments) }) return posts
def preview(self): """HTML representing a short preview of the post. Contains the first 200 characters of the post's content, followed by a link to the post itself. """ preview_text = do_striptags(self._content) link = '... <a href="{}">Continue→</a>'.format(self.url) preview_html = do_truncate(preview_text, length=200, end=link) return preview_html
def get_six_symbol_words(content): chunks = do_striptags(get_contents(content)).split(' ') bucket = set() for word in chunks: word = re.sub('[\n\r\t\s +]', '', word) word = re.sub('[%s+]' % re.escape(string.punctuation), '', word) if len(word) == 6: bucket.add(word) return bucket
def index_document(self, doc): put_url = ('{self.api_url}/{self.index_name}/document/{doc.code}' .format(self=self, doc=doc)) publication, year, _ = doc.code.split('_', 2) data_json = json.dumps({ 'content': do_striptags(doc.content.text), 'publication': publication, 'year': int(year), }) resp = requests.put(put_url, data=data_json) if resp.status_code not in [200, 201]: log.error("Error while indexing %s: %r", doc.code, resp.text) raise RuntimeError("ElasticSearch indexing failed")
def handle(self, *args, **options): from django.conf import settings from jinja2.filters import do_striptags from shuup.core.models import Product for product in Product.objects.all(): for lang, _ in settings.LANGUAGES: product_translation = product.translations.filter(master_id=product.pk, language_code=lang).first() if product_translation and product_translation.description: product_translation.short_description = do_striptags(product_translation.description)[:150] product_translation.save() self.stdout.write("Done.")
def index_document(self, doc): put_url = ( '{self.api_url}/{self.index_name}/document/{doc.code}'.format( self=self, doc=doc)) publication, year, _ = doc.code.split('_', 2) data_json = json.dumps({ 'content': do_striptags(doc.content.text), 'publication': publication, 'year': int(year), }) resp = requests.put(put_url, data=data_json) if resp.status_code not in [200, 201]: log.error("Error while indexing %s: %r", doc.code, resp.text) raise RuntimeError("ElasticSearch indexing failed")
def government_email_validator(form, field): """ A WTForms validator that uses the api to check the email against a government domain whitelist. Adds a flag 'non_gov' to the field for detecting if the user needs to be warned about a government email restriction. This flag is only true if the given email address is known to be non-government (and not just typoed). """ setattr(field.flags, 'non_gov', False) email_validator(form, field) if not is_government_email(field.data): setattr(field.flags, 'non_gov', True) # wtforms wraps the label in a <label> tag label = do_striptags(field.label) raise ValidationError('{} needs to be a government email address'.format(label))
def clean_data(params): ''' Strips whitespace from data, and html tags from body if in params ''' if params is None: return None cleaned_data = {} for x in params: cleaned = params[x] cleaned = cleaned.strip() cleaned_data[x] = cleaned if 'body' in cleaned_data.keys(): cleaned_data['body'] = do_striptags(cleaned_data['body']) return cleaned_data
def extract_data(data): s = BeautifulSoup(data, 'lxml') title_el = s.find(class_='asset-title') authors_el = s.find(class_='asset-authors') abstract_el = s.find(id='summary-text-field') download_url_el = s.find('a', class_='download') presented_at_el = s.find(class_='asset-subcontainer__content--conf') return { 'title': title_el.text.strip() if title_el else '', 'authors': re.sub('\s+', ' ', authors_el.text).strip() if authors_el else '', 'abstract': do_striptags(abstract_el.get('value')) if abstract_el else '', 'download_url': download_url_el.get('data-url') if download_url_el else '', 'presented_at': re.sub('\s+', ' ', presented_at_el.text).strip() if presented_at_el else '', }
def mdstrip(value, length=None): ''' Truncate and strip tags from a markdown source The markdown source is truncated at the excerpt if present and smaller than the required length. Then, all html tags are stripped. ''' if not value: return '' if EXCERPT_TOKEN in value: value = value.split(EXCERPT_TOKEN, 1)[0] if length > 0: value = do_truncate(value, length) rendered = md(value) return do_striptags(rendered)
def government_email_validator(form, field): """ A WTForms validator that uses the api to check the email against a government domain whitelist. Adds a flag 'non_gov' to the field for detecting if the user needs to be warned about a government email restriction. This flag is only true if the given email address is known to be non-government (and not just typoed). """ setattr(field.flags, 'non_gov', False) email_validator(form, field) if not is_government_email(field.data): setattr(field.flags, 'non_gov', True) # wtforms wraps the label in a <label> tag label = do_striptags(field.label) raise ValidationError( '{} needs to be a government email address'.format(label))
def extract_data(data): files = data.get('files') return { 'title': data.get('title', ''), 'authors': ', '.join([a.get('full_name') for a in data.get('authors')]) if data.get('authors') else '', 'abstract': do_striptags(data.get('description', '')), 'download_url': files[0].get('download_url') if files and len(files) else '', 'presented_at': '', }
def update_article_index(self, request, page, revision, username): """ Update search index """ w = WidgetLib() rendered = "" try: rendered = do_striptags(w.render_article(page, revision.article)) except: pass self.s.update_index( page.display_name, request.route_url("article_read", page_id=page.name), rendered, self.t.get_tags(page), revision.created, "article", page.name, username)
def extract_comments(root, threads): post_sel = ".{namespace}{selector}".format( namespace=NAMESPACE, selector='post') thread_sub_sel = "{namespace}{selector}".format( namespace=NAMESPACE, selector='thread') message_sub_sel = "{namespace}{selector}".format( namespace=NAMESPACE, selector='message') # author_sub_sel = ".//{namespace}{author}/{namespace}{username}".format( # namespace=NAMESPACE, # author='author', # username='******') createdAt_sub_sel = ".//{namespace}{selector}".format( namespace=NAMESPACE, selector='createdAt') metadata = {} for post in root.findall(post_sel): thread = post.find(thread_sub_sel) thread_id = thread.attrib.get( '{namespace}id'.format(namespace=INTERNALS_NAMESPACE), None) if thread_id: for message in post.findall(message_sub_sel): #author = post.find(author_sub_sel) createdAt = post.find(createdAt_sub_sel) if parse(createdAt.text) > COMMENTS_SINCE: who = do_striptags(message.text) key = threads[thread_id] if key is not None and key.strip() != "": if key not in metadata: metadata[key] = [who] else: print(('Duplicate for message for {}\n' 'Found {}\n' 'New {}').format(key, metadata[key], who)) metadata[key].append(who) return metadata
def mdstrip(value, length=None, end='…'): ''' Truncate and strip tags from a markdown source The markdown source is truncated at the excerpt if present and smaller than the required length. Then, all html tags are stripped. ''' if not value: return '' if EXCERPT_TOKEN in value: value = value.split(EXCERPT_TOKEN, 1)[0] rendered = md(value, wrap=False) text = do_striptags(rendered) if length and length > 0: text = do_truncate(None, text, length, end=end, leeway=2) return text
def mdstrip(value, length=None, end='…'): ''' Truncate and strip tags from a markdown source The markdown source is truncated at the excerpt if present and smaller than the required length. Then, all html tags are stripped. ''' if not value: return '' if EXCERPT_TOKEN in value: value = value.split(EXCERPT_TOKEN, 1)[0] rendered = md(value, wrap=False) text = do_striptags(rendered) text = bleach_clean(text) if length and length > 0: text = do_truncate(None, text, length, end=end, leeway=2) return text
def get_search_part(content, search_str, left_offset=30, part_len=260): """ 根据搜索内容截取文章正文中相关的内容 :param content: 文章正文 :param search_str: 搜索内容 :param left_offset: 左偏移量 default = 30 :param part_len: 截取的内容总长 default = 260 :return: 截取后的内容 """ no_tag_content = do_striptags(content) search_position = no_tag_content.lower().find(search_str.lower()) start_position = max(0, search_position - left_offset) search_part = no_tag_content[start_position: start_position + part_len] if search_position - left_offset > 0: search_part = f'....{search_part}' if search_position + part_len < len(no_tag_content): search_part = f'{search_part}....' search_part = search_part.replace(search_str, f'<font color="#ff3366">{search_str}</font>') return search_part
def truncate(text, arg, ellipsis='…'): matches = re.match('(\d+)([cw])', arg) text = do_striptags(text) if not matches: raise ValueError() count = int(matches.group(1)) type = matches.group(2) if type == 'c': if count > len(text): return text else: return text[:count] + ellipsis elif type == 'w': arr = text.strip().split() if count > len(arr): return text else: return ' '.join(arr[:count]) + ellipsis
def get_og_description(context, description=None): if not description: description = context.get('og_description') if not description: obj = context.get('object', None) if obj: field = getattr(obj, 'description', None) or getattr(obj, 'summary', None) description = do_striptags(truncate_paragraphs(field, 1)) if field else None if not description: request = context['request'] page = request.pages.current if page: description = page.og_description return escape(description or '')
def myhtmlstrip(astring, n=3): ''' strip the article.body to n <p> elements in index.html ''' # match the contents of style in img tags imgstyle = re.compile( r'(?P<imgtag>\s*img[^>].*?style=)(?P<quote>[\'\"]).*?(?P=quote)', re.I) # sub the contents of style in img tags with "width:100%;" s = imgstyle.sub('\g<imgtag>\g<quote>max-width:100%;\g<quote>', astring) # find all the <p> elements except <p> </p> s = re.sub(r'<p>\ </p>', '', s, re.M) para = re.compile(r'<\s*p[^>]*>.*?<\s*/\s*p\s*>', re.I) P = re.findall(para, s) # remove all html tags for safe P = map(do_striptags, P) # join the first n items P = "</p><p>".join(P[:n]) # if no <p> elements, do_truncate P = do_mark_safe("<p>%s</p>" % P) if P else do_truncate(do_striptags(s), 255, True) return P
def test_channel_share_email(self, send_email): with self.app.test_client() as client: userid = self.create_test_user().id recipient = UserData.test_user_a.email r = client.post( '/ws/share/email/', data=json.dumps(dict( object_type='channel', object_id=ChannelData.channel1.id, email=recipient, external_system='email', external_uid='123', )), content_type='application/json', headers=[get_auth_header(userid)]) self.assertEquals(r.status_code, 204, r.data) self.assertEquals(send_email.call_count, 1) self.assertEquals(send_email.call_args[0][0], recipient) if self.app.config.get('DOLLY'): self.assertIn('subscribed as %s.' % recipient, do_striptags(send_email.call_args[0][1])) notifications = UserNotification.query.filter_by( user=UserData.test_user_a.id, message_type='channel_shared') message = json.loads(notifications.value('message')) self.assertEquals(message['user']['id'], userid) self.assertEquals(message['channel']['id'], ChannelData.channel1.id) with self.app.test_client() as client: r = client.get( '/ws/%s/friends/' % userid, query_string='share_filter=true', headers=[get_auth_header(userid)]) self.assertEquals(r.status_code, 200, r.data) friends = json.loads(r.data)['users']['items'] self.assertIn(('email', recipient), [(f['external_system'], f['email']) for f in friends])
def get_six_symbol_words(content): chunks = do_striptags(get_contents(content)).split(' ') return {x for x in chunks if len(x) == 6}
def _format(_html): return do_truncate(do_striptags(_html), True, length=150)
def _format(_html): return do_truncate(do_striptags(_html), length=200)
def __unicode__(self): return do_striptags(self.title)
def strip_tags(text): """Strip HTML tags and replace adjacent whitespace by one space.""" return do_striptags(text)
def handle_getData(self, num_rows, page, sort_column, sort_order): """ Called from the system when the frontend needs the data of the datatable. This handler it self calls the "getData"-Method which returns the data. """ self.num_rows = num_rows if num_rows: self.start_row = (page - 1) * self.num_rows else: self.start_row = 0 sort_param = None if self.multisort and isinstance(sort_column, dict): sort_param = sorted([(key, val[0], val[1]) for key, val in sort_column.items()], key=lambda x: x[2]) else: sort_param = self.idx2colum_name(sort_column) self.sort_order = sort_order # really reading the data... total_rows, data = self.getData(start_row=self.start_row, num_rows=self.num_rows, sort_column=sort_param, sort_order=self.sort_order) # adding default values for col_def in self.columns_def: default = col_def.get("default") col_name = col_def["name"] if default: for row in data: if col_name not in row: row[col_name] = default elif type(default) is dict: row[col_name] = dict(default, **row[col_name]) # merge default dict if col_def['type'] == u'icon': for row in data: src = row[col_name].get("src") if src and src[:4] != "http": src = self.request.static_url(src) row[col_name]["src"] = src tip = row[col_name].get("tip") if tip: tip = jinja_filters.do_striptags(tip) ##todo tip = epfli18n.get_text(tip) row[col_name]["tip"] = tip if col_def['type'] == u'anchor': for row in data: name = row[col_name].get("name") if not name: name = row[col_name].get("href") row[col_name]["name"] = name target = row[col_name].get("target") if not target: target = "_self" row[col_name]["target"] = target # converting to the format needed by jqgrid... col_keys = [cd["name"] for cd in self.columns_def] transformer = epflutil.make_dict2list_transformer(col_keys) rows = [] for row in data: trans_row = transformer(row) rows.append({"cell": trans_row}) # handling internal state... if self.num_rows: self.current_page = int(self.start_row / self.num_rows) + 1 if total_rows is not None: total_pages = int(math.ceil(float(total_rows) / self.num_rows)) if total_pages == 0: total_pages = 1 else: total_pages = None else: self.current_page = 0 total_pages = 1 out = {"total": total_pages, "page": self.current_page, "records": total_rows, "rows": rows} self.response.answer_json_request(out)
def get_stripped_body(self): "Returns body without HTML tags and other shit" return do_striptags(htsafe(self.body)).replace(u"\u00A0", " ")