def json_ld(self): image = None SITEURL = self.settings["SITEURL"] if getattr(self, "image", None): image = { "@type": "ImageObject", "url": "%s/images/thumb/%s-1012x422.jpg" % (SITEURL, self.image.rpartition(".")[0]), "width": 1012, "height": 422, } data = { "@context": "http://schema.org", "@type": "BlogPosting", "headline": striptags(self.title), "image": image, "keywords": ", ".join(sorted(map(str, self.tags))), "url": "%s/%s" % (SITEURL, self.url), "datePublished": self.date.isoformat(), "dateModified": getattr(self, "modified", self.date).isoformat(), "description": striptags(self.summary), "publisher": { "@type": "Person", "name": "Markus Holtermann", }, "mainEntityOfPage": "%s/%s" % (SITEURL, self.url), "author": [{ "@type": "Person", "name": str(author), "url": "%s/%s" % (SITEURL, author.url), } for author in self.authors], } data = {k: v for k, v in data.items() if v is not None} return json.dumps(data)
def json_ld(self): image = None SITEURL = self.settings['SITEURL'] if getattr(self, 'image', None): image = { '@type': 'ImageObject', 'url': '%s/images/thumb/%s-1012x422.jpg' % ( SITEURL, self.image.rpartition('.')[0] ), 'width': 1012, 'height': 422, } data = { '@context': 'http://schema.org', '@type': 'BlogPosting', 'headline': striptags(self.title), 'image': image, 'keywords': ', '.join(sorted(map(str, self.tags))), 'url': '%s/%s' % (SITEURL, self.url), 'datePublished': self.date.isoformat(), 'dateModified': getattr(self, 'modified', self.date).isoformat(), 'description': striptags(self.summary), 'publisher': { '@type': 'Person', 'name': 'Markus Holtermann', }, 'mainEntityOfPage': '%s/%s' % (SITEURL, self.url), 'author': [{ '@type': 'Person', 'name': str(author), 'url': '%s/%s' % (SITEURL, author.url), } for author in self.authors], } data = { k: v for k, v in data.items() if v is not None } return json.dumps(data)
def get_posts(domain, post_type="text", offset=0, limit=200): url = API_URL_POSTS.format(url_base=API_URL, domain=domain, post_type=post_type, api_key=API_KEY, offset=offset, limit=limit) data = requests.get(url).json assert data["meta"]["status"] == 200, repr(data["meta"]) posts = data["response"]["posts"] for post in posts: post["utcdate"] = dateutil.parser.parse(post["date"]) post["date"] = post["utcdate"].astimezone(dateutil.tz.tzlocal()) if not post["slug"].strip(): post["slug"] = str(post["id"]) post.setdefault("title", striptags(post.get("caption", ""))) post.setdefault("new_slug", post.get("slug", "")) if post["type"] == "photo": post["tags"].append("照片") yield post #: paginate if limit - offset > 20: for post in get_posts(domain, post_type, offset + 20, limit): yield post
def extract_text(message): if message.is_multipart(): payload_list = message.get_payload() text_payload_list = ( p for p in payload_list if p.get_content_type() == 'text/plain') rich_payload_list = ( p for p in payload_list if p.get_content_type() == 'text/html') chosen = next(text_payload_list, next(rich_payload_list, None)) if not chosen: raise ValueError('need plain text or html message') else: chosen = message payload = chosen.get_payload() content_charset = chosen.get_content_charset() transfer_encoding = chosen.get('Content-Transfer-Encoding') # decodes with transfer encoding transfer_decoder = { 'quoted-printable': quopri.decodestring, 'base64': base64.decodestring, }.get(transfer_encoding) if transfer_decoder is not None: payload = transfer_decoder(payload) elif transfer_encoding not in (None, '7bit', '8bit'): raise ValueError('unknown transfer encoding in message') # decodes to unicode text = payload.decode(content_charset) # strip html tags if chosen.get_content_type() == 'text/html': text = striptags(text) return text.strip()
def html_summary(html): if not isinstance(html, unicode): html = html.decode('utf-8') # return truncate(striptags(html), length=250) return striptags(html)
def html_summary(html): return truncate(striptags(html), length=160)
def clean_title(self): return self.title and striptags(self.title) or self.site
def get_text(self, tpl=None): if not tpl: return '' return striptags(self.get_html(tpl))