def post(self): id = self.get_argument("id", None) tags = self.get_argument("tags", '') user_id = self.current_user["user_id"] res = { 'title': self.get_argument("title"), 'markdown': self.get_argument("markdown"), 'sharetype': self.get_argument("type"), 'slug': self.get_argument("slug", ''), 'tags': tags, 'updated': time.time(), } if id: share = Share.by_sid(id) if not share: self.redirect("/404") share.update(res) share.save() else: share = Share res['user_id'] = user_id share = share.new(res) user = User.by_sid(user_id) user.user_leaf += 10 user.save() for i in tags.split(' '): Tag.new(i, share.id) self.redirect("/share/" + str(share.id))
def post(self): share_id = self.get_argument("share_id", None) tags = self.get_argument("tags", '') # user_id = self.current_user["user_id"] tags = tags.strip() if share_id: share = Share.by_sid(share_id) if share and tags not in share.tags: tags = share.tags + ' ' + tags res = { 'tags': tags, 'updated': time.time(), } share.update(res) share.save() tags = tags.split(' ') tags = list(set(tags)) for i in tags: doc = { 'name': i, 'share_ids': share.id } Tag.new(doc)
def post(self): # print self.request.arguments share_id = self.get_argument("id", None) title = self.get_argument("title", '') markdown = self.get_argument("markdown", '') content = self.get_argument("content", '') sharetype = self.get_argument("type", '') slug = self.get_argument("slug", '') status = 1 if self.get_argument("dosubmit", None) == u'保存草稿' else 0 tags = self.get_argument("tags", '') upload_img = self.get_argument("uploadImg", '') post_img = self.get_argument("post_Img", '') post_img = '' if post_img == 'None' else post_img user_id = self.current_user["user_id"] res = { 'title': title, 'markdown': markdown, 'content': content, 'sharetype': sharetype, 'slug': slug, 'tags': tags, 'status': status, 'upload_img': upload_img, 'post_img': post_img, 'updated': time.time(), } if share_id: share = Share.by_sid(share_id) if not share: self.redirect("/404") share.update(res) share.save() else: share = Share res['user_id'] = user_id share = share.new(res) user = User.by_sid(user_id) user.user_leaf += 10 user.save() for i in tags.split(' '): doc = { 'name': i, 'share_ids': share.id } Tag.new(doc) if status == 1: self.redirect("/share/?id=" + str(share.id)) self.redirect("/share/" + str(share.id))
def post(self): # print self.request.arguments share_id = self.get_argument("id", None) title = self.get_argument("title", '') markdown = self.get_argument("markdown", '') content = self.get_argument("content", '') sharetype = self.get_argument("sharetype", '') slug = self.get_argument("slug", '') tags = self.get_argument("tags", '') upload_img = self.get_argument("uploadImg", '') post_img = self.get_argument("post_Img", '') link = self.get_argument("link", '') user_id = self.current_user["user_id"] res = { 'title': title, 'markdown': markdown, 'content': content, 'sharetype': sharetype, 'slug': slug, 'tags': tags, 'upload_img': upload_img, 'post_img': post_img, 'link': link, 'updated': time.time(), } if share_id: share = Share.by_sid(share_id) if not share: self.redirect("/404") share.update(res) share.save() else: share = Share res['user_id'] = user_id share = share.new(res) user = User.by_sid(user_id) user.user_leaf += 10 user.save() for i in tags.split(' '): doc = { 'name': i, 'share_ids': share.id } Tag.new(doc) self.redirect("/share/" + str(share.id))
def post(self): share_id = self.get_argument("share_id", None) tags = self.get_argument("tags", '') tags = tags.strip() if share_id: share = Share.by_sid(share_id) if share and tags not in share.tags: tags = share.tags + ' ' + tags res = { 'tags': tags, 'updated': time.time(), } share.update(res) share.save() tags = tags.split(' ') tags = list(set(tags)) for i in tags: doc = {'name': i, 'share_ids': share.id} Tag.new(doc)
def check(): # share_num = Share.find().count() # share_with_tag_num = share_num - Share.find({'tags': []}).count() for i in adb.Share_Col.find().sort('_id', 1): if i['status'] < 1: continue # if i['tags'] == []: if i['tags']: continue # print(i['id'], i['title']) print(i['user_id']) # adb.Share_Col.update().sort('_id', 1): tags = get_tags(i) adb.Share_Col.update({'_id': i['_id']}, {'$set': {'tags': tags}}) for tag in tags: doc = {'name': tag, 'share_ids': i['id']} Tag.new(doc) share_without_tag_num = Share.find({'tags': []}).count() print(share_without_tag_num)
def post(self): # TODO # print(self.request.arguments) share_id = self.get_argument("id", None) title = self.get_argument("title", '') markdown = self.get_argument("markdown", '') content = self.get_argument("content", '') sharetype = self.get_argument("sharetype", '') slug = self.get_argument("slug", '') tags = self.get_argument("tags", '') # upload_img = self.get_argument("uploadImg", '') post_img = self.get_argument("post_Img", '') link = self.get_argument("link", '') user_id = self.current_user["user_id"] vote_open = self.get_argument("vote_open", '') vote_title = self.get_argument("vote_title", '') img_url = self.get_argument("img_url", '') tags = tags.split() if link: url = link doc = Webcache.find_one({'url': url}, {'_id': 0}) if doc: logger.info('already downloaded') doc_title = doc.title # markdown = doc.markdown else: sessions = requests.session() sessions.headers[ 'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' try: # response = sessions.get(url) response = sessions.get(url, timeout=4) # TODO: try to use a proxy except (requests.ConnectionError, requests.Timeout) as e: print(e) self.write("GFW...") return # except requests.exceptions.HTTPError as e: # if e.response.status_code == 400: # error = e.response.json() # code = error['code'] # message = error['message'] except Exception as e: logger.info('e: {}'.format(e)) # self.redirect("/") self.write("GFW") return # response.encoding = 'utf-8' # TODO response.encoding = get_charset(response) logger.info('response.encoding {}'.format(response.encoding)) doc = Document(response.text) doc_title = doc.title() summary = doc.summary() _markdown = html2text.html2text(summary) _markdown = _markdown.replace('-\n', '-').strip() res_webcache = {} res_webcache['url'] = url res_webcache['title'] = doc_title res_webcache['markdown'] = _markdown if _markdown: webcache = Webcache webcache.new(res_webcache) if vote_open.isdigit(): vote_open = int(vote_open) else: vote_open = 0 if not title: title = doc_title # 处理封面链接 if img_url and not post_img: ext = img_url.split('?')[0].split('.')[-1] ext = '.' + ext.lower() print(ext) assert ext in ['.jpg', '.jpeg', '.gif', '.png', '.bmp'] img_dir = 'static/upload/img' now = datetime.datetime.now() t = now.strftime('%Y%m%d_%H%M%S_%f') img_name = '%s%s' % (t, ext) img_path = '%s/%s' % (img_dir, img_name) print(img_path) r = requests.get(img_url, verify=False, stream=True) # stream=True) chunk_size = 100 with open(img_path, 'wb') as image: for chunk in r.iter_content(chunk_size): image.write(chunk) im = Image.open(img_path) width, height = im.size if width / height > 5 or height / width > 5: os.remove(img_path) # 判断比例 删除图片 print('请不要上传长宽比例过大的图片') else: # 创建1200x550 750x230 365x230缩略图 make_post_thumb(img_path, sizes=[(1200, 550), (750, 230), (365, 230), (260, 160)]) print('done') post_img = img_path.split('/')[-1] post_img = post_img.split('.')[0] + '_1200.jpg' res = { 'title': title, 'markdown': markdown, 'content': content, 'sharetype': sharetype, 'slug': slug, 'tags': tags, 'post_img': post_img, 'link': link, 'vote_open': vote_open, 'vote_title': vote_title, 'updated': time.time(), } # if not markdown: # self.redirect("/") # return if share_id: share = Share.by_sid(share_id) if not share: self.redirect("/404") share.update(res) share.save() else: share = Share res['user_id'] = user_id share = share.new(res) user = User.by_sid(user_id) user.user_leaf += 10 user.save() for i in tags: doc = {'name': i, 'share_ids': share.id} Tag.new(doc) self.redirect("/share/" + str(share.id))
def add_from_file(rss_url, rss_hostname, rss_name): # rss_file = 'content/gen/qdaily_2019-04-20 15:07:12.xml' n = Share.find().count() print(n) print(rss_name) feeds = feedparser.parse(rss_url) for post in feeds.entries[::-1]: # authors # itunes_episodetype full # itunes_episode # itunes_explicit # itunes_title # itunes_duration # published link subtitle id image title tags # links title_detail author_detail summary_detail guidislink published_parsed summary content author # subtitle_detail # title title_detail # published published_parsed # summary summary_detail # author # link links guidislink # authors # 'itunes_title', 'itunes_episode' # 'author_detail', 'id', 'itunes_duration' # <itunes:duration>6957</itunes:duration> # TODO # 修正内容 目前暂时不支持 # <enclosure type="audio/mpeg" url="https://kernelpanic.fm/55/audio.mp3"/> # <media:content url="https://cdn.flipboard.com/telegraph.co.uk/1356d637c7438f6fcffda0d5de177b6058904de6/original.jpg" medium="image" type="image/jpeg" width="480" height="300" /> # media_content # print(post.keys()) if hasattr(post, 'summary'): summary = post.summary assert post.summary == post.description else: summary = '' # 部分rss没有content if hasattr(post, 'content'): content = post.content[0]['value'] else: if hasattr(post, 'summary'): content = post.summary else: print('no content', rss_url, rss_hostname, rss_name) continue if content.startswith('<![CDATA[') and content.endswith(']]>'): # m = rgx.search(content) # content = m.group(1) content = content[9:-3] if summary.startswith('<![CDATA[') and summary.endswith(']]>'): summary = summary[9:-3] if hasattr(post, 'published'): if 'GMT' == post.published[-3:]: published = datetime.strptime(post.published, "%a, %d %b %Y %H:%M:%S GMT") elif ',' in post.published: if post.published.endswith('2019'): pass # May 19, 2019 published = datetime.strptime(post.published, "%b %d, %Y") else: published = datetime.strptime(post.published, "%a, %d %b %Y %H:%M:%S %z") # Thu, 18 Apr 2019 19:32:58 +0800 elif '/' in post.published: published = datetime.strptime(post.published, "%Y/%m/%d %H:%M:%S %z") elif 'Z' == post.published[-1]: post.published = post.published.replace('.000Z', 'Z') published = datetime.strptime(post.published, "%Y-%m-%dT%H:%M:%SZ") # <pubDate>15 Jun 2019 06:30:00 EST</pubDate> elif 'EST' in post.published: post.published = post.published[:-4] published = datetime.strptime(post.published, "%d %b %Y %H:%M:%S") elif 'T' in post.published: # 2019-05-24T15:05:50-04:00 post.published = post.published[:-6] # tz = post.published[-6:].replace(':', '') published = datetime.strptime(post.published, "%Y-%m-%dT%H:%M:%S") # published = published.replace(tzinfo=FixedOffset(tz)) elif post.published.count(' ') == 1: published = datetime.strptime(post.published, "%Y-%m-%d %H:%M:%S") else: published = datetime.strptime(post.published, "%Y-%m-%d %H:%M:%S %z") published = published.timestamp() else: if random.random() > 0.9: print('no published time') published = time.time() title = post.title link = post.link author = '' if hasattr(post, 'source'): source_title = post.source.title # print(source_title) print(rss_name, source_title) if rss_name == '虎嗅': pass author = source_title else: assert rss_name in source_title # assert rss_name == source_title source = rss_name if hasattr(post, 'category_title'): category = post.category_title assert ' ' not in category assert ',' not in category tags = [category] elif hasattr(post, 'tags'): tags = post.tags # print(tags) # assert len(tags) == 1 # tags = tags[0]['term'] tags = ','.join([t['term'] for t in tags]) category = '' if '-' in tags: print(tags) tags = tags.replace(' ', '-') tags = tags.split(',') for tag in tags: if ' ' in tag: print(tag) else: # print('no category') category = '' tags = [] sharetype = 'rss' try: markdown = html2text.html2text(content) except Exception as e: print('error in html-to-markdown: {}'.format(e)) continue assert link res = { 'title': title, 'link': link, 'source': source, 'category': category, 'content': content, 'summary': summary, 'sharetype': sharetype, 'tags': tags, 'markdown': markdown, 'published': published, 'updated': time.time(), } # print(post.keys()) if hasattr(post, 'author'): # TODO print('author: ', post.author) res['author'] = post.author else: res['author'] = author # 去重方案 # - 标题重复 found = Share.find({'title': title}) if found.count(): if found.count() > 1: print('!! repeated article title: {}'.format(title)) elif found.count() == 1: # continue share = Share.by_sid(found[0].id) if share and summary and not share.link and link: print(res['link']) print('title {} updated'.format(title)) share.update(res) share.save() else: print('title {} adding'.format(title)) email = '{}@anwensf.com'.format(rss_hostname) auser = User.by_email(email) assert auser share = Share user_id = auser.id res['user_id'] = user_id # just use 1 as default # continue assert res['link'] share = share.new(res) user = User.by_sid(user_id) user.user_leaf += 10 user.save() for i in tags: doc = {'name': i, 'share_ids': share.id} Tag.new(doc)