Beispiel #1
0
    def post(self):
        id = self.get_argument("id", None)
        tags = self.get_argument("tags", '')
        user_id = self.current_user["user_id"]
        res = {
            'title': self.get_argument("title"),
            'markdown': self.get_argument("markdown"),
            'sharetype': self.get_argument("type"),
            'slug': self.get_argument("slug", ''),
            'tags': tags,
            'updated': time.time(),
        }

        if id:
            share = Share.by_sid(id)
            if not share:
                self.redirect("/404")
            share.update(res)
            share.save()
        else:
            share = Share
            res['user_id'] = user_id
            share = share.new(res)
            user = User.by_sid(user_id)
            user.user_leaf += 10
            user.save()
        for i in tags.split(' '):
            Tag.new(i, share.id)
        self.redirect("/share/" + str(share.id))
Beispiel #2
0
    def post(self):
        share_id = self.get_argument("share_id", None)
        tags = self.get_argument("tags", '')
        # user_id = self.current_user["user_id"]
        tags = tags.strip()
        if share_id:
            share = Share.by_sid(share_id)
            if share and tags not in share.tags:
                tags = share.tags + ' ' + tags
                res = {
                    'tags': tags,
                    'updated': time.time(),
                }

                share.update(res)
                share.save()

                tags = tags.split(' ')
                tags = list(set(tags))
                for i in tags:
                    doc = {
                        'name': i,
                        'share_ids': share.id
                    }
                    Tag.new(doc)
Beispiel #3
0
    def post(self):
        # print self.request.arguments
        share_id = self.get_argument("id", None)
        title = self.get_argument("title", '')
        markdown = self.get_argument("markdown", '')
        content = self.get_argument("content", '')
        sharetype = self.get_argument("type", '')
        slug = self.get_argument("slug", '')
        status = 1 if self.get_argument("dosubmit", None) == u'保存草稿' else 0
        tags = self.get_argument("tags", '')
        upload_img = self.get_argument("uploadImg", '')
        post_img = self.get_argument("post_Img", '')
        post_img = '' if post_img == 'None' else post_img
        user_id = self.current_user["user_id"]
        res = {
            'title': title,
            'markdown': markdown,
            'content': content,
            'sharetype': sharetype,
            'slug': slug,
            'tags': tags,
            'status': status,
            'upload_img': upload_img,
            'post_img': post_img,
            'updated': time.time(),
        }

        if share_id:
            share = Share.by_sid(share_id)
            if not share:
                self.redirect("/404")
            share.update(res)
            share.save()
        else:
            share = Share
            res['user_id'] = user_id
            share = share.new(res)
            user = User.by_sid(user_id)
            user.user_leaf += 10
            user.save()
        for i in tags.split(' '):
            doc = {
                'name': i,
                'share_ids': share.id
            }
            Tag.new(doc)
        if status == 1:
            self.redirect("/share/?id=" + str(share.id))
        self.redirect("/share/" + str(share.id))
Beispiel #4
0
    def post(self):
        # print self.request.arguments
        share_id = self.get_argument("id", None)
        title = self.get_argument("title", '')
        markdown = self.get_argument("markdown", '')
        content = self.get_argument("content", '')
        sharetype = self.get_argument("sharetype", '')
        slug = self.get_argument("slug", '')
        tags = self.get_argument("tags", '')
        upload_img = self.get_argument("uploadImg", '')
        post_img = self.get_argument("post_Img", '')
        link = self.get_argument("link", '')
        user_id = self.current_user["user_id"]
        res = {
            'title': title,
            'markdown': markdown,
            'content': content,
            'sharetype': sharetype,
            'slug': slug,
            'tags': tags,
            'upload_img': upload_img,
            'post_img': post_img,
            'link': link,
            'updated': time.time(),
        }

        if share_id:
            share = Share.by_sid(share_id)
            if not share:
                self.redirect("/404")
            share.update(res)
            share.save()
        else:
            share = Share
            res['user_id'] = user_id
            share = share.new(res)
            user = User.by_sid(user_id)
            user.user_leaf += 10
            user.save()
        for i in tags.split(' '):
            doc = {
                'name': i,
                'share_ids': share.id
            }
            Tag.new(doc)
        self.redirect("/share/" + str(share.id))
Beispiel #5
0
 def post(self):
     share_id = self.get_argument("share_id", None)
     tags = self.get_argument("tags", '')
     tags = tags.strip()
     if share_id:
         share = Share.by_sid(share_id)
         if share and tags not in share.tags:
             tags = share.tags + ' ' + tags
             res = {
                 'tags': tags,
                 'updated': time.time(),
             }
             share.update(res)
             share.save()
             tags = tags.split(' ')
             tags = list(set(tags))
             for i in tags:
                 doc = {'name': i, 'share_ids': share.id}
                 Tag.new(doc)
Beispiel #6
0
def check():
    # share_num = Share.find().count()
    # share_with_tag_num = share_num - Share.find({'tags': []}).count()

    for i in adb.Share_Col.find().sort('_id', 1):
        if i['status'] < 1:
            continue
        # if i['tags'] == []:
        if i['tags']:
            continue
        # print(i['id'], i['title'])
        print(i['user_id'])

        # adb.Share_Col.update().sort('_id', 1):
        tags = get_tags(i)
        adb.Share_Col.update({'_id': i['_id']}, {'$set': {'tags': tags}})
        for tag in tags:
            doc = {'name': tag, 'share_ids': i['id']}
            Tag.new(doc)

    share_without_tag_num = Share.find({'tags': []}).count()
    print(share_without_tag_num)
Beispiel #7
0
    def post(self):
        # TODO
        # print(self.request.arguments)
        share_id = self.get_argument("id", None)
        title = self.get_argument("title", '')
        markdown = self.get_argument("markdown", '')
        content = self.get_argument("content", '')
        sharetype = self.get_argument("sharetype", '')
        slug = self.get_argument("slug", '')
        tags = self.get_argument("tags", '')
        # upload_img = self.get_argument("uploadImg", '')
        post_img = self.get_argument("post_Img", '')
        link = self.get_argument("link", '')
        user_id = self.current_user["user_id"]
        vote_open = self.get_argument("vote_open", '')
        vote_title = self.get_argument("vote_title", '')
        img_url = self.get_argument("img_url", '')

        tags = tags.split()

        if link:
            url = link
            doc = Webcache.find_one({'url': url}, {'_id': 0})
            if doc:
                logger.info('already downloaded')
                doc_title = doc.title
                # markdown = doc.markdown
            else:
                sessions = requests.session()
                sessions.headers[
                    'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
                try:
                    # response = sessions.get(url)
                    response = sessions.get(url, timeout=4)
                # TODO: try to use a proxy
                except (requests.ConnectionError, requests.Timeout) as e:
                    print(e)
                    self.write("GFW...")
                    return
                # except requests.exceptions.HTTPError as e:
                #     if e.response.status_code == 400:
                #         error = e.response.json()
                #         code = error['code']
                #         message = error['message']

                except Exception as e:
                    logger.info('e: {}'.format(e))
                    # self.redirect("/")
                    self.write("GFW")
                    return
                # response.encoding = 'utf-8'  # TODO
                response.encoding = get_charset(response)
                logger.info('response.encoding {}'.format(response.encoding))
                doc = Document(response.text)
                doc_title = doc.title()
                summary = doc.summary()
                _markdown = html2text.html2text(summary)
                _markdown = _markdown.replace('-\n', '-').strip()
                res_webcache = {}
                res_webcache['url'] = url
                res_webcache['title'] = doc_title
                res_webcache['markdown'] = _markdown
                if _markdown:
                    webcache = Webcache
                    webcache.new(res_webcache)

        if vote_open.isdigit():
            vote_open = int(vote_open)
        else:
            vote_open = 0
        if not title:
            title = doc_title

        # 处理封面链接

        if img_url and not post_img:
            ext = img_url.split('?')[0].split('.')[-1]
            ext = '.' + ext.lower()
            print(ext)
            assert ext in ['.jpg', '.jpeg', '.gif', '.png', '.bmp']
            img_dir = 'static/upload/img'
            now = datetime.datetime.now()
            t = now.strftime('%Y%m%d_%H%M%S_%f')
            img_name = '%s%s' % (t, ext)
            img_path = '%s/%s' % (img_dir, img_name)
            print(img_path)
            r = requests.get(img_url, verify=False,
                             stream=True)  # stream=True)
            chunk_size = 100
            with open(img_path, 'wb') as image:
                for chunk in r.iter_content(chunk_size):
                    image.write(chunk)

            im = Image.open(img_path)
            width, height = im.size
            if width / height > 5 or height / width > 5:
                os.remove(img_path)  # 判断比例 删除图片
                print('请不要上传长宽比例过大的图片')
            else:
                # 创建1200x550 750x230 365x230缩略图
                make_post_thumb(img_path,
                                sizes=[(1200, 550), (750, 230), (365, 230),
                                       (260, 160)])
                print('done')
                post_img = img_path.split('/')[-1]
                post_img = post_img.split('.')[0] + '_1200.jpg'

        res = {
            'title': title,
            'markdown': markdown,
            'content': content,
            'sharetype': sharetype,
            'slug': slug,
            'tags': tags,
            'post_img': post_img,
            'link': link,
            'vote_open': vote_open,
            'vote_title': vote_title,
            'updated': time.time(),
        }
        # if not markdown:
        #     self.redirect("/")
        #     return
        if share_id:
            share = Share.by_sid(share_id)
            if not share:
                self.redirect("/404")
            share.update(res)
            share.save()
        else:
            share = Share
            res['user_id'] = user_id
            share = share.new(res)
            user = User.by_sid(user_id)
            user.user_leaf += 10
            user.save()
        for i in tags:
            doc = {'name': i, 'share_ids': share.id}
            Tag.new(doc)
        self.redirect("/share/" + str(share.id))
Beispiel #8
0
def add_from_file(rss_url, rss_hostname, rss_name):
    # rss_file = 'content/gen/qdaily_2019-04-20 15:07:12.xml'
    n = Share.find().count()
    print(n)
    print(rss_name)
    feeds = feedparser.parse(rss_url)
    for post in feeds.entries[::-1]:
        # authors
        # itunes_episodetype full
        # itunes_episode
        # itunes_explicit
        # itunes_title
        # itunes_duration
        # published link subtitle id image title tags
        # links title_detail author_detail summary_detail guidislink published_parsed summary content author
        # subtitle_detail

        # title title_detail
        # published published_parsed
        # summary summary_detail
        # author
        # link links guidislink
        # authors

        # 'itunes_title', 'itunes_episode'
        # 'author_detail', 'id', 'itunes_duration'
        # <itunes:duration>6957</itunes:duration>

        # TODO
        # 修正内容 目前暂时不支持
        # <enclosure type="audio/mpeg" url="https://kernelpanic.fm/55/audio.mp3"/>
        # <media:content url="https://cdn.flipboard.com/telegraph.co.uk/1356d637c7438f6fcffda0d5de177b6058904de6/original.jpg" medium="image" type="image/jpeg" width="480" height="300" />
        # media_content

        # print(post.keys())
        if hasattr(post, 'summary'):
            summary = post.summary
            assert post.summary == post.description
        else:
            summary = ''
        # 部分rss没有content
        if hasattr(post, 'content'):
            content = post.content[0]['value']
        else:
            if hasattr(post, 'summary'):
                content = post.summary
            else:
                print('no content', rss_url, rss_hostname, rss_name)
                continue
        if content.startswith('<![CDATA[') and content.endswith(']]>'):
            # m = rgx.search(content)
            # content = m.group(1)
            content = content[9:-3]
        if summary.startswith('<![CDATA[') and summary.endswith(']]>'):
            summary = summary[9:-3]

        if hasattr(post, 'published'):
            if 'GMT' == post.published[-3:]:
                published = datetime.strptime(post.published,
                                              "%a, %d %b %Y %H:%M:%S GMT")
            elif ',' in post.published:
                if post.published.endswith('2019'):
                    pass
                    # May 19, 2019
                    published = datetime.strptime(post.published, "%b %d, %Y")
                else:
                    published = datetime.strptime(post.published,
                                                  "%a, %d %b %Y %H:%M:%S %z")
                # Thu, 18 Apr 2019 19:32:58 +0800
            elif '/' in post.published:
                published = datetime.strptime(post.published,
                                              "%Y/%m/%d %H:%M:%S %z")
            elif 'Z' == post.published[-1]:
                post.published = post.published.replace('.000Z', 'Z')
                published = datetime.strptime(post.published,
                                              "%Y-%m-%dT%H:%M:%SZ")

            # <pubDate>15 Jun 2019 06:30:00 EST</pubDate>
            elif 'EST' in post.published:
                post.published = post.published[:-4]
                published = datetime.strptime(post.published,
                                              "%d %b %Y %H:%M:%S")
            elif 'T' in post.published:
                # 2019-05-24T15:05:50-04:00
                post.published = post.published[:-6]
                # tz = post.published[-6:].replace(':', '')
                published = datetime.strptime(post.published,
                                              "%Y-%m-%dT%H:%M:%S")
                # published = published.replace(tzinfo=FixedOffset(tz))
            elif post.published.count(' ') == 1:
                published = datetime.strptime(post.published,
                                              "%Y-%m-%d %H:%M:%S")
            else:
                published = datetime.strptime(post.published,
                                              "%Y-%m-%d %H:%M:%S %z")
            published = published.timestamp()
        else:
            if random.random() > 0.9:
                print('no published time')
            published = time.time()

        title = post.title
        link = post.link
        author = ''
        if hasattr(post, 'source'):
            source_title = post.source.title
            # print(source_title)
            print(rss_name, source_title)
            if rss_name == '虎嗅':
                pass
                author = source_title
            else:
                assert rss_name in source_title
            # assert rss_name == source_title
        source = rss_name

        if hasattr(post, 'category_title'):
            category = post.category_title
            assert ' ' not in category
            assert ',' not in category
            tags = [category]
        elif hasattr(post, 'tags'):
            tags = post.tags
            # print(tags)
            # assert len(tags) == 1
            # tags = tags[0]['term']
            tags = ','.join([t['term'] for t in tags])
            category = ''
            if '-' in tags:
                print(tags)
            tags = tags.replace(' ', '-')
            tags = tags.split(',')
            for tag in tags:
                if ' ' in tag:
                    print(tag)
        else:
            # print('no category')
            category = ''
            tags = []
        sharetype = 'rss'
        try:
            markdown = html2text.html2text(content)
        except Exception as e:
            print('error in html-to-markdown: {}'.format(e))
            continue
        assert link
        res = {
            'title': title,
            'link': link,
            'source': source,
            'category': category,
            'content': content,
            'summary': summary,
            'sharetype': sharetype,
            'tags': tags,
            'markdown': markdown,
            'published': published,
            'updated': time.time(),
        }
        # print(post.keys())
        if hasattr(post, 'author'):
            # TODO
            print('author: ', post.author)
            res['author'] = post.author
        else:
            res['author'] = author

        # 去重方案
        # - 标题重复
        found = Share.find({'title': title})
        if found.count():
            if found.count() > 1:
                print('!! repeated article title: {}'.format(title))
            elif found.count() == 1:
                # continue
                share = Share.by_sid(found[0].id)
                if share and summary and not share.link and link:
                    print(res['link'])
                    print('title {} updated'.format(title))
                    share.update(res)
                    share.save()
        else:
            print('title {} adding'.format(title))
            email = '{}@anwensf.com'.format(rss_hostname)
            auser = User.by_email(email)
            assert auser
            share = Share
            user_id = auser.id
            res['user_id'] = user_id  # just use 1 as default
            # continue
            assert res['link']
            share = share.new(res)

            user = User.by_sid(user_id)
            user.user_leaf += 10
            user.save()
            for i in tags:
                doc = {'name': i, 'share_ids': share.id}
                Tag.new(doc)