コード例 #1
0
ファイル: task.py プロジェクト: zhu327/rss
def get_key():
    client = tornado.httpclient.AsyncHTTPClient()
    url = WEIXIN_KEY.format(id=random.choice('abcdefghijklmnopqrstuvwxyz'))
    response = yield client.fetch(url)
    html = response.body.decode('utf-8')
    key, level, setting = process_key(html)

    mc.set('key', (key, level, setting))
コード例 #2
0
    def get(self):
        client = tornado.httpclient.AsyncHTTPClient()
        id = self.key
        link = WEIXIN_KEY.format(id=id)

        cookies = self.mc.get('cookie')
        head = random.choice(cookies)

        key = self.mc.get('key')
        eqs = process_eqs(key[0], id, key[2])

        url = WEIXIN_URL.format(id=id,
                                eqs=urllib.quote(eqs),
                                ekv=key[1],
                                t=int(time.time() * 1000))  # 生成api url

        # 访问api url,获取公众号文章列表
        request = tornado.httpclient.HTTPRequest(url=url, headers=head)
        response = yield client.fetch(request)

        if not response.code == 200:
            self.redirect("/")

        jsonp = response.body.decode('utf-8')
        items = process_jsonp(jsonp)  # 解析文章列表

        if not items:
            self.set_header("Content-Type",
                            "application/rss+xml; charset=UTF-8")
            self.render("rss.xml",
                        title='',
                        description='',
                        items=items,
                        pubdate='',
                        link=link)

        # 爬取每篇文章的内容
        responses = yield [client.fetch(i['link']) for i in items]
        remove = []
        for i, response in enumerate(responses):
            if response.code == 200:
                html = response.body.decode('utf-8')
                content = process_content(html)
                items[i]['content'] = content
            else:
                items[i]['content'] = ''

        pubdate = items[0]['created']
        title = description = items[0]['author']

        self.set_header("Content-Type", "application/rss+xml; charset=UTF-8")
        self.render("rss.xml",
                    title=title,
                    description=description,
                    items=items,
                    pubdate=pubdate,
                    link=link)
コード例 #3
0
ファイル: weixin.py プロジェクト: nsdown/rss
    def get(self):
        client = tornado.httpclient.AsyncHTTPClient()
        id = self.key
        link = WEIXIN_KEY.format(id=id)

        cookies = self.mc.get('cookie')
        head = random.choice(cookies)

        key = self.mc.get('key')
        eqs = process_eqs(key[0], id, key[2])

        url = WEIXIN_URL.format(id=id, eqs=urllib.quote(eqs), ekv=key[1], t=int(time.time()*1000)) # 生成api url

        # 访问api url,获取公众号文章列表
        request = tornado.httpclient.HTTPRequest(url=url, headers=head)
        response = yield client.fetch(request)

        if not response.code == 200:
            self.redirect("/")

        jsonp = response.body.decode('utf-8')
        items = process_jsonp(jsonp) # 解析文章列表

        if not items:
            self.set_header("Content-Type", "application/rss+xml; charset=UTF-8")
            self.render("rss.xml", title='', description='', items=items, pubdate='', link=link)

        # 爬取每篇文章的内容
        responses = yield [client.fetch(i['link']) for i in items]
        remove = []
        for i, response in enumerate(responses):
            if response.code == 200:
                html = response.body.decode('utf-8')
                content = process_content(html)
                items[i]['content'] = content
            else:
                items[i]['content'] = ''

        pubdate = items[0]['created']
        title = description = items[0]['author']

        self.set_header("Content-Type", "application/rss+xml; charset=UTF-8")
        self.render("rss.xml", title=title, description=description, items=items, pubdate=pubdate, link=link)
コード例 #4
0
ファイル: weixin.py プロジェクト: Sendarg/all2rss
    def get(self):
        client = tornado.httpclient.AsyncHTTPClient()
        id = self.key
        link = WEIXIN_KEY.format(id=id)

        cookies = self.mc.get('cookie')
        head = random.choice(cookies)

        '''
        login_cookies = response.headers.get_list('Set-Cookie')
        for item in self.__login_cookies:
        self.__login_headers.add('cookie', item)


        self.cookies = Cookie.SimpleCookie()
        if self.cookies:
            if headers is None:
                headers = dict()
            headers['Cookie'] = self._render_cookie_back()
        def _update_cookies(self, headers):
        try:
            sc = headers['Set-Cookie']
            cookies = escape.native_str(sc)
            self.cookies.update(Cookie.SimpleCookie(cookies))
            while True:
                self.cookies.update(Cookie.SimpleCookie(cookies))
                if ',' not in cookies:
                    break
                cookies = cookies[cookies.find(',') + 1:]
        except KeyError:
            return


        '''

        key = self.mc.get('key')
        eqs = process_eqs(key[0], id, key[2])

        url = WEIXIN_SOUGOU.format(id=id, eqs=urllib.quote(eqs), ekv=key[1], t=int(time.time() * 1000)) # 生成api url

        # 访问api url,获取公众号文章列表
        request = tornado.httpclient.HTTPRequest(url=url, headers=head)
        response = yield client.fetch(request)

        if not response.code == 200:
            self.redirect("/")

        jsonp = response.body.decode('utf-8')
        items = process_jsonp(jsonp) # 解析文章列表

        if not items:
            self.set_header("Content-Type", "application/xml")
            self.render("rss.xml", title='', description='', items=items, pubdate='', link=link)

        # 爬取每篇文章的内容
        responses = yield [client.fetch(i['link']) for i in items]
        for i, response in enumerate(responses):
            if response.code == 200:
                html = response.body.decode('utf-8')
                content = process_content(html)
                items[i]['content'] = content
            else:
                items[i]['content'] = ''

        pubdate = items[0]['created']
        title = description = items[0]['author']

        self.set_header("Content-Type", "application/xml")
        self.render("rss.xml", title=title, description=description, items=items, pubdate=pubdate, link=link)