Python TudouParser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: parser.tudou

클래스/타입: TudouParser

hotexamples.com에서의 예제들: 5

Python TudouParser - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 parser.tudou.TudouParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TudouParser(2)

parse(1)

예제 #1

파일 보기

    def __init__(self, conf, debug=False):
        handlers = [
            ('/', IndexHandler),
            ('/admin/drama/add', AdminDramaAddHandler),
            ('/admin/drama/list', AdminDramaListHandler),
            ('/admin/drama/search', AdminDramaSearchHandler),
            ('/admin/drama/parser', AdminDramaParserHandler),
            ('/api/drama/list', ApiDramaListHandler),
            ('/api/drama/search', ApiDramaSearchHandler),
            ('/drama/episode/play/(\S+)', DramaEpisodePlayHandler),
            ('/drama/episode/(\S+)', DramaEpisodeHandler),
            ('/weixin', WeixinHandler),
        ]
        settings = dict(template_path=os.path.join(os.path.dirname(__file__),
                                                   "./web/template"),
                        static_path=os.path.join(os.path.dirname(__file__),
                                                 "./web/static"),
                        debug=debug,
                        autoescape=None)
        self.conf = conf
        engine = MysqlEngine(conf.get('db.uri'))

        BaseModel.setup_all_model(engine)

        self.dramaModel = DramaModel.instance()
        self.episodeModel = DramaEpisodeModel.instance()

        self.dramaService = DramaService()
        self.wechat = WechatBasic(token=conf.get("wechat.token"),
                                  appid=conf.get("wechat.appId"),
                                  appsecret=conf.get("wechat.appSecret"))
        self.hashid = Hashids(salt="woshifyz")
        self.parser = {'tudou': TudouParser()}
        super(Application, self).__init__(handlers, **settings)

예제 #2

파일 보기

파일: get_drama.py 프로젝트: wcl365/video

class DramaEpisodeSource(BaseParser):
    def __init__(self):
        self.tudouParser = TudouParser()
        self.model = DramaEpisodeModel()
        self.urlModel = UrlContentModel()

    def fetch(self, d_id, url, last_ep=0):
        text_content = self.get_decoded_html(url)
        content = pq(text_content)
        trs = content(".abc dt")

        eps = self.model.get_by_drama_id(d_id)
        if eps:
            last_ep = eps[-1]['episode']

        i = 1
        for tr in trs:
            tr = pq(tr)
            ep_url = HOST + tr("a").attr("href")
            count = tr("a").text()
            ep_group = re.search(r"(\d+)", count)

            if ep_group is not None:
                ep = int(ep_group.group(1))
            else:
                ep = i
            print ep_url, ep
            if ep <= last_ep:
                print "fetch before, skip %s" % ep
                continue
            res = self.fetch_ep_page(d_id, ep_url, ep)
            if res == False:
                break
            i += 1

    def fetch_ep_page(self, d_id, url, ep):
        text_content = self.get_decoded_html(url)
        if text_content.find("http://www.tudou.com/programs/view") < 0:
            print "not tudou", d_id, url, ep
            return False
        content = pq(text_content)
        tudou_source = content("#ads iframe").attr("src")
        vid = re.search(r"code=(\S+?)&", tudou_source).group(1)

        source = "http://www.tudou.com/programs/view/" + vid
        url, hd_url = self.tudouParser.parse(vid)
        if url and hd_url:
            self.model.insert(d_id, ep, 0, source, url, hd_url)

    def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0):
        for i in range(1, cur + 1):
            if i <= last_ep:
                continue
            url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i)
            self.fetch_ep_page(d_id, url, i)

예제 #3

파일 보기

파일: get_drama.py 프로젝트: woshifyz/video

class DramaEpisodeSource(BaseParser):
    def __init__(self):
        self.tudouParser = TudouParser()
        self.model = DramaEpisodeModel()
        self.urlModel = UrlContentModel()

    def fetch(self, d_id, url, last_ep=0):
        text_content = self.get_decoded_html(url);
        content = pq(text_content)
        trs = content(".abc dt")

        eps = self.model.get_by_drama_id(d_id)
        if eps:
            last_ep = eps[-1]['episode']

        i = 1
        for tr in trs:
            tr = pq(tr)
            ep_url = HOST + tr("a").attr("href")
            count = tr("a").text()
            ep_group = re.search(r"(\d+)", count)

            if ep_group is not None:
                ep = int(ep_group.group(1))
            else:
                ep = i
            print ep_url, ep
            if ep <= last_ep:
                print "fetch before, skip %s" % ep
                continue
            res = self.fetch_ep_page(d_id, ep_url, ep)
            if res == False:
                break
            i += 1

    def fetch_ep_page(self, d_id, url, ep):
        text_content = self.get_decoded_html(url)
        if text_content.find("http://www.tudou.com/programs/view") < 0:
            print "not tudou", d_id, url, ep
            return False
        content = pq(text_content)
        tudou_source = content("#ads iframe").attr("src")
        vid = re.search(r"code=(\S+?)&", tudou_source).group(1)

        source = "http://www.tudou.com/programs/view/" + vid
        url, hd_url = self.tudouParser.parse(vid)
        if url and hd_url:
            self.model.insert(d_id, ep, 0, source, url, hd_url)

    def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0):
        for i in range(1, cur + 1):
            if i <= last_ep:
                continue
            url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i)
            self.fetch_ep_page(d_id, url, i)

예제 #4

파일 보기

파일: get_drama.py 프로젝트: wcl365/video

 def __init__(self):
     self.tudouParser = TudouParser()
     self.model = DramaEpisodeModel()
     self.urlModel = UrlContentModel()

예제 #5

파일 보기

파일: get_drama.py 프로젝트: woshifyz/video

 def __init__(self):
     self.tudouParser = TudouParser()
     self.model = DramaEpisodeModel()
     self.urlModel = UrlContentModel()