Example #1
0
 def parse_album_by_aid(self, drama_id, aid, last_ep=0):
     source = "http://api.tv.sohu.com/v4/album/videos/%s.json?page_size=100&api_key=695fe827ffeb7d74260a813025970bd5&plat=3&partner=1&sver=5.0.1&poid=1&page=1&with_fee_video=1&" % aid
     content = self.get_decoded_json(source)
     if not content:
         logging.error("sohu parse album by aid error, %s, %s" %
                       (drama_id, aid))
         return
     videos = content['data']['videos']
     for i, v in enumerate(videos):
         if i + 1 <= last_ep:
             continue
         if v.get('url_super', None) is not None:
             url = v['url_high']
             hd_url = v['url_super']
         else:
             url = v['url_nor']
             hd_url = v['url_high']
         v1, v2 = UrlContentModel.instance().insert(
             url), UrlContentModel.instance().insert(hd_url)
         if v1 > 0 and v2 > 0:
             DramaEpisodeModel.instance().insert(drama_id, i + 1, 0, source,
                                                 url, hd_url)
         else:
             logging.error("sohu get url content error, %s, %s" %
                           (url, hd_url))
Example #2
0
class DramaEpisodeSource(BaseParser):
    def __init__(self):
        self.tudouParser = TudouParser()
        self.model = DramaEpisodeModel()
        self.urlModel = UrlContentModel()

    def fetch(self, d_id, url, last_ep=0):
        text_content = self.get_decoded_html(url);
        content = pq(text_content)
        trs = content(".abc dt")

        eps = self.model.get_by_drama_id(d_id)
        if eps:
            last_ep = eps[-1]['episode']

        i = 1
        for tr in trs:
            tr = pq(tr)
            ep_url = HOST + tr("a").attr("href")
            count = tr("a").text()
            ep_group = re.search(r"(\d+)", count)

            if ep_group is not None:
                ep = int(ep_group.group(1))
            else:
                ep = i
            print ep_url, ep
            if ep <= last_ep:
                print "fetch before, skip %s" % ep
                continue
            res = self.fetch_ep_page(d_id, ep_url, ep)
            if res == False:
                break
            i += 1

    def fetch_ep_page(self, d_id, url, ep):
        text_content = self.get_decoded_html(url)
        if text_content.find("http://www.tudou.com/programs/view") < 0:
            print "not tudou", d_id, url, ep
            return False
        content = pq(text_content)
        tudou_source = content("#ads iframe").attr("src")
        vid = re.search(r"code=(\S+?)&", tudou_source).group(1)

        source = "http://www.tudou.com/programs/view/" + vid
        url, hd_url = self.tudouParser.parse(vid)
        if url and hd_url:
            self.model.insert(d_id, ep, 0, source, url, hd_url)

    def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0):
        for i in range(1, cur + 1):
            if i <= last_ep:
                continue
            url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i)
            self.fetch_ep_page(d_id, url, i)
Example #3
0
class DramaEpisodeSource(BaseParser):
    def __init__(self):
        self.tudouParser = TudouParser()
        self.model = DramaEpisodeModel()
        self.urlModel = UrlContentModel()

    def fetch(self, d_id, url, last_ep=0):
        text_content = self.get_decoded_html(url)
        content = pq(text_content)
        trs = content(".abc dt")

        eps = self.model.get_by_drama_id(d_id)
        if eps:
            last_ep = eps[-1]['episode']

        i = 1
        for tr in trs:
            tr = pq(tr)
            ep_url = HOST + tr("a").attr("href")
            count = tr("a").text()
            ep_group = re.search(r"(\d+)", count)

            if ep_group is not None:
                ep = int(ep_group.group(1))
            else:
                ep = i
            print ep_url, ep
            if ep <= last_ep:
                print "fetch before, skip %s" % ep
                continue
            res = self.fetch_ep_page(d_id, ep_url, ep)
            if res == False:
                break
            i += 1

    def fetch_ep_page(self, d_id, url, ep):
        text_content = self.get_decoded_html(url)
        if text_content.find("http://www.tudou.com/programs/view") < 0:
            print "not tudou", d_id, url, ep
            return False
        content = pq(text_content)
        tudou_source = content("#ads iframe").attr("src")
        vid = re.search(r"code=(\S+?)&", tudou_source).group(1)

        source = "http://www.tudou.com/programs/view/" + vid
        url, hd_url = self.tudouParser.parse(vid)
        if url and hd_url:
            self.model.insert(d_id, ep, 0, source, url, hd_url)

    def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0):
        for i in range(1, cur + 1):
            if i <= last_ep:
                continue
            url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i)
            self.fetch_ep_page(d_id, url, i)
Example #4
0
 def fetch_list(self):
     url = "http://api.tv.sohu.com/v4/search/channel/sub.json?subId=19&&api_key=695fe827ffeb7d74260a813025970bd5&build=5.0.1.1&offset=0&page_size=100&partner=1&pay_type=0&plat=3&poid=1&sver=5.0.1"
     content = self.get_decoded_json(url)
     videos = content['data']['videos']
     sp = SohuParser()
     for v in videos:
         name = v['album_name']
         d = DramaModel.instance().get_by_name(name)
         score = 0
         try:
             score = int(float(v['score_tip'][:-3]) * 10)
         except:
             pass
         if not d:
             DramaModel.instance().insert(name, v['publish_time'][:4],
                                          v['hor_w16_pic'], v['main_actor'],
                                          v['album_desc'], v['aid'], score)
             d = DramaModel.instance().get_by_name(name)
         else:
             logging.info("set score %s for %s" % (score, d['id']))
             DramaModel.instance().set_score(d['id'], score)
         eps = DramaEpisodeModel.instance().get_by_drama_id(d['id'])
         if eps:
             sp.parse_album_by_aid(d['id'], v['aid'], eps[-1]['episode'])
         else:
             sp.parse_album_by_aid(d['id'], v['aid'])
Example #5
0
 def fetch_list(self):
     url = "http://api.tv.sohu.com/v4/search/channel/sub.json?subId=19&&api_key=695fe827ffeb7d74260a813025970bd5&build=5.0.1.1&offset=0&page_size=100&partner=1&pay_type=0&plat=3&poid=1&sver=5.0.1"
     content = self.get_decoded_json(url)
     videos = content["data"]["videos"]
     sp = SohuParser()
     for v in videos:
         name = v["album_name"]
         d = DramaModel.instance().get_by_name(name)
         score = 0
         try:
             score = int(float(v["score_tip"][:-3]) * 10)
         except:
             pass
         if not d:
             DramaModel.instance().insert(
                 name, v["publish_time"][:4], v["hor_w16_pic"], v["main_actor"], v["album_desc"], v["aid"], score
             )
             d = DramaModel.instance().get_by_name(name)
         else:
             logging.info("set score %s for %s" % (score, d["id"]))
             DramaModel.instance().set_score(d["id"], score)
         eps = DramaEpisodeModel.instance().get_by_drama_id(d["id"])
         if eps:
             sp.parse_album_by_aid(d["id"], v["aid"], eps[-1]["episode"])
         else:
             sp.parse_album_by_aid(d["id"], v["aid"])
Example #6
0
    def __init__(self, conf, debug=False):
        handlers = [
            ('/', IndexHandler),
            ('/admin/drama/add', AdminDramaAddHandler),
            ('/admin/drama/list', AdminDramaListHandler),
            ('/admin/drama/search', AdminDramaSearchHandler),
            ('/admin/drama/parser', AdminDramaParserHandler),
            ('/api/drama/list', ApiDramaListHandler),
            ('/api/drama/search', ApiDramaSearchHandler),
            ('/drama/episode/play/(\S+)', DramaEpisodePlayHandler),
            ('/drama/episode/(\S+)', DramaEpisodeHandler),
            ('/weixin', WeixinHandler),
        ]
        settings = dict(template_path=os.path.join(os.path.dirname(__file__),
                                                   "./web/template"),
                        static_path=os.path.join(os.path.dirname(__file__),
                                                 "./web/static"),
                        debug=debug,
                        autoescape=None)
        self.conf = conf
        engine = MysqlEngine(conf.get('db.uri'))

        BaseModel.setup_all_model(engine)

        self.dramaModel = DramaModel.instance()
        self.episodeModel = DramaEpisodeModel.instance()

        self.dramaService = DramaService()
        self.wechat = WechatBasic(token=conf.get("wechat.token"),
                                  appid=conf.get("wechat.appId"),
                                  appsecret=conf.get("wechat.appSecret"))
        self.hashid = Hashids(salt="woshifyz")
        self.parser = {'tudou': TudouParser()}
        super(Application, self).__init__(handlers, **settings)
Example #7
0
 def parse_album_by_aid(self, drama_id, aid, last_ep=0):
     source = (
         "http://api.tv.sohu.com/v4/album/videos/%s.json?page_size=100&api_key=695fe827ffeb7d74260a813025970bd5&plat=3&partner=1&sver=5.0.1&poid=1&page=1&with_fee_video=1&"
         % aid
     )
     content = self.get_decoded_json(source)
     if not content:
         logging.error("sohu parse album by aid error, %s, %s" % (drama_id, aid))
         return
     videos = content["data"]["videos"]
     for i, v in enumerate(videos):
         if i + 1 <= last_ep:
             continue
         if v.get("url_super", None) is not None:
             url = v["url_high"]
             hd_url = v["url_super"]
         else:
             url = v["url_nor"]
             hd_url = v["url_high"]
         v1, v2 = UrlContentModel.instance().insert(url), UrlContentModel.instance().insert(hd_url)
         if v1 > 0 and v2 > 0:
             DramaEpisodeModel.instance().insert(drama_id, i + 1, 0, source, url, hd_url)
         else:
             logging.error("sohu get url content error, %s, %s" % (url, hd_url))
Example #8
0
    def __init__(self, conf, debug=False):
        handlers = [
            ('/', IndexHandler),
            ('/admin/drama/add', AdminDramaAddHandler),
            ('/admin/drama/list', AdminDramaListHandler),
            ('/admin/drama/search', AdminDramaSearchHandler),
            ('/admin/drama/parser', AdminDramaParserHandler),

            ('/api/drama/list', ApiDramaListHandler),
            ('/api/drama/search', ApiDramaSearchHandler),

            ('/drama/episode/play/(\S+)', DramaEpisodePlayHandler),
            ('/drama/episode/(\S+)', DramaEpisodeHandler),

            ('/weixin', WeixinHandler),
        ]
        settings = dict(template_path=os.path.join(os.path.dirname(__file__), "./web/template"),
                        static_path=os.path.join(os.path.dirname(__file__), "./web/static"),
                        debug=debug,
                        autoescape=None
                        )
        self.conf = conf
        engine = MysqlEngine(conf.get('db.uri'))

        BaseModel.setup_all_model(engine)

        self.dramaModel = DramaModel.instance()
        self.episodeModel = DramaEpisodeModel.instance()

        self.dramaService = DramaService()
        self.wechat = WechatBasic(token=conf.get("wechat.token"), appid=conf.get("wechat.appId"),
                                  appsecret=conf.get("wechat.appSecret"))
        self.hashid = Hashids(salt="woshifyz")
        self.parser = {
            'tudou': TudouParser()
        }
        super(Application, self).__init__(handlers, **settings)
Example #9
0
 def get_drama_infos(self, count, offset):
     dramas = DramaModel.instance().list_avalable(count, offset)
     for drama in dramas:
         eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id)
         drama["eps"] = eps
     return dramas
Example #10
0
 def new_drama(self, count=10):
     eps = DramaEpisodeModel.instance().new_drama(count=count)
     for ep in eps:
         ep["drama"] = DramaModel.instance().get_by_id(ep.drama_id)
     return eps
Example #11
0
 def search_by_name(self, name, count):
     dramas = DramaModel.instance().search_by_name(name, count)
     for drama in dramas:
         eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id)
         drama["eps"] = eps
     return dramas
Example #12
0
 def get_drama_infos(self, count, offset):
     dramas = DramaModel.instance().list_avalable(count, offset)
     for drama in dramas:
         eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id)
         drama['eps'] = eps
     return dramas
Example #13
0
 def new_drama(self, count=10):
     eps = DramaEpisodeModel.instance().new_drama(count=count)
     for ep in eps:
         ep['drama'] = DramaModel.instance().get_by_id(ep.drama_id)
     return eps
Example #14
0
 def search_by_name(self, name, count):
     dramas = DramaModel.instance().search_by_name(name, count)
     for drama in dramas:
         eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id)
         drama['eps'] = eps
     return dramas
Example #15
0
 def __init__(self):
     self.tudouParser = TudouParser()
     self.model = DramaEpisodeModel()
     self.urlModel = UrlContentModel()
Example #16
0
 def __init__(self):
     self.tudouParser = TudouParser()
     self.model = DramaEpisodeModel()
     self.urlModel = UrlContentModel()