def parse_album_by_aid(self, drama_id, aid, last_ep=0): source = "http://api.tv.sohu.com/v4/album/videos/%s.json?page_size=100&api_key=695fe827ffeb7d74260a813025970bd5&plat=3&partner=1&sver=5.0.1&poid=1&page=1&with_fee_video=1&" % aid content = self.get_decoded_json(source) if not content: logging.error("sohu parse album by aid error, %s, %s" % (drama_id, aid)) return videos = content['data']['videos'] for i, v in enumerate(videos): if i + 1 <= last_ep: continue if v.get('url_super', None) is not None: url = v['url_high'] hd_url = v['url_super'] else: url = v['url_nor'] hd_url = v['url_high'] v1, v2 = UrlContentModel.instance().insert( url), UrlContentModel.instance().insert(hd_url) if v1 > 0 and v2 > 0: DramaEpisodeModel.instance().insert(drama_id, i + 1, 0, source, url, hd_url) else: logging.error("sohu get url content error, %s, %s" % (url, hd_url))
class DramaEpisodeSource(BaseParser): def __init__(self): self.tudouParser = TudouParser() self.model = DramaEpisodeModel() self.urlModel = UrlContentModel() def fetch(self, d_id, url, last_ep=0): text_content = self.get_decoded_html(url); content = pq(text_content) trs = content(".abc dt") eps = self.model.get_by_drama_id(d_id) if eps: last_ep = eps[-1]['episode'] i = 1 for tr in trs: tr = pq(tr) ep_url = HOST + tr("a").attr("href") count = tr("a").text() ep_group = re.search(r"(\d+)", count) if ep_group is not None: ep = int(ep_group.group(1)) else: ep = i print ep_url, ep if ep <= last_ep: print "fetch before, skip %s" % ep continue res = self.fetch_ep_page(d_id, ep_url, ep) if res == False: break i += 1 def fetch_ep_page(self, d_id, url, ep): text_content = self.get_decoded_html(url) if text_content.find("http://www.tudou.com/programs/view") < 0: print "not tudou", d_id, url, ep return False content = pq(text_content) tudou_source = content("#ads iframe").attr("src") vid = re.search(r"code=(\S+?)&", tudou_source).group(1) source = "http://www.tudou.com/programs/view/" + vid url, hd_url = self.tudouParser.parse(vid) if url and hd_url: self.model.insert(d_id, ep, 0, source, url, hd_url) def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0): for i in range(1, cur + 1): if i <= last_ep: continue url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i) self.fetch_ep_page(d_id, url, i)
class DramaEpisodeSource(BaseParser): def __init__(self): self.tudouParser = TudouParser() self.model = DramaEpisodeModel() self.urlModel = UrlContentModel() def fetch(self, d_id, url, last_ep=0): text_content = self.get_decoded_html(url) content = pq(text_content) trs = content(".abc dt") eps = self.model.get_by_drama_id(d_id) if eps: last_ep = eps[-1]['episode'] i = 1 for tr in trs: tr = pq(tr) ep_url = HOST + tr("a").attr("href") count = tr("a").text() ep_group = re.search(r"(\d+)", count) if ep_group is not None: ep = int(ep_group.group(1)) else: ep = i print ep_url, ep if ep <= last_ep: print "fetch before, skip %s" % ep continue res = self.fetch_ep_page(d_id, ep_url, ep) if res == False: break i += 1 def fetch_ep_page(self, d_id, url, ep): text_content = self.get_decoded_html(url) if text_content.find("http://www.tudou.com/programs/view") < 0: print "not tudou", d_id, url, ep return False content = pq(text_content) tudou_source = content("#ads iframe").attr("src") vid = re.search(r"code=(\S+?)&", tudou_source).group(1) source = "http://www.tudou.com/programs/view/" + vid url, hd_url = self.tudouParser.parse(vid) if url and hd_url: self.model.insert(d_id, ep, 0, source, url, hd_url) def fetch_by_ep_id(self, d_id, vid, cur, last_ep=0): for i in range(1, cur + 1): if i <= last_ep: continue url = "http://www.hanjucc.com/hanju/%s/%s.html" % (vid, i) self.fetch_ep_page(d_id, url, i)
def fetch_list(self): url = "http://api.tv.sohu.com/v4/search/channel/sub.json?subId=19&&api_key=695fe827ffeb7d74260a813025970bd5&build=5.0.1.1&offset=0&page_size=100&partner=1&pay_type=0&plat=3&poid=1&sver=5.0.1" content = self.get_decoded_json(url) videos = content['data']['videos'] sp = SohuParser() for v in videos: name = v['album_name'] d = DramaModel.instance().get_by_name(name) score = 0 try: score = int(float(v['score_tip'][:-3]) * 10) except: pass if not d: DramaModel.instance().insert(name, v['publish_time'][:4], v['hor_w16_pic'], v['main_actor'], v['album_desc'], v['aid'], score) d = DramaModel.instance().get_by_name(name) else: logging.info("set score %s for %s" % (score, d['id'])) DramaModel.instance().set_score(d['id'], score) eps = DramaEpisodeModel.instance().get_by_drama_id(d['id']) if eps: sp.parse_album_by_aid(d['id'], v['aid'], eps[-1]['episode']) else: sp.parse_album_by_aid(d['id'], v['aid'])
def fetch_list(self): url = "http://api.tv.sohu.com/v4/search/channel/sub.json?subId=19&&api_key=695fe827ffeb7d74260a813025970bd5&build=5.0.1.1&offset=0&page_size=100&partner=1&pay_type=0&plat=3&poid=1&sver=5.0.1" content = self.get_decoded_json(url) videos = content["data"]["videos"] sp = SohuParser() for v in videos: name = v["album_name"] d = DramaModel.instance().get_by_name(name) score = 0 try: score = int(float(v["score_tip"][:-3]) * 10) except: pass if not d: DramaModel.instance().insert( name, v["publish_time"][:4], v["hor_w16_pic"], v["main_actor"], v["album_desc"], v["aid"], score ) d = DramaModel.instance().get_by_name(name) else: logging.info("set score %s for %s" % (score, d["id"])) DramaModel.instance().set_score(d["id"], score) eps = DramaEpisodeModel.instance().get_by_drama_id(d["id"]) if eps: sp.parse_album_by_aid(d["id"], v["aid"], eps[-1]["episode"]) else: sp.parse_album_by_aid(d["id"], v["aid"])
def __init__(self, conf, debug=False): handlers = [ ('/', IndexHandler), ('/admin/drama/add', AdminDramaAddHandler), ('/admin/drama/list', AdminDramaListHandler), ('/admin/drama/search', AdminDramaSearchHandler), ('/admin/drama/parser', AdminDramaParserHandler), ('/api/drama/list', ApiDramaListHandler), ('/api/drama/search', ApiDramaSearchHandler), ('/drama/episode/play/(\S+)', DramaEpisodePlayHandler), ('/drama/episode/(\S+)', DramaEpisodeHandler), ('/weixin', WeixinHandler), ] settings = dict(template_path=os.path.join(os.path.dirname(__file__), "./web/template"), static_path=os.path.join(os.path.dirname(__file__), "./web/static"), debug=debug, autoescape=None) self.conf = conf engine = MysqlEngine(conf.get('db.uri')) BaseModel.setup_all_model(engine) self.dramaModel = DramaModel.instance() self.episodeModel = DramaEpisodeModel.instance() self.dramaService = DramaService() self.wechat = WechatBasic(token=conf.get("wechat.token"), appid=conf.get("wechat.appId"), appsecret=conf.get("wechat.appSecret")) self.hashid = Hashids(salt="woshifyz") self.parser = {'tudou': TudouParser()} super(Application, self).__init__(handlers, **settings)
def parse_album_by_aid(self, drama_id, aid, last_ep=0): source = ( "http://api.tv.sohu.com/v4/album/videos/%s.json?page_size=100&api_key=695fe827ffeb7d74260a813025970bd5&plat=3&partner=1&sver=5.0.1&poid=1&page=1&with_fee_video=1&" % aid ) content = self.get_decoded_json(source) if not content: logging.error("sohu parse album by aid error, %s, %s" % (drama_id, aid)) return videos = content["data"]["videos"] for i, v in enumerate(videos): if i + 1 <= last_ep: continue if v.get("url_super", None) is not None: url = v["url_high"] hd_url = v["url_super"] else: url = v["url_nor"] hd_url = v["url_high"] v1, v2 = UrlContentModel.instance().insert(url), UrlContentModel.instance().insert(hd_url) if v1 > 0 and v2 > 0: DramaEpisodeModel.instance().insert(drama_id, i + 1, 0, source, url, hd_url) else: logging.error("sohu get url content error, %s, %s" % (url, hd_url))
def __init__(self, conf, debug=False): handlers = [ ('/', IndexHandler), ('/admin/drama/add', AdminDramaAddHandler), ('/admin/drama/list', AdminDramaListHandler), ('/admin/drama/search', AdminDramaSearchHandler), ('/admin/drama/parser', AdminDramaParserHandler), ('/api/drama/list', ApiDramaListHandler), ('/api/drama/search', ApiDramaSearchHandler), ('/drama/episode/play/(\S+)', DramaEpisodePlayHandler), ('/drama/episode/(\S+)', DramaEpisodeHandler), ('/weixin', WeixinHandler), ] settings = dict(template_path=os.path.join(os.path.dirname(__file__), "./web/template"), static_path=os.path.join(os.path.dirname(__file__), "./web/static"), debug=debug, autoescape=None ) self.conf = conf engine = MysqlEngine(conf.get('db.uri')) BaseModel.setup_all_model(engine) self.dramaModel = DramaModel.instance() self.episodeModel = DramaEpisodeModel.instance() self.dramaService = DramaService() self.wechat = WechatBasic(token=conf.get("wechat.token"), appid=conf.get("wechat.appId"), appsecret=conf.get("wechat.appSecret")) self.hashid = Hashids(salt="woshifyz") self.parser = { 'tudou': TudouParser() } super(Application, self).__init__(handlers, **settings)
def get_drama_infos(self, count, offset): dramas = DramaModel.instance().list_avalable(count, offset) for drama in dramas: eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id) drama["eps"] = eps return dramas
def new_drama(self, count=10): eps = DramaEpisodeModel.instance().new_drama(count=count) for ep in eps: ep["drama"] = DramaModel.instance().get_by_id(ep.drama_id) return eps
def search_by_name(self, name, count): dramas = DramaModel.instance().search_by_name(name, count) for drama in dramas: eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id) drama["eps"] = eps return dramas
def get_drama_infos(self, count, offset): dramas = DramaModel.instance().list_avalable(count, offset) for drama in dramas: eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id) drama['eps'] = eps return dramas
def new_drama(self, count=10): eps = DramaEpisodeModel.instance().new_drama(count=count) for ep in eps: ep['drama'] = DramaModel.instance().get_by_id(ep.drama_id) return eps
def search_by_name(self, name, count): dramas = DramaModel.instance().search_by_name(name, count) for drama in dramas: eps = DramaEpisodeModel.instance().get_by_drama_id(drama.id) drama['eps'] = eps return dramas
def __init__(self): self.tudouParser = TudouParser() self.model = DramaEpisodeModel() self.urlModel = UrlContentModel()