Example #1
0
 def dianshiju_info(self, tv_infos, db_tv_names):
     url = u'http://cache.video.qiyi.com/jp/vi/{}/{}/'
     for name, tv_info in tv_infos.items():
         # print u"抓取《{}》中".format(name)
         warning_message = u"《iqy {}》tv_info ,结果不准确\r\n". \
                           format(name)
         tv_id = tv_info[1]['id']
         vids = tv_info[2]['v_id']
         page = request(url.format(tv_id, vids))
         json_content = tv_info_is_valid(page)
         if not json_content:
             utils.log(message=warning_message)
             continue
         all_number = json_content['es']
         current_number = json_content['upOrder']
         description = json_content['info']
         label = json_content['tg']
         cast_member = json_content['ma']
         update_info = json_content['qiyiPlayStrategy'][:32]
         last_update_time = json_content['up']
         detail_urls = tv_info[0]['url']
         tv_type = u'电视剧'
         detail_titles = json_content['vn']
         detail_episodes = ''
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
Example #2
0
def start_qq(now):
    start = int(time.time())
    print "qq开始抓取 .."
    qq_spi = SpiderQq()
    qq_db = SerializeQq(now)

    # spider tv_names
    tv_names = qq_spi.tv_names()
    tv_names = list(set(tv_names))

    # spider tv_info
    qq_spi.tv_info(tv_names)

    # db tv_info
    db_tv_names = [_.name for _ in TvInfo.mget_by_platform(u'qq')]
    qq_db.tv_info(tv_names, db_tv_names)

    tv_names = tv_names + db_tv_names
    tv_names = list(set(tv_names))

    # spider play
    db_tv_infos = TvInfo.mget_by_platform(u'qq')
    qq_spi.play_info(db_tv_infos)

    # db play_info
    qq_db.play_info(db_tv_infos)

    end = int(time.time())
    print 'qq抓取完毕,耗时', utils.format_seconds(end - start)
Example #3
0
File: start.py Project: hncg/water
def start_qq(now):
    start = int(time.time())
    print "qq开始抓取 .."
    qq_spi = SpiderQq()
    qq_db = SerializeQq(now)

    # spider tv_names
    tv_names = qq_spi.tv_names()
    tv_names = list(set(tv_names))

    # spider tv_info
    qq_spi.tv_info(tv_names)

    # db tv_info
    db_tv_names = [_.name for _ in TvInfo.mget_by_platform(u'qq')]
    qq_db.tv_info(tv_names, db_tv_names)

    tv_names = tv_names + db_tv_names
    tv_names = list(set(tv_names))

    # spider play
    db_tv_infos = TvInfo.mget_by_platform(u'qq')
    qq_spi.play_info(db_tv_infos)

    # db play_info
    qq_db.play_info(db_tv_infos)

    end = int(time.time())
    print 'qq抓取完毕,耗时', utils.format_seconds(end - start)
Example #4
0
File: iqy.py Project: zffzjx/water
 def dianshiju_info(self, tv_infos, db_tv_names):
     url = u'http://cache.video.qiyi.com/jp/vi/{}/{}/'
     for name, tv_info in tv_infos.items():
         # print u"抓取《{}》中".format(name)
         warning_message = u"《iqy {}》tv_info ,结果不准确\r\n". \
                           format(name)
         tv_id = tv_info[1]['id']
         vids = tv_info[2]['v_id']
         page = request(url.format(tv_id, vids))
         json_content = tv_info_is_valid(page)
         if not json_content:
             utils.log(message=warning_message)
             continue
         all_number = json_content['es']
         current_number = json_content['upOrder']
         description = json_content['info']
         label = json_content['tg']
         cast_member = json_content['ma']
         update_info = json_content['qiyiPlayStrategy'][:32]
         last_update_time = json_content['up']
         detail_urls = tv_info[0]['url']
         tv_type = u'电视剧'
         detail_titles = json_content['vn']
         detail_episodes = ''
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
Example #5
0
 def zongyi_info(self, tv_infos, db_tv_names):
     for name, tv_info in tv_infos.items():
         # print u"抓取《{}》中".format(name)
         tv_id = u''
         description = tv_info[2]
         last_update_time = u''
         all_number = len(tv_info[0])
         current_number = tv_info[1]
         cast_member = tv_info[3]
         label = u''
         update_info = u''
         detail_urls = u''
         vids = ",".join(tv_info[0])
         tv_type = u'综艺'
         detail_titles = u''
         detail_episodes = u''
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=vids,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=vids,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
Example #6
0
File: start.py Project: hncg/water
def start_let(now):
    start = int(time.time())
    print "let开始抓取 .."
    let_spi = SpiderLet()
    let_db = SerializeLet(now)

    # dianshiju
    dianshiju_urls_map = let_spi.dianshiju_urls_map()
    tv_infos = TvInfo.mget_by_platform(u'let')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'let', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not dianshiju_urls_map.get(tv_info.name) and tv_info.type == u'电视剧': # noqa
            dianshiju_urls_map[tv_info.name] = [tv_info.detail_urls, tv_info.tv_id, tv_info.cast_member, tv_info.label] # noqa

    let_db.dianshiju(dianshiju_urls_map, db_tv_names, db_play_info_map)
    # zongyi
    zongyi_urls_map = let_spi.zongyi_urls_map()
    for tv_info in tv_infos:
        if not zongyi_urls_map.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_urls_map[tv_info.name] = [tv_info.detail_urls, tv_info.label] # noqa

    let_db.zongyi(zongyi_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'let抓取完毕,耗时', utils.format_seconds(end - start)
Example #7
0
def start_let(now):
    start = int(time.time())
    print "let开始抓取 .."
    let_spi = SpiderLet()
    let_db = SerializeLet(now)

    # dianshiju
    dianshiju_urls_map = let_spi.dianshiju_urls_map()
    tv_infos = TvInfo.mget_by_platform(u'let')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'let', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not dianshiju_urls_map.get(
                tv_info.name) and tv_info.type == u'电视剧':  # noqa
            dianshiju_urls_map[tv_info.name] = [
                tv_info.detail_urls, tv_info.tv_id, tv_info.cast_member,
                tv_info.label
            ]  # noqa

    let_db.dianshiju(dianshiju_urls_map, db_tv_names, db_play_info_map)
    # zongyi
    zongyi_urls_map = let_spi.zongyi_urls_map()
    for tv_info in tv_infos:
        if not zongyi_urls_map.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_urls_map[tv_info.name] = [
                tv_info.detail_urls, tv_info.label
            ]  # noqa

    let_db.zongyi(zongyi_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'let抓取完毕,耗时', utils.format_seconds(end - start)
Example #8
0
File: iqy.py Project: zffzjx/water
 def zongyi_info(self, tv_infos, db_tv_names):
     for name, tv_info in tv_infos.items():
         # print u"抓取《{}》中".format(name)
         tv_id = u''
         description = tv_info[2]
         last_update_time = u''
         all_number = len(tv_info[0])
         current_number = tv_info[1]
         cast_member = tv_info[3]
         label = u''
         update_info = u''
         detail_urls = u''
         vids = ",".join(tv_info[0])
         tv_type = u'综艺'
         detail_titles = u''
         detail_episodes = u''
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=vids,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=vids,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
Example #9
0
File: start.py Project: hncg/water
def start_iqy(now):
    start = int(time.time())
    print "iqy开始抓取 .."
    iqy_spi = SpiderIqy()
    iqy_db = SerializeIqy(now)

    # dianshiju
    dianshiju_infos = iqy_spi.dianshiju_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'电视剧')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not dianshiju_infos.get(tv_info.name) and tv_info.type == u'电视剧':
            dianshiju_infos[tv_info.name] = \
                [
                    {'url': [tv_info.detail_urls]},
                    {'id': tv_info.tv_id},
                    {'v_id': tv_info.vids}]
    iqy_db.dianshiju_info(dianshiju_infos, db_tv_names)

    # zongyi
    zongyi_infos = iqy_spi.zongyi_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'综艺')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not zongyi_infos.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_infos[tv_info.name] = \
                [
                    tv_info.vids.split(","),
                    tv_info.current_number,
                    tv_info.description,
                    tv_info.cast_member]
    iqy_db.zongyi_info(zongyi_infos, db_tv_names)
    # play_info
    db_tv_infos = TvInfo.mget_by_platform(u'iqy')
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'iqy', utils.format_time(time.time(), "%Y-%m-%d"))
    iqy_db.play_info(db_play_info_map, db_tv_infos)
    end = int(time.time())

    print 'iqy抓取完毕,耗时', utils.format_seconds(end - start)
Example #10
0
def start_iqy(now):
    start = int(time.time())
    print "iqy开始抓取 .."
    iqy_spi = SpiderIqy()
    iqy_db = SerializeIqy(now)

    # dianshiju
    dianshiju_infos = iqy_spi.dianshiju_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'电视剧')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not dianshiju_infos.get(tv_info.name) and tv_info.type == u'电视剧':
            dianshiju_infos[tv_info.name] = \
                [
                    {'url': [tv_info.detail_urls]},
                    {'id': tv_info.tv_id},
                    {'v_id': tv_info.vids}]
    iqy_db.dianshiju_info(dianshiju_infos, db_tv_names)

    # zongyi
    zongyi_infos = iqy_spi.zongyi_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'综艺')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not zongyi_infos.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_infos[tv_info.name] = \
                [
                    tv_info.vids.split(","),
                    tv_info.current_number,
                    tv_info.description,
                    tv_info.cast_member]
    iqy_db.zongyi_info(zongyi_infos, db_tv_names)
    # play_info
    db_tv_infos = TvInfo.mget_by_platform(u'iqy')
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'iqy', utils.format_time(time.time(), "%Y-%m-%d"))
    iqy_db.play_info(db_play_info_map, db_tv_infos)
    end = int(time.time())

    print 'iqy抓取完毕,耗时', utils.format_seconds(end - start)
Example #11
0
File: start.py Project: hncg/water
def start_sh(now):
    start = int(time.time())
    print "sh开始抓取 .."
    sh_spi = SpiderSh()
    sh_db = SerializeSh(now)
    # db
    pids_map = sh_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'sh')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            pids_map[tv_info.name] = tv_info.tv_id
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'sh', utils.format_time(time.time(), "%Y-%m-%d"))
    sh_db.info_and_play(pids_map, db_tv_names, db_play_info_map)

    end = int(time.time())
    print 'sh抓取完毕,耗时', utils.format_seconds(end - start)
Example #12
0
def start_sh(now):
    start = int(time.time())
    print "sh开始抓取 .."
    sh_spi = SpiderSh()
    sh_db = SerializeSh(now)
    # db
    pids_map = sh_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'sh')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            pids_map[tv_info.name] = tv_info.tv_id
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'sh', utils.format_time(time.time(), "%Y-%m-%d"))
    sh_db.info_and_play(pids_map, db_tv_names, db_play_info_map)

    end = int(time.time())
    print 'sh抓取完毕,耗时', utils.format_seconds(end - start)
Example #13
0
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
Example #14
0
File: start.py Project: hncg/water
def start_yk(now):
    start = int(time.time())
    print "yk开始抓取 .."
    yk_spi = SpiderYk()
    yk_db = SerializeYk(now)

    # spider urls_map
    tv_urls_map = yk_spi.tv_urls_map()
    # db info and play
    tv_infos = TvInfo.mget_by_platform(u'yk')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'yk', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not tv_urls_map.get(tv_info.name):
            tv_urls_map[tv_info.name] = tv_info.detail_urls
    yk_db.info_and_play(tv_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'yk抓取完毕,耗时', utils.format_seconds(end - start)
Example #15
0
def start_yk(now):
    start = int(time.time())
    print "yk开始抓取 .."
    yk_spi = SpiderYk()
    yk_db = SerializeYk(now)

    # spider urls_map
    tv_urls_map = yk_spi.tv_urls_map()
    # db info and play
    tv_infos = TvInfo.mget_by_platform(u'yk')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'yk', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not tv_urls_map.get(tv_info.name):
            tv_urls_map[tv_info.name] = tv_info.detail_urls
    yk_db.info_and_play(tv_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'yk抓取完毕,耗时', utils.format_seconds(end - start)
Example #16
0
File: start.py Project: hncg/water
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
Example #17
0
    def tv_info(self, tv_names, db_tv_names):
        info_dir = TV_INFO_FILE_DIR + SAVE_FILE
        for name in tv_names:
            page = utils.read(info_dir, name + TV_INFO_FILE_FIX)
            json_content = tv_info_is_valid_qq(page)
            if not json_content:
                continue
            play_list = get_playlist(json_content)
            tv_type = play_list['BC']
            tv_type = re.search(u'[\u4e00-\u9fa5]+', tv_type).group()
            description = play_list['TX']
            last_update_time = play_list['AT']
            update_info = play_list['SS']
            tv_id = play_list['ID']
            label = play_list['BE']
            cast_member = play_list['BM']
            cast_member = re.compile(u'<.+?>').sub(u'', cast_member)

            def get_current_number(play_list):
                match = re.search('\d+-\d+-\d+', play_list['TT'])
                current_number = match and match.group()
                if not current_number:
                    match = re.search('\d+', play_list['TT'])
                    current_number = match and match.group() or ''
                return current_number

            current_number = get_current_number(play_list)

            src_play_list = play_list['src_list']['vsrcarray'][0]['playlist']
            all_list = get_all_list(src_play_list)
            vids, detail_urls, detail_titles, detail_episodes = [], [], [], []
            for _ in all_list:
                vids.append(_['id'])
                detail_urls.append(_['url'])
                detail_titles.append(_['title'])
                detail_episodes.append(_['episode_number'])
            all_number = len(vids)
            vids, detail_urls, detail_titles, detail_episodes = \
                ",".join(vids), ",".join(detail_urls), \
                ",".join(detail_titles), ",".join(detail_episodes)
            if name in db_tv_names:
                TvInfo.update(
                    name=name,
                    tv_id=tv_id,
                    description=description,
                    last_update_time=last_update_time,
                    all_number=all_number,
                    current_number=current_number,
                    cast_member=cast_member,
                    platform=PLATFORM,
                    label=label,
                    update_info=update_info,
                    detail_urls=detail_urls,
                    vids=vids,
                    type=tv_type,
                    detail_titles=detail_titles,
                    detail_episodes=detail_episodes,
                )
            else:
                TvInfo.add(
                    name=name,
                    tv_id=tv_id,
                    description=description,
                    last_update_time=last_update_time,
                    all_number=all_number,
                    current_number=current_number,
                    cast_member=cast_member,
                    platform=PLATFORM,
                    label=label,
                    update_info=update_info,
                    detail_urls=detail_urls,
                    vids=vids,
                    type=tv_type,
                    detail_titles=detail_titles,
                    detail_episodes=detail_episodes,
                )
Example #18
0
File: mg.py Project: hncg/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = 'http://videocenter-2039197532.cn-north-1.elb.amazonaws.com.cn/dynamicinfo?callback=callback&cid={}' # noqa
     info_url = 'http://www.mgtv.com/v/{type_n}/{pid}'
     year_url = 'http://www.mgtv.com/v/1/{}/s/json.year.js'
     number_url = 'http://www.mgtv.com/v/1/{pid}/s/json.{year}.js'
     for name, tv_infos in pids_map.items():
         pid = tv_infos[0].encode('utf8')
         tv_id = pid
         info = request(info_url.format(type_n=tv_infos[1], pid=tv_infos[0])) # noqa
         info = info_is_valid(info)
         if not info:
             warning_message = u"mg《{}》tv_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         update_info = ''
         detail_urls = ''
         detail_titles = ''
         detail_episodes = ''
         current_number = re.search(u'"lastseries" : ".+?"', info).group()
         current_number = current_number.split(':')[1][2:-1]
         description = re.search(u'简介</em>(.|\n)+?</span>', info).group()
         description = re.compile(u'<.+?>|简介|:|\s').sub(u'', description)
         tv_type = TV_TYPE_MAP[tv_infos[1]]
         cast_flag = u'主演' if tv_type == u'电视剧' else u'主持人'
         cast_member = re.search(u'{}</em>(.|\n)+?</p>'.format(cast_flag),
                                 info).group()
         cast_member = re.compile(u'<.+?>|主演|主持人|:').sub(u'', cast_member)
         label = re.search(u'类型</em>(.|\n)+?</p>', info).group()
         label = re.compile(u'<.+?>|类型|:|\s').sub(u'', label)
         if tv_type == u'电视剧':
             all_number = re.search(u'共<b>\d+?</b>集', info).group()
             all_number = re.search(u'\d+', all_number).group()
         else:
             year_json = request(year_url.format(pid))
             year = year_json_is_valid(year_json)
             if not year:
                 warning_message = u"mg zongi《{}》year_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             number_info = request(number_url.format(pid=pid,
                                   year=(int)(year[0])))
             number_info = number_info_is_valid(number_info)
             if not number_info:
                 warning_message = u"mg zongyi《{}》number_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             all_number = len([_ for _ in number_info])
         play_info = request(play_url.format(pid))
         play_json = play_is_valid(play_info)
         if not play_json:
             warning_message = u"mg《{}》play_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts_str = play_json['data']['allVVStr']
         all_play_counts = (float)(re.compile(u'万|亿').sub(u'', all_play_counts_str)) # noqa
         if u'万'in all_play_counts_str:
             all_play_counts *= 10000
         elif u'亿'in all_play_counts_str:
             all_play_counts *= 100000000
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=pid,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type
         )
Example #19
0
File: mg.py Project: zffzjx/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = 'http://videocenter-2039197532.cn-north-1.elb.amazonaws.com.cn/dynamicinfo?callback=callback&cid={}'  # noqa
     info_url = 'http://www.mgtv.com/v/{type_n}/{pid}'
     year_url = 'http://www.mgtv.com/v/1/{}/s/json.year.js'
     number_url = 'http://www.mgtv.com/v/1/{pid}/s/json.{year}.js'
     for name, tv_infos in pids_map.items():
         pid = tv_infos[0].encode('utf8')
         tv_id = pid
         info = request(info_url.format(type_n=tv_infos[1],
                                        pid=tv_infos[0]))  # noqa
         info = info_is_valid(info)
         if not info:
             warning_message = u"mg《{}》tv_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         update_info = ''
         detail_urls = ''
         detail_titles = ''
         detail_episodes = ''
         current_number = re.search(u'"lastseries" : ".+?"', info).group()
         current_number = current_number.split(':')[1][2:-1]
         description = re.search(u'简介</em>(.|\n)+?</span>', info).group()
         description = re.compile(u'<.+?>|简介|:|\s').sub(u'', description)
         tv_type = TV_TYPE_MAP[tv_infos[1]]
         cast_flag = u'主演' if tv_type == u'电视剧' else u'主持人'
         cast_member = re.search(u'{}</em>(.|\n)+?</p>'.format(cast_flag),
                                 info).group()
         cast_member = re.compile(u'<.+?>|主演|主持人|:').sub(u'', cast_member)
         label = re.search(u'类型</em>(.|\n)+?</p>', info).group()
         label = re.compile(u'<.+?>|类型|:|\s').sub(u'', label)
         if tv_type == u'电视剧':
             all_number = re.search(u'共<b>\d+?</b>集', info).group()
             all_number = re.search(u'\d+', all_number).group()
         else:
             year_json = request(year_url.format(pid))
             year = year_json_is_valid(year_json)
             if not year:
                 warning_message = u"mg zongi《{}》year_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             number_info = request(
                 number_url.format(pid=pid, year=(int)(year[0])))
             number_info = number_info_is_valid(number_info)
             if not number_info:
                 warning_message = u"mg zongyi《{}》number_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             all_number = len([_ for _ in number_info])
         play_info = request(play_url.format(pid))
         play_json = play_is_valid(play_info)
         if not play_json:
             warning_message = u"mg《{}》play_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts_str = play_json['data']['allVVStr']
         all_play_counts = (float)(re.compile(u'万|亿').sub(
             u'', all_play_counts_str))  # noqa
         if u'万' in all_play_counts_str:
             all_play_counts *= 10000
         elif u'亿' in all_play_counts_str:
             all_play_counts *= 100000000
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=pid,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         PlayInfo.add(tv_id=tv_id,
                      tv_name=name,
                      day_play_counts=day_play_counts,
                      all_play_counts=all_play_counts,
                      time_at=self.now,
                      platform=PLATFORM,
                      type=tv_type)
Example #20
0
File: yk.py Project: zffzjx/water
 def info_and_play(self, tv_urls_map, db_tv_names, db_play_info_map):
     for name, url in tv_urls_map.items():
         warning_message = u"yk 《{} 》结果不准确\r\n". \
                           format(name)
         page = request(url)
         content = info_and_play_is_valid(page, name)
         if not content:
             time.sleep(30)
             page = request(url)
             content = info_and_play_is_valid(page, name)
         if not content:
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         label = ''
         update_info = ''
         detail_urls = url
         detail_titles = ''
         detail_episodes = ''
         tv_id = re.search(u'id.+?\.html', url).group()[:-5]
         title_str = re.search(u'<h1 class="title">(.|\n)+?</h1>', page). \
             group()
         tv_type = re.search(u'target="_blank">.+?<', title_str). \
             group()[16:-1]
         cast_member = []
         cast_member_flag = u'主持人' if tv_type == u'综艺' else u'主演'
         cast_member_str = re.search(
             cast_member_flag + u':</label>(.|\n)+?</span>', page).group()
         for m in re.finditer(u'<a.+?</a>', cast_member_str):
             cast_member.append(
                 re.search('">.+?<', m.group()).group()[2:-1])  # noqa
         cast_member = ",".join(cast_member)
         description_str = re. \
             search(u'<span class="short" id="show_info_short"(.|\n)+?</div>', content).group() # noqa
         description = re.compile(u'<.*?>|查看详情>>').sub(u'', description_str)
         all_number = ''
         current_number = ''
         if tv_type == u'电视剧':
             number_str = re.search(u'class="basenotice"(.|\n)+?<',
                                    content).group()
             current_number = re.search(u'更新至\d+', number_str)
             all_number = re.search(u'共\d+', number_str).group()[1:]
             current_number = current_number and current_number.group(
             )[3:] or all_number  # noqa
         if tv_type == u'综艺':
             all_number = 0
             tmp_episode = []
             for _ in re.finditer(u'y\.episode\.show\(\'.+?\'\)', content):
                 number_url = 'http://www.youku.com/show_episode/{}.html?dt=json&divid={}'  # noqa
                 divid = re.search(u'\'.+?\'', _.group()).group()[1:-1]
                 current_number_str = request(
                     number_url.format(tv_id.encode('utf8'),
                                       divid.encode('utf8')))  # noqa
                 if not current_number_str:
                     warning_message = u"yk 《{} 》number结果不准确\r\n". \
                         format(name)
                     utils.log(message=warning_message)
                     continue
                 tmp_episode = [
                     _ for _ in re.finditer(u'<ul(.|\n)+?</ul>',
                                            current_number_str)
                 ]
                 all_number += len(tmp_episode)
             if not all_number:
                 tmp_episode = re.search(
                     u'<div id="episode">(.|\n)+?</div>',
                     page).group()  # noqa
                 tmp_episode = [
                     _
                     for _ in re.finditer(u'<ul(.|\n)+?</ul>', tmp_episode)
                 ]
                 if not tmp_episode:
                     utils.log(message=warning_message)
                     continue
             try:
                 if not tmp_episode[0]:
                     continue
             except:
                 continue
             all_number = len(tmp_episode)
             current_number = re.search(u'<label>.+?</label>',
                                        tmp_episode[0].group()).group()
             current_number = re.compile(u'<.+?>|期'). \
                 sub(u'', current_number)
         all_play_counts = re.search(u'<label>总播放:</label>.+?\n', content) \
             .group()
         all_play_counts = (int)(
             re.compile(u'<label>总播放:</label>|,|\n').sub(
                 u'', all_play_counts))
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         PlayInfo.add(tv_id=tv_id,
                      tv_name=name,
                      day_play_counts=day_play_counts,
                      all_play_counts=all_play_counts,
                      time_at=self.now,
                      platform=PLATFORM,
                      type=tv_type)
Example #21
0
File: qq.py Project: hncg/water
    def tv_info(self, tv_names, db_tv_names):
        info_dir = TV_INFO_FILE_DIR + SAVE_FILE
        for name in tv_names:
            page = utils.read(info_dir, name + TV_INFO_FILE_FIX)
            json_content = tv_info_is_valid_qq(page)
            if not json_content:
                continue
            play_list = get_playlist(json_content)
            tv_type = play_list['BC']
            tv_type = re.search(u'[\u4e00-\u9fa5]+', tv_type).group()
            description = play_list['TX']
            last_update_time = play_list['AT']
            update_info = play_list['SS']
            tv_id = play_list['ID']
            label = play_list['BE']
            cast_member = play_list['BM']
            cast_member = re.compile(u'<.+?>').sub(u'', cast_member)

            def get_current_number(play_list):
                match = re.search('\d+-\d+-\d+', play_list['TT'])
                current_number = match and match.group()
                if not current_number:
                    match = re.search('\d+', play_list['TT'])
                    current_number = match and match.group() or ''
                return current_number
            current_number = get_current_number(play_list)

            src_play_list = play_list['src_list']['vsrcarray'][0]['playlist']
            all_list = get_all_list(src_play_list)
            vids, detail_urls, detail_titles, detail_episodes = [], [], [], []
            for _ in all_list:
                vids.append(_['id'])
                detail_urls.append(_['url'])
                detail_titles.append(_['title'])
                detail_episodes.append(_['episode_number'])
            all_number = len(vids)
            vids, detail_urls, detail_titles, detail_episodes = \
                ",".join(vids), ",".join(detail_urls), \
                ",".join(detail_titles), ",".join(detail_episodes)
            if name in db_tv_names:
                TvInfo.update(name=name, tv_id=tv_id,
                              description=description,
                              last_update_time=last_update_time,
                              all_number=all_number,
                              current_number=current_number,
                              cast_member=cast_member,
                              platform=PLATFORM,
                              label=label, update_info=update_info,
                              detail_urls=detail_urls,
                              vids=vids,
                              type=tv_type,
                              detail_titles=detail_titles,
                              detail_episodes=detail_episodes,
                              )
            else:
                TvInfo.add(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member, platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls, vids=vids,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
Example #22
0
File: let.py Project: zffzjx/water
    def dianshiju(self, urls_map, db_tv_names, db_play_info_map):
        play_url = 'http://v.stat.letv.com/vplay/queryMmsTotalPCount?callback=callback&pid={}' # noqa
        for name, tv_info in urls_map.items():
            url = tv_info[0]
            pid = tv_info[1]
            tv_id = pid
            cast_member = tv_info[2]
            last_update_time = u''
            update_info = u''
            detail_urls = url
            tv_type = u'电视剧'
            detail_titles = u''
            detail_episodes = u''
            label = tv_info[3]
            page = request(url)
            content = dianshiju_is_valid(page)
            if not content:
                warning_message = u"let《{}》tv_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            description = re.search(u'<p class="p7">(.|\n)+?</p>', content). \
                group()
            description = re.compile(u'<.+?>').sub('', description)
            all_number = re.search(u'共\d+?集', content).group()
            all_number = re.search(u'\d+', all_number).group()
            current_number = re.search(u'至\d+?集', content)
            current_number = current_number and re \
                .search(u'\d+', current_number.group()).group() or all_number

            page = request(play_url.format(pid))
            json_content = play_info_is_valid(page)
            if not json_content:
                page = request(play_url.format(pid))
                json_content = play_info_is_valid(page)
            if not json_content:
                warning_message = u"let《{}》play_info ,结果不准确\r\n". \
                    format(name)
                continue
            all_play_counts = json_content.get('plist_play_count')
            pre_all_play_counts = db_play_info_map.get(name)
            day_play_counts = pre_all_play_counts and \
                max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
            if name in db_tv_names:
                TvInfo.update(name=name, tv_id=pid,
                              description=description,
                              last_update_time=last_update_time,
                              all_number=all_number,
                              current_number=current_number,
                              cast_member=cast_member,
                              platform=PLATFORM,
                              label=label, update_info=update_info,
                              detail_urls=detail_urls,
                              vids=tv_id,
                              type=tv_type,
                              detail_titles=detail_titles,
                              detail_episodes=detail_episodes,
                              )
            else:
                TvInfo.add(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member, platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls, vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
            PlayInfo.add(
                tv_id=tv_id,
                tv_name=name,
                day_play_counts=day_play_counts,
                all_play_counts=all_play_counts,
                time_at=self.now,
                platform=PLATFORM,
                type=tv_type
            )
Example #23
0
File: sh.py Project: hncg/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = "http://count.vrs.sohu.com/count/queryext.action?plids={}&callback=callback"  # noqa
     info_url = "http://pl.hd.sohu.com/videolist?playlistid={}&callback=callback"  # noqa
     for name, pid in pids_map.items():
         tv_id = pid
         info = request(info_url.format(pid.encode("utf8")))
         json_content = info_is_valid(info)
         if not json_content:
             warning_message = u"sh《{}》tv_info ,结果不准确\r\n".format(name)
             utils.log(message=warning_message)
             continue
         description = json_content["albumDesc"]
         last_update_time = ""
         current_number = json_content["updateSet"]
         all_number = json_content["totalSet"]
         all_number = all_number != u"0" and all_number or current_number
         tv_type = TV_TYPE_MAP.get(json_content["cid"])
         if tv_type == u"综艺":
             cast_member = json_content["hosts"]
         else:
             cast_member = json_content["actors"]
         cast_member = u",".join(cast_member)
         label = ",".join(json_content["categories"])
         update_info = json_content["updateNotification"]
         detail_urls = ""
         detail_titles = ""
         detail_episodes = ""
         play = request(play_url.format(pid.encode("utf8")))
         play_json = play_is_valid(play, pid)
         if not play_json:
             warning_message = u"sh《{}》play_info ,结果不准确\r\n".format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts = play_json[pid]["total"]
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=pid,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type,
         )
Example #24
0
File: let.py Project: zffzjx/water
    def zongyi(self, urls_map, db_tv_names, db_play_info_map):
        pids = []
        number_utl = 'http://api.le.com/mms/out/album/videos?id={}&cid=11&platform=pc&callback=callback' # noqa
        play_url = 'http://v.stat.letv.com/vplay/queryMmsTotalPCount?callback=callback&pid={}' # noqa
        description_url = 'http://www.le.com/zongyi/{}.html'
        for name, tv_info in urls_map.items():
            url = tv_info[0]
            label = tv_info[1]
            pid_page = request(url)
            pid_page = zongyi_is_valid(pid_page)
            if not pid_page:
                continue
            pid = re.search(u'pid: \d+?,', pid_page).group()
            pid = re.search(u'\d+', pid).group()
            if pid in pids:
                continue
            pids.append(pid)
            tv_id = pid
            d_page = request(description_url.format(pid.encode('utf8')))
            d_page = description_is_valid(d_page)
            if not d_page:
                warning_message = u"let《{}》description_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            description = re.search(u'<p class="p7">(.|\n)+?</p>', d_page). \
                group()
            description = re.compile(u'<.+?>').sub('', description)

            last_update_time = u''
            update_info = u''
            detail_urls = url
            tv_type = u'综艺'
            detail_titles = u''
            detail_episodes = u''

            n_page = request(number_utl.format(pid))
            n_json = number_utl_is_valid(n_page)
            if not n_json:
                warning_message = u"let zongyi《{}》number_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            all_number = n_json['total']
            current_number = n_json['data'][0]['episode']
            cast_member = []
            [cast_member.append(_.get('guest')) for _ in n_json['data']]
            # remove repeat
            cast_member = " ".join(cast_member)
            cast_member = cast_member.split(" ")
            cast_member = list(set(cast_member))
            cast_member = " ".join(cast_member)

            page = request(play_url.format(pid))
            json_content = play_info_is_valid(page)
            if not json_content:
                warning_message = u"let《{}》play_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            all_play_counts = json_content.get('plist_play_count')
            pre_all_play_counts = db_play_info_map.get(name)
            day_play_counts = pre_all_play_counts and \
                max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
            if name in db_tv_names:
                TvInfo.update(name=name, tv_id=pid,
                              description=description,
                              last_update_time=last_update_time,
                              all_number=all_number,
                              current_number=current_number,
                              cast_member=cast_member,
                              platform=PLATFORM,
                              label=label, update_info=update_info,
                              detail_urls=detail_urls,
                              vids=tv_id,
                              type=tv_type,
                              detail_titles=detail_titles,
                              detail_episodes=detail_episodes,
                              )
            else:
                TvInfo.add(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member, platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls, vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
            PlayInfo.add(
                tv_id=tv_id,
                tv_name=name,
                day_play_counts=day_play_counts,
                all_play_counts=all_play_counts,
                time_at=self.now,
                platform=PLATFORM,
                type=tv_type
            )
Example #25
0
File: sh.py Project: zffzjx/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = 'http://count.vrs.sohu.com/count/queryext.action?plids={}&callback=callback'  # noqa
     info_url = 'http://pl.hd.sohu.com/videolist?playlistid={}&callback=callback'  # noqa
     for name, pid in pids_map.items():
         tv_id = pid
         info = request(info_url.format(pid.encode('utf8')))
         json_content = info_is_valid(info)
         if not json_content:
             warning_message = u"sh《{}》tv_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         description = json_content['albumDesc']
         last_update_time = ''
         current_number = json_content['updateSet']
         all_number = json_content['totalSet']
         all_number = all_number != u'0' and all_number or current_number
         tv_type = TV_TYPE_MAP.get(json_content['cid'])
         if tv_type == u'综艺':
             cast_member = json_content['hosts']
         else:
             cast_member = json_content['actors']
         cast_member = u",".join(cast_member)
         label = ",".join(json_content['categories'])
         update_info = json_content['updateNotification']
         detail_urls = ''
         detail_titles = ''
         detail_episodes = ''
         play = request(play_url.format(pid.encode('utf8')))
         play_json = play_is_valid(play, pid)
         if not play_json:
             warning_message = u"sh《{}》play_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts = play_json[pid]['total']
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=pid,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         PlayInfo.add(tv_id=tv_id,
                      tv_name=name,
                      day_play_counts=day_play_counts,
                      all_play_counts=all_play_counts,
                      time_at=self.now,
                      platform=PLATFORM,
                      type=tv_type)
Example #26
0
File: yk.py Project: hncg/water
 def info_and_play(self, tv_urls_map, db_tv_names, db_play_info_map):
     for name, url in tv_urls_map.items():
         warning_message = u"yk 《{} 》结果不准确\r\n". \
                           format(name)
         page = request(url)
         content = info_and_play_is_valid(page, name)
         if not content:
             time.sleep(30)
             page = request(url)
             content = info_and_play_is_valid(page, name)
         if not content:
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         label = ''
         update_info = ''
         detail_urls = url
         detail_titles = ''
         detail_episodes = ''
         tv_id = re.search(u'id.+?\.html', url).group()[:-5]
         title_str = re.search(u'<h1 class="title">(.|\n)+?</h1>', page). \
             group()
         tv_type = re.search(u'target="_blank">.+?<', title_str). \
             group()[16:-1]
         cast_member = []
         cast_member_flag = u'主持人' if tv_type == u'综艺' else u'主演'
         cast_member_str = re.search(
             cast_member_flag + u':</label>(.|\n)+?</span>', page).group()
         for m in re.finditer(u'<a.+?</a>', cast_member_str):
             cast_member.append(re.search('">.+?<', m.group()).group()[2:-1]) # noqa
         cast_member = ",".join(cast_member)
         description_str = re. \
             search(u'<span class="short" id="show_info_short"(.|\n)+?</div>', content).group() # noqa
         description = re.compile(u'<.*?>|查看详情>>').sub(u'', description_str)
         all_number = ''
         current_number = ''
         if tv_type == u'电视剧':
             number_str = re.search(u'class="basenotice"(.|\n)+?<',
                                    content).group()
             current_number = re.search(u'更新至\d+', number_str)
             all_number = re.search(u'共\d+', number_str).group()[1:]
             current_number = current_number and current_number.group()[3:] or all_number # noqa
         if tv_type == u'综艺':
             all_number = 0
             tmp_episode = []
             for _ in re.finditer(u'y\.episode\.show\(\'.+?\'\)', content):
                 number_url = 'http://www.youku.com/show_episode/{}.html?dt=json&divid={}' # noqa
                 divid = re.search(u'\'.+?\'', _.group()).group()[1:-1]
                 current_number_str = request(number_url.format(tv_id.encode('utf8'), divid.encode('utf8'))) # noqa
                 if not current_number_str:
                     warning_message = u"yk 《{} 》number结果不准确\r\n". \
                         format(name)
                     utils.log(message=warning_message)
                     continue
                 tmp_episode = [_ for _ in re.finditer(u'<ul(.|\n)+?</ul>',
                                                       current_number_str)]
                 all_number += len(tmp_episode)
             if not all_number:
                 tmp_episode = re.search(u'<div id="episode">(.|\n)+?</div>', page).group() # noqa
                 tmp_episode = [_ for _ in re.finditer(u'<ul(.|\n)+?</ul>',
                                                       tmp_episode)]
                 if not tmp_episode:
                     utils.log(message=warning_message)
                     continue
             try:
                 if not tmp_episode[0]:
                     continue
             except:
                 continue
             all_number = len(tmp_episode)
             current_number = re.search(u'<label>.+?</label>',
                                        tmp_episode[0].group()).group()
             current_number = re.compile(u'<.+?>|期'). \
                 sub(u'', current_number)
         all_play_counts = re.search(u'<label>总播放:</label>.+?\n', content) \
             .group()
         all_play_counts = (int)(re.compile(u'<label>总播放:</label>|,|\n')
                                 .sub(u'', all_play_counts))
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type
         )