def store_user_video(cls, mid, data, session=None, csvwriter=None):
     """
     mid,data 为生成的queue的里获取的数据,data参数多余为了兼容store_video
     session:
     None:csvwriter
     not None::ORM"""
     info = cls.getUserInfo(mid)
     if info:
         new_user = BiliUserInfo(**dict(zip(cls.user_field_keys, info)))
         video_infos = cls.getVideoList(mid)
         new_videos = None
         if video_infos:
             new_videos = (BiliVideoSimpleInfo(
                 **dict(zip(cls.video_field_keys, vinfo)))
                           for vinfo in video_infos)
         if session:
             DBOperation.add(new_user, session)
             if new_videos:
                 DBOperation.add_all(new_videos, session)
             return True
         elif csvwriter:
             csvwriter[0].writerow(info)
             if video_infos:
                 for video_info in video_infos:
                     csvwriter[1].writerow(video_info)
             return True
         else:
             print(info)
             return True
     else:
         return False
def init_via_tid(tid):
    bapi = BiliApi()
    session = Session()

    # get page total
    obj = bapi.get_archive_rank_by_partion(tid, 1, 50)
    page_total = math.ceil(obj['data']['page']['count'] / 50)
    logger_01.info('%d page(s) found.' % page_total)

    # get videos data info from api
    page_num = 1
    last_aid_list = []
    last_create_ts = 0
    last_create_ts_offset = 59
    while page_num <= page_total:
        obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)
        while True:
            try:
                for _ in obj['data']['archives']:
                    pass
                break
            except TypeError:
                logger_01.warning('TypeError caught, re-call page_num = %d' %
                                  page_num)
                time.sleep(1)
                obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)
        try:
            aid_list = []
            video_list = []
            for arch in obj['data']['archives']:
                aid = int(arch['aid'])
                create = arch['create']
                if aid not in last_aid_list:
                    # manual reset create_ts
                    create_ts = create_time_to_ts(create)
                    if create_ts == last_create_ts:
                        if last_create_ts_offset > 0:
                            last_create_ts_offset -= 1
                    else:
                        last_create_ts = create_ts
                        last_create_ts_offset = 59
                    create_ts += last_create_ts_offset
                    video_list.append(Video(aid=aid, tid=tid,
                                            create=create_ts))
                    aid_list.append(aid)
                else:
                    logger_01.warning('Aid %d already added!' % aid)
            DBOperation.add_all(video_list, session)
            last_aid_list = aid_list
            page_total = int(obj['data']['page']['count'] / 50) + 1
            logger_01.info('Page %d / %d done.' % (page_num, page_total))
        except Exception as e:
            logger_01.error('Exception caught. Detail: %s' % e)
        page_num += 1

    session.close()
    logger_01.info('Success get %d tid videos data info from api!' % tid)
Exemple #3
0
 def store_video(cls, aid, session=None, csvwriter=None):
     """session, csvwriter 二选一都没有直接打印"""
     info = cls.getVideoInfo(aid)
     if info:
         new_video = BiliVideoInfo(**dict(zip(cls.field_keys, info)))
         if session:
             DBOperation.add(new_video, session)
             return True
         elif csvwriter:
             csvwriter.writerow(info)
             return True
         else:
             print(info)
             return True
     else:
         return False
Exemple #4
0
def get_tid_pn(aid, session):
    # query video
    video = DBOperation.query_video_via_aid(aid, session)
    if video is None:
        print('Video aid=%d not found!' % aid)
        return None

    # query count
    tid = video.tid
    create = video.create
    # count_total = DBOperation.count_video_via_tid(tid, session)
    count_later = DBOperation.count_later_video_via_tid_and_create(
        tid, create, session)

    if count_later is None:
        print('Fail to count later video!')
        return None

    pn = math.ceil(count_later / 50)
    return tid, pn
Exemple #5
0
 def store_user(cls, mid, data, session=None, csvwriter=None):
     """
     mid,data 为生成的queue的里获取的数据,data参数多余为了兼容store_video
     session:
     None:csvwriter
     not None::ORM"""
     info = cls.getUserInfo(mid)
     if info:
         new_user = BiliUserInfo(**dict(zip(cls.field_keys, info)))
         if session:
             DBOperation.add(new_user, session)
             return True
         elif csvwriter:
             csvwriter.writerow(info)
             return True
         else:
             print(info)
             return True
     else:
         return False
Exemple #6
0
 def store_video(cls, aid, session=None, csvwriter=None):
     """session, csvwriter 二选一都没有直接打印"""
     info = cls.getVideoInfo(aid)
     #print(info)
     if info:
         new_video = TddFocusVideoRecord(**dict(zip(cls.field_keys, info)))
         if session:
             print("update av%s with %d views at %s" %
                   (info[1], info[2],
                    time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(info[0]))))
             DBOperation.add(new_video, session)
             return True
         elif csvwriter:
             csvwriter.writerow(info)
             return True
         else:
             print(info)
             return True
     else:
         return False
Exemple #7
0
 def store_video_simpleajax(cls, mid, aid, session=None, csvwriter=None):
     """session, csvwriter 二选一都没有直接打印
     只保存mid,aid 和ajax信息"""
     info_ajax = cls(aid).getAjaxInfo()
     try:
         info = (mid, aid) + info_ajax
     except:
         info = None
     if info:
         new_video = BiliVideoAjaxInfo(
             **dict(zip(cls.field_keys_ajax, info)))
         if session:
             DBOperation.add(new_video, session)
             return True
         elif csvwriter:
             csvwriter.writerow(info)
             return True
         else:
             print(info)
             return True
     else:
         return False
Exemple #8
0
def get_update_aids():
    result = []
    items = DBOperation.query(TddFocusVideo, Session())
    for item in items:
        result.append(item.aid)
    return result
Exemple #9
0
def routine_update_via_tid(tid):
    global is_updating

    logger_02.info('Now start routine update %d tid...' % tid)

    if is_updating:
        logger_02.warning('Last round has not finished, stop this round.')
        return
    else:
        is_updating = True

    session = Session()
    bapi = BiliApi()

    # 01 add new video
    logger_02.info('Now start add new video with tid %d...' % tid)

    # get last aid
    last_aids = list(
        map(lambda x: x.aid,
            DBOperation.query_last_x_aid_via_tid(tid, 10, session)))
    logger_02.info('Get last aids: %s' % last_aids)  # avoid last aid deleted

    # get page total
    obj = bapi.get_archive_rank_by_partion(tid, 1, 50)
    page_total = math.ceil(obj['data']['page']['count'] / 50)

    # add new videos data info from api
    page_num = 1
    last_aid_list = []
    last_create_ts = 0
    last_create_ts_offset = 59
    goon = True
    new_video_count = 0
    while page_num <= page_total and goon:
        obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)
        while True:
            try:
                for _ in obj['data']['archives']:
                    pass
                break
            except TypeError:
                logger_02.warning('TypeError caught, re-call page_num = %d' %
                                  page_num)
                time.sleep(1)
                obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)
        try:
            aid_list = []
            video_list = []
            for arch in obj['data']['archives']:
                aid = int(arch['aid'])
                create = arch['create']

                if aid in last_aids:
                    logger_02.info('Meet aid = %d in last_aids, break.' % aid)
                    goon = False
                    break

                if aid not in last_aid_list:
                    # manual reset create_ts
                    create_ts = create_time_to_ts(create)
                    if create_ts == last_create_ts:
                        if last_create_ts_offset > 0:
                            last_create_ts_offset -= 1
                    else:
                        last_create_ts = create_ts
                        last_create_ts_offset = 59
                    create_ts += last_create_ts_offset
                    video = Video(aid=aid, tid=tid, create=create_ts)
                    new_video_count += 1
                    logger_02.info('Add new video %s' % video)
                    video_list.append(video)
                    aid_list.append(aid)
                else:
                    logger_02.warning('Aid %d already added!' % aid)
            DBOperation.add_all(video_list, session)
            last_aid_list = aid_list
            page_total = math.ceil(obj['data']['page']['count'] / 50)
            # logger_02.info('%d / %d done' % (page_num, page_total))
        except Exception as e:
            logger_02.error('Exception caught. Detail: %s' % e)
        page_num += 1

    if new_video_count == 0:
        logger_02.info('No new video found with %d tid.' % tid)
    else:
        logger_02.info('%d new video(s) found with %d tid.' %
                       (new_video_count, tid))
    logger_02.info('Finish add new video with tid %d!' % tid)

    # 02 delete invalid video
    logger_02.info('Now start delete invalid video with tid %d...' % tid)

    # get count in db
    count_db = DBOperation.count_video_via_tid(tid, session)

    # get count via api
    obj = bapi.get_archive_rank_by_partion(tid, 1, 50)
    count_api = int(obj['data']['page']['count'])
    page_total = math.ceil(obj['data']['page']['count'] / 50)

    logger_02.info('Get count_db = %d, count_api = %d' % (count_db, count_api))
    invalid_count = count_db - count_api
    if invalid_count > 0:
        # need to delete
        page_num = 1
        unsettled_diff_aids = []
        while page_num <= page_total and invalid_count > 0:
            obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)
            while True:
                try:
                    for _ in obj['data']['archives']:
                        pass
                    break
                except TypeError:
                    logger_02.warning(
                        'TypeError caught, re-call page_num = %d' % page_num)
                    time.sleep(1)
                    obj = bapi.get_archive_rank_by_partion(tid, page_num, 50)

            try:
                # get page aids
                page_aids = [v['aid'] for v in obj['data']['archives']]

                # get db aids
                create_ts_from = create_time_to_ts(
                    obj['data']['archives'][0]['create']) + 59  # bigger one
                create_ts_to = create_time_to_ts(
                    obj['data']['archives'][-1]['create'])  # smaller one
                db_videos = DBOperation.query_video_between_create_ts(
                    create_ts_from, create_ts_to, session)
                db_aids = list(map(lambda x: x.aid, db_videos))

                # process unsettled
                for aid in unsettled_diff_aids:
                    if aid not in page_aids:
                        # query create time
                        create = -1
                        if aid in db_aids:
                            for v in db_videos:
                                if v.aid == aid:
                                    create = v.create
                                    break
                        else:
                            logger_02.warning(
                                'Cannot find unsettled diff aid %d in db_aids %s!'
                                % (aid, db_aids))
                        if create_ts_to <= create <= create_ts_to + 59:
                            # maybe in next page
                            logger_02.info('Remain aid %d in unsettled list.' %
                                           aid)
                        else:
                            DBOperation.delete_video_via_aid(aid, session)

                            logger_02.info('Delete unsettled invalid aid %d.' %
                                           aid)
                            if is_aid_valid(aid):
                                logger_02.warning(
                                    'Aid %d is not invalid! Do not remove it.'
                                    % aid)
                            else:
                                unsettled_diff_aids.remove(aid)
                                invalid_count -= 1
                    else:
                        logger_02.info('Save unsettled aid %d.' % aid)
                        unsettled_diff_aids.remove(aid)

                # get diff
                diff_aids = [aid for aid in db_aids if aid not in page_aids]
                new_aids = [aid for aid in page_aids if aid not in db_aids]

                # process diff
                if len(diff_aids) > 0:
                    for aid in diff_aids:
                        # query create time
                        create = -1
                        for v in db_videos:
                            if v.aid == aid:
                                create = v.create
                                break
                        if create_ts_to <= create <= create_ts_to + 59:
                            unsettled_diff_aids.append(aid)
                            logger_02.info('Add aid %d to unsettled list.' %
                                           aid)
                        elif create_ts_from - 59 <= create <= create_ts_from:
                            # counted in last page
                            pass
                        else:
                            logger_02.info('Delete invalid aid %d.' % aid)
                            if is_aid_valid(aid):
                                logger_02.warning(
                                    'Aid %d is not invalid! Do not remove it.'
                                    % aid)
                            else:
                                DBOperation.delete_video_via_aid(aid, session)
                                invalid_count -= 1
                else:
                    logger_02.info('No diff aid!')

                # process new
                last_create_ts = 0
                last_create_ts_offset = 59
                for aid in new_aids:
                    for arch in obj['data']['archives']:
                        if arch['aid'] == aid:
                            create = arch['create']
                            create_ts = create_time_to_ts(create)
                            if create_ts == last_create_ts:
                                if last_create_ts_offset > 0:
                                    last_create_ts_offset -= 1
                            else:
                                last_create_ts = create_ts
                                last_create_ts_offset = 59
                            create_ts += last_create_ts_offset
                            video = Video(aid=aid, tid=tid, create=create_ts)
                            logger_02.warning(
                                'Add new video %s during finding invalid aid.'
                                % video)
                            DBOperation.add(video, session)
                            break

                page_total = math.ceil(obj['data']['page']['count'] / 50)
                logger_02.info('Page %d / %d done, %d invalid aid left.' %
                               (page_num, page_total, invalid_count))
                page_num += 1
            except Exception as e:
                logger_02.error('Exception caught. Detail: %s' % e)
    else:
        logger_02.info('No invalid video to delete!')

    logger_02.info('Finish delete invalid video with tid %d!' % tid)

    logger_02.info('Finish routine update %d tid.\n' % tid)
    session.close()
    is_updating = False
def main():
    round_count = 1
    round_start = 0
    round_end = 0
    round_visit_count = 0
    session = None
    while True:
        try:
            logger_11.info('round %d start' % round_count)
            round_start = get_ts_s()
            round_visit_count = 0
            bapi = BiliApi()
            session = Session()

            # get page total
            obj = bapi.get_archive_rank_by_partion(30, 1, 50)
            page_total = math.ceil(obj['data']['page']['count'] / 50)
            logger_11.info('%d page(s) found' % page_total)

            page_num = 1
            while page_num <= page_total:
                obj = bapi.get_archive_rank_by_partion(30, page_num, 50)
                while True:
                    try:
                        for _ in obj['data']['archives']:
                            pass
                        break
                    except TypeError:
                        logger_11.warning(
                            'TypeError caught, re-call page_num = %d' %
                            page_num)
                        time.sleep(1)
                        obj = bapi.get_archive_rank_by_partion(
                            30, page_num, 50)
                try:
                    added = get_ts_s()
                    for arch in obj['data']['archives']:
                        aid = int(arch['aid'])
                        nbph_record = DBOperation.query_nbph_record_via_aid(
                            aid, session)
                        if nbph_record:
                            if nbph_record.pn != page_num:
                                nbph_record.pn = page_num
                                nbph_record.added = added
                                session.commit()
                        else:
                            nbph_record = NbphRecord()
                            nbph_record.aid = aid
                            nbph_record.pn = page_num
                            nbph_record.added = added
                            DBOperation.add(nbph_record, session)
                        round_visit_count += 1
                except Exception as e:
                    logger_11.error('Exception caught. Detail: %s' % e)
                page_num += 1
                time.sleep(0.1)
        except Exception as e:
            logger_11.error(e)
        finally:
            session.close()
            round_end = get_ts_s()
            logger_11.info(
                'round %d, start: %s, end: %s, timespan: %d, visit_count: %d, speed: %.2f'
                % (round_count, ts_s_to_str(round_start),
                   ts_s_to_str(round_end), round_end - round_start,
                   round_visit_count, round_visit_count /
                   (round_end - round_start) * 60))
            round_count += 1
            time.sleep(10)