コード例 #1
0
ファイル: bilibili.py プロジェクト: baidong1980/spider
    def check_type_req(self, av_id: int):
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})
        url = self.VIEW_URL % av_id

        json_req = get_request_proxy(url, 1)

        if json_req is None or 'data' not in json_req or 'tid' not in json_req[
                'data']:
            if can_retry(url):
                self.check_type_req(av_id)
            return
        self.rank_type[av_id] = json_req['data']['tid'] == self.assign_tid
コード例 #2
0
    def check_rank(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})
        if len(rank_list):
            score = int(rank_list[1])
            rank = int(rank_list[0])

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = get_request_proxy(url, 1)

        if not self.have_error(json_req):
            if times < 3:
                self.check_rank(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if not self.check_view(av_id, data[0]):
            if times < 3:
                self.check_rank(av_id, times + 1)
            return
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[3:5]]
        else:
            data = [time_str(), *data]

        with codecs.open('%s%d.csv' % (history_dir, av_id),
                         'a',
                         encoding='utf-8') as f:
            f.write(','.join([str(index) for index in data]) + '\n')

        if self.check_rank_list(av_id, rank_list):
            av_id_id = int(av_id) * 10 + int(rank_list[-1])
            if av_id_id not in self.rank:
                self.rank[av_id_id] = [rank_list[0] // 10]
            else:
                self.rank[av_id_id].append(rank_list[0] // 10)
            self.last_rank[av_id_id] = rank_list[0]
            send_email(
                '%dday List || Rank: %d Score: %d' %
                (int(rank_list[-1]), rank, score),
                '%dday List || Rank: %d Score: %d' %
                (int(rank_list[-1]), rank, score))
        if av_id in self.last_check and self.last_check[av_id] - int(
                time.time()) > one_day:
            del self.rank_map[av_id]
        elif av_id not in self.last_check and int(
                time.time()) > one_day + self.begin_timestamp:
            del self.rank_map[av_id]
        self.last_view[av_id] = data[1]
コード例 #3
0
ファイル: bilibili.py プロジェクト: baidong1980/spider
 def public_data(self, av_id: int, times: int):
     ''' get public basic data '''
     changeHeaders({'Referer': self.BASIC_AV_URL % av_id})
     url = self.VIEW_URL % av_id
     json_req = get_request_proxy(url, 1)
     if json_req is None or not 'data' in json_req or not 'pubdate' in json_req[
             'data']:
         if times < 3:
             self.public_data(av_id, times + 1)
         return
     data_time = json_req['data']['pubdate']
     mid = json_req['data']['owner']['mid']
     self.get_star_num(mid, 0)
     self.public[av_id] = [data_time, mid]
コード例 #4
0
ファイル: bilibili.py プロジェクト: baidong1980/spider
    def check_rank_v2(self, av_id: int, times=0):
        rank_list = self.rank_map[av_id] if av_id in self.rank_map else []
        changeHeaders({'Referer': self.BASIC_AV_URL % av_id})

        url = self.ARCHIVE_STAT_URL % av_id
        json_req = get_request_proxy(url, 1)

        if not self.have_error(json_req):
            if times < 3:
                self.check_rank_v2(av_id, times + 1)
            return
        json_req = json_req['data']
        need = [
            'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku'
        ]
        data = [json_req[index] for index in need]
        if len(rank_list):
            data = [time_str(), *data, *rank_list[:2], *rank_list[-2:]]
        else:
            data = [time_str(), *data]
        self.data_v2[av_id] = data
コード例 #5
0
ファイル: bilibili.py プロジェクト: baidong1980/spider
    def load_rank_index(self, index: int, day_index: int):
        ''' load rank '''
        changeHeaders({'Referer': self.AV_URL})
        url = self.RANKING_URL % (index, day_index)
        text = basic_req(url, 3)
        rank_str = re.findall('window.__INITIAL_STATE__=(.*?);', text)
        if not len(rank_str):
            if can_retry(url):
                self.load_rank_index(index, day_index)
            return False
        rank_map = json.loads(rank_str[0])
        rank_list = rank_map['rankList']

        now_av_id = []
        wait_check_public = []
        rank_map = {}

        for ii, rank in enumerate(rank_list):
            av_id = int(rank['aid'])
            need_params = [
                'pts', 'author', 'mid', 'play', 'video_review', 'coins',
                'duration', 'title'
            ]
            temp_rank_list = [
                ii, *[rank[ii] for ii in need_params], index, day_index
            ]
            now_av_id.append(av_id)
            if not self.check_type(av_id):
                continue
            self.check_rank_rose(av_id, temp_rank_list)
            if self.add_av(av_id, ii, temp_rank_list[1]):
                rank_map[av_id] = temp_rank_list
        ''' check assign av rank '''
        for ii in self.assign_ids:
            if not ii in self.public:
                wait_check_public.append(ii)
            if not ii in self.last_view and not ii in self.rank_map:
                self.rank_map[ii] = []
        have_assign = len([0 for ii in self.assign_ids if ii in now_av_id]) > 0
        ''' check tid type '''
        threading_public = []
        for ii in rank_map.keys():
            work = threading.Thread(target=self.check_type_req, args=(ii, ))
            threading_public.append(work)
        for work in threading_public:
            work.start()
        for work in threading_public:
            work.join()

        for ii, jj in rank_map.items():
            if self.check_type(ii) != True:
                continue
            if not ii in self.public:
                wait_check_public.append(ii)
            self.last_check[ii] = int(time.time())
            self.rank_map[ii] = jj
        ''' load public basic data '''
        threading_public = []
        for ii in wait_check_public:
            work = threading.Thread(target=self.public_data, args=(
                ii,
                0,
            ))
            threading_public.append(work)
        for work in threading_public:
            work.start()
        for work in threading_public:
            work.join()
        ''' begin monitor '''
        threading_list = []
        for ii, jj in self.public.items():
            if not ii in self.public_list and jj[0] + one_day > int(
                    time.time()):
                work = threading.Thread(target=self.public_monitor,
                                        args=(
                                            ii,
                                            0,
                                        ))
                threading_list.append(work)
        for work in threading_list:
            work.start()
        return have_assign
コード例 #6
0
    def load_rank_index(self, index: int, day_index: int):
        ''' load rank '''
        changeHeaders({'Referer': self.RANKING_URL % (index, day_index)})
        url = self.RANKING_URL % (index, day_index)
        html = basic_req(url, 0)
        rank_list = html.find_all('li', class_='rank-item')

        now_av_id = []
        wait_check_public = []
        rank_map = {}

        for av in rank_list:
            av_href = av.find_all('a')[0]['href']
            av_id = int(re.findall('av.*', av_href)[0][2:-1])
            now_av_id.append(av_id)
            if not self.check_type(av_id):
                continue
            rank = int(av.find_all('div', class_='num')[0].text)
            score = int(
                av.find_all('div', class_='pts')[0].find_all('div')[0].text)
            name = av.find_all('span')[2].text
            if self.add_av(av_id, rank, score):
                rank_map[av_id] = [rank, score, name, index, day_index]
        ''' check assign av rank '''
        for ii in self.assign_ids:
            if not ii in self.public:
                wait_check_public.append(ii)
            if not ii in self.last_view and not ii in self.rank_map:
                self.rank_map[ii] = []
        have_assign = len([0 for ii in self.assign_ids if ii in now_av_id]) > 0
        ''' check tid type '''
        threading_public = []
        for ii in rank_map.keys():
            work = threading.Thread(target=self.check_type_req, args=(ii, ))
            threading_public.append(work)
        for work in threading_public:
            work.start()
        for work in threading_public:
            work.join()

        for ii, jj in rank_map.items():
            if self.check_type(ii) != True:
                continue
            if not ii in self.public:
                wait_check_public.append(ii)
            self.last_check[ii] = int(time.time())
            self.rank_map[ii] = jj
        ''' load public basic data '''
        threading_public = []
        for ii in wait_check_public:
            work = threading.Thread(target=self.public_data, args=(
                ii,
                0,
            ))
            threading_public.append(work)
        for work in threading_public:
            work.start()
        for work in threading_public:
            work.join()
        ''' begin monitor '''
        threading_list = []
        for ii, jj in self.public.items():
            if not ii in self.public_list and jj[0] + one_day > int(
                    time.time()):
                work = threading.Thread(target=self.public_monitor,
                                        args=(
                                            ii,
                                            0,
                                        ))
                threading_list.append(work)
        for work in threading_list:
            work.start()
        return have_assign
コード例 #7
0
    def generate_eleven(self):
        ################################################################
        #
        #   [generate eleven] version 19.4.21(Test ✔️) write by gunjianpan
        #
        #   1. random generate 15 bit param `callback`;
        #   2. use callback request OCEANBALL -> get origin js;
        #   3. eval once -> (match array, and then chr() it) -> decoder js;
        #   4. replace document and windows(you also can use execjs & jsdom);
        #   5. warning you should replace `this` to some params,
        #      Otherwise, you will get `老板给小三买了包, 却没有给你钱买房`
        #   6. finsh, return, and joint params;
        #
        ################################################################

        callback = self.generate_callback(15)
        now_time = int(time.time() * 1000)
        url = '{}?callback={}&_={}'.format(OCEANBALL_URL, callback, now_time)
        referer_url = HOTEL_DETAIL_URL % self.default_hotel_id
        changeHeaders({'Referer': referer_url})
        oceanball_js = basic_req(url, 3)
        array = re.findall(r'\(\[(.*)\],', oceanball_js)[0].split(',')
        array = [int(ii) for ii in array]
        offset = int(re.findall(r'item-(\d*?)\)', oceanball_js)[0])
        ''' String.fromCharCode '''
        oe = ''.join([chr(ii - offset) for ii in array])
        ''' replace window[callback] callback function '''
        replace_str = re.findall(r'{}\(new.*\)\);'.format(callback), oe)[0]
        eleven_params = re.findall(
            r'{}\(new.*\+ (.*?) \+.*\)\);'.format(callback), oe)[0]
        replaced_str = 'return {};'.format(eleven_params)
        oe = oe.replace(replace_str, replaced_str)
        oe = oe.replace('\'', '"').replace('\r', '')
        oe = oe.replace(';!', 'let aaa = ', 1)

        replace = '''
        function(){let href='https://hotels.ctrip.com/hotel/4889292.html';
            a={'documentElement': {'attributes':{}}};
            b={};
            function c(){};
            userAgent ='Chrome/73.0.3682.0';
            geolocation = 0;
        '''
        ''' replace document & windown & navigator '''
        oe = oe.replace('document.body.innerHTML.length',
                        '888').replace('document.body.innerHTML', '""')
        oe = oe.replace('document.createElement("div")', '{}')
        oe = oe.replace('window.HTMLSpanElement',
                        'c').replace('document.createElement("span")', '1')
        oe = oe.replace('window.location.href',
                        'href').replace('location.href', 'href')
        oe = oe.replace('navigator.', '')
        oe = oe.replace('new Image().', '')
        oe = oe.replace('document.all', '0').replace('document.referrer', '""')
        oe = oe.replace('this || ', '')
        oe = oe.replace('window["document"]', 'a')

        oe = oe.replace('document', 'a').replace('window', 'b')
        oe = oe.replace('function(){', replace, 1)
        ''' eval script '''
        eleven = js2py.eval_js(oe)
        echo(1, 'eleven', eleven)
        return eleven