def check_type_req(self, av_id: int): changeHeaders({'Referer': self.BASIC_AV_URL % av_id}) url = self.VIEW_URL % av_id json_req = get_request_proxy(url, 1) if json_req is None or 'data' not in json_req or 'tid' not in json_req[ 'data']: if can_retry(url): self.check_type_req(av_id) return self.rank_type[av_id] = json_req['data']['tid'] == self.assign_tid
def check_rank(self, av_id: int, times=0): rank_list = self.rank_map[av_id] if av_id in self.rank_map else [] changeHeaders({'Referer': self.BASIC_AV_URL % av_id}) if len(rank_list): score = int(rank_list[1]) rank = int(rank_list[0]) url = self.ARCHIVE_STAT_URL % av_id json_req = get_request_proxy(url, 1) if not self.have_error(json_req): if times < 3: self.check_rank(av_id, times + 1) return json_req = json_req['data'] need = [ 'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku' ] data = [json_req[index] for index in need] if not self.check_view(av_id, data[0]): if times < 3: self.check_rank(av_id, times + 1) return if len(rank_list): data = [time_str(), *data, *rank_list[:2], *rank_list[3:5]] else: data = [time_str(), *data] with codecs.open('%s%d.csv' % (history_dir, av_id), 'a', encoding='utf-8') as f: f.write(','.join([str(index) for index in data]) + '\n') if self.check_rank_list(av_id, rank_list): av_id_id = int(av_id) * 10 + int(rank_list[-1]) if av_id_id not in self.rank: self.rank[av_id_id] = [rank_list[0] // 10] else: self.rank[av_id_id].append(rank_list[0] // 10) self.last_rank[av_id_id] = rank_list[0] send_email( '%dday List || Rank: %d Score: %d' % (int(rank_list[-1]), rank, score), '%dday List || Rank: %d Score: %d' % (int(rank_list[-1]), rank, score)) if av_id in self.last_check and self.last_check[av_id] - int( time.time()) > one_day: del self.rank_map[av_id] elif av_id not in self.last_check and int( time.time()) > one_day + self.begin_timestamp: del self.rank_map[av_id] self.last_view[av_id] = data[1]
def public_data(self, av_id: int, times: int): ''' get public basic data ''' changeHeaders({'Referer': self.BASIC_AV_URL % av_id}) url = self.VIEW_URL % av_id json_req = get_request_proxy(url, 1) if json_req is None or not 'data' in json_req or not 'pubdate' in json_req[ 'data']: if times < 3: self.public_data(av_id, times + 1) return data_time = json_req['data']['pubdate'] mid = json_req['data']['owner']['mid'] self.get_star_num(mid, 0) self.public[av_id] = [data_time, mid]
def check_rank_v2(self, av_id: int, times=0): rank_list = self.rank_map[av_id] if av_id in self.rank_map else [] changeHeaders({'Referer': self.BASIC_AV_URL % av_id}) url = self.ARCHIVE_STAT_URL % av_id json_req = get_request_proxy(url, 1) if not self.have_error(json_req): if times < 3: self.check_rank_v2(av_id, times + 1) return json_req = json_req['data'] need = [ 'view', 'like', 'coin', 'favorite', 'reply', 'share', 'danmaku' ] data = [json_req[index] for index in need] if len(rank_list): data = [time_str(), *data, *rank_list[:2], *rank_list[-2:]] else: data = [time_str(), *data] self.data_v2[av_id] = data
def load_rank_index(self, index: int, day_index: int): ''' load rank ''' changeHeaders({'Referer': self.AV_URL}) url = self.RANKING_URL % (index, day_index) text = basic_req(url, 3) rank_str = re.findall('window.__INITIAL_STATE__=(.*?);', text) if not len(rank_str): if can_retry(url): self.load_rank_index(index, day_index) return False rank_map = json.loads(rank_str[0]) rank_list = rank_map['rankList'] now_av_id = [] wait_check_public = [] rank_map = {} for ii, rank in enumerate(rank_list): av_id = int(rank['aid']) need_params = [ 'pts', 'author', 'mid', 'play', 'video_review', 'coins', 'duration', 'title' ] temp_rank_list = [ ii, *[rank[ii] for ii in need_params], index, day_index ] now_av_id.append(av_id) if not self.check_type(av_id): continue self.check_rank_rose(av_id, temp_rank_list) if self.add_av(av_id, ii, temp_rank_list[1]): rank_map[av_id] = temp_rank_list ''' check assign av rank ''' for ii in self.assign_ids: if not ii in self.public: wait_check_public.append(ii) if not ii in self.last_view and not ii in self.rank_map: self.rank_map[ii] = [] have_assign = len([0 for ii in self.assign_ids if ii in now_av_id]) > 0 ''' check tid type ''' threading_public = [] for ii in rank_map.keys(): work = threading.Thread(target=self.check_type_req, args=(ii, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() for ii, jj in rank_map.items(): if self.check_type(ii) != True: continue if not ii in self.public: wait_check_public.append(ii) self.last_check[ii] = int(time.time()) self.rank_map[ii] = jj ''' load public basic data ''' threading_public = [] for ii in wait_check_public: work = threading.Thread(target=self.public_data, args=( ii, 0, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() ''' begin monitor ''' threading_list = [] for ii, jj in self.public.items(): if not ii in self.public_list and jj[0] + one_day > int( time.time()): work = threading.Thread(target=self.public_monitor, args=( ii, 0, )) threading_list.append(work) for work in threading_list: work.start() return have_assign
def load_rank_index(self, index: int, day_index: int): ''' load rank ''' changeHeaders({'Referer': self.RANKING_URL % (index, day_index)}) url = self.RANKING_URL % (index, day_index) html = basic_req(url, 0) rank_list = html.find_all('li', class_='rank-item') now_av_id = [] wait_check_public = [] rank_map = {} for av in rank_list: av_href = av.find_all('a')[0]['href'] av_id = int(re.findall('av.*', av_href)[0][2:-1]) now_av_id.append(av_id) if not self.check_type(av_id): continue rank = int(av.find_all('div', class_='num')[0].text) score = int( av.find_all('div', class_='pts')[0].find_all('div')[0].text) name = av.find_all('span')[2].text if self.add_av(av_id, rank, score): rank_map[av_id] = [rank, score, name, index, day_index] ''' check assign av rank ''' for ii in self.assign_ids: if not ii in self.public: wait_check_public.append(ii) if not ii in self.last_view and not ii in self.rank_map: self.rank_map[ii] = [] have_assign = len([0 for ii in self.assign_ids if ii in now_av_id]) > 0 ''' check tid type ''' threading_public = [] for ii in rank_map.keys(): work = threading.Thread(target=self.check_type_req, args=(ii, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() for ii, jj in rank_map.items(): if self.check_type(ii) != True: continue if not ii in self.public: wait_check_public.append(ii) self.last_check[ii] = int(time.time()) self.rank_map[ii] = jj ''' load public basic data ''' threading_public = [] for ii in wait_check_public: work = threading.Thread(target=self.public_data, args=( ii, 0, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() ''' begin monitor ''' threading_list = [] for ii, jj in self.public.items(): if not ii in self.public_list and jj[0] + one_day > int( time.time()): work = threading.Thread(target=self.public_monitor, args=( ii, 0, )) threading_list.append(work) for work in threading_list: work.start() return have_assign
def generate_eleven(self): ################################################################ # # [generate eleven] version 19.4.21(Test ✔️) write by gunjianpan # # 1. random generate 15 bit param `callback`; # 2. use callback request OCEANBALL -> get origin js; # 3. eval once -> (match array, and then chr() it) -> decoder js; # 4. replace document and windows(you also can use execjs & jsdom); # 5. warning you should replace `this` to some params, # Otherwise, you will get `老板给小三买了包, 却没有给你钱买房` # 6. finsh, return, and joint params; # ################################################################ callback = self.generate_callback(15) now_time = int(time.time() * 1000) url = '{}?callback={}&_={}'.format(OCEANBALL_URL, callback, now_time) referer_url = HOTEL_DETAIL_URL % self.default_hotel_id changeHeaders({'Referer': referer_url}) oceanball_js = basic_req(url, 3) array = re.findall(r'\(\[(.*)\],', oceanball_js)[0].split(',') array = [int(ii) for ii in array] offset = int(re.findall(r'item-(\d*?)\)', oceanball_js)[0]) ''' String.fromCharCode ''' oe = ''.join([chr(ii - offset) for ii in array]) ''' replace window[callback] callback function ''' replace_str = re.findall(r'{}\(new.*\)\);'.format(callback), oe)[0] eleven_params = re.findall( r'{}\(new.*\+ (.*?) \+.*\)\);'.format(callback), oe)[0] replaced_str = 'return {};'.format(eleven_params) oe = oe.replace(replace_str, replaced_str) oe = oe.replace('\'', '"').replace('\r', '') oe = oe.replace(';!', 'let aaa = ', 1) replace = ''' function(){let href='https://hotels.ctrip.com/hotel/4889292.html'; a={'documentElement': {'attributes':{}}}; b={}; function c(){}; userAgent ='Chrome/73.0.3682.0'; geolocation = 0; ''' ''' replace document & windown & navigator ''' oe = oe.replace('document.body.innerHTML.length', '888').replace('document.body.innerHTML', '""') oe = oe.replace('document.createElement("div")', '{}') oe = oe.replace('window.HTMLSpanElement', 'c').replace('document.createElement("span")', '1') oe = oe.replace('window.location.href', 'href').replace('location.href', 'href') oe = oe.replace('navigator.', '') oe = oe.replace('new Image().', '') oe = oe.replace('document.all', '0').replace('document.referrer', '""') oe = oe.replace('this || ', '') oe = oe.replace('window["document"]', 'a') oe = oe.replace('document', 'a').replace('window', 'b') oe = oe.replace('function(){', replace, 1) ''' eval script ''' eleven = js2py.eval_js(oe) echo(1, 'eleven', eleven) return eleven