def clean_csv(av_id: int): ''' clean csv ''' csv_path = os.path.join(history_dir, '{}.csv'.format(av_id)) output_path = os.path.join(history_data_dir, '{}_new.csv'.format(av_id)) csv = read_file(csv_path) last_time, last_view = csv[0].split(',')[:2] result = [csv[0]] last_time = time_stamp(last_time) last_view = int(last_view) empty_line = ','.join([' '] * (len(csv[0].split(',')) + 1)) for line in csv[1:]: now_time, now_view = line.split(',')[:2] now_time = time_stamp(now_time) now_view = int(now_view) time_gap = now_time - last_time if now_view < last_view or now_view - last_view > 5000: # echo(1, last_view, last_time, now_view, now_time) continue if abs(time_gap) > 150: for ii in range(int((time_gap - 30) // 120)): result.append(empty_line) if abs(time_gap) > 90: # echo(0, last_view, last_time, now_view, now_time) result.append(line) last_view, last_time = now_view, now_time # else: # echo(2, last_view, last_time, now_view, now_time) with open(output_path, 'w') as f: f.write('\n'.join(result))
def update_article(self, article_id: str, article_body: str): p = self.share2article[article_id][-2].split("/")[-1] article_info = self.list_recent[p] data = { "fileId": p, "parentId": article_info["parentId"], "domain": article_info["domain"], "rootVersion": -1, "sessionId": "", "modifyTime": int(time_stamp()), "bodyString": article_body, "transactionId": p, "transactionTime": int(time_stamp()), "orgEditorType": article_info["orgEditorType"], "tags": article_info["tags"], "cstk": self.cstk, } url = self.SYNC_URL % ("push", self.cstk) req = basic_req(url, 11, data=data, header=self.get_ynote_web_header(1)) if req is None or list(req.keys()) != [ "entry", "meta", "effectedShareEntries", "forcePullVersion", "effected", ]: echo( "0|error", "Update atricle_id {} Error".format(article_id), req.json() if req is not None else "", ) return False echo("1|warning", "Update atricle_id {} Success!!!".format(article_id)) return True
def check_rank(self, bv_id: str): rank_info = self.rank_map[bv_id] if bv_id in self.rank_map else {} stat = self.get_view_detail(bv_id) if stat is None or "stat" not in stat: return stat = stat["stat"] data = {**stat, "time": time_str(), **rank_info} need = [ "time", "view", "like", "coin", "favorite", "reply", "share", "danmaku", "id", "pts", "type", "day", ] output = [str(data[ii]) for ii in need if ii in data] output = output + [str(v) for k, v in data.items() if k not in need] with codecs.open("{}{}.csv".format(history_dir, bv_id), "a", encoding="utf-8") as f: f.write(",".join(output) + "\n") if (bv_id in self.last_check and int(time_stamp()) - self.last_check[bv_id] > one_day / 2): self.del_map[bv_id] = 1 del self.rank_map[bv_id] if bv_id == self.basic_bv_id: clean_csv(bv_id) elif (bv_id not in self.last_check and int(time_stamp()) > one_day + self.begin_timestamp): self.del_map[bv_id] = 1 del self.rank_map[bv_id] if bv_id == self.basic_bv_id: clean_csv(bv_id) self.last_view[bv_id] = data["view"] now_time = time_stamp() if not bv_id in self.public["T"] or bv_id not in self.assign_ids: return time_gap = (now_time - self.public["T"][bv_id][0]) / 60 echo("0|debug", bv_id, time_gap < (4.5 * one_day / 60), self.public["T"][bv_id]) if time_gap >= (4.5 * one_day / 60): return if not bv_id in self.check_done: self.check_done[bv_id] = [] echo("3|info", "Time Gap:", int(time_gap / 10)) if (int(time_gap / 10) in self.history_check_list and int(time_gap / 10) not in self.check_done[bv_id]): self.history_rank(time_gap, data, bv_id)
def public_monitor(self, bv_id: str): """ a monitor """ self.public["L"].append(bv_id) created, mid = self.public["T"][bv_id] self.get_star_num(mid) self.check_rank_v2(bv_id) time.sleep(5) follower = self.star["T"][mid] if mid in self.star["T"] else 0 data1 = self.data_v2[bv_id] if bv_id in self.data_v2 else {} sleep_time = created + one_day - int(time_stamp()) if sleep_time < 0: return echo("4|debug", "Monitor Begin %s" % (bv_id)) time.sleep(sleep_time) self.get_star_num(mid) self.check_rank_v2(bv_id) time.sleep(5) follower_2 = self.star["T"][mid] if mid in self.star["T"] else 0 data2 = self.data_v2[bv_id] if bv_id in self.data_v2 else [] data = [ time_str(created), bv_id, follower, follower_2, *list(data.values()), *list(data2.values()), ] with codecs.open(data_dir + "public.csv", "a", encoding="utf-8") as f: f.write(",".join([str(ii) for ii in data]) + "\n")
def get_tb_getdetail(self, item_id: int): if ( not 'uland' in self.cookies or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() req = self.get_tb_getdetail_once(item_id, self.cookies['uland']) if req is not None: req_text = req.text re_json = json.loads(req_text[req_text.find("{") : -1]) return re_json["data"]["item"]
def get_uland_url(self, uland_url: str): if ( not 'uland' in self.cookies # or not self.M in self.cookies['uland'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() s_req = self.get_uland_url_once(uland_url, self.cookies['uland']) req_text = s_req.text re_json = json.loads(req_text[req_text.find("{") : -1]) return re_json["data"]["resultList"][0]["itemId"]
def decoder_tpwd_once(self, article_id: str, tpwd: str, mode: int = 0): req = self.decoder_tpwd(tpwd) if req is None or not len(req): return temp_map = {ii: req[ii] for ii in self.NEED_KEY} if temp_map["validDate"] == self.ZERO_STAMP or "-" in temp_map["validDate"]: temp_map["validDate"] = 1500000000 else: temp_map["validDate"] = ( time_stamp(time_format="%d天%H小时%M分%S秒", time_str=req["validDate"]) - self.BASIC_STAMP + time_stamp() ) temp_map["validDate"] = time_str(temp_map["validDate"]) temp_map["url"] = temp_map["url"].strip() if article_id not in self.tpwd_map: self.tpwd_map[article_id] = {} self.tpwd_map[article_id][tpwd] = temp_map if not mode: self.decoder_tpwd_url(article_id, tpwd)
async def _heartbeat_loop(self): ''' heart beat every 30s ''' if self._types and int(time_stamp()) > self._begin_time + one_day: self.close() for _ in range(int(one_day * 7 / 30)): try: await self._websocket.send_bytes( self.parse_struct({}, Operation.SEND_HEARTBEAT)) await asyncio.sleep(30) except (asyncio.CancelledError, aiohttp.ClientConnectorError): break
def get_type(self, gt: str, cookies: dict = {}) -> dict: url = self.GETTYPE_URL % (gt, int(time_stamp() * 1000)) headers = self.get_login_headers(3, cookies) req, cookie = proxy_req(url, 3, header=headers, need_cookie=True) j_begin = req.find('{') if req == '' or j_begin == -1: if can_retry(self.GETTYPE_URL): return self.get_type(gt, cookies) else: return None, {} type_json = json.loads(req[j_begin:-1]) return type_json['data'], cookie
def get_m_h5_tk(self): self.m_time = time_stamp() def get_cookie_once(key, func, *param): req = func(*param) if req is not None: self.cookies[key] = req.cookies.get_dict() echo(1, "get {} cookie:".format(key), self.cookies[key]) get_cookie_once('uland', self.get_uland_url_once, self.uland_url) if False: get_cookie_once('finger', self.get_finger_once, self.test_item_id) get_cookie_once('baichuan', self.get_baichuan_once, self.test_item_id, self.test_finger_id)
def on_can_message(self, msg): if msg.interface == CanMessage.CANTYPE_CONTROL: if msg.id in self.kia_db._frame_id_to_message: # Kia CAN messageSTEERING_ANGLE_angle kia_can_msg = self.kia_db.decode_message( msg.id, bytearray(msg.data)) msg_type = self.kia_db.get_message_by_frame_id(msg.id) if msg_type.name == "STEERING_ANGLE": steering_wheel_angle = float( kia_can_msg["STEERING_ANGLE_angle"]) * math.pi / 180.0 steering_wheel_angle_msg = Float32( data=steering_wheel_angle) yaw_angle_msg = Float32(data=steering_wheel_angle / KIA_SOUL_STEERING_RATIO) self.steering_wheel_angle_raw_pub.publish( steering_wheel_angle_msg) self.steering_angle_raw_pub.publish(yaw_angle_msg) joint_msg = JointState() joint_msg.header.stamp = util.time_stamp(msg.can_timestamp) joint_msg.name = [ "steering_joint", "yaw", "front_left_steer_joint", "front_right_steer_joint" ] joint_msg.position = [ steering_wheel_angle, steering_wheel_angle / KIA_SOUL_STEERING_RATIO, steering_wheel_angle / KIA_SOUL_STEERING_RATIO, steering_wheel_angle / KIA_SOUL_STEERING_RATIO ] self.steering_joint_states_pub.publish(joint_msg) elif msg_type.name == "SPEED": # print(kia_can_msg) speed = util.mph_to_ms( float(kia_can_msg["SPEED_rear_left"])) self.on_speed(speed, msg.can_timestamp) elif msg.id in self.oscc_db._frame_id_to_message: # OSCC Message. Currently this only publishes 0 or 1 to indicate # enabled or not. In the future this should be changed to # throttle/brake/steering values but that requires a firmware change # to the OSCC return oscc_can_msg = self.oscc_db.decode_message( msg.id, bytearray(msg.data)) if oscc_can_msg.name == "BRAKE_REPORT": self.brake_pedal_pub.publish( oscc_can_msg.brake_report_enabled) elif oscc_can_msg.name == "STEERING_REPORT": self.steering_torque.publish( oscc_can_msg.steering_report_enabled) elif oscc_can_msg.name == "THROTTLE_REPORT": self.accel_pedal_pub.publish( oscc_can_msg.throttle_report_enabled)
def __init__(self, av_id: int, types=0, p: int = -1): ''' init class ''' self._av_id = av_id self._room_id = None self._count = 1 self._types = types self._begin_time = int(time_stamp()) self._loop = asyncio.get_event_loop() self._session = aiohttp.ClientSession(loop=self._loop) self._is_running = False self._websocket = None self._p = p if p > 0 else 1 self._getroom_id()
def get_finger(self, item_id: int): if ( not 'finger' in self.cookies or not self.M in self.cookies['finger'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() s_req = self.get_finger_once(item_id, self.cookies['finger']) if s_req is None: return try: return s_req.json()['data']['fingerId'] except Exception as e: return
def get_baichuan(self, item_id: int): if ( not 'baichuan' in self.cookies or not self.M in self.cookies['baichuan'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() finger_id = self.get_finger(item_id) if finger_id is None: return echo(4, 'finger id:', finger_id) req = self.get_baichuan_once(item_id, finger_id, self.cookies['baichuan']) if req is not None: return req.json()['data']
def parse_struct(self, data: dict, operation: int): ''' parse struct ''' assert int(time_stamp()) < self._begin_time + \ 7 * one_day, 'Excess Max RunTime!!!' if operation == 7: body = json.dumps(data).replace(" ", '').encode('utf-8') else: body = self.HEARTBEAT_BODY.encode('utf-8') header = self.HEADER_STRUCT.pack(self.HEADER_STRUCT.size + len(body), self.HEADER_STRUCT.size, 1, operation, self._count, 0) self._count += 1 return header + body
def get_tb_h5_api(self, api: str, jsv: str, refer_url: str, data: dict, j_data_t: dict = {}, cookies: dict = {}, mode: int = 0, data_str: str = None): """ tb h5 api @2019.11.6 ✔️Tested""" step = self.M in cookies if data_str is None: data_str = json_str(data) headers = { "Accept": 'application/json', "referer": refer_url, "Agent": get_use_agent('mobile') } if step: headers["Cookie"] = encoder_cookie(cookies) appkey = "12574478" token = cookies[self.M].split("_")[0] if step else "" t = int(time_stamp() * 1000) j_data = { "jsv": jsv, "appKey": appkey, "t": t, "sign": self.get_tb_h5_token(token, t, appkey, data_str), "api": api, "v": 1.0, "timeout": 20000, "AntiCreep": True, "AntiFlood": True, "type": "originaljson", "dataType": "jsonp", **j_data_t } if mode == 0: j_data['data'] = data_str mtop_url = encoder_url(j_data, self.MTOP_URL % (api, int(j_data['v']))) if mode == 0: req = proxy_req(mtop_url, 2, header=headers) else: req = proxy_req(mtop_url, 12, data=data, header=headers) # echo(4, 'request once.') if req is None: if can_retry(self.MTOP_URL % (api, int(j_data['v']))): return self.get_tb_h5_api(api, jsv, refer_url, data, j_data_t, cookies, mode) else: return return req
def load_history_file(self, av_id: int, av_info: dict): data_path = '{}{}_new.csv'.format(history_data_dir, av_id) history_list = read_file(data_path)[:2880] if not len(history_list): return created, title = av_info['created'], av_info['title'] history_list = [ii.split(',') for ii in history_list] time_map = { round((time_stamp(ii[0]) - created) / 120) * 2: ii for ii in history_list if ii[0] != '' } last_data = [0] * 8 for ii in self.history_map.keys(): if ii in time_map: self.history_map[ii][av_id] = time_map[ii] last_data = time_map[ii] + last_data[len(time_map[ii]):] else: self.history_map[ii][av_id] = last_data
def load_history_file(self, bv_id: str, bv_info: dict): data_path = "{}{}_new.csv".format(history_data_dir, bv_id) history_list = read_file(data_path)[:3660] if not len(history_list): return created, title = bv_info["created"], bv_info["title"] history_list = [ii.split(",") for ii in history_list] time_map = { round((time_stamp(ii[0]) - created) / 120) * 2: ii for ii in history_list if ii[0] != "" } last_data = [0] * 8 for ii in self.history_map.keys(): if ii in time_map: self.history_map[ii][bv_id] = time_map[ii] last_data = time_map[ii] + last_data[len(time_map[ii]):] else: self.history_map[ii][bv_id] = last_data
def parse_struct(self, data: dict, operation: int): """ parse struct """ assert ( int(time_stamp()) < self._begin_time + 7 * one_day ), "Excess Max RunTime!!!" if operation == 7: body = json.dumps(data).replace(" ", "").encode("utf-8") else: body = self.HEARTBEAT_BODY.encode("utf-8") header = self.HEADER_STRUCT.pack( self.HEADER_STRUCT.size + len(body), self.HEADER_STRUCT.size, 1, operation, self._count, 0, ) self._count += 1 return header + body
def __init__(self): super(Up, self).__init__() self.update_proxy(1) self.rank = {"T": {}, "L": {}} self.rank_type = {} self.public = {"T": {}, "L": []} self.star = {"T": {}, "L": {}} self.data_v2 = {} self.have_assign = {1: [], 3: [], "T": []} self.last_check = {} self.last_view = {} self.comment = {} self.email_send_time = {} self.begin_timestamp = int(time_stamp()) self.bv_list = [] self.bv_ids = {} self.check_done = {} self.pool = ThreadPoolExecutor(max_workers=10) self.monitor_pool = ThreadPoolExecutor(max_workers=50) self.load_history_data()
class ActivateArticle(TBK): """ activate article in youdao Cloud""" Y_URL = "https://note.youdao.com/" WEB_URL = f"{Y_URL}web/" API_P_URL = f"{Y_URL}yws/api/personal/" SYNC_URL = f"{API_P_URL}sync?method=%s&keyfrom=web&cstk=%s" NOTE_URL = f"{Y_URL}yws/public/note/%s?editorType=0" SHARE_URL = f"{Y_URL}ynoteshare1/index.html?id=%s&type=note" GET_SHARE_URL = f"{API_P_URL}share?method=get&shareKey=%s" LISTRECENT_URL = ( f"{API_P_URL}file?method=listRecent&offset=%d&limit=30&keyfrom=web&cstk=%s" ) MYSHARE_URL = ( f"{API_P_URL}myshare?method=get&checkBan=true&entryId=%s&keyfrom=web&cstk=%s" ) DECODER_TPWD_URL = "https://api.taokouling.com/tkl/tkljm?apikey=%s&tkl=¥%s¥" Y_DOC_JS_URL = "https://shared-https.ydstatic.com/ynote/ydoc/index-6f5231c139.js" MTOP_URL = "https://h5api.m.taobao.com/h5/%s/%d.0/" ITEM_URL = "https://item.taobao.com/item.htm?id=%d" DETAIL_URL = 'https://detail.m.tmall.com/item.htm?id=%d' S_LIST_SQL = "SELECT `id`, article_id, title, q, created_at from article;" I_LIST_SQL = "INSERT INTO article (article_id, title, q) VALUES %s;" R_LIST_SQL = "REPLACE INTO article (`id`, article_id, title, q, is_deleted, created_at) VALUES %s;" S_ARTICLE_SQL = 'SELECT `id`, article_id, tpwd_id, item_id, tpwd, domain, content, url, commission_rate, commission_type, expire_at, created_at from article_tpwd WHERE `article_id` = "%s";' I_ARTICLE_SQL = "INSERT INTO article_tpwd (article_id, tpwd_id, item_id, tpwd, domain, content, url, commission_rate, commission_type, expire_at) VALUES %s;" R_ARTICLE_SQL = "REPLACE INTO article_tpwd (`id`, article_id, tpwd_id, item_id, tpwd, domain, content, url, commission_rate, commission_type, expire_at, created_at, is_deleted) VALUES %s;" END_TEXT = "</text><inline-styles/><styles/></para></body></note>" TPWD_REG = "\p{Sc}(\w{8,12}?)\p{Sc}" TPWD_REG2 = "(\p{Sc}\w{8,12}\p{Sc})" JSON_KEYS = ["p", "ct", "su", "pr", "au","pv","mt","sz","domain", "tl","content", ] URL_DOMAIN = { 0: "s.click.taobao.com", 1: "item.taobao.com", 2: "detail.tmall.com", 5: "uland.taobao.com", 10: "taoquan.taobao.com", 11: "a.m.taobao.com", 15: "empty", 16: "failure", } NEED_KEY = ["content", "url", "validDate", "picUrl"] ONE_HOURS = 3600 ONE_DAY = 24 M = "_m_h5_tk" ZERO_STAMP = "0天0小时0分0秒" T_FORMAT = "%m-%d %H:%M" BASIC_STAMP = ( time_stamp(time_format="%d天%H小时%M分%S秒", time_str="1天0小时0分0秒") - ONE_DAY * ONE_HOURS ) def __init__(self): super(ActivateArticle, self).__init__() self.Db = Db("tbk") self.Db.create_table(os.path.join(root_dir, "tpwd.sql")) self.Db.create_table(os.path.join(root_dir, "article.sql")) self.tpwd_map = {} self.tpwd_db_map = {} self.tpwds = {} self.cookies = {} self.share2article = {} self.article_list = {} self.list_recent = {} self.idx = [] self.empty_content = "" self.tpwd_exec = ThreadPoolExecutor(max_workers=20) self.need_del = {} self.get_share_list() def load_process(self): self.load_ids() if len(self.idx) < 30: time.sleep(np.random.rand() * 30 + 6) self.load_ids() self.load_article_list() # self.update_tpwd() self.get_m_h5_tk() self.get_ynote_file() self.get_ynote_file(1) def load_ids(self): changeJsonTimeout(5) req = self.basic_youdao(self.home_id) if req == "": echo("0|error", "Get The Home Page Info Error!!! Please retry->->->") return self.idx = regex.findall("id=(\w*?)<", req) if len(self.idx) < 30: echo("0|error", "The Num of id is error!! Please check it.") else: echo(1, "Load Article List {} items.".format(len(self.idx))) def get_share_info(self, share_id: str): changeJsonTimeout(4) url = self.GET_SHARE_URL % share_id headers = self.get_tb_headers(self.Y_URL) req = basic_req(url, 1, header=headers) if req is None: return info = req["entry"] self.share2article[share_id] = (info["name"].replace('.note', ''), info["id"], info["lastUpdateTime"]) return req def basic_youdao(self, idx: str, use_proxy: bool = True): url = self.NOTE_URL % idx refer_url = self.SHARE_URL % idx headers = { "Accept": "*/*", "Referer": refer_url, "X-Requested-With": "XMLHttpRequest", } req_req = proxy_req if use_proxy else basic_req req = req_req(url, 1, header=headers, config={'timeout': 8}) if req is None or list(req.keys()) != self.JSON_KEYS: if can_retry(url): echo(2, "retry") return self.basic_youdao(idx) else: echo(1, "retry upper time") return "" return req["content"] def load_article_pipeline(self, mode: int = 0): article_exec = ThreadPoolExecutor(max_workers=5) a_list = [article_exec.submit(self.load_article, ii, mode) for ii in self.idx] list(as_completed(a_list)) self.load_list2db() def load_article(self, article_id: str, mode: int = 0, is_load2db: bool = True): if mode: self.get_share_info(article_id) self.load_list2db() return if article_id not in self.tpwds: article = self.basic_youdao(article_id) tpwds = list({ii: 0 for ii in regex.findall(self.TPWD_REG, article)}) self.tpwds[article_id] = tpwds else: tpwds = self.tpwds[article_id] if article_id not in self.tpwd_map: self.tpwd_map[article_id] = {} time = 0 au_list = [] no_type = [ ii for ii, jj in self.tpwd_map[article_id].items() if "type" not in jj or jj["item_id"] is None ] while ( len(self.tpwd_map[article_id]) < len(tpwds) or (len(no_type) and not time) ) and time < 5: thread_list = [ii for ii in tpwds if not ii in self.tpwd_map[article_id]] echo(1, article_id, "tpwds len:", len(tpwds), "need load", len(thread_list)) thread_list = [ self.tpwd_exec.submit(self.decoder_tpwd_once, article_id, ii) for ii in thread_list ] list(as_completed(thread_list)) no_type = [ ii for ii, jj in self.tpwd_map[article_id].items() if "type" not in jj or jj["item_id"] is None ] au_list.extend( [ self.tpwd_exec.submit(self.decoder_tpwd_url, article_id, ii) for ii in no_type ] ) time += 1 list(as_completed(au_list)) no_title = [ ii for ii, jj in self.tpwd_map[article_id].items() if "title" not in jj ] time = 0 while len(no_title) and time < 5: title_list = [ self.tpwd_exec.submit(self.get_item_title, article_id, ii) for ii in no_title ] echo(1, article_id, "need get title:", len(title_list)) list(as_completed(title_list)) time += 1 no_title = [ ii for ii, jj in self.tpwd_map[article_id].items() if "title" not in jj ] if is_load2db: self.load_article2db(article_id) def update_title(self, article_id: str): self.tpwd_map[article_id] = { ii[3]: {"content": ii[1], "item_id": ii[0]} for ii in self.article_list[article_id].values() } no_title = [ ii for ii, jj in self.tpwd_map[article_id].items() if "title" not in jj ] time = 0 while len(no_title) and time < 5: title_list = [ self.tpwd_exec.submit(self.get_item_title, article_id, ii) for ii in no_title ] echo(1, article_id, "need get title:", len(title_list)) list(as_completed(title_list)) time += 1 no_title = [ ii for ii, jj in self.tpwd_map[article_id].items() if "title" not in jj ] update_num = len( [ 1 for ii, jj in self.tpwd_map[article_id].items() if "title" in jj and jj["content"] != jj["title"] ] ) echo(2, "Update", article_id, update_num, "Title Success!!!") self.update_article2db(article_id) def load_list2db(self): t_share_map = self.share2article.copy() share_map = self.get_share_list() insert_list, update_list = [], [] for ii, jj in t_share_map.items(): if ii in share_map: t = share_map[ii] update_list.append((t[0], ii, jj[0], jj[1], 0, t[-1])) else: insert_list.append((ii, jj[0], jj[1])) self.update_db(insert_list, "Insert Article List", 1) self.update_db(update_list, "Update Article List", 1) def get_share_list(self): share_list = self.Db.select_db(self.S_LIST_SQL) share_map = {} for ii, jj in enumerate(share_list): t = jj[-1].strftime("%Y-%m-%d %H:%M:%S") share_map[jj[1]] = (*jj[:-1], t) self.share2article = share_map return share_map def load_article2db(self, article_id: str): m = self.tpwd_map[article_id] m = {ii: jj for ii, jj in m.items() if jj["url"]} tpwds = list(set(self.tpwds[article_id])) data = [ ( article_id, ii, m[jj]["item_id"], jj, m[jj]["type"], m[jj]["content"], m[jj]["url"], 0, "", m[jj]["validDate"], ) for ii, jj in enumerate(tpwds) if jj in m and "item_id" in m[jj] and m[jj]["type"] != 15 ] data_map = {ii[3]: ii for ii in data} update_list, insert_list = [], [] for ii in data: if ii[3] in self.tpwd_db_map[article_id]: t = self.tpwd_db_map[article_id][ii[3]] update_list.append((t[0], *ii, t[-1], 0)) else: insert_list.append(ii) for ii, jj in self.tpwd_db_map[article_id].items(): if ii not in data_map: update_list.append((*jj, 1)) self.update_db(insert_list, f"article_id {article_id} Insert") self.update_db(update_list, f"article_id {article_id} Update") def update_tpwd(self, mode: int = 0, is_renew: bool = True, a_id: str = None): update_num = 0 for article_id, jj in self.article_list.items(): if a_id is not None and article_id != a_id: continue for o_tpwd, (num_iid, title, domain, tpwd, _, _, url) in jj.items(): c = jj[o_tpwd] if ( is_renew and self.URL_DOMAIN[1] not in url and self.URL_DOMAIN[2] not in url and self.URL_DOMAIN[10] not in url ): renew_type = 2 if url in self.URL_DOMAIN[5] else 1 origin_tpwd = self.convert2tpwd(url, title) if origin_tpwd is None: origin_tpwd = tpwd else: renew_type = 0 origin_tpwd = tpwd if num_iid == "" or domain == 16: c = ( *c[:2], 16, origin_tpwd, 1 if renew_type == 0 else 2, *c[-2:], ) else: c = self.generate_tpwd( title, int(num_iid), origin_tpwd, renew_type, c, mode ) self.article_list[article_id][o_tpwd] = c update_num += int(c[2] < 15 or (renew_type and not mode)) echo(2, "Update {} Tpwd Info Success!!".format(update_num)) def generate_tpwd( self, title: str, num_iid: int, renew_tpwd: str, renew_type: int, c: dict, mode: int ): goods = self.get_dg_material(title, num_iid) if goods is None or not len(goods): echo(0, "goods get", 'error' if goods is None else 'empty', ':', title, num_iid) return (*c[:2], 17, renew_tpwd, 1 if renew_type == 0 else 2, *c[-2:]) goods = goods[0] if "ysyl_click_url" in goods and len(goods["ysyl_click_url"]): url = goods["ysyl_click_url"] elif "coupon_share_url" in goods and len(goods["coupon_share_url"]): url = goods["coupon_share_url"] else: url = goods["url"] url = "https:{}".format(url) commission_rate = int(goods["commission_rate"]) commission_type = goods["commission_type"] tpwd = self.convert2tpwd(url, title) if tpwd is None: echo(0, "tpwd error:", tpwd) return (*c[:2], 18, renew_tpwd, 1 if renew_type == 0 else 2 * c[-2:]) if mode: return (*c[:3], tpwd, commission_rate, commission_type, c[-1]) if renew_type == 1: return (*c[:3], tpwd, 2, commission_type, c[-1]) return (*c[:3], tpwd, commission_rate, commission_type, c[-1]) def load_article_list(self): """ tpwd: [goods_id, goods_name, domain, tpwd, commission_rate, commission_type, url] """ for article_id in self.idx: article_list = self.get_article_db(article_id) self.article_list[article_id] = { ii[4]: [ii[3], ii[6], ii[5], ii[4], ii[8], ii[9], ii[7]] for ii in article_list } self.tpwd_db_map[article_id] = {ii[4]: ii for ii in article_list} have_id = [ii[0] for ii in self.tpwd_db_map[article_id].values()] need_del_id = [ii[0] for ii in article_list if ii[0] not in have_id] self.need_del[article_id] = need_del_id item_num = sum([len(ii) for ii in self.article_list.values()]) echo(1, "Load {} article list from db.".format(item_num)) def get_article_db(self, article_id: str): article_list = list(self.Db.select_db(self.S_ARTICLE_SQL % article_id)) for ii, jj in enumerate(article_list): t = jj[-1].strftime("%Y-%m-%d %H:%M:%S") y = jj[-2].strftime("%Y-%m-%d %H:%M:%S") article_list[ii] = [*jj[:-2], y, t] return article_list def update_db(self, data: list, types: str, mode: int = 0): if not len(data): return if "insert" in types.lower(): basic_sql = self.I_LIST_SQL if mode else self.I_ARTICLE_SQL else: basic_sql = self.R_LIST_SQL if mode else self.R_ARTICLE_SQL i_sql = basic_sql % str(data)[1:-1] insert_re = self.Db.insert_db(i_sql) if insert_re: echo(3, "{} {} info Success".format(types, len(data))) else: echo(0, "{} failed".format(types)) def decoder_tpwd_once(self, article_id: str, tpwd: str, mode: int = 0): req = self.decoder_tpwd(tpwd) if req is None or not len(req): return temp_map = {ii: req[ii] for ii in self.NEED_KEY} if temp_map["validDate"] == self.ZERO_STAMP or "-" in temp_map["validDate"]: temp_map["validDate"] = 1500000000 else: temp_map["validDate"] = ( time_stamp(time_format="%d天%H小时%M分%S秒", time_str=req["validDate"]) - self.BASIC_STAMP + time_stamp() ) temp_map["validDate"] = time_str(temp_map["validDate"]) temp_map["url"] = temp_map["url"].strip() if article_id not in self.tpwd_map: self.tpwd_map[article_id] = {} self.tpwd_map[article_id][tpwd] = temp_map if not mode: self.decoder_tpwd_url(article_id, tpwd) def decoder_tpwd_url(self, article_id: str, tpwd: str): temp_map = self.tpwd_map[article_id][tpwd] tpwd_type, item_id = self.analysis_tpwd_url(temp_map["url"]) if item_id is None: return temp_map["type"] = tpwd_type temp_map["item_id"] = item_id if tpwd_type < 20: echo(2, "Domain:", self.URL_DOMAIN[tpwd_type], "item id:", item_id) self.tpwd_map[article_id][tpwd] = temp_map def analysis_tpwd_url(self, url: str): if self.URL_DOMAIN[5] in url: return 5, self.get_uland_url(url) elif self.URL_DOMAIN[11] in url: return 11, self.get_a_m_url(url) elif self.URL_DOMAIN[0] in url: return 0, self.get_s_click_url(url) elif self.URL_DOMAIN[10] in url: return 10, 0 elif self.URL_DOMAIN[1] in url: good_id = self.get_item_detail(url) if good_id != "": return 1, good_id return 16, 0 elif url == "": return 15, 0 echo("0|warning", "New Domain:", regex.findall("https://(.*?)/", url), url) return 20, 0 def decoder_tpwd(self, tpwd: str): """ decoder the tpwd from taokouling """ url = self.DECODER_TPWD_URL % (self.api_key, tpwd) req = basic_req(url, 1) if ( req is None or isinstance(req, str) or 'ret' not in list(req.keys()) ): return {} return req def get_s_click_url(self, s_click_url: str): """ decoder s.click real jump url @validation time: 2019.10.23""" time.sleep(np.random.randint(0, 10)) item_url = self.get_s_click_location(s_click_url) if item_url is None: echo(3, "s_click_url location Error..") return return self.get_item_detail(item_url) def get_s_click_url_v1(self, s_click_url: str): """ decoder s.click real jump url @validation time: 2019.08.31""" if "tu=" not in s_click_url: tu_url = self.get_s_click_tu(s_click_url) else: tu_url = s_click_url if tu_url is None or "tu=" not in tu_url: echo(3, "s_click_url tu url ENd Retry..", tu_url) return qso = decoder_url(tu_url) if "tu" not in qso: if "alisec" in tu_url: echo("0|debug", "Request Too Fast") time.sleep(np.random.randint(10) * np.random.rand()) else: echo(0, s_click_url, tu_url) return redirect_url = urllib.parse.unquote(qso["tu"]) return self.get_s_click_detail(redirect_url, tu_url) def get_tb_headers(self, url: str = "", refer_url: str = "") -> dict: headers = {"Accept": get_accept("html"), "User-Agent": get_use_agent()} if url != "": headers["Host"] = url.split("/")[2] if refer_url != "": headers["referer"] = refer_url return headers def get_s_click_basic( self, s_click_url: str, retry_func=(lambda x: False), referer: str = "", allow_redirects: bool = True, is_direct: bool = False, ): headers = self.get_tb_headers(refer_url=referer) req_func = basic_req if is_direct else proxy_req req = req_func( s_click_url, 2, header=headers, config={"allow_redirects": allow_redirects} ) if is_direct: return req if req is None or retry_func(req): if can_retry(s_click_url): return self.get_s_click_basic( s_click_url, retry_func, referer, allow_redirects, is_direct ) else: return return req def get_s_click_tu(self, s_click_url: str): req = self.get_s_click_basic(s_click_url, lambda i: "tu=" not in i.url) if req is None: return return req.url def get_s_click_location(self, s_click_url: str): req = self.get_s_click_basic(s_click_url) if req is None: echo("0|warning", "s_click_url first click error.") return echo("1", "real_jump_address get") rj = regex.findall("real_jump_address = '(.*?)'", req.text) if not len(rj): echo("0|warning", "real_jump_address get error.") return rj = rj[0].replace("&", "&") req_rj = self.get_s_click_basic( rj, lambda i: "Location" not in i.headers, referer=rj, allow_redirects=False ) if req_rj is None: return return req_rj.headers["Location"] def get_s_click_detail(self, redirect_url: str, tu_url: str): headers = self.get_tb_headers(refer_url=tu_url) req = proxy_req(redirect_url, 2, header=headers) if req is None or "id=" not in req.url: if can_retry(redirect_url): return self.get_s_click_detail(redirect_url, tu_url) else: return return self.get_item_detail(req.url) def get_item_detail(self, item_url: str) -> str: item = decoder_url(item_url) if not "id" in item: echo(0, "id not found:", item_url) return "" return item["id"] def get_item_title_once(self, item_id: int) -> str: item = self.get_tb_getdetail(item_id) if item is None: return '' return item['title'] def get_item_title(self, article_id: str, tpwd: str): temp_map = self.tpwd_map[article_id][tpwd] if ( "item_id" not in temp_map or temp_map["item_id"] == "" or temp_map["item_id"] == "0" ): return item_id = int(temp_map["item_id"]) title = self.get_item_title_once(item_id) if title != "": self.tpwd_map[article_id][tpwd]["title"] = title def get_item_title_once_v1(self, item_id: int) -> str: req = self.get_item_basic(item_id) if req is None: return "" req_text = req.text req_title = regex.findall('data-title="(.*?)">', req_text) if len(req_title): return req_title[0] req_title = regex.findall('<meta name="keywords" content="(.*?)"', req_text) if len(req_title): return req_title[0] return "" def get_item_basic(self, item_id: int, url: str = ""): url = self.ITEM_URL % item_id if url == "" else url headers = {"Accept": get_accept("html")} req = proxy_req(url, 2, header=headers, config={"allow_redirects": False}) if req is None: if can_retry(url): return self.get_item_basic(item_id, url) return if req.status_code != 200: return self.get_item_basic(item_id, req.headers["Location"]) return req def get_uland_url(self, uland_url: str): if ( not 'uland' in self.cookies # or not self.M in self.cookies['uland'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() s_req = self.get_uland_url_once(uland_url, self.cookies['uland']) req_text = s_req.text re_json = json.loads(req_text[req_text.find("{") : -1]) return re_json["data"]["resultList"][0]["itemId"] def get_a_m_url(self, a_m_url: str): req = self.get_a_m_basic(a_m_url) if req is None: return item_url = req.headers["location"] return self.get_item_detail(item_url) def get_a_m_basic(self, a_m_url: str): headers = self.get_tb_headers(a_m_url) req = proxy_req(a_m_url, 2, header=headers, config={"allow_redirects": False}) if req is None or "location" not in req.headers: if can_retry(a_m_url): return self.get_a_m_basic(a_m_url) return return req def get_m_h5_tk(self): self.m_time = time_stamp() def get_cookie_once(key, func, *param): req = func(*param) if req is not None: self.cookies[key] = req.cookies.get_dict() echo(1, "get {} cookie:".format(key), self.cookies[key]) get_cookie_once('uland', self.get_uland_url_once, self.uland_url) if False: get_cookie_once('finger', self.get_finger_once, self.test_item_id) get_cookie_once('baichuan', self.get_baichuan_once, self.test_item_id, self.test_finger_id) def get_baichuan(self, item_id: int): if ( not 'baichuan' in self.cookies or not self.M in self.cookies['baichuan'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() finger_id = self.get_finger(item_id) if finger_id is None: return echo(4, 'finger id:', finger_id) req = self.get_baichuan_once(item_id, finger_id, self.cookies['baichuan']) if req is not None: return req.json()['data'] def get_tb_getdetail(self, item_id: int): if ( not 'uland' in self.cookies or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() req = self.get_tb_getdetail_once(item_id, self.cookies['uland']) if req is not None: req_text = req.text re_json = json.loads(req_text[req_text.find("{") : -1]) return re_json["data"]["item"] def get_tb_getdetail_once(self, item_id: int, cookies: dict = {}): refer_url = self.DETAIL_URL % item_id data = {"itemNumId": str(item_id)} jsv = '2.4.8' api = 'mtop.taobao.detail.getdetail' j_data_t = {'v': 6.0, 'ttid': '2017@taobao_h5_6.6.0', 'AntiCreep': True, 'callback': 'mtopjsonp1' } return self.get_tb_h5_api(api, jsv, refer_url, data, j_data_t, cookies) def get_baichuan_once(self, item_id: int, finger_id: str, cookies: dict = {}): refer_url = self.DETAIL_URL % item_id data = { 'pageCode': 'mallDetail', 'ua': get_use_agent('mobile'), 'params': json_str({ "url": refer_url, "referrer": "", "oneId": None, "isTBInstalled": "null", "fid": finger_id }) } data_str = r'{"pageCode":"mallDetail","ua":"%s","params":"{\"url\":\"%s\",\"referrer\":\"\",\"oneId\":null,\"isTBInstalled\":\"null\",\"fid\":\"%s\"}"}' % (get_use_agent('mobile'), refer_url, finger_id) print(data) api = 'mtop.taobao.baichuan.smb.get' jsv = '2.4.8' return self.get_tb_h5_api(api, jsv, refer_url, data, cookies=cookies, mode=1, data_str=data_str) def get_tb_h5_api(self, api: str, jsv: str, refer_url: str, data: dict, j_data_t: dict = {}, cookies: dict = {}, mode: int = 0, data_str: str = None): """ tb h5 api @2019.11.6 ✔️Tested""" step = self.M in cookies if data_str is None: data_str = json_str(data) headers = { "Accept": 'application/json', "referer": refer_url, "Agent": get_use_agent('mobile') } if step: headers["Cookie"] = encoder_cookie(cookies) appkey = "12574478" token = cookies[self.M].split("_")[0] if step else "" t = int(time_stamp() * 1000) j_data = { "jsv": jsv, "appKey": appkey, "t": t, "sign": self.get_tb_h5_token(token, t, appkey, data_str), "api": api, "v": 1.0, "timeout": 20000, "AntiCreep": True, "AntiFlood": True, "type": "originaljson", "dataType": "jsonp", **j_data_t } if mode == 0: j_data['data'] = data_str mtop_url = encoder_url(j_data, self.MTOP_URL % (api, int(j_data['v']))) if mode == 0: req = proxy_req(mtop_url, 2, header=headers) else: req = proxy_req(mtop_url, 12, data=data, header=headers) # echo(4, 'request once.') if req is None: if can_retry(self.MTOP_URL % (api, int(j_data['v']))): return self.get_tb_h5_api(api, jsv, refer_url, data, j_data_t, cookies, mode) else: return return req def get_uland_url_once(self, uland_url: str, cookies: dict = {}): """ tb h5 api @2019.11.9 ✔️Tested""" step = self.M in cookies uland_params = decoder_url(uland_url) tt = { "floorId": "13193" if step else "13052", "variableMap": json_str( { "taoAppEnv": "0", "e": uland_params["e"], "scm": uland_params["scm"], } ), } api = "mtop.alimama.union.xt.en.api.entry" jsv = '2.4.0' j_data = {'type': 'jsonp', "callback": "mtopjsonp{}".format(int(step) + 1)} return self.get_tb_h5_api(api, jsv, uland_url, tt, j_data, cookies) def get_finger(self, item_id: int): if ( not 'finger' in self.cookies or not self.M in self.cookies['finger'] or time_stamp() - self.m_time > self.ONE_HOURS / 2 ): self.get_m_h5_tk() s_req = self.get_finger_once(item_id, self.cookies['finger']) if s_req is None: return try: return s_req.json()['data']['fingerId'] except Exception as e: return def get_finger_once(self, item_id: int, cookies: dict = {}): step = self.M in cookies api = 'mtop.taobao.hacker.finger.create' refer_url = self.ITEM_URL % item_id jsv = '2.4.11' j_data = {'type': 'jsonp', "callback": "mtopjsonp{}".format(int(step) + 1),} return self.get_tb_h5_api(api, jsv, refer_url, {}, cookies=cookies) def get_tb_h5_token(self, *data: list): md5 = hashlib.md5() wait_enc = "&".join([str(ii) for ii in data]) md5.update(wait_enc.encode()) return md5.hexdigest() def get_ynote_file(self, offset: int = 0): url = self.LISTRECENT_URL % (offset, self.cstk) data = {"cstk": self.cstk} req = basic_req(url, 11, data=data, header=self.get_ynote_web_header(1)) if req is None or type(req) != list: return None list_recent = {ii["fileEntry"]["id"]: ii["fileEntry"] for ii in req} self.list_recent = {**self.list_recent, **list_recent} echo(1, "Load ynote file {} items.".format(len(self.list_recent))) return req def get_ynote_web_header(self, mode: int = 0): headers = { "Content-Type": get_content_type(), "Cookie": self.cookie, "Host": self.Y_URL.split("/")[2], "Origin": self.Y_URL, "Referer": self.WEB_URL, } if mode: headers["Accept"] = get_accept("xhr") else: headers["Accept"] = get_accept("html") return headers def get_empty_content(self): headers = {"Referer": self.WEB_URL} req = proxy_req(self.Y_DOC_JS_URL, 3, header=headers) if len(req) < 1000: if can_retry(self.Y_DOC_JS_URL): return self.get_empty_content() else: return empty_content = regex.findall("t.EMPTY_NOTE_CONTENT='(.*?)'", req)[0] empty_content = empty_content.split(self.END_TEXT)[0] self.empty_content = empty_content echo(1, "Load empty content", empty_content) return empty_content def get_web_content(self): req = proxy_req(self.WEB_URL, 3, header=self.get_ynote_web_header()) if len(req) < 1000: if can_retry(self.WEB_URL): return self.get_web_content() else: return return req def update_article_pipeline(self, article_id: str): xml = self.get_xml(article_id) if xml is None: echo("0|warning", "get xml error") return xml, r_log, r_num = self.replace_tpwd(article_id, xml) if not r_num: echo("0|warning", "r_num == 0") return flag = self.update_article(article_id, xml) if flag: self.email_update_result(article_id, r_log, r_num) self.update_valid(article_id) self.update_article2db(article_id, True) self.share_article(article_id) def email_update_result(self, article_id: str, r_log: list, r_num: int): p = self.share2article[article_id][-2].split("/")[-1] article_info = self.list_recent[p] name = article_info["name"].replace(".note", "") subject = "更新({}){}/{}条[{}]".format( time_str(time_format=self.T_FORMAT), r_num, len(r_log), article_info["name"] ) content = "\n".join( [ "Title: {}".format(article_info["name"]), "Time: {}".format(time_str()), "Update Num: {}/{}条".format(r_num, len(r_log)), "", *r_log, ] ) send_email(content, subject, assign_rec=self.assign_rec) def update_valid(self, article_id: str): if article_id not in self.tpwd_map: self.tpwd_map[article_id] = {} wait_list = [ ii for ii in self.article_list[article_id].keys() if ii not in self.tpwd_map[article_id] ] update_time = 0 while len(wait_list) and update_time < 5: echo(2, "Begin Update No.{} times Tpwd validDate".format(update_time + 1)) update_v = [ self.tpwd_exec.submit(self.decoder_tpwd_once, article_id, ii, 1) for ii in wait_list ] list(as_completed(update_v)) wait_list = [ ii for ii in self.article_list[article_id].keys() if ii not in self.tpwd_map[article_id] ] update_time += 1 def update_article2db(self, article_id: str, is_tpwd_update: bool = False): def valid_t(types: str, maps: dict): return types in maps and maps[types] != '' m = {ii[4]: ii for ii in self.get_article_db(article_id)} data = [] for ( o_tpwd, (num_iid, title, domain, tpwd, commission_rate, commission_type, ur), ) in self.article_list[article_id].items(): """ `id`, article_id, tpwd_id, item_id, tpwd, domain, content, url, commission_rate, commission_type, expire_at, created_at, is_deleted """ n = m[o_tpwd] if o_tpwd in self.tpwd_map[article_id]: t = self.tpwd_map[article_id][o_tpwd] content = ( t["title"] if valid_t('title', t) else (t['content'] if valid_t('content', t) else n[6]) ) url = t["url"] if valid_t('url', t) else n[7] validDate = t["validDate"] if valid_t('validDate', t) else n[-2] data.append( ( *n[:4], tpwd if is_tpwd_update else o_tpwd, domain, content, url, commission_rate, commission_type, validDate, n[-1], 0, ) ) else: data.append( ( *n[:4], tpwd if is_tpwd_update else o_tpwd, domain, n[6], n[7], commission_rate, commission_type, n[-2], n[-1], 0, ) ) self.update_db(data, "Update Article {} TPWD".format(article_id)) def replace_tpwd(self, article_id: str, xml: str): tpwds = regex.findall(self.TPWD_REG2, xml) m = self.article_list[article_id] r_log, r_num = [], 0 EXIST = "PASSWORD_NOT_EXIST::口令不存在" DECODER_EXC = "DECODER_EXCEPTION::商品已下架" NO_GOODS = "GOODS_NOT_FOUND::未参加淘客" TPWD_ERROR = "TPWD_ERROR::淘口令生成异常" for ii, jj in enumerate(tpwds): pure_jj = jj[1:-1] no_t = "No.{} tpwd: {}, ".format(ii + 1, jj) if pure_jj not in m: r_log.append("{}{}".format(no_t, EXIST)) continue # tpwd = 'NOTNOTEXIST' num_iid, title, domain, tpwd, commission_rate, commission_type, ur = m[pure_jj] if domain >= 15: if domain == 15: applied = "{},{}".format(EXIST, title) elif domain == 16: applied = "{},{}".format(DECODER_EXC, title) elif domain == 17: applied = "{},{}".format(NO_GOODS, title) elif domain == 18: applied = "{},{}".format(TPWD_ERROR, title) else: applied = title xml = xml.replace(jj, "¥{}¥".format(tpwd)) if commission_rate == 2: COMMISSION = "->¥{}¥ SUCCESS, 保持原链接, {}".format(tpwd, applied) elif commission_rate == 1: COMMISSION = "未能更新淘口令, {}".format(applied) else: COMMISSION = "->¥{}¥ SUCCESS, 佣金: {}, 类型: {}, {}".format( tpwd, commission_rate, commission_type, applied ) r_log.append("{}{}".format(no_t, COMMISSION)) r_num += int(commission_rate != 1) return xml, r_log, r_num def get_xml(self, article_id: str): url = self.SYNC_URL % ("download", self.cstk) data = { "fileId": self.share2article[article_id][-2].split("/")[-1], "version": -1, "convert": True, "editorType": 1, "cstk": self.cstk, } req = proxy_req(url, 12, data=data, header=self.get_ynote_web_header(1)) if req is None or len(req.text) < 100: if can_retry(url): return self.get_xml(article_id) else: return return req.text def update_article(self, article_id: str, article_body: str): p = self.share2article[article_id][-2].split("/")[-1] article_info = self.list_recent[p] data = { "fileId": p, "parentId": article_info["parentId"], "domain": article_info["domain"], "rootVersion": -1, "sessionId": "", "modifyTime": int(time_stamp()), "bodyString": article_body, "transactionId": p, "transactionTime": int(time_stamp()), "orgEditorType": article_info["orgEditorType"], "tags": article_info["tags"], "cstk": self.cstk, } url = self.SYNC_URL % ("push", self.cstk) req = basic_req(url, 11, data=data, header=self.get_ynote_web_header(1)) if req is None or list(req.keys()) != [ "entry", "meta", "effectedShareEntries", "forcePullVersion", "effected", ]: echo( "0|error", "Update atricle_id {} Error".format(article_id), req.json() if req is not None else "", ) return False echo("1|warning", "Update atricle_id {} Success!!!".format(article_id)) return True def share_article(self, article_id: str): p = self.share2article[article_id][-2].split("/")[-1] url = self.MYSHARE_URL % (p, self.cstk) req = proxy_req(url, 1, header=self.get_ynote_web_header(1)) if req is None or list(req.keys()) != ["entry", "meta"]: if can_retry(url): return self.share_article(article_id) return False echo("2", "Share article {} Success!!!".format(article_id)) return True def load_article_local(self, file_path: str): if file_path not in self.tpwds: tt = '||||'.join(read_file(file_path)) tpwds = regex.findall(self.TPWD_REG, tt) self.tpwds[file_path] = tpwds else: tpwds = self.tpwds[file_path] if file_path not in self.tpwd_map: self.tpwd_map[file_path] = {} time = 0 while (len(self.tpwd_map[file_path]) < len(tpwds)) and time < 5: thread_list = [ii for ii in tpwds if not ii in self.tpwd_map[file_path]] echo(1, file_path, "tpwds len:", len(tpwds), "need load", len(thread_list)) thread_list = [ self.tpwd_exec.submit(self.decoder_tpwd_once, file_path, ii, 1) for ii in thread_list ] list(as_completed(thread_list)) time += 1 def load_picture(self, url: str, idx: int): td = basic_req(url, 2) picture_path = 'picture/{}.jpg'.format(idx) with open(picture_path, 'wb') as f: f.write(td.content) def load_picture_pipeline(self, file_path: str): mkdir('picture') tpk_list = self.tpwds[file_path] picture_url = [(self.tpwd_map[file_path][tpk]['picUrl'], idx) for idx, tpk in enumerate(tpk_list) if tpk in self.tpwd_map[file_path]] picture_url = [(ii, idx) for ii, idx in picture_url if not os.path.exists('picture/{}.jpg'.format(idx))] echo(1, 'Load {} picture Begin'.format(len(picture_url))) pp = [self.tpwd_exec.submit(self.load_picture, ii, jj) for ii, jj in picture_url] return pp def check_overdue(self): def check_overdue_once(data: list) -> bool: dif_time = time_stamp(data[-2]) - time_stamp() return dif_time > 0 and dif_time <= self.ONE_HOURS * self.ONE_DAY overdue_article = [(article_id, article_list[4]) for article_id, ii in self.tpwd_db_map.items() for article_list in ii.values() if check_overdue_once(article_list)] overdue_id = set([article_id for article_id, _ in overdue_article]) overdue_list = [(article_id, len([1 for a_id, tpwd in overdue_article if article_id == a_id])) for article_id in overdue_id] if not len(overdue_list): return title = '链接需要更新#{}#篇'.format(len(overdue_list)) content = title + '\n \n' for article_id, num in overdue_list: content += '{}, 需要更新{}个链接,{}\n'.format(self.share2article[article_id][2], num, self.NOTE_URL % article_id) content += '\n\nPlease update within 6 hours, Thx!' echo('2|debug', title, content) send_email(content, title) def load_share_total(self): self.check_overdue() for article_id in self.idx: self.get_share_info(article_id) self.load_list2db() self.__init__() self.load_process() def load_article_new(self): for article_id in self.idx: self.load_article(article_id) def load_click(self, num=1000000): ''' schedule click ''' for index in range(num): threading_list = [] if index % 12 != 1: threading_list.append(threading.Thread(target=self.load_article_new, args=())) if index % 12 == 1: threading_list.append(threading.Thread(target=self.load_share_total, args=())) for work in threading_list: work.start() time.sleep(self.ONE_HOURS / 2)
def __init__(self): self.T = {} self.n = { 'A': 48, 'BUTTON': 1, 'CANVAS': 1, 'CPUClass': None, 'DIV': 71, 'HTMLLength': 158225, 'IMG': 5, 'INPUT': 4, 'LABEL': 1, 'LI': 21, 'LINK': 3, 'P': 10, 'SCRIPT': 14, 'SPAN': 9, 'STYLE': 18, 'UL': 4, 'browserLanguage': "zh-CN", 'browserLanguages': "zh-CN,zh", 'canvas2DFP': "5eb3d9a167292cc324a4a6b692171a49", 'canvas3DFP': "b2284dba7b1ccb5ef8fabc22c0065611", 'colorDepth': 24, 'cookieEnabled': 1, 'devicePixelRatio': 2, 'deviceorientation': False, 'doNotTrack': 0, 'documentMode': "CSS1Compat", 'flashEnabled': -1, 'hardwareConcurrency': 8, 'indexedDBEnabled': 1, 'innerHeight': 150, 'innerWidth': 1680, 'internalip': None, 'javaEnabled': 0, 'jsFonts': "AndaleMono,Arial,ArialBlack,ArialHebrew,ArialNarrow,ArialRoundedMTBold,ArialUnicodeMS,ComicSansMS,Courier,CourierNew,Geneva,Georgia,Helvetica,HelveticaNeue,Impact,LUCIDAGRANDE,MicrosoftSansSerif,Monaco,Palatino,Tahoma,Times,TimesNewRoman,TrebuchetMS,Verdana,Wingdings,Wingdings2,Wingdings3", 'localStorageEnabled': 1, 'maxTouchPoints': 0, 'mediaDevices': -1, 'netEnabled': 1, 'outerHeight': 987, 'outerWidth': 1680, 'performanceTiming': "-1,-1,16,2,122,0,274,0,209,137,6,6,32,3405,3405,3408,35543,35544,35547,-1", 'platform': "MacIntel", 'plugins': "internal-pdf-viewer,mhjfbmdgcfjbbpaeojofohoefgiehjai,internal-nacl-plugin", 'screenAvailHeight': 987, 'screenAvailLeft': 0, 'screenAvailTop': 23, 'screenAvailWidth': 1680, 'screenHeight': 1050, 'screenLeft': 0, 'screenTop': 23, 'screenWidth': 1680, 'sessionStorageEnabled': 1, 'systemLanguage': None, 'textLength': 93737, 'timestamp': int(time_stamp()), 'timezone': -8, 'touchEvent': False, 'userAgent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3818.0 Safari/537.36", } self.t = ['textLength', 'HTMLLength', 'documentMode', 'A', 'ARTICLE', 'ASIDE', 'AUDIO', 'BASE', 'BUTTON', 'CANVAS', 'CODE', 'IFRAME', 'IMG', 'INPUT', 'LABEL', 'LINK', 'NAV', 'OBJECT', 'OL', 'PICTURE', 'PRE', 'SECTION', 'SELECT', 'SOURCE', 'SPAN', 'STYLE', 'TABLE', 'TEXTAREA', 'VIDEO', 'screenLeft', 'screenTop', 'screenAvailLeft', 'screenAvailTop', 'innerWidth', 'innerHeight', 'outerWidth', 'outerHeight', 'browserLanguage', 'browserLanguages', 'systemLanguage', 'devicePixelRatio', 'colorDepth', 'userAgent', 'cookieEnabled', 'netEnabled', 'screenWidth', 'screenHeight', 'screenAvailWidth', 'screenAvailHeight', 'localStorageEnabled', 'sessionStorageEnabled', 'indexedDBEnabled', 'CPUClass', 'platform', 'doNotTrack', 'timezone', 'canvas2DFP', 'canvas3DFP', 'plugins', 'maxTouchPoints', 'flashEnabled', 'javaEnabled', 'hardwareConcurrency', 'jsFonts', 'timestamp', 'performanceTiming', 'internalip', 'mediaDevices', 'DIV', 'P', 'UL', 'LI', 'SCRIPT', 'deviceorientation', 'touchEvent']
def get_check(self): """ check comment """ self.delay_load_history_data() bv_list = [[ii["bvid"], ii["aid"], ii["comment"]] for ii in self.bv_ids.values() if not regex.findall(self.ignore_list, str(ii["aid"]))] bv_map = {ii["bvid"]: ii for ii in self.bv_ids.values()} if self.bv_list and len( self.bv_list) and len(self.bv_list) != len(bv_list): new_bv_list = [(ii, jj) for ii, jj, _ in bv_list if not ii in self.bv_list and not ii in self.del_map ] self.rank_map = { **self.rank_map, **{ii: {} for ii, _ in new_bv_list} } echo("1|error", "New Bv av ids:", new_bv_list) for bv_id, av_id in new_bv_list: rank_info = bv_map[bv_id] shell_str = "nohup python3 bilibili/bsocket.py {} %d >> log.txt 2>&1 &".format( av_id) echo("0|error", "Shell str:", shell_str) os.system(shell_str % 1) os.system(shell_str % 2) email_str = "发布({}){}#{} {}".format( time_str(rank_info["created"], time_format=self.T_FORMAT), rank_info["title"], bv_id, av_id, ) email_str2 = "{} {} is release at {}.\nPlease check the online & common program.".format( rank_info["title"], time_str(rank_info["created"]), self.BASIC_BV_URL % bv_id, ) send_email(email_str2, email_str, self.special_info_email) self.update_ini(bv_id, av_id) self.public["T"][bv_id] = [ rank_info["created"], rank_info["mid"] ] self.last_check[bv_id] = int(time_stamp()) self.bv_list = [ii for (ii, _, _) in bv_list] now_hour = int(time_str(time_format="%H")) now_min = int(time_str(time_format="%M")) now_time = now_hour + now_min / 60 if now_time > self.ignore_start and now_time < self.ignore_end: return if self.assign_mid == -1: return # threads = [self.pool.submit(self.check_type_req, bv_id) for bv_id in rank_map.keys()] # list(as_completed(threads)) threading_list = [] for (_, ii, jj) in bv_list: work = threading.Thread(target=self.comment_check_schedule, args=(ii, jj)) threading_list.append(work) for work in threading_list: work.start() for work in threading_list: work.join() return bv_list
def load_rank_index(self, index: int, day_index: int): """ load rank """ self.have_assign[day_index] = [] url = self.RANKING_URL % (self.assign_rank_id, day_index, index) text = self.get_api_req(url, self.basic_bv_id, 1) if text is None: return rank_list = text["list"] need_params = [ "pts", "bvid", "aid", "author", "mid", "play", "video_review", "coins", "duration", "title", ] now_bv_ids, checks, rank_map = [], [], {} ## loop for Rank List # 1. Filter different `tid` bv # 2. Send email for [热榜]. # 3. Update rank map for idx, rank in enumerate(rank_list): bv_id = rank["bvid"] rank_info = { "id": idx + 1, **{ii: rank[ii] for ii in rank}, "type": index, "day": day_index, } now_bv_ids.append(bv_id) if not self.check_type(bv_id): continue if day_index < 5: self.check_rank_rose(bv_id, rank_info) if self.add_av(bv_id, idx, rank_info["pts"]): rank_map[bv_id] = rank_info ## check ids for bv_id in self.assign_ids: if not bv_id in self.public["T"]: checks.append(bv_id) if not bv_id in self.last_view and not bv_id in self.rank_map: self.rank_map[bv_id] = {} have_assign = [ii for ii in self.assign_ids if ii in now_bv_ids] ## check tid type threads = [ self.pool.submit(self.check_type_req, bv_id) for bv_id in rank_map.keys() ] list(as_completed(threads)) ## update rank_map for bv_id, rank_info in rank_map.items(): if not self.check_type(bv_id): continue if not bv_id in self.public["T"]: checks.append(bv_id) self.last_check[bv_id] = int(time_stamp()) self.rank_map[bv_id] = rank_info ## update public threads = [ self.pool.submit(self.public_data, bv_id) for bv_id in checks ] list(as_completed(threads)) ## monitor need_monitor = [ bv_id for bv_id, (created, mid) in self.public["T"].items() if not bv_id in self.public["L"] and created + one_day > int(time_stamp()) ] threads = [ self.monitor_pool.submit(self.public_monitor, bv_id) for bv_id in need_monitor ] self.have_assign[day_index] = have_assign
def check_overdue_once(data: list) -> bool: dif_time = time_stamp(data[-2]) - time_stamp() return dif_time > 0 and dif_time <= self.ONE_HOURS * self.ONE_DAY