def __get_url_curl(self, post, referer): """ :param post: :param referer: :return: """ headers = { "user-agent": getUserAgent(), "origin": "https://www.pelisplay.tv", "referer": referer } url = "https://www.pelisplay.tv/entradas/procesar_player" data = curlData(url, value=post, cookie=self.cookie, header=headers) try: data = json.loads(data) except Exception as e: lock.acquire() self.get_cookie() lock.release() if self.cookie_get_num < 3: return self.__get_url_curl(post, referer=referer) else: data = {"estado": 500} debug("播放链接获取出错,错误信息:{error}".format(error=e)) if data['estado'] == 200: data = data['data'] else: data = "" self.cookie_get_num = 0 return data
def get_cookie(self): header = { "User-Agent": getUserAgent(), # "Cache-Control": "max-age=0", # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3" } url = "https://www.pelisplay.tv/" self.cookie = getCookie(url, header=header) debug(self.cookie)
def __init__(self): # 数据库连接全局变量 self.ws_db = phoenix_db.DBConfig() self.config = config # 全局代理IP self.proxy_ip = "" self.is_change_proxy = 0 self.guid = "" self.cookie = "" self.ip_over = 0 self.user_agent_index = int((random.random()) * 1000) % getUserAgent(2) self.table_columns = self.ws_db.getColumns({"table": "ws_docid"})
def __handle_data(self, item): url = "https://www.pelisplay.tv" + item['img_src'] header = { # "Referer": "https://www.pelisplay.tv/", "User-Agent": getUserAgent(), "Accept": "image/webp,image/apng,image/*,*/*;q=0.8" } data = curlData(url, header=header) with open("static/images/{id}.jpg".format(id=item['id']), "wb") as f: try: data = data.encode("utf-8") except Exception as e: debug(e) f.write(data) self.__update_data(item) f.close() return {"code": 0}
def get_image(): url = request.values.get("url") if url is not None: domain = "https://www.pelisplay.tv" url = urllib.parse.unquote(url) final_url = domain + url header = { "Referer": "https://www.pelisplay.tv/", "User-Agent": getUserAgent(), "Accept": "image/webp,image/apng,image/*,*/*;q=0.8" } data = curlData(final_url, header=header, open_virtual_ip=True) ext = re.findall("[\w\W]*?\.([\w\W]*.)", url)[0] else: ext = "jpg" data = "" # debug(data) return Response(data, mimetype=get_image_type(ext))
def getCookie(self, url): if self.ip_over == 1: return 0 header = { "User-Agent": getUserAgent(index=self.user_agent_index) } try: cookie = getCookie(url, referer="http://wenshu.court.gov.cn", header=header, proxy_ip=self.proxy_ip, timeout=5) return cookie except Exception as e: if self.is_change_proxy > 4: self.setProxyIp() self.is_change_proxy = 0 else: self.is_change_proxy = self.is_change_proxy + 1 if e.__str__().find("HTTPConnectionPool") != -1: debug("cookie获取出错,HttpConnect错误,重新获取ip并重新获取") return self.getCookie(url) else: return 0
def __get_video_src(self, item): header = { # "Referer": "http://www.wyysdsa.com/", "User-Agent": getUserAgent(), # "Cache-Control": "max-age=0", # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3" } # url = "http://zeus.pelisplay.tv/embed/vip.php?u=Q1A5NUZJM1VDTWlUTk8wTEFmWGNQZDhnbWRIcmt6UVU0VGIxakpXOUF4Mi9yZW51Zi9yaXZlcXFoYnlwL3picC5hYm1uem4uampqLy86ZmNnZ3U&fondo_requerido=" # url = "https://nl.tan90.club/test/testHeader.html" data = curlData(url=item['url'], header=header, cookie=self.cookie) # with open("tmp/content_detail.txt", "rb") as f: # data = f.read().decode("utf-8") # f.close() try: src = re.findall("JSON\.parse\('([\w\W]*?)'\)\);", data)[0] src = src.replace("\\", "") src = json.loads(src) src = src[0]['file'] except Exception as e: src = "" debug(e) return src
def getUrlAndCookieCaseType(self, court_name, start_date, end_date, case_type): """ 拼装url同时得到cookie :param court_name: :param start_date: :param end_date: :param case_type :return: param 包含线程所需所有参数 (dict) """ # 获取guid header_1 = { "User-Agent": getUserAgent(index=self.user_agent_index), "Origin": "http://wenshu.court.gov.cn" } try: num_flag = 0 guid = curlData("http://ws_api.xiezhi.sc.cn/getParam?vjkl5=0") guid = json.loads(guid) guid = guid['guid'] number = "" # try: # number = curlData("http://wenshu.court.gov.cn/ValiCode/GetCode", {"guid": guid}, header=header_1, # referer="http://wenshu.court.gov.cn", # proxy_ip=self.proxy_ip, timeout=5) # except: # number = "remind" # while number.find("remind") != -1 or number.find("html") != -1 or number.find("服务不可用") != -1: # if num_flag > 4: # self.setProxyIp() # num_flag = 0 # else: # num_flag = num_flag + 1 # debug("number获取出错,继续获取") # try: # number = curlData("http://wenshu.court.gov.cn/ValiCode/GetCode", {"guid": guid}, header=header_1, # referer="http://wenshu.court.gov.cn", # proxy_ip=self.proxy_ip, timeout=5) # except Exception as e: # debug(e) # sleep(0.5) except: debug("guid获取出错") return self.getUrlAndCookieCaseType(court_name, start_date, end_date, case_type) # 拼装url url = "http://wenshu.court.gov.cn/list/list/?sorttype=1&number=%s&guid=%s&conditions=searchWord+%s+SLFY++%s&conditions=searchWord++CPRQ++%s%%20TO%%20%s&conditions=searchWord+%s+AJLX++%s" % ( number, guid, urllib.parse.quote(str(court_name)), urllib.parse.quote("法院名称:%s" % court_name), urllib.parse.quote(str(start_date)), urllib.parse.quote(str(end_date)), str(self.getCaseTypeIndex(case_type)), urllib.parse.quote(str("案件类型:%s" % case_type))) cookie = self.getCookie(url) while cookie == 0: self.setProxyIp() cookie = self.getCookie(url) try: vjkl5 = cookie['vjkl5'] except: debug("vjk5获取失败,重新获取") if self.is_change_proxy > 4: self.setProxyIp() self.is_change_proxy = 0 else: self.is_change_proxy = self.is_change_proxy + 1 return self.getUrlAndCookieCaseType(court_name, start_date, end_date, case_type) try: post = curlData("http://ws_api.xiezhi.sc.cn/getParam?vjkl5=" + vjkl5) except: post = "{}" debug("post参数获取出错") post = json.loads(post) post['Order'] = "法院层级" post['Page'] = 20 post['number'] = "wens" post['Direction'] = "asc" header = { "User-Agent": getUserAgent(index=self.user_agent_index), "Origin": "http://wenshu.court.gov.cn" } param = { "vjkl5": vjkl5, "post": post, "header": header, "cookie_all": cookie, "url": url, "court_name": court_name, "start_date": start_date, "end_date": end_date } param['post']['Param'] = "法院名称:%s,裁判日期:%s TO %s,案件类型:%s" % (court_name, start_date, end_date, case_type) return param
def getYear(self, court_name): """ 拼装url同时得到cookie :param court_name: :return: 所有有数据的年份 """ # 获取guid header_1 = { "User-Agent": getUserAgent(index=self.user_agent_index), "Origin": "http://wenshu.court.gov.cn" } try: guid = curlData("http://ws_api.xiezhi.sc.cn/getParam?vjkl5=0") guid = json.loads(guid) guid = guid['guid'] # number = "" num_flag = 0 try: number = curlData("http://wenshu.court.gov.cn/ValiCode/GetCode", {"guid": guid}, header=header_1, referer="http://wenshu.court.gov.cn", proxy_ip=self.proxy_ip, timeout=5) except: number = "remind" while number.find("remind") != -1 or number.find("html") != -1 or number.find("服务不可用") != -1: if num_flag > 4: self.setProxyIp() num_flag = 0 else: num_flag = num_flag + 1 debug("number获取出错,继续获取") try: number = curlData("http://wenshu.court.gov.cn/ValiCode/GetCode", {"guid": guid}, header=header_1, referer="http://wenshu.court.gov.cn", proxy_ip=self.proxy_ip, timeout=5) except Exception as e: debug(e) sleep(0.5) except Exception as e: debug("guid获取出错") return self.getYear(court_name) # 拼装url url = "http://wenshu.court.gov.cn/list/list/?sorttype=1&number=%s&guid=%s&conditions=searchWord+%s+SLFY++%s" % ( number, guid, urllib.parse.quote(str(court_name)), urllib.parse.quote("法院名称:%s" % court_name)) cookie = self.getCookie(url) while cookie == 0: self.setProxyIp() cookie = self.getCookie(url) try: vjkl5 = cookie['vjkl5'] except: debug("vjkl5获取失败,重新获取") if self.is_change_proxy > 4: self.setProxyIp() self.is_change_proxy = 0 else: self.is_change_proxy = self.is_change_proxy + 1 return self.getYear(court_name) try: post = curlData("http://ws_api.xiezhi.sc.cn/getParam?vjkl5=" + vjkl5) except: post = "{}" debug("post参数获取出错") post = json.loads(post) post['number'] = number post['guid'] = guid header = { "User-Agent": getUserAgent(index=self.user_agent_index), "Origin": "http://wenshu.court.gov.cn" } post['Param'] = "法院名称:%s" % court_name year = curlData("http://wenshu.court.gov.cn/List/TreeContent", post, url, cookie, header, self.proxy_ip, timeout=5) try: year = json.loads(year) year = json.loads(year) except: pass return year[4]['Child']