def run(self): # labels: 'PASSED', 'FAILED', 'CRASHED', 'NOT_IMPLEMENTED', 'NO_FAIL' errors = True crashed = True label = None try: js2py.eval_js(self.code) errors = False crashed = False except NotImplementedError: tb = sys.exc_info()[-1] stk = traceback.extract_tb(tb) fname = stk[-1][2] passed = False reason = 'Not implemented - "%s"' % fname full_error = traceback.format_exc() label = 'NOT_IMPLEMENTED' except PyJsException as e: crashed = False full_error = traceback.format_exc() if self.negative: passed = True else: passed = False reason = PyExceptionToJs(e).get('message').to_python() label = 'FAILED' except SyntaxError as e: full_error = traceback.format_exc() if self.negative=='SyntaxError': passed = True else: passed = False reason = 'Could not parse' label = 'CRASHED' except: full_error = traceback.format_exc() passed = False reason = 'UNKNOWN - URGENT, FIX NOW!' label = 'CRASHED' if not errors: if self.negative: passed = False reason = "???" label = "NO_FAIL" full_error = '' else: passed = True if passed: label = "PASSED" reason = '' full_error = '' self.passed, self.label, self.reason, self.full_error = passed, label, reason, full_error return passed, label, reason, full_error
def exec_file(path): js = load(path) desc = re.search("/\*---(.+)---\*/", js, re.DOTALL).groups()[0] inc = re.search("includes:(.+)", desc, re.DOTALL) bibs = "" if inc: libs = inc.groups()[0].splitlines() har = "harness/" for lib in libs: lib = lib.strip("[] -") if not lib: continue bibs += load(har + lib) try: js2py.eval_js(init + bibs + js) except NotImplementedError: return except: if "negative:" in desc or "onlyStrict" in desc: return # supposed to fail print "-" * 30 print traceback.format_exc() print print desc print print 'File "%s", line 1, in chuj' % os.path.abspath(path) raw_input()
def exec_file(path): js = load(path) desc = re.search('/\*---(.+)---\*/', js, re.DOTALL).groups()[0] inc = re.search('includes:(.+)', desc, re.DOTALL) bibs = '' if inc: libs = inc.groups()[0].splitlines() har = 'harness/' for lib in libs: lib = lib.strip('[] -') if not lib: continue bibs += load(har+lib) try: js2py.eval_js(init + bibs + js) except NotImplementedError: print 'Not implemented' except: if 'negative:' in desc or 'onlyStrict' in desc: return # supposed to fail print '-'*30 print traceback.format_exc() print print desc print print 'File "%s", line 1, in chuj' % os.path.abspath(path) raw_input() return if 'negative:' in desc and not 'onlyStrict' in desc: print 'File "%s", line 1, in chuj' % os.path.abspath(path) print 'Did not fail!' raw_input()
def run(self): # labels: 'PASSED', 'FAILED', 'CRASHED', 'NOT_IMPLEMENTED', 'NO_FAIL' errors = True crashed = True label = None try: js2py.eval_js(self.code) errors = False crashed = False except NotImplementedError: tb = sys.exc_info()[-1] stk = traceback.extract_tb(tb) fname = stk[-1][2] passed = False reason = 'Not implemented - "%s"' % fname full_error = traceback.format_exc() label = 'NOT_IMPLEMENTED' except PyJsException, e: crashed = False full_error = traceback.format_exc() if self.negative: passed = True else: passed = False reason = PyExceptionToJs(e).get('message').to_python() label = 'FAILED'
def solve_cf_challenge(self, resp, **original_kwargs): body = resp.text parsed_url = urlparse(resp.url) domain = parsed_url.netloc submit_url = '%s://%s/cdn-cgi/l/chk_jschl' % (parsed_url.scheme, domain) cloudflare_kwargs = {k: v for k, v in original_kwargs.items() if k not in ['hooks']} params = cloudflare_kwargs.setdefault('params', {}) headers = cloudflare_kwargs.setdefault('headers', {}) headers['Referer'] = resp.url try: params['jschl_vc'] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1) params['pass'] = re.search(r'name="pass" value="(.+?)"', body).group(1) params['s'] = re.search(r'name="s" value="(.+?)"', body).group(1) # Extract the arithmetic operation js = self.extract_js(body).replace('t.length', str(len(domain))) except Exception: # Something is wrong with the page. # This may indicate Cloudflare has changed their anti-bot # technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. logging.error('[!] Unable to parse Cloudflare anti-bots page.') raise # Safely evaluate the Javascript expression try: params['jschl_answer'] = str(js2py.eval_js(js)) except (Exception, BaseException): try: params['jschl_answer'] = str(js2py.eval_js(js)) except (Exception, BaseException): return # Requests transforms any request into a GET after a redirect, # so the redirect has to be handled manually here to allow for # performing other types of requests even as the first request. method = resp.request.method cloudflare_kwargs['allow_redirects'] = False self.wait() redirect = self.request(method, submit_url, **cloudflare_kwargs) location = redirect.headers.get('Location') parsed_location = urlparse(location) if not parsed_location.netloc: location = '%s://%s%s' % (parsed_url.scheme, domain, parsed_location.path) return self.request(method, location, **original_kwargs)
def handle_javascript(self, line): return js2py.eval_js( line.replace( "{}))", "{}).replace('document.open();document.write','').replace(';document.close();',''))", ) )
def handle_free(self, pyfile): m = re.search( r'<div class="video-wrapper">.+?<script type="text/javascript">(.+?)</script>', self.data, re.S, ) if m is None: self.error(self._("Player Javascript data not found")) script = m.group(1) m = re.search(r"quality_items_\d+", script) if m is None: self.error(self._("`quality_items` variable no found")) result_var = re.search(r"quality_items_\d+", script).group(0) script = "".join(re.findall(r"^\s*var .+", script, re.M)) script = re.sub(r"[\n\t]|/\*.+?\*/", "", script) script += "JSON.stringify({});".format(result_var) res = js2py.eval_js(script) json_data = json.loads(res) urls = { int(re.search("^(\d+)", x["text"]).group(0)): x["url"] for x in json_data if x["url"] } quality = max(urls.keys()) self.link = urls[quality]
def parse_live_detail(self, response): logger.info("live url {}".format(response.url)) info = re.findall("window.(anchor = .*?);", response.body, re.S)[0] post_info = js2py.eval_js(info) post_item = LiveItem() post_item["author_id"] = post_info["memberid"] post_item["author_name"] = post_info["nickname"] post_item["url"] = response.url post_item["title"] = response.xpath("//h1/text()").extract_first() post_item["site_id"] = 1223 post_item["site_name"] = "一直播" # post_item["read_num"] = post_info["online"] post_item["online_num"] = post_info["online"] # 文章阅读数 视频观看数 live参加数 post_item["like_num"] = response.xpath('//div[@class="hide"]').re_first(u"共有(\d+)条点赞") # 点赞数 post_item["comment_num"] = response.xpath('//div[@class="hide"]').re_first(u"共有(\d+)条评论") # 评论数 post_item["post_time"] = dateformatting.parse(post_info["starttime"]).strftime(date_format) # 发布时间 post_item["include_time"] = self.crawled_time # 抓取时间 post_item["content_tags"] = response.xpath('//div[@class="hide"]').re_first(u"认证类型:(.*?)。") post_item["video"] = post_info["play_url"] post_item["image"] = post_info["covers"] yield post_item # logger.info(post_item) logger.info(u"{} live view people {}".format(post_item["author_name"], post_item["online_num"]))
def getProxy(): #socks5访问pachong.org socks.set_default_proxy(socks.SOCKS5,'127.0.0.1',1080) socket.socket = socks.socksocket r = requesocks.get(proxyUrl) html = r.text.encode('utf-8') #匹配 网页定义的js声明 reg_script_head = '<script type.*?>(.*?)</script>' pattern_script_head = re.compile(reg_script_head,re.S) result_of_script_head = re.findall(pattern_script_head,html) #匹配ip端口 reg_port = '<td><script>(.*?)</script>' pattern_port = re.compile(reg_port,re.S) result_of_port = re.findall(pattern_port,html) #匹配ip地址 reg_ip = '<td>([0-9]+(?:\.[0-9]+){0,3})</td>' pattern_ip = re.compile(reg_ip,re.S) result_of_ip = re.findall(pattern_ip,html) for i,item in enumerate(result_of_ip): jsevalPort = result_of_script_head[2] + result_of_port[i] js = ''' function add(){ %s } add()''' % jsevalPort.replace('document.write','return') result = js2py.eval_js(js) ip_port[item] = result
def addcrypted2(): package = flask.request.form.get( "package", flask.request.form.get("source", flask.request.form.get("referer")) ) crypted = flask.request.form["crypted"] jk = flask.request.form["jk"] crypted = standard_b64decode(unquote(crypted.replace(" ", "+"))) jk = js2py.eval_js(f"{jk} f()") try: key = bytes.fromhex(jk) except Exception: return "Could not decrypt key", 500 obj = Fernet(key) urls = obj.decrypt(crypted).replace("\x00", "").replace("\r", "").split("\n") urls = [url for url in urls if url.strip()] api = flask.current_app.config["PYLOAD_API"] try: if package: api.add_package(package, urls, 0) else: api.generate_and_add_packages(urls, 0) except Exception: return "failed can't add", 500 else: return "success\r\n"
def _eval_id_decoding(self, webpage, ol_id): try: # raise # uncomment to test method with pairing #js_code = re.findall( # ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻ //\*´∇`\*/ \['_'\];" #,webpage, re.DOTALL)[0] js_code = re.findall( #ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻ //\*´∇`\*/ \['_'\];", ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ", webpage,re.S) #common.log_utils.log_notice('js_code: %s' % js_code) js_code = re.sub('''if\s*\([^\}]+?typeof[^\}]+?\}''', '', js_code) js_code = re.sub('''if\s*\([^\}]+?document[^\}]+?\}''', '', js_code) except Exception as e: print 'Could not find JavaScript %s' % e raise ResolverError('Could not find JavaScript %s' % e) print("AAA1", ol_id, js_code) #js_code = base64.b64decode('''ICAgICAgICAgICAgICAgICAgICB2YXIgaWQgPSAiJXMiDQogICAgICAgICAgICAgICAgICAgICAgLCBkZWNvZGVkDQogICAgICAgICAgICAgICAgICAgICAgLCBkb2N1bWVudCA9IHt9DQogICAgICAgICAgICAgICAgICAgICAgLCB3aW5kb3cgPSB0aGlzDQogICAgICAgICAgICAgICAgICAgICAgLCAkID0gZnVuY3Rpb24oKXsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIHsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0ZXh0OiBmdW5jdGlvbihhKXsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmKGEpDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlY29kZWQgPSBhOw0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZWxzZQ0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gaWQ7DQogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWFkeTogZnVuY3Rpb24oYSl7DQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhKCkNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9DQogICAgICAgICAgICAgICAgICAgICAgICAgIH0NCiAgICAgICAgICAgICAgICAgICAgICAgIH07DQogICAgICAgICAgICAgICAgICAgIChmdW5jdGlvbihkLCB3KXsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZiA9IGZ1bmN0aW9uKCl7fTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgcyA9ICcnOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciBvID0gbnVsbDsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgYiA9IGZhbHNlOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciBuID0gMDsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZGYgPSBbJ2Nsb3NlJywnY3JlYXRlQXR0cmlidXRlJywnY3JlYXRlRG9jdW1lbnRGcmFnbWVudCcsJ2NyZWF0ZUVsZW1lbnQnLCdjcmVhdGVFbGVtZW50TlMnLCdjcmVhdGVFdmVudCcsJ2NyZWF0ZU5TUmVzb2x2ZXInLCdjcmVhdGVSYW5nZScsJ2NyZWF0ZVRleHROb2RlJywnY3JlYXRlVHJlZVdhbGtlcicsJ2V2YWx1YXRlJywnZXhlY0NvbW1hbmQnLCdnZXRFbGVtZW50QnlJZCcsJ2dldEVsZW1lbnRzQnlOYW1lJywnZ2V0RWxlbWVudHNCeVRhZ05hbWUnLCdpbXBvcnROb2RlJywnb3BlbicsJ3F1ZXJ5Q29tbWFuZEVuYWJsZWQnLCdxdWVyeUNvbW1hbmRJbmRldGVybScsJ3F1ZXJ5Q29tbWFuZFN0YXRlJywncXVlcnlDb21tYW5kVmFsdWUnLCd3cml0ZScsJ3dyaXRlbG4nXTsNCiAgICAgICAgICAgICAgICAgICAgICBkZi5mb3JFYWNoKGZ1bmN0aW9uKGUpe2RbZV09Zjt9KTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZG9fID0gWydhbmNob3JzJywnYXBwbGV0cycsJ2JvZHknLCdkZWZhdWx0VmlldycsJ2RvY3R5cGUnLCdkb2N1bWVudEVsZW1lbnQnLCdlbWJlZHMnLCdmaXJzdENoaWxkJywnZm9ybXMnLCdpbWFnZXMnLCdpbXBsZW1lbnRhdGlvbicsJ2xpbmtzJywnbG9jYXRpb24nLCdwbHVnaW5zJywnc3R5bGVTaGVldHMnXTsNCiAgICAgICAgICAgICAgICAgICAgICBkb18uZm9yRWFjaChmdW5jdGlvbihlKXtkW2VdPW87fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIGRzID0gWydVUkwnLCdjaGFyYWN0ZXJTZXQnLCdjb21wYXRNb2RlJywnY29udGVudFR5cGUnLCdjb29raWUnLCdkZXNpZ25Nb2RlJywnZG9tYWluJywnbGFzdE1vZGlmaWVkJywncmVmZXJyZXInLCd0aXRsZSddOw0KICAgICAgICAgICAgICAgICAgICAgIGRzLmZvckVhY2goZnVuY3Rpb24oZSl7ZFtlXT1zO30pOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciB3YiA9IFsnY2xvc2VkJywnaXNTZWN1cmVDb250ZXh0J107DQogICAgICAgICAgICAgICAgICAgICAgd2IuZm9yRWFjaChmdW5jdGlvbihlKXt3W2VdPWI7fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIHdmID0gWydhZGRFdmVudExpc3RlbmVyJywnYWxlcnQnLCdhdG9iJywnYmx1cicsJ2J0b2EnLCdjYW5jZWxBbmltYXRpb25GcmFtZScsJ2NhcHR1cmVFdmVudHMnLCdjbGVhckludGVydmFsJywnY2xlYXJUaW1lb3V0JywnY2xvc2UnLCdjb25maXJtJywnY3JlYXRlSW1hZ2VCaXRtYXAnLCdkaXNwYXRjaEV2ZW50JywnZmV0Y2gnLCdmaW5kJywnZm9jdXMnLCdnZXRDb21wdXRlZFN0eWxlJywnZ2V0U2VsZWN0aW9uJywnbWF0Y2hNZWRpYScsJ21vdmVCeScsJ21vdmVUbycsJ29wZW4nLCdwb3N0TWVzc2FnZScsJ3Byb21wdCcsJ3JlbGVhc2VFdmVudHMnLCdyZW1vdmVFdmVudExpc3RlbmVyJywncmVxdWVzdEFuaW1hdGlvbkZyYW1lJywncmVzaXplQnknLCdyZXNpemVUbycsJ3Njcm9sbCcsJ3Njcm9sbEJ5Jywnc2Nyb2xsVG8nLCdzZXRJbnRlcnZhbCcsJ3NldFRpbWVvdXQnLCdzdG9wJ107DQogICAgICAgICAgICAgICAgICAgICAgd2YuZm9yRWFjaChmdW5jdGlvbihlKXt3W2VdPWY7fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIHduID0gWydkZXZpY2VQaXhlbFJhdGlvJywnaW5uZXJIZWlnaHQnLCdpbm5lcldpZHRoJywnbGVuZ3RoJywnb3V0ZXJIZWlnaHQnLCdvdXRlcldpZHRoJywncGFnZVhPZmZzZXQnLCdwYWdlWU9mZnNldCcsJ3NjcmVlblgnLCdzY3JlZW5ZJywnc2Nyb2xsWCcsJ3Njcm9sbFknXTsNCiAgICAgICAgICAgICAgICAgICAgICB3bi5mb3JFYWNoKGZ1bmN0aW9uKGUpe3dbZV09bjt9KTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgd28gPSBbJ2FwcGxpY2F0aW9uQ2FjaGUnLCdjYWNoZXMnLCdjcnlwdG8nLCdleHRlcm5hbCcsJ2ZyYW1lRWxlbWVudCcsJ2ZyYW1lcycsJ2hpc3RvcnknLCdpbmRleGVkREInLCdsb2NhbFN0b3JhZ2UnLCdsb2NhdGlvbicsJ2xvY2F0aW9uYmFyJywnbWVudWJhcicsJ25hdmlnYXRvcicsJ29uYWJvcnQnLCdvbmFuaW1hdGlvbmVuZCcsJ29uYW5pbWF0aW9uaXRlcmF0aW9uJywnb25hbmltYXRpb25zdGFydCcsJ29uYmVmb3JldW5sb2FkJywnb25ibHVyJywnb25jYW5wbGF5Jywnb25jYW5wbGF5dGhyb3VnaCcsJ29uY2hhbmdlJywnb25jbGljaycsJ29uY29udGV4dG1lbnUnLCdvbmRibGNsaWNrJywnb25kZXZpY2Vtb3Rpb24nLCdvbmRldmljZW9yaWVudGF0aW9uJywnb25kcmFnJywnb25kcmFnZW5kJywnb25kcmFnZW50ZXInLCdvbmRyYWdsZWF2ZScsJ29uZHJhZ292ZXInLCdvbmRyYWdzdGFydCcsJ29uZHJvcCcsJ29uZHVyYXRpb25jaGFuZ2UnLCdvbmVtcHRpZWQnLCdvbmVuZGVkJywnb25lcnJvcicsJ29uZm9jdXMnLCdvbmhhc2hjaGFuZ2UnLCdvbmlucHV0Jywnb25pbnZhbGlkJywnb25rZXlkb3duJywnb25rZXlwcmVzcycsJ29ua2V5dXAnLCdvbmxhbmd1YWdlY2hhbmdlJywnb25sb2FkJywnb25sb2FkZWRkYXRhJywnb25sb2FkZWRtZXRhZGF0YScsJ29ubG9hZHN0YXJ0Jywnb25tZXNzYWdlJywnb25tb3VzZWRvd24nLCdvbm1vdXNlZW50ZXInLCdvbm1vdXNlbGVhdmUnLCdvbm1vdXNlbW92ZScsJ29ubW91c2VvdXQnLCdvbm1vdXNlb3ZlcicsJ29ubW91c2V1cCcsJ29ub2ZmbGluZScsJ29ub25saW5lJywnb25wYWdlaGlkZScsJ29ucGFnZXNob3cnLCdvbnBhdXNlJywnb25wbGF5Jywnb25wbGF5aW5nJywnb25wb3BzdGF0ZScsJ29ucHJvZ3Jlc3MnLCdvbnJhdGVjaGFuZ2UnLCdvbnJlc2V0Jywnb25yZXNpemUnLCdvbnNjcm9sbCcsJ29uc2Vla2VkJywnb25zZWVraW5nJywnb25zZWxlY3QnLCdvbnNob3cnLCdvbnN0YWxsZWQnLCdvbnN0b3JhZ2UnLCdvbnN1Ym1pdCcsJ29uc3VzcGVuZCcsJ29udGltZXVwZGF0ZScsJ29udG9nZ2xlJywnb250cmFuc2l0aW9uZW5kJywnb251bmxvYWQnLCdvbnZvbHVtZWNoYW5nZScsJ29ud2FpdGluZycsJ29ud2Via2l0YW5pbWF0aW9uZW5kJywnb253ZWJraXRhbmltYXRpb25pdGVyYXRpb24nLCdvbndlYmtpdGFuaW1hdGlvbnN0YXJ0Jywnb253ZWJraXR0cmFuc2l0aW9uZW5kJywnb253aGVlbCcsJ29wZW5lcicsJ3BhcmVudCcsJ3BlcmZvcm1hbmNlJywncGVyc29uYWxiYXInLCdzY3JlZW4nLCdzY3JvbGxiYXJzJywnc2VsZicsJ3Nlc3Npb25TdG9yYWdlJywnc3BlZWNoU3ludGhlc2lzJywnc3RhdHVzYmFyJywndG9vbGJhcicsJ3RvcCddOw0KICAgICAgICAgICAgICAgICAgICAgIHdvLmZvckVhY2goZnVuY3Rpb24oZSl7d1tlXT1vO30pOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciB3cyA9IFsnbmFtZSddOw0KICAgICAgICAgICAgICAgICAgICAgIHdzLmZvckVhY2goZnVuY3Rpb24oZSl7d1tlXT1zO30pOw0KICAgICAgICAgICAgICAgICAgICB9KShkb2N1bWVudCwgd2luZG93KTsNCiAgICAgICAgICAgICAgICAgICAgJXM7DQogICAgICAgICAgICAgICAgICAgIHByaW50KGRlY29kZWQpOw==''') % (ol_id, js_code) #print("AAA2", ol_id, js_code) js_code = ''' var id = "%s" , decoded , document = {} , window = this , $ = function(){ return { text: function(a){ if(a) decoded = a; else return id; }, ready: function(a){ a() } } }; (function(d){ var f = function(){}; var s = ''; var o = null; ['close','createAttribute','createDocumentFragment','createElement','createElementNS','createEvent','createNSResolver','createRange','createTextNode','createTreeWalker','evaluate','execCommand','getElementById','getElementsByName','getElementsByTagName','importNode','open','queryCommandEnabled','queryCommandIndeterm','queryCommandState','queryCommandValue','write','writeln'].forEach(function(e){d[e]=f;}); ['anchors','applets','body','defaultView','doctype','documentElement','embeds','firstChild','forms','images','implementation','links','location','plugins','styleSheets'].forEach(function(e){d[e]=o;}); ['URL','characterSet','compatMode','contentType','cookie','designMode','domain','lastModified','referrer','title'].forEach(function(e){d[e]=s;}); })(document); %s; decoded;''' % (ol_id, js_code) try: decoded = js2py.eval_js(js_code) if ' ' in decoded or decoded == '': raise return decoded except Exception as e: raise ResolverError('Could not eval ID decoding %s' %e)
def get_revenue_table(file): try: f = codecs.open(file, 'r',"utf-8") soup = BeautifulSoup(f, "html5lib") # Extract movie info from main block title1 = unidecode(soup.title.get_text().replace(" - Daily Box Office Results - Box Office Mojo", "")) title2 = soup.body.find(id="container").find(id="main").find(id="body").select("table")[2].tbody.tr.td.select("table")[0].tbody.tr.select('td')[1].b.get_text() info = soup.body.find(id="container").find(id="main").find(id="body").select("table")[2].tbody.tr.td.select("table")[0].tbody.tr.center.tbody.select("b") total_revenues = int(re.sub('[!@#$,]', '', info[0].get_text())) distributor = unidecode(re.sub('[!@#$,]', '', info[1].get_text())) release_date = unidecode(re.sub('[!@#$,]', '', info[2].get_text())) dt_obj = datetime.strptime(release_date, '%B %d %Y') # Of the form datetime.datetime(2016, 5, 6, 0, 0) (e.g. dt_obj.year = 2016) genre = unidecode(re.sub('[!@#$,]', '', info[3].get_text())) runtime_pre = re.sub('[!@#$,.a-zA-z]', '', info[4].get_text()).strip().split() runtime = int(runtime_pre[0]) * 60 + int(runtime_pre[1]) MPAA = re.sub('[!@#$,]', '', info[5].get_text()) # Convert production budget string to integer production_budget_pre1 = unidecode(re.sub('[!@#$,]', '', info[6].get_text())) production_budget_pre2 = "".join(production_budget_pre1.lower().split()) for word, initial in {"million":"000000", "thousand":"000" }.items(): production_budget = production_budget_pre2.replace(word.lower(), initial) # Extract revenue figures java_text = soup.find_all(type="text/javascript") t = java_text[5] jtext = t.getText().split('\t')[6].replace("\n", " ") table = js2py.eval_js(jtext) revenue = table.to_list() # Enter into dataframe rev = pd.DataFrame(revenue) rev.drop(0, axis=1, inplace=True) # Get total base revenues base_revenues = rev[1].sum() # Calculate conversion factor rev_cf = (total_revenues / base_revenues) / 1000000 # Load onto dictionary to export keys = ['title1', 'title2', 'total_revenues', 'distributor', 'dt_obj', 'genre', 'runtime', 'MPAA', 'production_budget'] values = [title1, unidecode(title2), total_revenues, distributor, dt_obj, genre, runtime, MPAA, int(production_budget)] movie_details = dict(zip(keys, values)) rev_df = rev * rev_cf except Exception, e: print file movie_details = {'title1': 0} rev_df = 0 missing_information.append(file) logging.exception(e)
def handle_free(self, pyfile): # step 1: get essential information: the media URL and the javascript # translating the URL m = re.search(self.MEDIA_URL_PATTERN, self.data) if m is None: self.fail(self._("Could not find any media URLs")) encoded_media_url = m.group(1) self.log_debug(f"Found encoded media URL: {encoded_media_url}") m = re.search(self.COMMUNITY_JS_PATTERN, self.data) if m is None: self.fail(self._("Could not find necessary javascript script to load")) community_js_url = m.group(1) self.log_debug(f"Found community js at {community_js_url}") community_js_code = self.load(community_js_url) # step 2: from the js code, parse the necessary parts: the decoder function and the headers # as the jscript is fairly long, we'll split it to make parsing easier community_js_code = community_js_code.partition(self.JS_SPLIT_WORD)[0] m = re.search(self.JS_HEADER_PATTERN, community_js_code) if m is None: self.fail(self._("Could not parse the necessary parts off the javascript")) decoder_function = m.group("decoder") initialization = m.group("initvars") m = re.search(self.JS_PROCESS_PATTERN, community_js_code) if m is None: self.fail( self._("Could not parse the processing function off the javascript") ) process_function = m.group(0) new_js_code = ( decoder_function + "; " + initialization + "; var " + process_function + '; process_recording("' + encoded_media_url + '");' ) self.log_debug(f"Running js script: {new_js_code}") js_result = js2py.eval_js(new_js_code) self.log_debug(f"Result is: {js_result}") self.link = js_result
def test_ko_model(self): """ Tests ko_model """ wayne = self.setup_user() people = Person.objects.all() model = ko_model(wayne) self.assertNotEqual(model, '') interpreted = js2py.eval_js(model)
def eval(self, jsEnv, js): if js2py.eval_js('(+(+!+[]+[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+[!+[]+!+[]]+[+[]])+[])[+!+[]]') == '1': logging.warning('WARNING - Please upgrade your js2py https://github.com/PiotrDabkowski/Js2Py, applying work around for the meantime.') js = jsunfuck(js) def atob(s): return base64.b64decode('{}'.format(s)).decode('utf-8') js2py.disable_pyimport() context = js2py.EvalJs({'atob': atob}) result = context.eval('{}{}'.format(jsEnv, js)) return result
def get_fp_sign(fp_raw): rsp = requests.get( 'https://login.xunlei.com/risk?cmd=algorithm&t=' + str(time.time() * 1000) ) sign = '' try: xl_al = js2py.eval_js(rsp.content) sign = xl_al(fp_raw) except Exception as e: print(e) return sign
def _solve_cf_ddos_challenge(addon_plugin, owner_plugin, data): try: addon_plugin.log_info( addon_plugin._("Detected CloudFlare's DDoS protection page") ) # Cloudflare requires a delay before solving the challenge owner_plugin.set_wait(5) last_url = owner_plugin.req.last_effective_url urlp = urllib.parse.urlparse(last_url) domain = urlp.netloc submit_url = "{}://{}/cdn-cgi/l/chk_jschl".format(urlp.scheme, domain) get_params = {} try: get_params["jschl_vc"] = re.search( r'name="jschl_vc" value="(\w+)"', data ).group(1) get_params["pass"] = re.search( r'name="pass" value="(.+?)"', data ).group(1) # Extract the arithmetic operation js = re.search( r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", data, ).group(1) js = re.sub(r"a\.value = (parse_int\(.+?\)).+", r"\1", js) js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js) js = re.sub(r"[\n\\']", "", js) except Exception: # Something is wrong with the page. # This may indicate CloudFlare has changed their anti-bot # technique. owner_plugin.log_error( addon_plugin._("Unable to parse CloudFlare's DDoS protection page") ) return None #: Tell the exception handler to re-throw the exception # Safely evaluate the Javascript expression get_params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain)) owner_plugin.wait() #: Do the actual wait return owner_plugin.load(submit_url, get=get_params, ref=last_url) except Exception as exc: addon_plugin.log_error(exc) return None #: Tell the exception handler to re-throw the exception
def parse_report(self): """Retrieve the results from the report. :raises: :class:`ReportNotFoundError` -- if the report file was not found. :return: List of dicts where each one represents a discovery. :rtype: :class:`list` .. note:: Example of retrieved data after conversion (i.e. `raw_report`) using the module :mod:`ast`: .. code-block:: js [{ 'severity': 3, 'type': 40402, 'samples': [ { 'url': 'http://demo.testfire.net/bank/login.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/0' }, { 'url': 'http://demo.testfire.net/bank/login.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/1' }, { 'url': 'http://demo.testfire.net/subscribe.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/2' } ] },] """ REPORT_VAR_NAME = 'issue_samples' variables = self.re_var_pattern.findall(self.report_stream) split_data = self.report_stream.split(";") js_data = [data for data in split_data if data is not None] py_data = [] format_data = {} # Final python dict after converting js to py dirs = [] # List of directories of all urls # Converting js to py to make it simple to process for data in js_data: temp_data = js2py.eval_js(data) if temp_data is not None: py_data.append(temp_data) # Mapping variable to its content for i in range(len(py_data)): format_data[variables[i]] = py_data[i] if REPORT_VAR_NAME not in variables: raise ReportNotFoundError('PTP did NOT find issue_samples variable. Is this the correct file?') # We now have a raw version of the Skipfish report as a list of dict. self.vulns = [ {'ranking': self.RANKING_SCALE[vuln['severity']]} for vuln in format_data[REPORT_VAR_NAME]] if not self.light: for var in variables: for item in format_data[var]: for sample in item['samples']: dirs.append({'url': sample['url'], 'dir': os.path.join(self.search_directory, sample['dir'])}) self.vulns.append({'ranking': constants.UNKNOWN, 'transactions': self._parse_report_full(dirs)}) return self.vulns
def _eval_id_decoding(self, webpage, ol_id): try: # raise # uncomment to test method with pairing js_code = re.findall( ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻ //\*´∇`\*/ \['_'\];" ,webpage, re.DOTALL)[0] #common.log_utils.log_notice('js_code: %s' % js_code) js_code = re.sub('''if\s*\([^\}]+?typeof[^\}]+?\}''', '', js_code) js_code = re.sub('''if\s*\([^\}]+?document[^\}]+?\}''', '', js_code) except Exception as e: print 'Could not find JavaScript %s' % e raise ResolverError('Could not find JavaScript %s' % e) js_code = ''' var id = "%s" , decoded , document = {} , window = this , $ = function(){ return { text: function(a){ if(a) decoded = a; else return id; }, ready: function(a){ a() } } }; (function(d){ var f = function(){}; var s = ''; var o = null; ['close','createAttribute','createDocumentFragment','createElement','createElementNS','createEvent','createNSResolver','createRange','createTextNode','createTreeWalker','evaluate','execCommand','getElementById','getElementsByName','getElementsByTagName','importNode','open','queryCommandEnabled','queryCommandIndeterm','queryCommandState','queryCommandValue','write','writeln'].forEach(function(e){d[e]=f;}); ['anchors','applets','body','defaultView','doctype','documentElement','embeds','firstChild','forms','images','implementation','links','location','plugins','styleSheets'].forEach(function(e){d[e]=o;}); ['URL','characterSet','compatMode','contentType','cookie','designMode','domain','lastModified','referrer','title'].forEach(function(e){d[e]=s;}); })(document); %s; decoded;''' % (ol_id, js_code) try: decoded = js2py.eval_js(js_code) if ' ' in decoded or decoded == '': raise return decoded except: raise ResolverError('Could not eval ID decoding')
def wildcard(input): res = [] a = [] for i in input: a.append(i[1] + " " + i[0]) add = js2py.eval_js('function wildcard(input){var output=[],cases=[],wilds=[],patts=[],masks=[];var bits=groupCases(cases);for(var i=0;i<=bits;i++)wilds[i]=[];wildStrings(bits);convertStrings(wilds,patts,"-01","110");convertStrings(wilds,masks,"-01","011");for(var c=0;c<cases.length;c++){for(var i=0,j=Math.pow(2,bits);i<=bits;i++,j /=2){for(var k=0;k<patts[i].length;k++){var patt=patts[i][k];var mask=masks[i][k];var matches=[];for(var d=0;d<cases[c].nums.length;d++){var num=cases[c].nums[d];if(((num^patt)&mask)==mask)matches.push(d);}if(matches.length==j){output.push(wilds[i][k]+" "+cases[c].id);for(var l=j-1;l>=0;l--)cases[c].nums.splice(matches[l],1);}}}}return output;function groupCases(cases){var max=0;for(var i=0;i<input.length;i++){var num=parseInt(input[i],2);if(num>max)max=num;var id=input[i].slice(input[i].indexOf(" ")+1);var pos=0;while(cases[pos]!=undefined&&cases[pos].id!=id)++pos;if(cases[pos]==undefined)cases[pos]={id:id,nums:[]};cases[pos].nums.push(num);}return Math.ceil(Math.log(max)/ Math.log(2));}function wildStrings(len,wild,str){wild=wild||0;str=str||"";for(var i=0;i<3;i++){var w=(i==0)?1:0;var s=str+["-","0","1"][i];if(len>1){wildStrings(len-1,wild+w,s)}else {wilds[bits-wild-w].push(s);}}}function convertStrings(input,output,from,to){for(var i=0;i<input.length;i++){output[i]=[];for(var j=0;j<input[i].length;j++){var str=input[i][j],s="";for(var k=0;k<str.length;k++){s+=to.charAt(from.indexOf(str.charAt(k)));}output[i].push(parseInt(s,2));}}}}') for i in add(a): if i is None: break res.append(i.encode("ascii", "ignore")) return res
def addcrypted2(self): package = self.get_post("source", "ClickNLoad Package") crypted = self.get_post("crypted") jk = self.get_post("jk") crypted = standard_b64decode(unquote(crypted.replace(" ", "+"))) jk = js2py.eval_js(f"{jk} f()") key = bytes.fromhex(jk) obj = Fernet(key) result = obj.decrypt(crypted).replace("\x00", "").replace("\r", "").split("\n") result = [x for x in result if x != ""] self.add_package(package, result, 0)
def _get_links(self, crypted, jk): #: Get key jreturn = js2py.eval_js("{} f()".format(jk)) self.log_debug(f"JsEngine returns value [{jreturn}]") key = bytes.fromhex(jreturn) #: Decrypt obj = Fernet(key) text = obj.decrypt(base64.b64decode(crypted)) #: Extract links text = text.replace("\x00", "").replace("\r", "") links = [link for link in text.split("\n") if link] #: Log and return self.log_debug(f"Package has {len(links)} links") return links
def solve_cf_challenge(self, resp, **kwargs): time.sleep(5) body = resp.text domain = urlparse.urlparse(resp.url).netloc params = kwargs.setdefault("params", {}) headers = kwargs.setdefault("headers", {}) headers["Referer"] = resp.url try: params["jschl_vc"] = re.search('name="jschl_vc" value="(\w+)"', body).group(1) params["pass"] = re.search('name="pass" value="(.+?)"', body).group(1) js = re.search("setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1) js = re.sub("a\.value = (parseInt\(.+?\)).+", r"\1", js) js = re.sub("\s{3,}[a-z](?: = |\.).+", "", js) js = re.sub("[\n\\']", "", js) except Exception: raise Exception("[!] Unable to parse Cloudflare anti-bots page.") params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain)) redirect = self.request(resp.request.method, "%s://%s/cdn-cgi/l/chk_jschl" % (urlparse.urlparse(resp.url).scheme, domain), dict(kwargs, allow_redirects = False)) return self.request(resp.request.method, redirect.headers["Location"], **kwargs)
def get_link(self): #: Get all the scripts inside the html body soup = BeautifulSoup(self.data) scripts = [ s.get_text() for s in soup.body.find_all("script", type="text/javascript") if "('dlbutton').href =" in s.get_text() ] #: Emulate a document in JS inits = [ """ var document = {} document.get_element_by_id = function(x) { if (!this.has_own_property(x)) { this[x] = {get_attribute : function(x) { return this[x] } } } return this[x] } """ ] #: inits is meant to be populated with the initialization of all the DOM elements found in the scripts elt_re = r'get_element_by_id\([\'"](.+?)[\'"]\)(\.)?(get_attribute\([\'"])?(\w+)?([\'"]\))?' for m in re.findall(elt_re, " ".join(scripts)): JSid, JSattr = m[0], m[3] values = [ f for f in (elt.get(JSattr, None) for elt in soup.find_all(id=JSid)) if f ] if values: inits.append( 'document.get_element_by_id("{}")["{}"] = "{}"'.format( JSid, JSattr, values[-1] ) ) #: Add try/catch in JS to handle deliberate errors scripts = ["\n".join(("try{", script, "} catch(err){}")) for script in scripts] #: Get the file's url by evaluating all the scripts scripts = inits + scripts + ["document.dlbutton.href"] return js2py.eval_js("\n".join(scripts))
def solve_cf_challenge(self, resp, **original_kwargs): sleep(5) # Cloudflare requires a delay before solving the challenge body = resp.text parsed_url = urlparse(resp.url) domain = urlparse(resp.url).netloc submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain) cloudflare_kwargs = deepcopy(original_kwargs) params = cloudflare_kwargs.setdefault("params", {}) headers = cloudflare_kwargs.setdefault("headers", {}) headers["Referer"] = resp.url try: params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1) params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1) # Extract the arithmetic operation js = self.extract_js(body) except Exception: # Something is wrong with the page. # This may indicate Cloudflare has changed their anti-bot # technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. logging.error("[!] Unable to parse Cloudflare anti-bots page. " "Try upgrading cloudflare-scrape, or submit a bug report " "if you are running the latest version. Please read " "https://github.com/Anorov/cloudflare-scrape#updates " "before submitting a bug report.") raise # Safely evaluate the Javascript expression params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain)) # Requests transforms any request into a GET after a redirect, # so the redirect has to be handled manually here to allow for # performing other types of requests even as the first request. method = resp.request.method cloudflare_kwargs["allow_redirects"] = False redirect = self.request(method, submit_url, **cloudflare_kwargs) return self.request(method, redirect.headers["Location"], **original_kwargs)
def getURLFromObfJs(js): js = js.replace("eval", "fnRes=") print "return" in js js = str(js2py.eval_js(js)) # First let's decode the javascript searchObj = re.search("var _escape='[%u\\d\\w]+';", js) if searchObj: escapeCode = searchObj.group().replace("var _escape='", "")[:-2] escapeCode = escapeCode.replace("%", "\\") escapeCode = escapeCode.decode("unicode-escape").replace("'+autoplay+'","no") print "escape code: " + escapeCode else: return False if re.search(r'<form(.+?)action="[^"]*(hqq|netu)\.tv/player/embed_player\.php"[^>]*>', escapeCode): return escapeCode # Second let's find the iframes src iframes = re.findall('<iframe [\\w\\d"=:\\/.?&\'+ %-;><]*<\\/iframe>', escapeCode) return '-'.join(iframes)
def solve_challenge(self, body, domain): try: js = re.search(r"setTimeout\(function\(\){\s+(var " "s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1) except Exception: raise ValueError("Unable to identify Cloudflare IUAM Javascript on website. %s" % BUG_REPORT) js = re.sub(r"a\.value = (.+ \+ t\.length).+", r"\1", js) js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain))) # Strip characters that could be used to exit the string context # These characters are not currently used in Cloudflare's arithmetic snippet js = re.sub(r"[\n\\']", "", js) if "toFixed" not in js: raise ValueError("Error parsing Cloudflare IUAM Javascript challenge. %s" % BUG_REPORT) # Use vm.runInNewContext to safely evaluate code # The sandboxed code cannot use the Node.js standard library #js = "%s, Object.create(null), {timeout: 5000}));" % js try: print(js) result = js2py.eval_js(js) print("eval cloudfare js result: ", result) #result = subprocess.check_output(["node", "-e", js]).strip() except OSError as e: if e.errno == 2: raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape" " README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.") raise except Exception: logging.error("Error executing Cloudflare IUAM Javascript. %s" % BUG_REPORT) raise try: float(result) except Exception: raise ValueError("Cloudflare IUAM challenge returned unexpected answer. %s" % BUG_REPORT) return result
def get_js_return(content): """ <form id="challenge-form" action="/cdn-cgi/l/chk_jschl" method="get"> <input type="hidden" name="jschl_vc" value="53c6b5dc36c113610e2597f11d600d1a"/> <input type="hidden" name="pass" value="1504765865.161-4zOzo1BY22"/> <input type="hidden" id="jschl-answer" name="jschl_answer"/> </form> """ # 1ac9690a11b8fcfca762bc36f64679f9, 1504779184.092-aYUwU2JqVN, 2032 # __cfduid de7bab0aaae0290de0540da8fba78f3731504779180 # cf_clearance dc3e9630656b5528c0f1c10d7c9c523c7606e096-1504779185-28800 jschl_vc = "0f7e4868294108cd97d299fba2b793a6" passwd = "1504832239.691-38r4axZu7r" jschl_answer = "1875" l = re.findall(r'name="jschl_vc" value="(.*?)"', content) if l: jschl_vc = l[0] l = re.findall(r'name="pass" value="(.*?)"', content) if l: passwd = l[0] m = re.search(r'setTimeout\(function\(\)\{((?:.|\n)*?)f\.submit\(\)', content) if m: s = m.group(1) l = s.split("\n") l = [i for i in l if i.split()] first = l[0] last = l[-1] _ret = re.search(r"(.*?)a\.value\s+=\s+((.*?)121')", last) if _ret: last = _ret.group(1) ret = _ret.group(2) js = "function f(){ %s %s %s return %s }" % (first, """ t = 'https://www.spamhaus.org/'; r = t.match(/https?:\/\//)[0]; t = t.substr(r.length); t = t.substr(0,t.length-1); """, last, ret) jschl_answer = eval_js(js)() return jschl_vc, passwd, jschl_answer
def magic(cmd): if cmd == 'clean': global old_msg old_msg = {} elif cmd == '骂张冠男': return '张冠男你个臭傻逼' elif cmd.startswith('骂'): return random.choice(['啥?', '呵呵', '要心平气和', '你气~~急败坏']) elif cmd == '天气': try: f_headers = fake_headers f_headers['Referer'] = 'http://www.weather.com.cn' j = requests.get('http://d1.weather.com.cn/dingzhi/101010100.html', headers=f_headers, timeout=5) j.encoding = 'utf-8' j = js2py.eval_js(j.text.split(';')[0])['weatherinfo'] return "%s | %s | %s ~ %s" % (j['cityname'], j['weather'], j['tempn'], j['temp']) except Exception, e: pass
def _encrypted_password(self): ''' Cisco does logic in javascript to encrypt your router password before authenticating. This js function contains a nonce that changes from request to request (probably done server side). This function downloads the javascript, modifies it to add an invocation with the router password, and executes. ''' gateway = self._session.get(self._address) html_and_js = "".join(gateway.content.splitlines()) encryption_function_js_str = re.findall( 'md5 for more info. \*\/(.*)function chk_keypress', html_and_js)[0] # Add line to call the function that encrypts the password encryption_function_with_invocation_js = encryption_function_js_str + ' en_value(en_value("' + self._password + '")+nonce);' # Invoke encrypted_password = js2py.eval_js( encryption_function_with_invocation_js) return encrypted_password
def check_device_id(self): if not self.has_cookie('.xunlei.com', 'deviceid'): url1 = 'https://login.xunlei.com/risk?cmd=algorithm&t=' + str( current_timestamp()) sign_fun = self.__urlread(url1).decode() import js2py xl_al = js2py.eval_js(sign_fun) SB = USER_AGENT + "###zh-cn###24###960x1440###-540###true###true###true###undefined###undefined###x86###Win32#########" + md5( str(current_timestamp()).encode()) xl_fp_raw = base64.b64encode(SB.encode()).decode() xl_fp = md5(xl_fp_raw.encode()) xl_fp_sign = xl_al(xl_fp_raw) device_data = { 'xl_fp_raw': xl_fp_raw, 'xl_fp': xl_fp, 'xl_fp_sign': xl_fp_sign } device_url = 'http://login.xunlei.com/risk?cmd=report' self.urlopen(device_url, data=device_data).read() if not self.has_cookie('.xunlei.com', '_x_t_'): self.set_cookie('.xunlei.com', '_x_t_', '0')
def solve_cf_challenge(self, resp, **kwargs): time.sleep( 5) # Cloudflare requires a delay before solving the challenge body = resp.text parsed_url = urlparse(resp.url) domain = urlparse(resp.url).netloc submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain) params = kwargs.setdefault("params", {}) headers = kwargs.setdefault("headers", {}) headers["Referer"] = resp.url try: params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1) params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1) # Extract the arithmetic operation js = self.extract_js(body) except Exception: # Something is wrong with the page. # This may indicate Cloudflare has changed their anti-bot # technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. print("[!] Unable to parse Cloudflare anti-bots page. " "Try upgrading cloudflare-scrape, or submit a bug report " "if you are running the latest version. Please read " "https://github.com/Anorov/cloudflare-scrape#updates " "before submitting a bug report.\n") raise # Safely evaluate the Javascript expression js = js.replace('return', '') params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain)) return self.get(submit_url, **kwargs)
def execute_js(comic_id, chapter_id): api = "http://qiman6.com/{}/{}.html".format(comic_id, chapter_id) headers = { "Host": "qiman6.com", "Referer": "http://qiman6.com/{}/".format(comic_id), "Upgrade-Insecure-Requests": "1", "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, " "like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1 " } result = requests.get(url=api, headers=headers, timeout=10) if result.status_code == 200: try: doc = pq(result.text) html = doc("body script").eq(0).text() images = js2py.eval_js(html) return json.dumps(list(images), ensure_ascii=False) except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False)
def decode_data(data, nonce): decode_js = \ ''' function decode(data, nonce) { var T = data.split(''); var N = nonce; var len, locate, str; N = N.match(/\d+[a-zA-Z]+/g); len = N.length; while (len--) { locate = parseInt(N[len]) & 255; str = N[len].replace(/\d+/g, ''); T.splice(locate, str.length); } T = T.join(''); return T; } ''' handle = js2py.eval_js(decode_js) data = handle(data, nonce) return data
def test(): parse_var("http://www.dm5.com/m25536/") params = { "cid": args["DM5_CID"], "page": 1, "key": "", "language": 1, "gtk": 6, "_cid": args["DM5_CID"], "_mid": args["DM5_MID"], "_dt": args["DM5_VIEWSIGN_DT"], "_sign": args["DM5_VIEWSIGN"] } print("params参数:", params) url = domain + "/m25536/" + "chapterfun.ashx" headers = {"Referer": "http://www.dm5.com/m25536/"} res = requests.get(url, params=params, headers=headers) print(res.encoding) print(res.url) print("res:", res.text) imgs = js2py.eval_js(res.text) print("eval函数解析后结果:", imgs)
def get_all_last_1_months(start, limit): out = {"rows": []} try: url = "https://www.nseindia.com/corporates/corpInfo/equities/getFinancialResults.jsp?start={start}&limit={limit}&symbol=&industry=&period=Quarterly&broadcastPeriod=Last%203%20Months".format( start=start, limit=limit) headers = { 'mode': 'no-cors', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3' } req = requests.Request(url, None, headers) response = requests.urlopen(req) the_page = response.read() js = "function a() {return JSON.stringify(" + the_page.decode( "ascii") + ")} a()" out = json.loads(js2py.eval_js(js)) insert_log((0, 0, 0, 'NSE', 'SEARCHINFO', None, url, str(out))) except Exception as e: insert_log((0, 0, 0, 'NSE', 'CRITICAL', None, url, "Error getting all 1 month search, " + str(e))) return out
def parse_volume_list(self, soup): self.volumes = [] script = soup.find(name='main').find_next_sibling(name='script').string try: data = js2py.eval_js( '(function() {' + script + 'return window.lnmtl;})()' ).to_dict() for i, vol in enumerate(data['volumes']): title = vol.get('title', '') or '' title = re.sub(r'[^\u0000-\u00FF]', '', title) title = re.sub(r'\(\)', '', title).strip() self.volumes.append( {'id': i + 1, 'title': title, 'download_id': vol['id'], } ) # end for except Exception as _: logger.exception('Failed parsing one possible batch') # end try if len(self.volumes) == 0: raise Exception('Failed parsing volume list')
def login(mobilePhone, pswd): guid = js2py.eval_js(js) cookies = dict() cookies['ud'] = guid() r = requests.post(login_url, cookies=cookies, headers=headers, data={ 'mobilePhone': mobilePhone, 'pswd': pswd, 'imageCode': '' }) d = json.loads(r.text) if d['code'] != 200: return 'error' c = d['result'].split('|') cookies['tok'] = c[0] cookies['u'] = c[1] cookies['m'] = c[2].replace("/", '%2F').replace("=", "%3D") # r2=requests.post(islogin_url,headers=headers,cookies=cookies) # print(r2.text) # ud tok u m return cookies
def getsign(): import requests curID = 39164517 submittype = 1 data = requests.get('https://www.wjx.cn/jq/%s.aspx?from=timeline' % curID) hlv = 1 data = data.text jqnonce = re.search('jqnonce="(.*?)"', data).group(1) rndnum = re.search('rndnum="(.*?)"', data).group(1) starttime = re.search('starttime="(.*?)"', data).group(1) t = (str(int(time() * 1000))) js_res = js2py.eval_js(''' function gen(jqnonce, ktimes) { var c, d, e, b = ktimes % 10; var a = jqnonce; for (0 == b && (b = 1), c = [], d = 0; d < a.length; d++) e = a.charCodeAt(d) ^ b, c.push(String.fromCharCode(e)); var jqsign = (c.join("")); return jqsign; } ''') ktimes = 58 jqsign = js_res(jqnonce, ktimes) params = { 'submittype': submittype, 'curID': curID, 't': t, 'ktimes': ktimes, 'rn': rndnum, 'hlv': hlv, 'jqnonce': jqnonce, 'starttime': starttime, 'jqsign': jqsign } print(params) url = 'https://www.wjx.cn/joinnew/processjq.ashx?from=timeline&%s' \ % (parse.urlencode(params)) print(url) return url
def parse_artists_json(body): body = body.decode('utf-8').strip() if body.startswith(')]}\''): body = body[4:] print 'Parsing json (can take several minutes)' js_array = js2py.eval_js(body) artists_array = js_array[0][0][2] print 'json parsed!' artists = list() for artist_obj in artists_array: artist_item = dict() artist_item['name'] = artist_obj[0].strip() artist_item['page_url'] = GoogleartCrawlSpider.BASE_URL + artist_obj[ 3].strip('/') artist_item[ 'artist_id'] = GoogleartCrawlSpider.artist_id_from_page_url( artist_item['page_url']) artist_item['total_items_count'] = int( artist_obj[1].strip().split(' ')[0].replace(',', '')) artists.append(artist_item) return artists
def __digest_password(self): Digest_pd = ''' function(b) { function a(a, d) { var c = (a & 65535) + (d & 65535); return (a >> 16) + (d >> 16) + (c >> 16) << 16 | c & 65535 } for (var d = (b.length + 8 >> 6) + 1, c = Array(16 * d), e = 0; e < 16 * d; e++) c[e] = 0; for (e = 0; e < b.length; e++) c[e >> 2] |= b.charCodeAt(e) << 24 - 8 * (e & 3); c[e >> 2] |= 128 << 24 - 8 * (e & 3); c[16 * d - 1] = 8 * b.length; b = Array(80); for (var d = 1732584193, e = -271733879, f = -1732584194, h = 271733878, j = -1009589776, k = 0; k < c.length; k += 16) { for (var l = d, m = e, n = f, p = h, q = j, g = 0; 80 > g; g++) { b[g] = 16 > g ? c[k + g] : (b[g - 3] ^ b[g - 8] ^ b[g - 14] ^ b[g - 16]) << 1 | (b[g - 3] ^ b[g - 8] ^ b[g - 14] ^ b[g - 16]) >>> 31; var r = a(a(d << 5 | d >>> 27, 20 > g ? e & f | ~e & h : 40 > g ? e ^ f ^ h : 60 > g ? e & f | e & h | f & h : e ^ f ^ h), a(a(j, b[g]), 20 > g ? 1518500249 : 40 > g ? 1859775393 : 60 > g ? -1894007588 : -899497514)) , j = h , h = f , f = e << 30 | e >>> 2 , e = d , d = r } d = a(d, l); e = a(e, m); f = a(f, n); h = a(h, p); j = a(j, q) } c = [d, e, f, h, j]; b = ""; for (d = 0; d < 4 * c.length; d++) b += "0123456789abcdef".charAt(c[d >> 2] >> 8 * (3 - d % 4) + 4 & 15) + "0123456789abcdef".charAt(c[d >> 2] >> 8 * (3 - d % 4) & 15); return b }''' password = js2py.eval_js(Digest_pd) password = password('fetion.com.cn:%s' % str(self.password)) return password
def variantB_parse_script_for_object( self, script_url: str) -> Tuple[Optional[Dict], CrawlState, str]: """ Extract the consent data from an inline json object. This assumes that inside the object, the array "Groups" is found. Inside this array we can find all the cookie data we need -- however, the object needs to be sanitized first, and stray characters need to be removed. The process isn't perfect, but it should work with a sufficient number of instances. """ cookielaw_script = requests.get(script_url).text.strip() # purge newlines cookielaw_script = re.sub('\n', ' ', cookielaw_script) # Find the start of the group array matchobj = re.search(",\\s*Groups:\\s*\\[", cookielaw_script) try: if matchobj: startpoint = matchobj.start(0) # Get the end of the group array i = matchobj.end(0) open_brackets = 1 while i < len(cookielaw_script) and open_brackets > 0: if cookielaw_script[i] == "[": open_brackets += 1 elif cookielaw_script[i] == "]": open_brackets -= 1 i += 1 group_string = cookielaw_script[startpoint + 1:i] # put the object into a javascript function, and evaluate it # This returns a dict of the cookie consent data we need. js_object_string = "function $() {return {" + group_string + "}};" data_dict = js2py.eval_js(js_object_string)() return data_dict, CrawlState.SUCCESS, "Successfully extracted objects from javascript" else: return None, CrawlState.PARSE_ERROR, "Failed to find desired javascript object in Onetrust consent script." except Exception as ex: return None, CrawlState.UNKNOWN, f"Unexpected error while parsing OneTrust javascript: : {type(ex)} {ex}"
def process_model(mod_name, url, model, max_width=120): sku_list = [] for width in range(max_width): f = js2py.eval_js(model) product = f(width) # create sku sku = {} sku['id'] = product[0] price = product[1] if width > 96: price += 120 sku['price'] = float('{:.2f}'.format(price)) sku['url'] = url.format(mod_name) if sku['price'] != 0: sku_list.append(sku) # remove duplicates sku_list_no_dup = [i for n, i in enumerate(sku_list) if i not in sku_list[n + 1:]] return sku_list_no_dup
def getLinks(urlid, url, linkExtractor, nameExtractor, durationExtractor, dateCheck, tagsExtractor): try: rootpage = getDocSoup(url) vidlinks = eval(linkExtractor) if len(vidlinks) == 0: try: cookie = js2py.eval_js(re.sub(".*<!--", "", re.sub("//-->.*", "", rootpage.get_text().replace("document.cookie=", "return ").replace( "document.location.reload(true);", "").replace( "Loading ...", ""))) + " go()") rootpage = getDocSoup(url, cookie) vidlinks = eval(linkExtractor) except: pass if len(vidlinks) == 0: print >> sys.stderr, "NO VIDEOS FOUND: " + url return except (urllib2.HTTPError, urllib2.URLError), e: print >> sys.stderr, "GL " + type(e).__name__ + " " + str(e) + " " + url return
def _get_data_by_key(js_list): """JavaScript function to generate the languages. A payload with the languages is passed to a JavaScript function. Instead of parsing that payload (cumbersome), we 'overload' that function to return what we want. """ js_function = """ function AF_initDataCallback(args) { return { key: args['key'], data: args['data'] }; }; """ data_by_key = {} for js in js_list: js_code = js_function + js py_eval = js2py.eval_js(js_code) data_by_key[py_eval['key']] = py_eval['data'] return data_by_key
def get_seccode(): import js2py UA = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" ) headers = { "Sec-Fetch-Mode": "no-cors", "User-Agent": UA, "Accept": "*/*", "Sec-Fetch-Site": "same-origin", "Referer": "https://translate.sogou.com/", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", } def get_suv(): return str(int(time() * 1000000) + int(random() * 1000)) def get_seccode_cookies(): res = requests.get("https://translate.sogou.com/", headers=headers) return { "SNUID": res.cookies.get("SNUID"), "SUID": res.cookies.get("SUID"), "ABTEST": res.cookies.get("ABTEST"), "IPLOC": res.cookies.get("IPLOC"), "SUV": get_suv(), } cookies = get_seccode_cookies() response = requests.get( "https://translate.sogou.com/logtrace", headers=headers, cookies=cookies ) if DEBUG: print(response.status_code, response.text) text = response.text rv = js2py.eval_js(text + "; window.seccode;") return str(rv)
def handle_free(self, pyfile): self.free_url = "http://turbobit.net/download/free/{}".format( self.info["pattern"]["ID"] ) self.data = self.load(self.free_url) m = re.search(self.LIMIT_WAIT_PATTERN, self.data) if m is not None: self.retry(wait=m.group(1)) self.solve_captcha() m = re.search(r"min_limit : (.+?),", self.data) if m is None: self.fail(self._("min_limit pattern not found")) wait_time = js2py.eval_js(m.group(1)) self.wait(wait_time) self.req.http.c.setopt(pycurl.HTTPHEADER, ["X-Requested-With: XMLHttpRequest"]) self.data = self.load( "http://turbobit.net/download/get_link_timeout/{}".format( self.info["pattern"]["ID"] ), ref=self.free_url, ) self.req.http.c.setopt(pycurl.HTTPHEADER, ["X-Requested-With:"]) if "/download/started/" in self.data: self.data = self.load( "http://turbobit.net/download/started/{}".format( self.info["pattern"]["ID"] ) ) m = re.search(self.LINK_FREE_PATTERN, self.data) if m is not None: self.link = "http://turbobit.net{}".format(m.group(1))
def adie_events2dicts(events_html): """Convert the events page of ADIE into our own Event format before Mongo import. Args: events_html: the HTML content of the ADIE events page. Returns: an iterable of dict with the JSON values of the Event proto. """ with open(events_html, 'rt') as events_file: page_text = events_file.read() page_selector = selector.Selector(text=page_text) # Parse the markers with coordinates. map_div = page_selector.xpath('//div[@class="acf-map"]') markers = [{ 'data-lat': d.xpath('@data-lat').extract_first(), 'data-lng': d.xpath('@data-lng').extract_first(), } for d in map_div.xpath('div[@class="marker"]')] # Parse the other attributes. events_script = page_selector.xpath( '//script[contains(., "var evenements = []")]/text()').extract_first() if not events_script: raise ValueError( '"{}" does not contain the javascript to create events:\n{}'. format(events_html, page_text)) if 'evenement = []' not in events_script: raise ValueError('The [] bug is fixed, please drop the replace code') events_script = events_script.replace('evenement = []', 'evenement = {}') events = js2py.eval_js(events_script + ';evenements') # Join coordinates and other attributes. return [ _adie_event_to_proto(dict(a, **b)) for a, b in zip(markers, events) ]
def solve_cf_challenge(self, resp, **kwargs): time.sleep(5) # Cloudflare requires a delay before solving the challenge body = resp.text parsed_url = urlparse(resp.url) domain = urlparse(resp.url).netloc submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain) params = kwargs.setdefault("params", {}) headers = kwargs.setdefault("headers", {}) headers["Referer"] = resp.url try: params["jschl_vc"] = re.findall(r'name="jschl_vc" value="(\w+)"', body)[-1] params["pass"] = re.findall(r'name="pass" value="(.+?)"', body)[-1] # Extract the arithmetic operation js = self.extract_js(body) except Exception: # Something is wrong with the page. # This may indicate Cloudflare has changed their anti-bot # technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. print( "[!] Unable to parse Cloudflare anti-bots page. " "Try upgrading cloudflare-scrape, or submit a bug report " "if you are running the latest version. Please read " "https://github.com/Anorov/cloudflare-scrape#updates " "before submitting a bug report.\n" ) raise # Safely evaluate the Javascript expression js = js.replace("return", "") params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain)) return self.get(submit_url, **kwargs)
def __sha1_account(self): sha1 = '''function(a) { function b(a, b) { var c = (a & 65535) + (b & 65535); return (a >> 16) + (b >> 16) + (c >> 16) << 16 | c & 65535 } for (var d = [], c = 0; c < 8 * a.length; c += 8) d[c >> 5] |= (a.charCodeAt(c / 8) & 255) << 24 - c % 32; a = 8 * a.length; d[a >> 5] |= 128 << 24 - a % 32; d[(a + 64 >> 9 << 4) + 15] = a; a = Array(80); for (var c = 1732584193, e = -271733879, f = -1732584194, h = 271733878, j = -1009589776, k = 0; k < d.length; k += 16) { for (var l = c, m = e, n = f, p = h, q = j, g = 0; 80 > g; g++) { a[g] = 16 > g ? d[k + g] : (a[g - 3] ^ a[g - 8] ^ a[g - 14] ^ a[g - 16]) << 1 | (a[g - 3] ^ a[g - 8] ^ a[g - 14] ^ a[g - 16]) >>> 31; var r = b(b(c << 5 | c >>> 27, 20 > g ? e & f | ~e & h : 40 > g ? e ^ f ^ h : 60 > g ? e & f | e & h | f & h : e ^ f ^ h), b(b(j, a[g]), 20 > g ? 1518500249 : 40 > g ? 1859775393 : 60 > g ? -1894007588 : -899497514)) , j = h , h = f , f = e << 30 | e >>> 2 , e = c , c = r } c = b(c, l); e = b(e, m); f = b(f, n); h = b(h, p); j = b(j, q) } d = [c, e, f, h, j]; a = ""; for (c = 0; c < 4 * d.length; c++) a += "0123456789abcdef".charAt(d[c >> 2] >> 8 * (3 - c % 4) + 4 & 15) + "0123456789abcdef".charAt(d[c >> 2] >> 8 * (3 - c % 4) & 15); return a } ''' _params = js2py.eval_js(sha1) params = _params(str(self.email)) return params
def video_page(self, response: HtmlResponse): # some video has "Watch Full Video" button full_video_button = response.css("#trailerFullLengthDownload") video_title = response.css('h1.title').css('span::text').get() video_channel = response.css('div.video-actions-container').css( 'div.usernameWrap.clearfix').css('a::text').get() if full_video_button: button_title = full_video_button.css('::attr(data-title)').get() if button_title != 'Buy Full Video': full_url = full_video_button.css('::attr(href)').get() self.logger.info('%s detected full video, original name: %s', video_channel, video_title) yield scrapy.Request(full_url, callback=self.video_page, priority=100) else: self.logger.info('%s detected buy video, drop', video_channel) else: self.logger.info('get model: %s, title: %s', video_channel, video_title) player_id_element = response.css('#player') js = player_id_element.css('script').get() data_video_id = player_id_element.css( '::attr(data-video-id)').get() prepare_js = js.split('<script type="text/javascript">')[1].split( 'loadScriptUniqueId')[0] exec_js = '{0}\nqualityItems_{1};'.format(prepare_js, data_video_id) js_result = js2py.eval_js( exec_js) # type: js2py.base.JsObjectWrapper quality_items = js_result.to_list() # type: list quality = quality_items[-1]['text'] if quality != '240p' or quality != '"480p"': video_url = quality_items[-1]['url'] yield PornhubItem(file_urls=video_url, file_name=video_title, file_channel=video_channel, parent_url=response.url)
def get_video_url(item): logger.trace() itemlist = list() data = httptools.downloadpage(item.url, headers={ 'Referer': item.referer }).data #logger.debug(data) if "Page not found" in data or "File was deleted" in data or "404 Not Found" in data: return ResolveError(0) packed = scrapertools.find_single_match( data, "<script type='text/javascript'>eval(.*?)</script>") logger.debug(packed) data = js2py.eval_js(packed) logger.debug(data) sources = scrapertools.find_single_match(data, 'sources:\s?(\[.*?\]+)') logger.debug(sources) for url, res in scrapertools.find_multiple_matches( sources, 'file:"([^"]+)",label:"([^"]+)"'): itemlist.append(Video(url=url, res=res)) return itemlist
def weibo_detail(weibo_id): weibo_content = {} url = r'https://m.weibo.cn/detail/%s' % str(weibo_id) html = requests.get(url) soup = BeautifulSoup(html.text, 'html.parser') script = soup.find_all("script")[1].text # 定位含有微博信息的 js script render_data = js2py.eval_js(script + '$render_data') # 将 js 执行转换 最后加上 $render_data 来获取 render_data 变量 status = render_data['status'].to_dict() url2 = r'https://m.weibo.cn/statuses/extend?id=%s' % str(weibo_id) html2 = requests.get(url2) popularity = html2.json()['data'] status.update(popularity) # 将获取到的转发、评论、点赞数更新到原字典中 weibo_content['time'] = convert_time(status['created_at']) # 微博发布时间戳 weibo_content['uid'] = status['user']['id'] # 用户id if 'raw_text' in status.keys(): weibo_content['text'] = status['raw_text'] else: weibo_content['text'] = status['text'] for key in keys: weibo_content[key] = status.get(key, None) # 有的字段可能无,尽量用get方法 for key in user_keys: weibo_content[key] = status['user'].get(key, None) return weibo_content
def fxxk_dsign(content): try: content = content.decode() except AttributeError: pass js = content[31:-9] for name in obfuscate_name: js = re.sub(rf"\w+ = '?{name}'?;", '', js) redirect = re.findall(r"_\w+?\[_\w+?\]=", js) if len(redirect) == 2: js = js[:js.find(redirect[1])] else: for fake in fake_location: if re.search(fake, js): js = re.sub(fake, '', js) break js = re.sub(r"location\[_\w+\]=?", '', js) js = re.sub(r"location\.href=?", '', js) js = re.sub(r"_\w+\[_\w+\]=?", '', js) js = js.replace(awful_getName_func, '') js = re.sub(r"function (?P<f_name>\w+?)\(\){return getName\(\);\}", r"function \g<f_name>(){return '\g<f_name>';}", js) return js2py.eval_js(js)
def translate(query): sign_function = js2py.eval_js(open("requests_js2py_scrape_baidu_translate/get_sign.js").read()) url = "https://fanyi.baidu.com/basetrans" headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1", "Referer": "https://fanyi.baidu.com/", "Cookie": "BAIDUID=714BFAAF02DA927F583935C7A354949A:FG=1; BIDUPSID=714BFAAF02DA927F583935C7A354949A; PSTM=1553390486; delPer=0; PSINO=5; H_PS_PSSID=28742_1463_21125_18559_28723_28557_28697_28585_28640_28604_28626_22160; locale=zh; from_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_afd111fa62852d1f37001d1f980b6800=1553658863,1553766321,1553769980,1553770442; Hm_lpvt_afd111fa62852d1f37001d1f980b6800=1553770442; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1553766258,1553766321,1553769980,1553770442; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1553770442" } sign = sign_function(query) data = { "query": query, "from": "en", "to": "zh", "token": "6f5c83b84d69ad3633abdf18abcb030d", "sign": sign } res = requests.post( url=url, headers=headers, data=data ) return res.json()
def LoadDataFromPublicStarfighterWebsite(): content = '' try: response = requests.get('{}?cb={}'.format(Config.publicDatabaseUrl, time.time())) response.raise_for_status() except requests.exceptions.HTTPError as http_err: print('HTTP error occurred: {}'.format(http_err)) except Exception as err: print('HTTP error occurred: {}'.format(err)) else: content = response.text jsStr = re.sub(r'.*?(var data = \{\};.*)var showing = null;.*', '\\1', content, 0, re.S) jsContent = 'function getData(){\n' + jsStr + '\nreturn data;\n}' getData = js2py.eval_js(jsContent) jsData = getData() return { 'shipList': jsData['Ships'].to_list(), 'itemList': jsData['Items'].to_list() }
def do_folder(folder): global PASSED, FAILED, CRUSHED print 'Doing', folder folders = [] f = os.path.join(TEST_PATH, folder) for e in os.listdir(f): e = os.path.join(f, e) if os.path.isfile(e): case = Case(e) if case.IsOnlyStrict(): print 'Strict', case.name continue code = case.GetSource() try: res = js2py.eval_js(code) if res == 'PASSED7486': PASSED += 1 continue else: FAILED += 1 print print 'Failed', case.name print res print except KeyboardInterrupt: return except: CRUSHED += 1 print '<<<<<<<<<<<<<<' print 'Crushed', case.name print '<<<<<<<<<<<<<<<' else: folders.append(e) print 'Passed ', PASSED, 'out of', PASSED + FAILED + CRUSHED, 'tests. ', CRUSHED, 'crushed.' for f in folders: do_folder(f)
def test_ko_data(self): """ Tests ko_data """ # Test an object wayne = self.setup_user() people = Person.objects.all() data = ko_data(people) self.assertNotEqual(data, '[]') # Will raise if invalid. interpreted = js2py.eval_js(data) # Test a vanilla QS rapper = Profession.objects.get(pk=1) data = ko_data(rapper) self.assertNotEqual(data, '[]') interpreted = js2py.eval_js(data) rapper = Profession.objects.none() data = ko_data(rapper) self.assertNotEqual(data, '[]') interpreted = js2py.eval_js(data) rapper = '' data = ko_data(rapper) self.assertEqual(data, '[]') interpreted = js2py.eval_js(data) rapper = [] data = ko_data(rapper) self.assertEqual(data, '[]') interpreted = js2py.eval_js(data) # Test an individual object rapper = wayne.profession data = ko_data(rapper) self.assertNotEqual(data, '[]') interpreted = js2py.eval_js(data)
def getBattleFormatsData(): js=urllib2.urlopen("https://raw.githubusercontent.com/Zarel/Pokemon-Showdown/master/data/formats-data.js").read() return json.loads(js2py.eval_js('exports={},'+js+'JSON.stringify(exports.BattleFormatsData)'))
import js2py import time print("Testing ECMA 5...") assert js2py.eval_js('(new Date("2008-9-03T20:56:35.450686Z")).toString()') assert js2py.eval_js('/ser/.test("Mleko + ser to nabial")') assert js2py.eval_js('1 + "1"') == '11' assert js2py.eval_js('function (r) {return r}')(5) == 5 x, c = js2py.run_file('examples/esprima.js') assert c.esprima.parse('var abc = 40').to_dict() == {'type': 'Program', 'body': [{'type': 'VariableDeclaration', 'kind': 'var', 'declarations': [{'id': {'type': 'Identifier', 'name': 'abc'}, 'type': 'VariableDeclarator', 'init': {'type': 'Literal', 'raw': '40', 'value': 40}}]}], 'sourceType': 'script'} try: assert js2py.eval_js('syntax error!') and 0 except js2py.PyJsException as err: assert str(err).startswith('SyntaxError: ') assert js2py.eval_js('pyimport time; time.time()') <= time.time() js2py.disable_pyimport() try: assert js2py.eval_js('pyimport time') and 0 except js2py.PyJsException as err: assert str(err).startswith('SyntaxError: ')