Esempio n. 1
0
    def run(self):
        # labels: 'PASSED', 'FAILED', 'CRASHED', 'NOT_IMPLEMENTED', 'NO_FAIL'
        errors = True
        crashed = True
        label = None
        try:
            js2py.eval_js(self.code)
            errors = False
            crashed = False

        except NotImplementedError:
            tb = sys.exc_info()[-1]
            stk = traceback.extract_tb(tb)
            fname = stk[-1][2]
            passed = False
            reason = 'Not implemented - "%s"' % fname
            full_error = traceback.format_exc()
            label = 'NOT_IMPLEMENTED'

        except PyJsException as e:
            crashed = False
            full_error = traceback.format_exc()
            if self.negative:
                passed = True
            else:
                passed = False
                reason = PyExceptionToJs(e).get('message').to_python()
                label = 'FAILED'


        except SyntaxError as e:
            full_error = traceback.format_exc()
            if self.negative=='SyntaxError':
                passed = True
            else:
                passed = False
                reason = 'Could not parse'
                label = 'CRASHED'

        except:
            full_error = traceback.format_exc()
            passed = False
            reason = 'UNKNOWN - URGENT, FIX NOW!'
            label = 'CRASHED'

        if not errors:
            if self.negative:
                passed = False
                reason = "???"
                label = "NO_FAIL"
                full_error = ''
            else:
                passed = True

        if passed:
            label = "PASSED"
            reason = ''
            full_error = ''
        self.passed, self.label, self.reason, self.full_error = passed, label, reason, full_error
        return passed, label, reason, full_error
Esempio n. 2
0
def exec_file(path):
    js = load(path)
    desc = re.search("/\*---(.+)---\*/", js, re.DOTALL).groups()[0]
    inc = re.search("includes:(.+)", desc, re.DOTALL)
    bibs = ""
    if inc:
        libs = inc.groups()[0].splitlines()
        har = "harness/"
        for lib in libs:
            lib = lib.strip("[] -")
            if not lib:
                continue
            bibs += load(har + lib)
    try:
        js2py.eval_js(init + bibs + js)
    except NotImplementedError:
        return
    except:
        if "negative:" in desc or "onlyStrict" in desc:
            return  # supposed to fail

        print "-" * 30
        print traceback.format_exc()
        print
        print desc
        print
        print 'File "%s", line 1, in chuj' % os.path.abspath(path)
        raw_input()
Esempio n. 3
0
def exec_file(path):
    js = load(path)
    desc = re.search('/\*---(.+)---\*/', js, re.DOTALL).groups()[0]
    inc = re.search('includes:(.+)', desc, re.DOTALL)
    bibs = ''
    if inc:
        libs = inc.groups()[0].splitlines()
        har = 'harness/'
        for lib in libs:
            lib = lib.strip('[] -')
            if not lib:
                continue
            bibs += load(har+lib)
    try:
        js2py.eval_js(init + bibs + js)

    except NotImplementedError:
        print 'Not implemented'
    except:
        if 'negative:' in desc or 'onlyStrict' in desc:
            return # supposed to fail

        print '-'*30
        print traceback.format_exc()
        print
        print desc
        print
        print 'File "%s", line 1, in chuj' % os.path.abspath(path)
        raw_input()
        return
    if 'negative:' in desc and not 'onlyStrict' in desc:
        print 'File "%s", line 1, in chuj' % os.path.abspath(path)
        print 'Did not fail!'
        raw_input()
Esempio n. 4
0
    def run(self):
        # labels: 'PASSED', 'FAILED', 'CRASHED', 'NOT_IMPLEMENTED', 'NO_FAIL'
        errors = True
        crashed = True
        label = None
        try:
            js2py.eval_js(self.code)
            errors = False
            crashed = False

        except NotImplementedError:
            tb = sys.exc_info()[-1]
            stk = traceback.extract_tb(tb)
            fname = stk[-1][2]
            passed = False
            reason = 'Not implemented - "%s"' % fname
            full_error = traceback.format_exc()
            label = 'NOT_IMPLEMENTED'

        except PyJsException, e:
            crashed = False
            full_error = traceback.format_exc()
            if self.negative:
                passed = True
            else:
                passed = False
                reason = PyExceptionToJs(e).get('message').to_python()
                label = 'FAILED'
Esempio n. 5
0
    def solve_cf_challenge(self, resp, **original_kwargs):
        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = parsed_url.netloc
        submit_url = '%s://%s/cdn-cgi/l/chk_jschl' % (parsed_url.scheme, domain)

        cloudflare_kwargs = {k: v for k, v in original_kwargs.items() if k not in ['hooks']}
        params = cloudflare_kwargs.setdefault('params', {})
        headers = cloudflare_kwargs.setdefault('headers', {})
        headers['Referer'] = resp.url

        try:
            params['jschl_vc'] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)
            params['pass'] = re.search(r'name="pass" value="(.+?)"', body).group(1)
            params['s'] = re.search(r'name="s" value="(.+?)"', body).group(1)

            # Extract the arithmetic operation
            js = self.extract_js(body).replace('t.length', str(len(domain)))

        except Exception:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            logging.error('[!] Unable to parse Cloudflare anti-bots page.')
            raise

        # Safely evaluate the Javascript expression
        try:
            params['jschl_answer'] = str(js2py.eval_js(js))
        except (Exception, BaseException):
            try:
                params['jschl_answer'] = str(js2py.eval_js(js))
            except (Exception, BaseException):
                return

        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        method = resp.request.method
        cloudflare_kwargs['allow_redirects'] = False
        self.wait()
        redirect = self.request(method, submit_url, **cloudflare_kwargs)

        location = redirect.headers.get('Location')
        parsed_location = urlparse(location)
        if not parsed_location.netloc:
            location = '%s://%s%s' % (parsed_url.scheme, domain, parsed_location.path)
        return self.request(method, location, **original_kwargs)
Esempio n. 6
0
 def handle_javascript(self, line):
     return js2py.eval_js(
         line.replace(
             "{}))",
             "{}).replace('document.open();document.write','').replace(';document.close();',''))",
         )
     )
Esempio n. 7
0
    def handle_free(self, pyfile):
        m = re.search(
            r'<div class="video-wrapper">.+?<script type="text/javascript">(.+?)</script>',
            self.data,
            re.S,
        )
        if m is None:
            self.error(self._("Player Javascript data not found"))

        script = m.group(1)

        m = re.search(r"quality_items_\d+", script)
        if m is None:
            self.error(self._("`quality_items` variable no found"))

        result_var = re.search(r"quality_items_\d+", script).group(0)

        script = "".join(re.findall(r"^\s*var .+", script, re.M))
        script = re.sub(r"[\n\t]|/\*.+?\*/", "", script)
        script += "JSON.stringify({});".format(result_var)

        res = js2py.eval_js(script)
        json_data = json.loads(res)

        urls = {
            int(re.search("^(\d+)", x["text"]).group(0)): x["url"]
            for x in json_data
            if x["url"]
        }

        quality = max(urls.keys())

        self.link = urls[quality]
Esempio n. 8
0
    def parse_live_detail(self, response):
        logger.info("live url {}".format(response.url))
        info = re.findall("window.(anchor = .*?);", response.body, re.S)[0]
        post_info = js2py.eval_js(info)

        post_item = LiveItem()
        post_item["author_id"] = post_info["memberid"]
        post_item["author_name"] = post_info["nickname"]
        post_item["url"] = response.url
        post_item["title"] = response.xpath("//h1/text()").extract_first()
        post_item["site_id"] = 1223
        post_item["site_name"] = "一直播"
        # post_item["read_num"] = post_info["online"]
        post_item["online_num"] = post_info["online"]  # 文章阅读数 视频观看数 live参加数
        post_item["like_num"] = response.xpath('//div[@class="hide"]').re_first(u"共有(\d+)条点赞")  # 点赞数
        post_item["comment_num"] = response.xpath('//div[@class="hide"]').re_first(u"共有(\d+)条评论")  # 评论数
        post_item["post_time"] = dateformatting.parse(post_info["starttime"]).strftime(date_format)  # 发布时间
        post_item["include_time"] = self.crawled_time  # 抓取时间
        post_item["content_tags"] = response.xpath('//div[@class="hide"]').re_first(u"认证类型:(.*?)。")
        post_item["video"] = post_info["play_url"]
        post_item["image"] = post_info["covers"]
        yield post_item
        # logger.info(post_item)

        logger.info(u"{} live view people {}".format(post_item["author_name"], post_item["online_num"]))
Esempio n. 9
0
def getProxy():
    #socks5访问pachong.org
    socks.set_default_proxy(socks.SOCKS5,'127.0.0.1',1080)
    socket.socket = socks.socksocket
    r = requesocks.get(proxyUrl)
    html = r.text.encode('utf-8')
    #匹配 网页定义的js声明
    reg_script_head = '<script type.*?>(.*?)</script>'
    pattern_script_head = re.compile(reg_script_head,re.S)
    result_of_script_head = re.findall(pattern_script_head,html)

    #匹配ip端口
    reg_port = '<td><script>(.*?)</script>'
    pattern_port = re.compile(reg_port,re.S)
    result_of_port = re.findall(pattern_port,html)

    #匹配ip地址
    reg_ip = '<td>([0-9]+(?:\.[0-9]+){0,3})</td>'
    pattern_ip = re.compile(reg_ip,re.S)
    result_of_ip = re.findall(pattern_ip,html)

    for i,item in enumerate(result_of_ip):
        jsevalPort = result_of_script_head[2] + result_of_port[i]
        js = ''' function add(){
        %s
        }
        add()''' % jsevalPort.replace('document.write','return')
        result = js2py.eval_js(js)
        ip_port[item] = result
Esempio n. 10
0
def addcrypted2():
    package = flask.request.form.get(
        "package", flask.request.form.get("source", flask.request.form.get("referer"))
    )
    crypted = flask.request.form["crypted"]
    jk = flask.request.form["jk"]

    crypted = standard_b64decode(unquote(crypted.replace(" ", "+")))
    jk = js2py.eval_js(f"{jk} f()")

    try:
        key = bytes.fromhex(jk)
    except Exception:
        return "Could not decrypt key", 500

    obj = Fernet(key)
    urls = obj.decrypt(crypted).replace("\x00", "").replace("\r", "").split("\n")
    urls = [url for url in urls if url.strip()]

    api = flask.current_app.config["PYLOAD_API"]
    try:
        if package:
            api.add_package(package, urls, 0)
        else:
            api.generate_and_add_packages(urls, 0)
    except Exception:
        return "failed can't add", 500
    else:
        return "success\r\n"
Esempio n. 11
0
    def _eval_id_decoding(self, webpage, ol_id):
        try:
            # raise # uncomment to test method with pairing
            #js_code = re.findall(
            #    ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻   //\*´∇`\*/ \['_'\];"
            #,webpage, re.DOTALL)[0]
            js_code = re.findall(
                #ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻   //\*´∇`\*/ \['_'\];",
                ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ",
                webpage,re.S)

            #common.log_utils.log_notice('js_code: %s' % js_code)
            js_code = re.sub('''if\s*\([^\}]+?typeof[^\}]+?\}''', '', js_code)
            js_code = re.sub('''if\s*\([^\}]+?document[^\}]+?\}''', '', js_code)
        except Exception as e:
            print 'Could not find JavaScript %s' % e
            raise ResolverError('Could not find JavaScript %s' % e)
        print("AAA1", ol_id, js_code)
        #js_code = base64.b64decode('''ICAgICAgICAgICAgICAgICAgICB2YXIgaWQgPSAiJXMiDQogICAgICAgICAgICAgICAgICAgICAgLCBkZWNvZGVkDQogICAgICAgICAgICAgICAgICAgICAgLCBkb2N1bWVudCA9IHt9DQogICAgICAgICAgICAgICAgICAgICAgLCB3aW5kb3cgPSB0aGlzDQogICAgICAgICAgICAgICAgICAgICAgLCAkID0gZnVuY3Rpb24oKXsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIHsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0ZXh0OiBmdW5jdGlvbihhKXsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmKGEpDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlY29kZWQgPSBhOw0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZWxzZQ0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gaWQ7DQogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWFkeTogZnVuY3Rpb24oYSl7DQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhKCkNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9DQogICAgICAgICAgICAgICAgICAgICAgICAgIH0NCiAgICAgICAgICAgICAgICAgICAgICAgIH07DQogICAgICAgICAgICAgICAgICAgIChmdW5jdGlvbihkLCB3KXsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZiA9IGZ1bmN0aW9uKCl7fTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgcyA9ICcnOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciBvID0gbnVsbDsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgYiA9IGZhbHNlOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciBuID0gMDsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZGYgPSBbJ2Nsb3NlJywnY3JlYXRlQXR0cmlidXRlJywnY3JlYXRlRG9jdW1lbnRGcmFnbWVudCcsJ2NyZWF0ZUVsZW1lbnQnLCdjcmVhdGVFbGVtZW50TlMnLCdjcmVhdGVFdmVudCcsJ2NyZWF0ZU5TUmVzb2x2ZXInLCdjcmVhdGVSYW5nZScsJ2NyZWF0ZVRleHROb2RlJywnY3JlYXRlVHJlZVdhbGtlcicsJ2V2YWx1YXRlJywnZXhlY0NvbW1hbmQnLCdnZXRFbGVtZW50QnlJZCcsJ2dldEVsZW1lbnRzQnlOYW1lJywnZ2V0RWxlbWVudHNCeVRhZ05hbWUnLCdpbXBvcnROb2RlJywnb3BlbicsJ3F1ZXJ5Q29tbWFuZEVuYWJsZWQnLCdxdWVyeUNvbW1hbmRJbmRldGVybScsJ3F1ZXJ5Q29tbWFuZFN0YXRlJywncXVlcnlDb21tYW5kVmFsdWUnLCd3cml0ZScsJ3dyaXRlbG4nXTsNCiAgICAgICAgICAgICAgICAgICAgICBkZi5mb3JFYWNoKGZ1bmN0aW9uKGUpe2RbZV09Zjt9KTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgZG9fID0gWydhbmNob3JzJywnYXBwbGV0cycsJ2JvZHknLCdkZWZhdWx0VmlldycsJ2RvY3R5cGUnLCdkb2N1bWVudEVsZW1lbnQnLCdlbWJlZHMnLCdmaXJzdENoaWxkJywnZm9ybXMnLCdpbWFnZXMnLCdpbXBsZW1lbnRhdGlvbicsJ2xpbmtzJywnbG9jYXRpb24nLCdwbHVnaW5zJywnc3R5bGVTaGVldHMnXTsNCiAgICAgICAgICAgICAgICAgICAgICBkb18uZm9yRWFjaChmdW5jdGlvbihlKXtkW2VdPW87fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIGRzID0gWydVUkwnLCdjaGFyYWN0ZXJTZXQnLCdjb21wYXRNb2RlJywnY29udGVudFR5cGUnLCdjb29raWUnLCdkZXNpZ25Nb2RlJywnZG9tYWluJywnbGFzdE1vZGlmaWVkJywncmVmZXJyZXInLCd0aXRsZSddOw0KICAgICAgICAgICAgICAgICAgICAgIGRzLmZvckVhY2goZnVuY3Rpb24oZSl7ZFtlXT1zO30pOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciB3YiA9IFsnY2xvc2VkJywnaXNTZWN1cmVDb250ZXh0J107DQogICAgICAgICAgICAgICAgICAgICAgd2IuZm9yRWFjaChmdW5jdGlvbihlKXt3W2VdPWI7fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIHdmID0gWydhZGRFdmVudExpc3RlbmVyJywnYWxlcnQnLCdhdG9iJywnYmx1cicsJ2J0b2EnLCdjYW5jZWxBbmltYXRpb25GcmFtZScsJ2NhcHR1cmVFdmVudHMnLCdjbGVhckludGVydmFsJywnY2xlYXJUaW1lb3V0JywnY2xvc2UnLCdjb25maXJtJywnY3JlYXRlSW1hZ2VCaXRtYXAnLCdkaXNwYXRjaEV2ZW50JywnZmV0Y2gnLCdmaW5kJywnZm9jdXMnLCdnZXRDb21wdXRlZFN0eWxlJywnZ2V0U2VsZWN0aW9uJywnbWF0Y2hNZWRpYScsJ21vdmVCeScsJ21vdmVUbycsJ29wZW4nLCdwb3N0TWVzc2FnZScsJ3Byb21wdCcsJ3JlbGVhc2VFdmVudHMnLCdyZW1vdmVFdmVudExpc3RlbmVyJywncmVxdWVzdEFuaW1hdGlvbkZyYW1lJywncmVzaXplQnknLCdyZXNpemVUbycsJ3Njcm9sbCcsJ3Njcm9sbEJ5Jywnc2Nyb2xsVG8nLCdzZXRJbnRlcnZhbCcsJ3NldFRpbWVvdXQnLCdzdG9wJ107DQogICAgICAgICAgICAgICAgICAgICAgd2YuZm9yRWFjaChmdW5jdGlvbihlKXt3W2VdPWY7fSk7DQogICAgICAgICAgICAgICAgICAgICAgdmFyIHduID0gWydkZXZpY2VQaXhlbFJhdGlvJywnaW5uZXJIZWlnaHQnLCdpbm5lcldpZHRoJywnbGVuZ3RoJywnb3V0ZXJIZWlnaHQnLCdvdXRlcldpZHRoJywncGFnZVhPZmZzZXQnLCdwYWdlWU9mZnNldCcsJ3NjcmVlblgnLCdzY3JlZW5ZJywnc2Nyb2xsWCcsJ3Njcm9sbFknXTsNCiAgICAgICAgICAgICAgICAgICAgICB3bi5mb3JFYWNoKGZ1bmN0aW9uKGUpe3dbZV09bjt9KTsNCiAgICAgICAgICAgICAgICAgICAgICB2YXIgd28gPSBbJ2FwcGxpY2F0aW9uQ2FjaGUnLCdjYWNoZXMnLCdjcnlwdG8nLCdleHRlcm5hbCcsJ2ZyYW1lRWxlbWVudCcsJ2ZyYW1lcycsJ2hpc3RvcnknLCdpbmRleGVkREInLCdsb2NhbFN0b3JhZ2UnLCdsb2NhdGlvbicsJ2xvY2F0aW9uYmFyJywnbWVudWJhcicsJ25hdmlnYXRvcicsJ29uYWJvcnQnLCdvbmFuaW1hdGlvbmVuZCcsJ29uYW5pbWF0aW9uaXRlcmF0aW9uJywnb25hbmltYXRpb25zdGFydCcsJ29uYmVmb3JldW5sb2FkJywnb25ibHVyJywnb25jYW5wbGF5Jywnb25jYW5wbGF5dGhyb3VnaCcsJ29uY2hhbmdlJywnb25jbGljaycsJ29uY29udGV4dG1lbnUnLCdvbmRibGNsaWNrJywnb25kZXZpY2Vtb3Rpb24nLCdvbmRldmljZW9yaWVudGF0aW9uJywnb25kcmFnJywnb25kcmFnZW5kJywnb25kcmFnZW50ZXInLCdvbmRyYWdsZWF2ZScsJ29uZHJhZ292ZXInLCdvbmRyYWdzdGFydCcsJ29uZHJvcCcsJ29uZHVyYXRpb25jaGFuZ2UnLCdvbmVtcHRpZWQnLCdvbmVuZGVkJywnb25lcnJvcicsJ29uZm9jdXMnLCdvbmhhc2hjaGFuZ2UnLCdvbmlucHV0Jywnb25pbnZhbGlkJywnb25rZXlkb3duJywnb25rZXlwcmVzcycsJ29ua2V5dXAnLCdvbmxhbmd1YWdlY2hhbmdlJywnb25sb2FkJywnb25sb2FkZWRkYXRhJywnb25sb2FkZWRtZXRhZGF0YScsJ29ubG9hZHN0YXJ0Jywnb25tZXNzYWdlJywnb25tb3VzZWRvd24nLCdvbm1vdXNlZW50ZXInLCdvbm1vdXNlbGVhdmUnLCdvbm1vdXNlbW92ZScsJ29ubW91c2VvdXQnLCdvbm1vdXNlb3ZlcicsJ29ubW91c2V1cCcsJ29ub2ZmbGluZScsJ29ub25saW5lJywnb25wYWdlaGlkZScsJ29ucGFnZXNob3cnLCdvbnBhdXNlJywnb25wbGF5Jywnb25wbGF5aW5nJywnb25wb3BzdGF0ZScsJ29ucHJvZ3Jlc3MnLCdvbnJhdGVjaGFuZ2UnLCdvbnJlc2V0Jywnb25yZXNpemUnLCdvbnNjcm9sbCcsJ29uc2Vla2VkJywnb25zZWVraW5nJywnb25zZWxlY3QnLCdvbnNob3cnLCdvbnN0YWxsZWQnLCdvbnN0b3JhZ2UnLCdvbnN1Ym1pdCcsJ29uc3VzcGVuZCcsJ29udGltZXVwZGF0ZScsJ29udG9nZ2xlJywnb250cmFuc2l0aW9uZW5kJywnb251bmxvYWQnLCdvbnZvbHVtZWNoYW5nZScsJ29ud2FpdGluZycsJ29ud2Via2l0YW5pbWF0aW9uZW5kJywnb253ZWJraXRhbmltYXRpb25pdGVyYXRpb24nLCdvbndlYmtpdGFuaW1hdGlvbnN0YXJ0Jywnb253ZWJraXR0cmFuc2l0aW9uZW5kJywnb253aGVlbCcsJ29wZW5lcicsJ3BhcmVudCcsJ3BlcmZvcm1hbmNlJywncGVyc29uYWxiYXInLCdzY3JlZW4nLCdzY3JvbGxiYXJzJywnc2VsZicsJ3Nlc3Npb25TdG9yYWdlJywnc3BlZWNoU3ludGhlc2lzJywnc3RhdHVzYmFyJywndG9vbGJhcicsJ3RvcCddOw0KICAgICAgICAgICAgICAgICAgICAgIHdvLmZvckVhY2goZnVuY3Rpb24oZSl7d1tlXT1vO30pOw0KICAgICAgICAgICAgICAgICAgICAgIHZhciB3cyA9IFsnbmFtZSddOw0KICAgICAgICAgICAgICAgICAgICAgIHdzLmZvckVhY2goZnVuY3Rpb24oZSl7d1tlXT1zO30pOw0KICAgICAgICAgICAgICAgICAgICB9KShkb2N1bWVudCwgd2luZG93KTsNCiAgICAgICAgICAgICAgICAgICAgJXM7DQogICAgICAgICAgICAgICAgICAgIHByaW50KGRlY29kZWQpOw==''') % (ol_id, js_code)
        #print("AAA2", ol_id, js_code)
        js_code = '''
            var id = "%s"
              , decoded
              , document = {}
              , window = this
              , $ = function(){
                  return {
                    text: function(a){
                      if(a)
                        decoded = a;
                      else
                        return id;
                    },
                    ready: function(a){
                      a()
                    }
                  }
                };
            (function(d){
              var f = function(){};
              var s = '';
              var o = null;
              ['close','createAttribute','createDocumentFragment','createElement','createElementNS','createEvent','createNSResolver','createRange','createTextNode','createTreeWalker','evaluate','execCommand','getElementById','getElementsByName','getElementsByTagName','importNode','open','queryCommandEnabled','queryCommandIndeterm','queryCommandState','queryCommandValue','write','writeln'].forEach(function(e){d[e]=f;});
              ['anchors','applets','body','defaultView','doctype','documentElement','embeds','firstChild','forms','images','implementation','links','location','plugins','styleSheets'].forEach(function(e){d[e]=o;});
              ['URL','characterSet','compatMode','contentType','cookie','designMode','domain','lastModified','referrer','title'].forEach(function(e){d[e]=s;});
            })(document);
            %s;
            decoded;''' % (ol_id, js_code)


        try:
            decoded = js2py.eval_js(js_code)
            if ' ' in decoded or decoded == '':
                raise
            return decoded
        except Exception as e:
            raise ResolverError('Could not eval ID decoding %s' %e)
Esempio n. 12
0
def get_revenue_table(file):
    try:
        f = codecs.open(file, 'r',"utf-8")
        soup = BeautifulSoup(f, "html5lib")
        # Extract movie info from main block
        title1 = unidecode(soup.title.get_text().replace(" - Daily Box Office Results - Box Office Mojo", ""))
        title2 = soup.body.find(id="container").find(id="main").find(id="body").select("table")[2].tbody.tr.td.select("table")[0].tbody.tr.select('td')[1].b.get_text()
        info = soup.body.find(id="container").find(id="main").find(id="body").select("table")[2].tbody.tr.td.select("table")[0].tbody.tr.center.tbody.select("b")
        total_revenues = int(re.sub('[!@#$,]', '', info[0].get_text()))
        distributor = unidecode(re.sub('[!@#$,]', '', info[1].get_text()))
        release_date = unidecode(re.sub('[!@#$,]', '', info[2].get_text()))
        dt_obj = datetime.strptime(release_date, '%B %d %Y')
        # Of the form datetime.datetime(2016, 5, 6, 0, 0) (e.g. dt_obj.year = 2016)
        genre = unidecode(re.sub('[!@#$,]', '', info[3].get_text()))
        runtime_pre = re.sub('[!@#$,.a-zA-z]', '', info[4].get_text()).strip().split()
        runtime = int(runtime_pre[0]) * 60 + int(runtime_pre[1])
        MPAA = re.sub('[!@#$,]', '', info[5].get_text())

        # Convert production budget string to integer
        production_budget_pre1 = unidecode(re.sub('[!@#$,]', '', info[6].get_text()))
        production_budget_pre2 = "".join(production_budget_pre1.lower().split())
        for word, initial in {"million":"000000", "thousand":"000" }.items():
            production_budget = production_budget_pre2.replace(word.lower(), initial)

        # Extract revenue figures
        java_text = soup.find_all(type="text/javascript")
        t = java_text[5]
        jtext = t.getText().split('\t')[6].replace("\n", " ")
        table = js2py.eval_js(jtext)
        revenue = table.to_list()

        # Enter into dataframe
        rev = pd.DataFrame(revenue)
        rev.drop(0, axis=1, inplace=True)

        # Get total base revenues
        base_revenues = rev[1].sum()

        # Calculate conversion factor
        rev_cf = (total_revenues / base_revenues) / 1000000

        # Load onto dictionary to export
        keys = ['title1', 'title2', 'total_revenues', 'distributor', 'dt_obj', 'genre', 'runtime', 'MPAA', 'production_budget']

        values = [title1, unidecode(title2), total_revenues, distributor, dt_obj, genre, runtime, MPAA, int(production_budget)]

        movie_details = dict(zip(keys, values))

        rev_df = rev * rev_cf

    except Exception, e:
        print file
        movie_details = {'title1': 0}
        rev_df = 0
        missing_information.append(file)
        logging.exception(e)
Esempio n. 13
0
    def handle_free(self, pyfile):
        # step 1: get essential information: the media URL and the javascript
        # translating the URL
        m = re.search(self.MEDIA_URL_PATTERN, self.data)
        if m is None:
            self.fail(self._("Could not find any media URLs"))

        encoded_media_url = m.group(1)
        self.log_debug(f"Found encoded media URL: {encoded_media_url}")

        m = re.search(self.COMMUNITY_JS_PATTERN, self.data)
        if m is None:
            self.fail(self._("Could not find necessary javascript script to load"))

        community_js_url = m.group(1)
        self.log_debug(f"Found community js at {community_js_url}")

        community_js_code = self.load(community_js_url)

        # step 2: from the js code, parse the necessary parts: the decoder function and the headers
        # as the jscript is fairly long, we'll split it to make parsing easier
        community_js_code = community_js_code.partition(self.JS_SPLIT_WORD)[0]

        m = re.search(self.JS_HEADER_PATTERN, community_js_code)
        if m is None:
            self.fail(self._("Could not parse the necessary parts off the javascript"))

        decoder_function = m.group("decoder")
        initialization = m.group("initvars")

        m = re.search(self.JS_PROCESS_PATTERN, community_js_code)
        if m is None:
            self.fail(
                self._("Could not parse the processing function off the javascript")
            )

        process_function = m.group(0)

        new_js_code = (
            decoder_function
            + "; "
            + initialization
            + "; var "
            + process_function
            + '; process_recording("'
            + encoded_media_url
            + '");'
        )

        self.log_debug(f"Running js script: {new_js_code}")
        js_result = js2py.eval_js(new_js_code)
        self.log_debug(f"Result is: {js_result}")

        self.link = js_result
Esempio n. 14
0
    def test_ko_model(self):
        """
        Tests ko_model
        """

        wayne = self.setup_user()
        people = Person.objects.all()

        model = ko_model(wayne)
        self.assertNotEqual(model, '')

        interpreted = js2py.eval_js(model)
Esempio n. 15
0
    def eval(self, jsEnv, js):
        if js2py.eval_js('(+(+!+[]+[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+[!+[]+!+[]]+[+[]])+[])[+!+[]]') == '1':
            logging.warning('WARNING - Please upgrade your js2py https://github.com/PiotrDabkowski/Js2Py, applying work around for the meantime.')
            js = jsunfuck(js)

        def atob(s):
            return base64.b64decode('{}'.format(s)).decode('utf-8')

        js2py.disable_pyimport()
        context = js2py.EvalJs({'atob': atob})
        result = context.eval('{}{}'.format(jsEnv, js))

        return result
Esempio n. 16
0
def get_fp_sign(fp_raw):
    rsp = requests.get(
        'https://login.xunlei.com/risk?cmd=algorithm&t=' +
        str(time.time() * 1000)
    )
    sign = ''
    try:
        xl_al = js2py.eval_js(rsp.content)
        sign = xl_al(fp_raw)
    except Exception as e:
        print(e)

    return sign
Esempio n. 17
0
    def _solve_cf_ddos_challenge(addon_plugin, owner_plugin, data):
        try:
            addon_plugin.log_info(
                addon_plugin._("Detected CloudFlare's DDoS protection page")
            )
            # Cloudflare requires a delay before solving the challenge
            owner_plugin.set_wait(5)

            last_url = owner_plugin.req.last_effective_url
            urlp = urllib.parse.urlparse(last_url)
            domain = urlp.netloc
            submit_url = "{}://{}/cdn-cgi/l/chk_jschl".format(urlp.scheme, domain)

            get_params = {}

            try:
                get_params["jschl_vc"] = re.search(
                    r'name="jschl_vc" value="(\w+)"', data
                ).group(1)
                get_params["pass"] = re.search(
                    r'name="pass" value="(.+?)"', data
                ).group(1)

                # Extract the arithmetic operation
                js = re.search(
                    r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n",
                    data,
                ).group(1)
                js = re.sub(r"a\.value = (parse_int\(.+?\)).+", r"\1", js)
                js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js)
                js = re.sub(r"[\n\\']", "", js)

            except Exception:
                # Something is wrong with the page.
                # This may indicate CloudFlare has changed their anti-bot
                # technique.
                owner_plugin.log_error(
                    addon_plugin._("Unable to parse CloudFlare's DDoS protection page")
                )
                return None  #: Tell the exception handler to re-throw the exception

            # Safely evaluate the Javascript expression
            get_params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain))

            owner_plugin.wait()  #: Do the actual wait

            return owner_plugin.load(submit_url, get=get_params, ref=last_url)

        except Exception as exc:
            addon_plugin.log_error(exc)
            return None  #: Tell the exception handler to re-throw the exception
Esempio n. 18
0
File: parser.py Progetto: owtf/ptp
    def parse_report(self):
        """Retrieve the results from the report.

        :raises: :class:`ReportNotFoundError` -- if the report file was not found.

        :return: List of dicts where each one represents a discovery.
        :rtype: :class:`list`

        .. note::

            Example of retrieved data after conversion (i.e. `raw_report`) using the module :mod:`ast`:

            .. code-block:: js

                [{ 'severity': 3, 'type': 40402, 'samples': [
                    { 'url': 'http://demo.testfire.net/bank/login.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/0' },
                    { 'url': 'http://demo.testfire.net/bank/login.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/1' },
                    { 'url': 'http://demo.testfire.net/subscribe.aspx', 'extra': 'SQL syntax string', 'sid': '21010', 'dir': '_i2/2' } ]
                },]

        """
        REPORT_VAR_NAME = 'issue_samples'
        variables = self.re_var_pattern.findall(self.report_stream)
        split_data = self.report_stream.split(";")
        js_data = [data for data in split_data if data is not None]
        py_data = []
        format_data = {}  # Final python dict after converting js to py
        dirs = []  # List of directories of all urls
        # Converting js to py to make it simple to process
        for data in js_data:
            temp_data = js2py.eval_js(data)
            if temp_data is not None:
                py_data.append(temp_data)

        # Mapping variable to its content
        for i in range(len(py_data)):
            format_data[variables[i]] = py_data[i]

        if REPORT_VAR_NAME not in variables:
            raise ReportNotFoundError('PTP did NOT find issue_samples variable. Is this the correct file?')
        # We now have a raw version of the Skipfish report as a list of dict.
        self.vulns = [
            {'ranking': self.RANKING_SCALE[vuln['severity']]}
            for vuln in format_data[REPORT_VAR_NAME]]
        if not self.light:
            for var in variables:
                for item in format_data[var]:
                    for sample in item['samples']:
                        dirs.append({'url': sample['url'], 'dir': os.path.join(self.search_directory, sample['dir'])})
            self.vulns.append({'ranking': constants.UNKNOWN, 'transactions': self._parse_report_full(dirs)})
        return self.vulns
Esempio n. 19
0
    def _eval_id_decoding(self, webpage, ol_id):
        try:
            # raise # uncomment to test method with pairing
            js_code = re.findall(
                ur"(゚ω゚ノ=.*?\('_'\);.*?)゚ω゚ノ= /`m´)ノ ~┻━┻   //\*´∇`\*/ \['_'\];"
            ,webpage, re.DOTALL)[0]
            #common.log_utils.log_notice('js_code: %s' % js_code)
            js_code = re.sub('''if\s*\([^\}]+?typeof[^\}]+?\}''', '', js_code)
            js_code = re.sub('''if\s*\([^\}]+?document[^\}]+?\}''', '', js_code)
        except Exception as e:
            print 'Could not find JavaScript %s' % e
            raise ResolverError('Could not find JavaScript %s' % e)


        js_code = '''
            var id = "%s"
              , decoded
              , document = {}
              , window = this
              , $ = function(){
                  return {
                    text: function(a){
                      if(a)
                        decoded = a;
                      else
                        return id;
                    },
                    ready: function(a){
                      a()
                    }
                  }
                };
            (function(d){
              var f = function(){};
              var s = '';
              var o = null;
              ['close','createAttribute','createDocumentFragment','createElement','createElementNS','createEvent','createNSResolver','createRange','createTextNode','createTreeWalker','evaluate','execCommand','getElementById','getElementsByName','getElementsByTagName','importNode','open','queryCommandEnabled','queryCommandIndeterm','queryCommandState','queryCommandValue','write','writeln'].forEach(function(e){d[e]=f;});
              ['anchors','applets','body','defaultView','doctype','documentElement','embeds','firstChild','forms','images','implementation','links','location','plugins','styleSheets'].forEach(function(e){d[e]=o;});
              ['URL','characterSet','compatMode','contentType','cookie','designMode','domain','lastModified','referrer','title'].forEach(function(e){d[e]=s;});
            })(document);
            %s;
            decoded;''' % (ol_id, js_code)

        try:
            decoded = js2py.eval_js(js_code)
            if ' ' in decoded or decoded == '':
                raise
            return decoded
        except:
            raise ResolverError('Could not eval ID decoding')
Esempio n. 20
0
def wildcard(input):

    res = []
    a = []
    for i in input:
        a.append(i[1] + " " + i[0])
    add = js2py.eval_js('function wildcard(input){var output=[],cases=[],wilds=[],patts=[],masks=[];var bits=groupCases(cases);for(var i=0;i<=bits;i++)wilds[i]=[];wildStrings(bits);convertStrings(wilds,patts,"-01","110");convertStrings(wilds,masks,"-01","011");for(var c=0;c<cases.length;c++){for(var i=0,j=Math.pow(2,bits);i<=bits;i++,j /=2){for(var k=0;k<patts[i].length;k++){var patt=patts[i][k];var mask=masks[i][k];var matches=[];for(var d=0;d<cases[c].nums.length;d++){var num=cases[c].nums[d];if(((num^patt)&mask)==mask)matches.push(d);}if(matches.length==j){output.push(wilds[i][k]+" "+cases[c].id);for(var l=j-1;l>=0;l--)cases[c].nums.splice(matches[l],1);}}}}return output;function groupCases(cases){var max=0;for(var i=0;i<input.length;i++){var num=parseInt(input[i],2);if(num>max)max=num;var id=input[i].slice(input[i].indexOf(" ")+1);var pos=0;while(cases[pos]!=undefined&&cases[pos].id!=id)++pos;if(cases[pos]==undefined)cases[pos]={id:id,nums:[]};cases[pos].nums.push(num);}return Math.ceil(Math.log(max)/ Math.log(2));}function wildStrings(len,wild,str){wild=wild||0;str=str||"";for(var i=0;i<3;i++){var w=(i==0)?1:0;var s=str+["-","0","1"][i];if(len>1){wildStrings(len-1,wild+w,s)}else {wilds[bits-wild-w].push(s);}}}function convertStrings(input,output,from,to){for(var i=0;i<input.length;i++){output[i]=[];for(var j=0;j<input[i].length;j++){var str=input[i][j],s="";for(var k=0;k<str.length;k++){s+=to.charAt(from.indexOf(str.charAt(k)));}output[i].push(parseInt(s,2));}}}}')

    for i in add(a):
        if i is None:
            break
        res.append(i.encode("ascii", "ignore"))

    return res
Esempio n. 21
0
    def addcrypted2(self):
        package = self.get_post("source", "ClickNLoad Package")
        crypted = self.get_post("crypted")
        jk = self.get_post("jk")

        crypted = standard_b64decode(unquote(crypted.replace(" ", "+")))
        jk = js2py.eval_js(f"{jk} f()")
        key = bytes.fromhex(jk)

        obj = Fernet(key)
        result = obj.decrypt(crypted).replace("\x00", "").replace("\r", "").split("\n")

        result = [x for x in result if x != ""]

        self.add_package(package, result, 0)
Esempio n. 22
0
    def _get_links(self, crypted, jk):
        #: Get key
        jreturn = js2py.eval_js("{} f()".format(jk))
        self.log_debug(f"JsEngine returns value [{jreturn}]")
        key = bytes.fromhex(jreturn)

        #: Decrypt
        obj = Fernet(key)
        text = obj.decrypt(base64.b64decode(crypted))

        #: Extract links
        text = text.replace("\x00", "").replace("\r", "")
        links = [link for link in text.split("\n") if link]

        #: Log and return
        self.log_debug(f"Package has {len(links)} links")
        return links
Esempio n. 23
0
	def solve_cf_challenge(self, resp, **kwargs):
		time.sleep(5)
		body = resp.text
		domain = urlparse.urlparse(resp.url).netloc
		params = kwargs.setdefault("params", {})
		headers = kwargs.setdefault("headers", {})
		headers["Referer"] = resp.url
		try:
			params["jschl_vc"] = re.search('name="jschl_vc" value="(\w+)"', body).group(1)
			params["pass"] = re.search('name="pass" value="(.+?)"', body).group(1)
			js = re.search("setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1)
			js = re.sub("a\.value = (parseInt\(.+?\)).+", r"\1", js)
			js = re.sub("\s{3,}[a-z](?: = |\.).+", "", js)
			js = re.sub("[\n\\']", "", js)
		except Exception:
			raise Exception("[!] Unable to parse Cloudflare anti-bots page.")
		params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain))
		redirect = self.request(resp.request.method, "%s://%s/cdn-cgi/l/chk_jschl" % (urlparse.urlparse(resp.url).scheme, domain), dict(kwargs, allow_redirects = False))
		return self.request(resp.request.method, redirect.headers["Location"], **kwargs)
Esempio n. 24
0
    def get_link(self):
        #: Get all the scripts inside the html body
        soup = BeautifulSoup(self.data)
        scripts = [
            s.get_text()
            for s in soup.body.find_all("script", type="text/javascript")
            if "('dlbutton').href =" in s.get_text()
        ]

        #: Emulate a document in JS
        inits = [
            """
                var document = {}
                document.get_element_by_id = function(x) {
                    if (!this.has_own_property(x)) {
                        this[x] = {get_attribute : function(x) { return this[x] } }
                    }
                    return this[x]
                }
                """
        ]

        #: inits is meant to be populated with the initialization of all the DOM elements found in the scripts
        elt_re = r'get_element_by_id\([\'"](.+?)[\'"]\)(\.)?(get_attribute\([\'"])?(\w+)?([\'"]\))?'
        for m in re.findall(elt_re, " ".join(scripts)):
            JSid, JSattr = m[0], m[3]
            values = [
                f for f in (elt.get(JSattr, None) for elt in soup.find_all(id=JSid)) if f
            ]
            if values:
                inits.append(
                    'document.get_element_by_id("{}")["{}"] = "{}"'.format(
                        JSid, JSattr, values[-1]
                    )
                )

        #: Add try/catch in JS to handle deliberate errors
        scripts = ["\n".join(("try{", script, "} catch(err){}")) for script in scripts]

        #: Get the file's url by evaluating all the scripts
        scripts = inits + scripts + ["document.dlbutton.href"]

        return js2py.eval_js("\n".join(scripts))
Esempio n. 25
0
    def solve_cf_challenge(self, resp, **original_kwargs):
        sleep(5)  # Cloudflare requires a delay before solving the challenge

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = urlparse(resp.url).netloc
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)

        cloudflare_kwargs = deepcopy(original_kwargs)
        params = cloudflare_kwargs.setdefault("params", {})
        headers = cloudflare_kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url

        try:
            params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)
            params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1)

            # Extract the arithmetic operation
            js = self.extract_js(body)

        except Exception:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            logging.error("[!] Unable to parse Cloudflare anti-bots page. "
                          "Try upgrading cloudflare-scrape, or submit a bug report "
                          "if you are running the latest version. Please read "
                          "https://github.com/Anorov/cloudflare-scrape#updates "
                          "before submitting a bug report.")
            raise

        # Safely evaluate the Javascript expression
        params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain))

        # Requests transforms any request into a GET after a redirect,
        # so the redirect has to be handled manually here to allow for
        # performing other types of requests even as the first request.
        method = resp.request.method
        cloudflare_kwargs["allow_redirects"] = False
        redirect = self.request(method, submit_url, **cloudflare_kwargs)
        return self.request(method, redirect.headers["Location"], **original_kwargs)
Esempio n. 26
0
def getURLFromObfJs(js):
	js = js.replace("eval", "fnRes=")
	print "return" in js
	js = str(js2py.eval_js(js))

	# First let's decode the javascript
	searchObj = re.search("var _escape='[%u\\d\\w]+';", js)
	if searchObj:
		escapeCode = searchObj.group().replace("var _escape='", "")[:-2]
		escapeCode = escapeCode.replace("%", "\\")
		escapeCode = escapeCode.decode("unicode-escape").replace("'+autoplay+'","no")
		print "escape code: " + escapeCode
	else:
		return False
	
	if re.search(r'<form(.+?)action="[^"]*(hqq|netu)\.tv/player/embed_player\.php"[^>]*>', escapeCode):
		return escapeCode
	# Second let's find the iframes src
	iframes = re.findall('<iframe [\\w\\d"=:\\/.?&\'+ %-;><]*<\\/iframe>', escapeCode)
	return '-'.join(iframes)
Esempio n. 27
0
    def solve_challenge(self, body, domain):
        try:
            js = re.search(r"setTimeout\(function\(\){\s+(var "
                        "s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1)
        except Exception:
            raise ValueError("Unable to identify Cloudflare IUAM Javascript on website. %s" % BUG_REPORT)

        js = re.sub(r"a\.value = (.+ \+ t\.length).+", r"\1", js)
        js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))

        # Strip characters that could be used to exit the string context
        # These characters are not currently used in Cloudflare's arithmetic snippet
        js = re.sub(r"[\n\\']", "", js)

        if "toFixed" not in js:
            raise ValueError("Error parsing Cloudflare IUAM Javascript challenge. %s" % BUG_REPORT)

        # Use vm.runInNewContext to safely evaluate code
        # The sandboxed code cannot use the Node.js standard library
        #js = "%s, Object.create(null), {timeout: 5000}));" % js

        try:
            print(js)
            result = js2py.eval_js(js)
            print("eval cloudfare js result: ", result)
            #result = subprocess.check_output(["node", "-e", js]).strip()
        except OSError as e:
            if e.errno == 2:
                raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape"
                    " README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.")
            raise
        except Exception:
            logging.error("Error executing Cloudflare IUAM Javascript. %s" % BUG_REPORT)
            raise

        try:
            float(result)
        except Exception:
            raise ValueError("Cloudflare IUAM challenge returned unexpected answer. %s" % BUG_REPORT)

        return result
Esempio n. 28
0
def get_js_return(content):
    """
    <form id="challenge-form" action="/cdn-cgi/l/chk_jschl" method="get">
        <input type="hidden" name="jschl_vc" value="53c6b5dc36c113610e2597f11d600d1a"/>
        <input type="hidden" name="pass" value="1504765865.161-4zOzo1BY22"/>
        <input type="hidden" id="jschl-answer" name="jschl_answer"/>
    </form>
    """
    # 1ac9690a11b8fcfca762bc36f64679f9, 1504779184.092-aYUwU2JqVN, 2032
    # __cfduid    de7bab0aaae0290de0540da8fba78f3731504779180
    # cf_clearance   dc3e9630656b5528c0f1c10d7c9c523c7606e096-1504779185-28800

    jschl_vc = "0f7e4868294108cd97d299fba2b793a6"
    passwd = "1504832239.691-38r4axZu7r"
    jschl_answer = "1875"

    l = re.findall(r'name="jschl_vc" value="(.*?)"', content)
    if l: jschl_vc = l[0]

    l = re.findall(r'name="pass" value="(.*?)"', content)
    if l: passwd = l[0]

    m = re.search(r'setTimeout\(function\(\)\{((?:.|\n)*?)f\.submit\(\)',
                  content)
    if m:
        s = m.group(1)
        l = s.split("\n")
        l = [i for i in l if i.split()]
        first = l[0]
        last = l[-1]
        _ret = re.search(r"(.*?)a\.value\s+=\s+((.*?)121')", last)
        if _ret:
            last = _ret.group(1)
            ret = _ret.group(2)
        js = "function f(){ %s  %s  %s  return %s }" % (first, """
            t = 'https://www.spamhaus.org/';
            r = t.match(/https?:\/\//)[0];
            t = t.substr(r.length); t = t.substr(0,t.length-1);
          """, last, ret)
        jschl_answer = eval_js(js)()
    return jschl_vc, passwd, jschl_answer
Esempio n. 29
0
def magic(cmd):
    if cmd == 'clean':
        global old_msg
        old_msg = {}
    elif cmd == '骂张冠男':
        return '张冠男你个臭傻逼'
    elif cmd.startswith('骂'):
        return random.choice(['啥?', '呵呵', '要心平气和', '你气~~急败坏'])
    elif cmd == '天气':
        try:
            f_headers = fake_headers
            f_headers['Referer'] = 'http://www.weather.com.cn'
            j = requests.get('http://d1.weather.com.cn/dingzhi/101010100.html',
                             headers=f_headers,
                             timeout=5)
            j.encoding = 'utf-8'
            j = js2py.eval_js(j.text.split(';')[0])['weatherinfo']
            return "%s | %s | %s  ~ %s" % (j['cityname'], j['weather'],
                                           j['tempn'], j['temp'])
        except Exception, e:
            pass
Esempio n. 30
0
    def _encrypted_password(self):
        '''
            Cisco does logic in javascript to encrypt your router password
            before authenticating. This js function contains a nonce that changes
            from request to request (probably done server side). This function
            downloads the javascript, modifies it to add an invocation with the
            router password, and executes.
        '''
        gateway = self._session.get(self._address)
        html_and_js = "".join(gateway.content.splitlines())
        encryption_function_js_str = re.findall(
            'md5 for more info. \*\/(.*)function chk_keypress', html_and_js)[0]

        # Add line to call the function that encrypts the password
        encryption_function_with_invocation_js = encryption_function_js_str + ' en_value(en_value("' + self._password + '")+nonce);'

        # Invoke
        encrypted_password = js2py.eval_js(
            encryption_function_with_invocation_js)

        return encrypted_password
Esempio n. 31
0
 def check_device_id(self):
     if not self.has_cookie('.xunlei.com', 'deviceid'):
         url1 = 'https://login.xunlei.com/risk?cmd=algorithm&t=' + str(
             current_timestamp())
         sign_fun = self.__urlread(url1).decode()
         import js2py
         xl_al = js2py.eval_js(sign_fun)
         SB = USER_AGENT + "###zh-cn###24###960x1440###-540###true###true###true###undefined###undefined###x86###Win32#########" + md5(
             str(current_timestamp()).encode())
         xl_fp_raw = base64.b64encode(SB.encode()).decode()
         xl_fp = md5(xl_fp_raw.encode())
         xl_fp_sign = xl_al(xl_fp_raw)
         device_data = {
             'xl_fp_raw': xl_fp_raw,
             'xl_fp': xl_fp,
             'xl_fp_sign': xl_fp_sign
         }
         device_url = 'http://login.xunlei.com/risk?cmd=report'
         self.urlopen(device_url, data=device_data).read()
     if not self.has_cookie('.xunlei.com', '_x_t_'):
         self.set_cookie('.xunlei.com', '_x_t_', '0')
Esempio n. 32
0
    def solve_cf_challenge(self, resp, **kwargs):
        time.sleep(
            5)  # Cloudflare requires a delay before solving the challenge

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = urlparse(resp.url).netloc
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme,
                                                      domain)

        params = kwargs.setdefault("params", {})
        headers = kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url

        try:
            params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"',
                                           body).group(1)
            params["pass"] = re.search(r'name="pass" value="(.+?)"',
                                       body).group(1)

            # Extract the arithmetic operation
            js = self.extract_js(body)

        except Exception:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            print("[!] Unable to parse Cloudflare anti-bots page. "
                  "Try upgrading cloudflare-scrape, or submit a bug report "
                  "if you are running the latest version. Please read "
                  "https://github.com/Anorov/cloudflare-scrape#updates "
                  "before submitting a bug report.\n")
            raise

        # Safely evaluate the Javascript expression
        js = js.replace('return', '')
        params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain))

        return self.get(submit_url, **kwargs)
Esempio n. 33
0
def execute_js(comic_id, chapter_id):
    api = "http://qiman6.com/{}/{}.html".format(comic_id, chapter_id)
    headers = {
        "Host":
        "qiman6.com",
        "Referer":
        "http://qiman6.com/{}/".format(comic_id),
        "Upgrade-Insecure-Requests":
        "1",
        "user-agent":
        "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, "
        "like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1 "
    }
    result = requests.get(url=api, headers=headers, timeout=10)
    if result.status_code == 200:
        try:
            doc = pq(result.text)
            html = doc("body script").eq(0).text()
            images = js2py.eval_js(html)
            return json.dumps(list(images), ensure_ascii=False)
        except Exception as e:
            return json.dumps({"error": str(e)}, ensure_ascii=False)
Esempio n. 34
0
def decode_data(data, nonce):
    decode_js = \
        '''
            function decode(data, nonce)
            {
                var T = data.split('');
                var N = nonce;
                var len, locate, str;
                N = N.match(/\d+[a-zA-Z]+/g);
                len = N.length;
                while (len--) {
                    locate = parseInt(N[len]) & 255;    
                    str = N[len].replace(/\d+/g, '');
                    T.splice(locate, str.length);
                }
                T = T.join('');
                return T;
            }
        '''
    handle = js2py.eval_js(decode_js)
    data = handle(data, nonce)
    return data
Esempio n. 35
0
def test():
    parse_var("http://www.dm5.com/m25536/")
    params = {
        "cid": args["DM5_CID"],
        "page": 1,
        "key": "",
        "language": 1,
        "gtk": 6,
        "_cid": args["DM5_CID"],
        "_mid": args["DM5_MID"],
        "_dt": args["DM5_VIEWSIGN_DT"],
        "_sign": args["DM5_VIEWSIGN"]
    }
    print("params参数:", params)
    url = domain + "/m25536/" + "chapterfun.ashx"
    headers = {"Referer": "http://www.dm5.com/m25536/"}
    res = requests.get(url, params=params, headers=headers)
    print(res.encoding)
    print(res.url)
    print("res:", res.text)
    imgs = js2py.eval_js(res.text)
    print("eval函数解析后结果:", imgs)
def get_all_last_1_months(start, limit):
    out = {"rows": []}
    try:
        url = "https://www.nseindia.com/corporates/corpInfo/equities/getFinancialResults.jsp?start={start}&limit={limit}&symbol=&industry=&period=Quarterly&broadcastPeriod=Last%203%20Months".format(
            start=start, limit=limit)
        headers = {
            'mode':
            'no-cors',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'
        }
        req = requests.Request(url, None, headers)
        response = requests.urlopen(req)
        the_page = response.read()
        js = "function a() {return JSON.stringify(" + the_page.decode(
            "ascii") + ")} a()"
        out = json.loads(js2py.eval_js(js))
        insert_log((0, 0, 0, 'NSE', 'SEARCHINFO', None, url, str(out)))
    except Exception as e:
        insert_log((0, 0, 0, 'NSE', 'CRITICAL', None, url,
                    "Error getting all 1 month search, " + str(e)))
    return out
Esempio n. 37
0
    def parse_volume_list(self, soup):
        self.volumes = []
        script = soup.find(name='main').find_next_sibling(name='script').string

        try:
            data = js2py.eval_js(
                '(function() {' + script + 'return window.lnmtl;})()'
            ).to_dict()
            for i, vol in enumerate(data['volumes']):
                title = vol.get('title', '') or ''
                title = re.sub(r'[^\u0000-\u00FF]', '', title)
                title = re.sub(r'\(\)', '', title).strip()
                self.volumes.append(
                    {'id': i + 1, 'title': title, 'download_id': vol['id'], }
                )
            # end for
        except Exception as _:
            logger.exception('Failed parsing one possible batch')
        # end try

        if len(self.volumes) == 0:
            raise Exception('Failed parsing volume list')
Esempio n. 38
0
def login(mobilePhone, pswd):
    guid = js2py.eval_js(js)
    cookies = dict()
    cookies['ud'] = guid()
    r = requests.post(login_url, cookies=cookies, headers=headers, data={
        'mobilePhone': mobilePhone,
        'pswd': pswd,
        'imageCode': ''
    })
    d = json.loads(r.text)

    if d['code'] != 200:
        return 'error'

    c = d['result'].split('|')
    cookies['tok'] = c[0]
    cookies['u'] = c[1]
    cookies['m'] = c[2].replace("/", '%2F').replace("=", "%3D")
    # r2=requests.post(islogin_url,headers=headers,cookies=cookies)
    # print(r2.text)
    # ud tok u m
    return cookies
Esempio n. 39
0
def getsign():
    import requests
    curID = 39164517
    submittype = 1
    data = requests.get('https://www.wjx.cn/jq/%s.aspx?from=timeline' % curID)
    hlv = 1
    data = data.text
    jqnonce = re.search('jqnonce="(.*?)"', data).group(1)
    rndnum = re.search('rndnum="(.*?)"', data).group(1)
    starttime = re.search('starttime="(.*?)"', data).group(1)
    t = (str(int(time() * 1000)))
    js_res = js2py.eval_js('''
    function gen(jqnonce, ktimes) {
        var c, d, e, b = ktimes % 10;
        var a = jqnonce;
        for (0 == b && (b = 1), c = [], d = 0; d < a.length; d++) e = a.charCodeAt(d) ^ b,
            c.push(String.fromCharCode(e));
        var jqsign = (c.join(""));
        return jqsign;
    }
    ''')
    ktimes = 58
    jqsign = js_res(jqnonce, ktimes)
    params = {
        'submittype': submittype,
        'curID': curID,
        't': t,
        'ktimes': ktimes,
        'rn': rndnum,
        'hlv': hlv,
        'jqnonce': jqnonce,
        'starttime': starttime,
        'jqsign': jqsign
    }
    print(params)
    url = 'https://www.wjx.cn/joinnew/processjq.ashx?from=timeline&%s' \
          % (parse.urlencode(params))
    print(url)
    return url
Esempio n. 40
0
def parse_artists_json(body):
    body = body.decode('utf-8').strip()
    if body.startswith(')]}\''):
        body = body[4:]
    print 'Parsing json (can take several minutes)'
    js_array = js2py.eval_js(body)
    artists_array = js_array[0][0][2]
    print 'json parsed!'

    artists = list()
    for artist_obj in artists_array:
        artist_item = dict()
        artist_item['name'] = artist_obj[0].strip()
        artist_item['page_url'] = GoogleartCrawlSpider.BASE_URL + artist_obj[
            3].strip('/')
        artist_item[
            'artist_id'] = GoogleartCrawlSpider.artist_id_from_page_url(
                artist_item['page_url'])
        artist_item['total_items_count'] = int(
            artist_obj[1].strip().split(' ')[0].replace(',', ''))
        artists.append(artist_item)
    return artists
Esempio n. 41
0
 def __digest_password(self):
     Digest_pd = '''
         function(b) {
                     function a(a, d) {
                     var c = (a & 65535) + (d & 65535);
                     return (a >> 16) + (d >> 16) + (c >> 16) << 16 | c & 65535
                                         }
         for (var d = (b.length + 8 >> 6) + 1, c = Array(16 * d), e = 0; e < 16 * d; e++)
             c[e] = 0;
         for (e = 0; e < b.length; e++)
             c[e >> 2] |= b.charCodeAt(e) << 24 - 8 * (e & 3);
         c[e >> 2] |= 128 << 24 - 8 * (e & 3);
         c[16 * d - 1] = 8 * b.length;
         b = Array(80);
         for (var d = 1732584193, e = -271733879, f = -1732584194, h = 271733878, j = -1009589776, k = 0; k < c.length; k += 16) {
             for (var l = d, m = e, n = f, p = h, q = j, g = 0; 80 > g; g++) {
                 b[g] = 16 > g ? c[k + g] : (b[g - 3] ^ b[g - 8] ^ b[g - 14] ^ b[g - 16]) << 1 | (b[g - 3] ^ b[g - 8] ^ b[g - 14] ^ b[g - 16]) >>> 31;
                 var r = a(a(d << 5 | d >>> 27, 20 > g ? e & f | ~e & h : 40 > g ? e ^ f ^ h : 60 > g ? e & f | e & h | f & h : e ^ f ^ h), a(a(j, b[g]), 20 > g ? 1518500249 : 40 > g ? 1859775393 : 60 > g ? -1894007588 : -899497514))
                   , j = h
                   , h = f
                   , f = e << 30 | e >>> 2
                   , e = d
                   , d = r
             }
             d = a(d, l);
             e = a(e, m);
             f = a(f, n);
             h = a(h, p);
             j = a(j, q)
         }
         c = [d, e, f, h, j];
         b = "";
         for (d = 0; d < 4 * c.length; d++)
             b += "0123456789abcdef".charAt(c[d >> 2] >> 8 * (3 - d % 4) + 4 & 15) + "0123456789abcdef".charAt(c[d >> 2] >> 8 * (3 - d % 4) & 15);
         return b
     }'''
     password = js2py.eval_js(Digest_pd)
     password = password('fetion.com.cn:%s' % str(self.password))
     return password
    def variantB_parse_script_for_object(
            self, script_url: str) -> Tuple[Optional[Dict], CrawlState, str]:
        """
        Extract the consent data from an inline json object. This assumes that inside the object,
        the array "Groups" is found. Inside this array we can find all the cookie data we need
        -- however, the object needs to be sanitized first, and stray characters need to be removed.

        The process isn't perfect, but it should work with a sufficient number of instances.
        """
        cookielaw_script = requests.get(script_url).text.strip()

        # purge newlines
        cookielaw_script = re.sub('\n', ' ', cookielaw_script)

        # Find the start of the group array
        matchobj = re.search(",\\s*Groups:\\s*\\[", cookielaw_script)
        try:
            if matchobj:
                startpoint = matchobj.start(0)

                # Get the end of the group array
                i = matchobj.end(0)
                open_brackets = 1
                while i < len(cookielaw_script) and open_brackets > 0:
                    if cookielaw_script[i] == "[": open_brackets += 1
                    elif cookielaw_script[i] == "]": open_brackets -= 1
                    i += 1
                group_string = cookielaw_script[startpoint + 1:i]

                # put the object into a javascript function, and evaluate it
                # This returns a dict of the cookie consent data we need.
                js_object_string = "function $() {return {" + group_string + "}};"
                data_dict = js2py.eval_js(js_object_string)()

                return data_dict, CrawlState.SUCCESS, "Successfully extracted objects from javascript"
            else:
                return None, CrawlState.PARSE_ERROR, "Failed to find desired javascript object in Onetrust consent script."
        except Exception as ex:
            return None, CrawlState.UNKNOWN, f"Unexpected error while parsing OneTrust javascript: : {type(ex)} {ex}"
Esempio n. 43
0
def process_model(mod_name, url, model, max_width=120):
    sku_list = []
    for width in range(max_width):
        f = js2py.eval_js(model)
        product = f(width)

        # create sku
        sku = {}
        sku['id'] = product[0]
        price = product[1]
        if width > 96:
            price += 120
        sku['price'] = float('{:.2f}'.format(price))
        sku['url'] = url.format(mod_name)

        if sku['price'] != 0:
            sku_list.append(sku)

    # remove duplicates
    sku_list_no_dup = [i for n, i in enumerate(sku_list) if i not in sku_list[n + 1:]]

    return sku_list_no_dup
Esempio n. 44
0
def getLinks(urlid, url, linkExtractor, nameExtractor, durationExtractor, dateCheck, tagsExtractor):
    try:
        rootpage = getDocSoup(url)
        vidlinks = eval(linkExtractor)

        if len(vidlinks) == 0:
            try:
                cookie = js2py.eval_js(re.sub(".*<!--", "", re.sub("//-->.*", "",
                                                                   rootpage.get_text().replace("document.cookie=",
                                                                                               "return ").replace(
                                                                       "document.location.reload(true);", "").replace(
                                                                       "Loading ...", ""))) + " go()")
                rootpage = getDocSoup(url, cookie)
                vidlinks = eval(linkExtractor)
            except:
                pass
            if len(vidlinks) == 0:
                print >> sys.stderr, "NO VIDEOS FOUND: " + url
                return
    except (urllib2.HTTPError, urllib2.URLError), e:
        print >> sys.stderr, "GL " + type(e).__name__ + " " + str(e) + " " + url
        return
Esempio n. 45
0
def _get_data_by_key(js_list):
    """JavaScript function to generate the languages.

    A payload with the languages is passed to a JavaScript function.
    Instead of parsing that payload (cumbersome), we 'overload' that
    function to return what we want.

    """

    js_function = """
        function AF_initDataCallback(args) {
            return { key: args['key'], data: args['data'] };
        };
    """

    data_by_key = {}
    for js in js_list:
        js_code = js_function + js
        py_eval = js2py.eval_js(js_code)
        data_by_key[py_eval['key']] = py_eval['data']

    return data_by_key
Esempio n. 46
0
def get_seccode():
    import js2py

    UA = (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    )

    headers = {
        "Sec-Fetch-Mode": "no-cors",
        "User-Agent": UA,
        "Accept": "*/*",
        "Sec-Fetch-Site": "same-origin",
        "Referer": "https://translate.sogou.com/",
        "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
    }

    def get_suv():
        return str(int(time() * 1000000) + int(random() * 1000))

    def get_seccode_cookies():
        res = requests.get("https://translate.sogou.com/", headers=headers)
        return {
            "SNUID": res.cookies.get("SNUID"),
            "SUID": res.cookies.get("SUID"),
            "ABTEST": res.cookies.get("ABTEST"),
            "IPLOC": res.cookies.get("IPLOC"),
            "SUV": get_suv(),
        }

    cookies = get_seccode_cookies()
    response = requests.get(
        "https://translate.sogou.com/logtrace", headers=headers, cookies=cookies
    )
    if DEBUG:
        print(response.status_code, response.text)
    text = response.text
    rv = js2py.eval_js(text + "; window.seccode;")
    return str(rv)
Esempio n. 47
0
    def handle_free(self, pyfile):
        self.free_url = "http://turbobit.net/download/free/{}".format(
            self.info["pattern"]["ID"]
        )
        self.data = self.load(self.free_url)

        m = re.search(self.LIMIT_WAIT_PATTERN, self.data)
        if m is not None:
            self.retry(wait=m.group(1))

        self.solve_captcha()

        m = re.search(r"min_limit : (.+?),", self.data)
        if m is None:
            self.fail(self._("min_limit pattern not found"))

        wait_time = js2py.eval_js(m.group(1))
        self.wait(wait_time)

        self.req.http.c.setopt(pycurl.HTTPHEADER, ["X-Requested-With: XMLHttpRequest"])
        self.data = self.load(
            "http://turbobit.net/download/get_link_timeout/{}".format(
                self.info["pattern"]["ID"]
            ),
            ref=self.free_url,
        )
        self.req.http.c.setopt(pycurl.HTTPHEADER, ["X-Requested-With:"])

        if "/download/started/" in self.data:
            self.data = self.load(
                "http://turbobit.net/download/started/{}".format(
                    self.info["pattern"]["ID"]
                )
            )

            m = re.search(self.LINK_FREE_PATTERN, self.data)
            if m is not None:
                self.link = "http://turbobit.net{}".format(m.group(1))
Esempio n. 48
0
def adie_events2dicts(events_html):
    """Convert the events page of ADIE into our own Event format before Mongo import.

    Args:
        events_html: the HTML content of the ADIE events page.

    Returns:
        an iterable of dict with the JSON values of the Event proto.
    """

    with open(events_html, 'rt') as events_file:
        page_text = events_file.read()
    page_selector = selector.Selector(text=page_text)

    # Parse the markers with coordinates.
    map_div = page_selector.xpath('//div[@class="acf-map"]')
    markers = [{
        'data-lat': d.xpath('@data-lat').extract_first(),
        'data-lng': d.xpath('@data-lng').extract_first(),
    } for d in map_div.xpath('div[@class="marker"]')]

    # Parse the other attributes.
    events_script = page_selector.xpath(
        '//script[contains(., "var evenements = []")]/text()').extract_first()
    if not events_script:
        raise ValueError(
            '"{}" does not contain the javascript to create events:\n{}'.
            format(events_html, page_text))

    if 'evenement = []' not in events_script:
        raise ValueError('The [] bug is fixed, please drop the replace code')
    events_script = events_script.replace('evenement = []', 'evenement = {}')
    events = js2py.eval_js(events_script + ';evenements')

    # Join coordinates and other attributes.
    return [
        _adie_event_to_proto(dict(a, **b)) for a, b in zip(markers, events)
    ]
Esempio n. 49
0
    def solve_cf_challenge(self, resp, **kwargs):
        time.sleep(5)  # Cloudflare requires a delay before solving the challenge

        body = resp.text
        parsed_url = urlparse(resp.url)
        domain = urlparse(resp.url).netloc
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)

        params = kwargs.setdefault("params", {})
        headers = kwargs.setdefault("headers", {})
        headers["Referer"] = resp.url

        try:
            params["jschl_vc"] = re.findall(r'name="jschl_vc" value="(\w+)"', body)[-1]
            params["pass"] = re.findall(r'name="pass" value="(.+?)"', body)[-1]

            # Extract the arithmetic operation
            js = self.extract_js(body)

        except Exception:
            # Something is wrong with the page.
            # This may indicate Cloudflare has changed their anti-bot
            # technique. If you see this and are running the latest version,
            # please open a GitHub issue so I can update the code accordingly.
            print(
                "[!] Unable to parse Cloudflare anti-bots page. "
                "Try upgrading cloudflare-scrape, or submit a bug report "
                "if you are running the latest version. Please read "
                "https://github.com/Anorov/cloudflare-scrape#updates "
                "before submitting a bug report.\n"
            )
            raise

        # Safely evaluate the Javascript expression
        js = js.replace("return", "")
        params["jschl_answer"] = str(int(js2py.eval_js(js)) + len(domain))

        return self.get(submit_url, **kwargs)
Esempio n. 50
0
 def __sha1_account(self):
     sha1 = '''function(a) {
         function b(a, b) {
             var c = (a & 65535) + (b & 65535);
             return (a >> 16) + (b >> 16) + (c >> 16) << 16 | c & 65535
         }
         for (var d = [], c = 0; c < 8 * a.length; c += 8)
             d[c >> 5] |= (a.charCodeAt(c / 8) & 255) << 24 - c % 32;
         a = 8 * a.length;
         d[a >> 5] |= 128 << 24 - a % 32;
         d[(a + 64 >> 9 << 4) + 15] = a;
         a = Array(80);
         for (var c = 1732584193, e = -271733879, f = -1732584194, h = 271733878, j = -1009589776, k = 0; k < d.length; k += 16) {
             for (var l = c, m = e, n = f, p = h, q = j, g = 0; 80 > g; g++) {
                 a[g] = 16 > g ? d[k + g] : (a[g - 3] ^ a[g - 8] ^ a[g - 14] ^ a[g - 16]) << 1 | (a[g - 3] ^ a[g - 8] ^ a[g - 14] ^ a[g - 16]) >>> 31;
                 var r = b(b(c << 5 | c >>> 27, 20 > g ? e & f | ~e & h : 40 > g ? e ^ f ^ h : 60 > g ? e & f | e & h | f & h : e ^ f ^ h), b(b(j, a[g]), 20 > g ? 1518500249 : 40 > g ? 1859775393 : 60 > g ? -1894007588 : -899497514))
                   , j = h
                   , h = f
                   , f = e << 30 | e >>> 2
                   , e = c
                   , c = r
             }
             c = b(c, l);
             e = b(e, m);
             f = b(f, n);
             h = b(h, p);
             j = b(j, q)
         }
         d = [c, e, f, h, j];
         a = "";
         for (c = 0; c < 4 * d.length; c++)
             a += "0123456789abcdef".charAt(d[c >> 2] >> 8 * (3 - c % 4) + 4 & 15) + "0123456789abcdef".charAt(d[c >> 2] >> 8 * (3 - c % 4) & 15);
         return a
     }
         '''
     _params = js2py.eval_js(sha1)
     params = _params(str(self.email))
     return params
Esempio n. 51
0
 def video_page(self, response: HtmlResponse):
     # some video has "Watch Full Video" button
     full_video_button = response.css("#trailerFullLengthDownload")
     video_title = response.css('h1.title').css('span::text').get()
     video_channel = response.css('div.video-actions-container').css(
         'div.usernameWrap.clearfix').css('a::text').get()
     if full_video_button:
         button_title = full_video_button.css('::attr(data-title)').get()
         if button_title != 'Buy Full Video':
             full_url = full_video_button.css('::attr(href)').get()
             self.logger.info('%s detected full video, original name: %s',
                              video_channel, video_title)
             yield scrapy.Request(full_url,
                                  callback=self.video_page,
                                  priority=100)
         else:
             self.logger.info('%s detected buy video, drop', video_channel)
     else:
         self.logger.info('get model: %s, title: %s', video_channel,
                          video_title)
         player_id_element = response.css('#player')
         js = player_id_element.css('script').get()
         data_video_id = player_id_element.css(
             '::attr(data-video-id)').get()
         prepare_js = js.split('<script type="text/javascript">')[1].split(
             'loadScriptUniqueId')[0]
         exec_js = '{0}\nqualityItems_{1};'.format(prepare_js,
                                                   data_video_id)
         js_result = js2py.eval_js(
             exec_js)  # type: js2py.base.JsObjectWrapper
         quality_items = js_result.to_list()  # type: list
         quality = quality_items[-1]['text']
         if quality != '240p' or quality != '"480p"':
             video_url = quality_items[-1]['url']
             yield PornhubItem(file_urls=video_url,
                               file_name=video_title,
                               file_channel=video_channel,
                               parent_url=response.url)
Esempio n. 52
0
def get_video_url(item):
    logger.trace()
    itemlist = list()

    data = httptools.downloadpage(item.url, headers={
        'Referer': item.referer
    }).data
    #logger.debug(data)
    if "Page not found" in data or "File was deleted" in data or "404 Not Found" in data:
        return ResolveError(0)

    packed = scrapertools.find_single_match(
        data, "<script type='text/javascript'>eval(.*?)</script>")
    logger.debug(packed)
    data = js2py.eval_js(packed)
    logger.debug(data)
    sources = scrapertools.find_single_match(data, 'sources:\s?(\[.*?\]+)')
    logger.debug(sources)
    for url, res in scrapertools.find_multiple_matches(
            sources, 'file:"([^"]+)",label:"([^"]+)"'):
        itemlist.append(Video(url=url, res=res))

    return itemlist
Esempio n. 53
0
def weibo_detail(weibo_id):
    weibo_content = {}
    url = r'https://m.weibo.cn/detail/%s' % str(weibo_id)
    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')
    script = soup.find_all("script")[1].text  # 定位含有微博信息的 js script
    render_data = js2py.eval_js(script + '$render_data')  # 将 js 执行转换 最后加上 $render_data 来获取 render_data 变量
    status = render_data['status'].to_dict()
    url2 = r'https://m.weibo.cn/statuses/extend?id=%s' % str(weibo_id)
    html2 = requests.get(url2)
    popularity = html2.json()['data']
    status.update(popularity)  # 将获取到的转发、评论、点赞数更新到原字典中
    weibo_content['time'] = convert_time(status['created_at'])  # 微博发布时间戳
    weibo_content['uid'] = status['user']['id']  # 用户id
    if 'raw_text' in status.keys():
        weibo_content['text'] = status['raw_text']
    else:
        weibo_content['text'] = status['text']
    for key in keys:
        weibo_content[key] = status.get(key, None)  # 有的字段可能无,尽量用get方法
    for key in user_keys:
        weibo_content[key] = status['user'].get(key, None)
    return weibo_content
Esempio n. 54
0
def fxxk_dsign(content):
    try:
        content = content.decode()
    except AttributeError:
        pass
    js = content[31:-9]
    for name in obfuscate_name:
        js = re.sub(rf"\w+ = '?{name}'?;", '', js)
    redirect = re.findall(r"_\w+?\[_\w+?\]=", js)
    if len(redirect) == 2:
        js = js[:js.find(redirect[1])]
    else:
        for fake in fake_location:
            if re.search(fake, js):
                js = re.sub(fake, '', js)
                break
    js = re.sub(r"location\[_\w+\]=?", '', js)
    js = re.sub(r"location\.href=?", '', js)
    js = re.sub(r"_\w+\[_\w+\]=?", '', js)
    js = js.replace(awful_getName_func, '')
    js = re.sub(r"function (?P<f_name>\w+?)\(\){return getName\(\);\}",
                r"function \g<f_name>(){return '\g<f_name>';}", js)
    return js2py.eval_js(js)
Esempio n. 55
0
def translate(query):
    sign_function = js2py.eval_js(open("requests_js2py_scrape_baidu_translate/get_sign.js").read())
    url = "https://fanyi.baidu.com/basetrans"
    headers = {
        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
        "Referer": "https://fanyi.baidu.com/",
        "Cookie": "BAIDUID=714BFAAF02DA927F583935C7A354949A:FG=1; BIDUPSID=714BFAAF02DA927F583935C7A354949A; PSTM=1553390486; delPer=0; PSINO=5; H_PS_PSSID=28742_1463_21125_18559_28723_28557_28697_28585_28640_28604_28626_22160; locale=zh; from_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_afd111fa62852d1f37001d1f980b6800=1553658863,1553766321,1553769980,1553770442; Hm_lpvt_afd111fa62852d1f37001d1f980b6800=1553770442; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1553766258,1553766321,1553769980,1553770442; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1553770442"
    }
    sign = sign_function(query)
    data = {
        "query": query,
        "from": "en",
        "to": "zh",
        "token": "6f5c83b84d69ad3633abdf18abcb030d",
        "sign": sign
    }

    res = requests.post(
        url=url,
        headers=headers,
        data=data
    )
    return res.json()
Esempio n. 56
0
def LoadDataFromPublicStarfighterWebsite():
    content = ''

    try:
        response = requests.get('{}?cb={}'.format(Config.publicDatabaseUrl, time.time()))
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print('HTTP error occurred: {}'.format(http_err))
    except Exception as err:
        print('HTTP error occurred: {}'.format(err))
    else:
        content = response.text

    jsStr = re.sub(r'.*?(var data = \{\};.*)var showing = null;.*', '\\1', content, 0, re.S)
    jsContent = 'function getData(){\n' + jsStr + '\nreturn data;\n}'

    getData = js2py.eval_js(jsContent)
    jsData = getData()

    return {
        'shipList': jsData['Ships'].to_list(),
        'itemList': jsData['Items'].to_list()
    }
Esempio n. 57
0
def do_folder(folder):
    global PASSED, FAILED, CRUSHED
    print 'Doing', folder
    folders = []
    f = os.path.join(TEST_PATH, folder)

    for e in os.listdir(f):
        e = os.path.join(f, e)
        if os.path.isfile(e):
            case = Case(e)
            if case.IsOnlyStrict():
                print 'Strict', case.name
                continue
            code = case.GetSource()
            try:
                res = js2py.eval_js(code)
                if res == 'PASSED7486':
                    PASSED += 1
                    continue
                else:
                    FAILED += 1
                    print
                    print 'Failed', case.name
                    print res
                    print
            except KeyboardInterrupt:
                return
            except:
                CRUSHED += 1
                print '<<<<<<<<<<<<<<'
                print 'Crushed', case.name
                print '<<<<<<<<<<<<<<<'
        else:
            folders.append(e)
    print 'Passed ', PASSED, 'out of', PASSED + FAILED + CRUSHED, 'tests. ', CRUSHED, 'crushed.'
    for f in folders:
        do_folder(f)
Esempio n. 58
0
    def test_ko_data(self):
        """
        Tests ko_data
        """

        # Test an object
        wayne = self.setup_user()
        people = Person.objects.all()

        data = ko_data(people)
        self.assertNotEqual(data, '[]')

        # Will raise if invalid.
        interpreted = js2py.eval_js(data)

        # Test a vanilla QS
        rapper = Profession.objects.get(pk=1)
        data = ko_data(rapper)
        self.assertNotEqual(data, '[]')
        interpreted = js2py.eval_js(data)

        rapper = Profession.objects.none()
        data = ko_data(rapper)
        self.assertNotEqual(data, '[]')
        interpreted = js2py.eval_js(data)

        rapper = ''
        data = ko_data(rapper)
        self.assertEqual(data, '[]')
        interpreted = js2py.eval_js(data)

        rapper = []
        data = ko_data(rapper)
        self.assertEqual(data, '[]')
        interpreted = js2py.eval_js(data)

        # Test an individual object
        rapper = wayne.profession
        data = ko_data(rapper)
        self.assertNotEqual(data, '[]')
        interpreted = js2py.eval_js(data)
Esempio n. 59
0
def getBattleFormatsData():
	js=urllib2.urlopen("https://raw.githubusercontent.com/Zarel/Pokemon-Showdown/master/data/formats-data.js").read()
	return json.loads(js2py.eval_js('exports={},'+js+'JSON.stringify(exports.BattleFormatsData)'))
Esempio n. 60
0
import js2py
import time

print("Testing ECMA 5...")


assert js2py.eval_js('(new Date("2008-9-03T20:56:35.450686Z")).toString()')

assert js2py.eval_js('/ser/.test("Mleko + ser to nabial")')
assert js2py.eval_js('1 + "1"') == '11'

assert js2py.eval_js('function (r) {return r}')(5) == 5

x, c = js2py.run_file('examples/esprima.js')
assert c.esprima.parse('var abc = 40').to_dict() == {'type': 'Program', 'body': [{'type': 'VariableDeclaration', 'kind': 'var', 'declarations': [{'id': {'type': 'Identifier', 'name': 'abc'}, 'type': 'VariableDeclarator', 'init': {'type': 'Literal', 'raw': '40', 'value': 40}}]}], 'sourceType': 'script'}

try:
    assert js2py.eval_js('syntax error!') and 0
except js2py.PyJsException as err:
    assert str(err).startswith('SyntaxError: ')


assert js2py.eval_js('pyimport time; time.time()') <= time.time()

js2py.disable_pyimport()
try:
    assert js2py.eval_js('pyimport time') and 0
except js2py.PyJsException as err:
    assert str(err).startswith('SyntaxError: ')