コード例 #1
0
ファイル: dm5.py プロジェクト: v7368858/ComicCrawler
def getimgurl(html, url, page):

	if url not in cache:
		key = search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, DOTALL)
		if key:
			key = eval(key.group(1)).split(";")[1]
			key = search(r"=(.+)$", key).group(1)
			key = eval(key)
		else:
			key = ""
		length = search("DM5_IMAGE_COUNT=(\d+);", html).group(1)
		cid = search("DM5_CID=(\d+);", html).group(1)
		funs = []
		for p in range(1, int(length) + 1):
			fun_url = urljoin(url, "chapterfun.ashx?cid={}&page={}&language=1&key={}&gtk=6".format(cid, p, key))
			funs.append(fun_url)
		cache[url] = funs

		# Grab cookies?
		grabhtml(funs[0], referer=url)

	if page - 1 >= len(cache[url]):
		del cache[url]
		raise LastPageError

	fun_url = cache[url][page - 1]
	text = grabhtml(fun_url, referer=url)
	d = compile(text).eval("(typeof (hd_c) != 'undefined' && hd_c.length > 0 && typeof (isrevtt) != 'undefined') ? hd_c : d")
	return d[0]
コード例 #2
0
ファイル: baidu.py プロジェクト: sclbeta/sign_in
 def login(self, v_code='', c_string=''):
     self.get_pre_login_info()
     url = 'https://passport.baidu.com/v2/api/?login'
     '''
     head = {
         "Origin": "https://passport.baidu.com",
         "Accept-Encoding": "gzip, deflate",
         "Host": "passport.baidu.com",
         "Accept-Language": "zh-CN,zh;q=0.8",
         "Content-Type": "application/x-www-form-urlencoded",
         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
         "Cache-Control": "max-age=0",
         "Referer": "https://passport.baidu.com/v2/?login",
         "Connection": "keep-alive",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36"
     }
     '''
     data = {
         "staticpage": "https://passport.baidu.com/static/passpc-account/html/v3Jump.html",
         "charset": "UTF-8",
         "token": self.token,
         "tpl": "pp",
         "subpro": "",
         "apiver": "v3",
         "tt": execjs.eval('new Date().getTime()'),
         "codestring": c_string,
         "safeflg": "0",
         "u": "https://passport.baidu.com/",
         "isPhone": "false",
         "detect": "1",
         "quick_user": "******",
         "logintype": "basicLogin",
         "logLoginType": "pc_loginBasic",
         "idc": "",
         "loginmerge": "true",
         "username": self.usr,
         "password": self.rsa_pwd,
         "verifycode": v_code,
         "mem_pass": "******",
         "rsakey": self.key,
         "crypttype": "12",
         "ppui_logintime": execjs.eval('20000 + 10000 * Math.random() % 10000'),
         "gid": self.create_gid(),
         "callback": "parent.bd__pcbs__dvpmkh"
     }
     data = urllib.urlencode(data)
     #print data
     req = urllib2.Request(url, data)
     # req.add_header(head)
     res = urllib2.urlopen(req).read()
     if 'err_no=257' in res:
         pass
         #self.check_vcode(res)
     elif 'err_no=0' in res:
         return True
     else:
         pass
コード例 #3
0
ファイル: auth.py プロジェクト: ScottMcDede/naverdic
def login(account, password):

    js_path = os.path.join(os.path.dirname(__file__), 'login.long.js')

    with open(js_path) as f:
        js = f.read()

    execjs.eval(js)


    pass
コード例 #4
0
ファイル: manhuadao.py プロジェクト: v7368858/ComicCrawler
def getimgurls(html, url):
	base, protocol, id = re.search(r"((https?://)[^/]+)/book/([^/]+)", url).groups()
	
	core = re.search(r'src="(/scripts/core[^"]+)"', html).group(1)
	cInfo = re.search(r'cInfo = ({[^;]+});', html).group(1)
	
	coreJs = grabhtml(base + core, referer=url)
	pageConfig = re.search(r'pageConfig=({[^;]+})', coreJs).group(1)
	
	images = execjs.eval(cInfo)["fs"]
	host = execjs.eval(pageConfig)["host"]
	
	return [protocol + host + image for image in images]
コード例 #5
0
ファイル: testTencentSpider.py プロジェクト: zwd1990/comic
def testDecodeImgpath():
    html = spider.getSourceCode(
        'https://ac.qq.com/ComicView/index/id/629632/cid/45')
    data = re.findall(r"var DATA\s*= '(.*)'", html)[0]
    nonce = re.findall(r'window\[".*=(.*);', html)[0]
    nonce = execjs.eval(nonce)
    print spider.decodeImgpath(data, nonce)
コード例 #6
0
    def parse_manhua_info(self, response):
        # print response.text
        """
        解析漫画具体章节下的漫画详情,获取漫画图片链接,链接是通过混淆过的js自动计算出来的。

        引入pyexecjs 库解析js
        js混淆解密,发现js代码其实就是声明了一个 var newImgs = []的数组。参考: http://tool.chinaz.com/js.aspx
        :param response:
        :return:
        """

        js_code = response.css('body > script:nth-child(8)').xpath(
            './/text()').extract_first()
        # js_code = self.js_regex.sub(lambda m: m.group().replace(' ', ''), js_code) # 过滤掉空格
        image_urls = execjs.eval(js_code + ', newImgs')
        item = ImageItem()
        item['url'] = response.url
        item['name'] = response.css("#title").xpath('text()').extract_first(
            '').split(' ')[0]
        item['chapter'] = filter(
            lambda x: x.isdigit(),
            response.css("#title").xpath('text()').extract_first('').split(' ')
            [1])
        item['image_urls'] = image_urls
        # print image_urls
        yield item
コード例 #7
0
ファイル: dbhouse.py プロジェクト: wangyanweigithub/scrapy
    def get_area_house(self, response):
        try:
            print(response.url)
            body = response.body.decode("utf8")
            groups = re.search(
                "\s*var search_result = \s*(.*);var search_result_list_num\s*=\s*\d",
                body)
            body = execjs.eval(groups[1])
            with open("a.html", 'w', encoding="utf-8") as f:
                f.write(body)
            body = Selector(text=body)
            for each in body.xpath("//li[@class='title']/h2"):
                url = each.xpath("a/@href").extract()[0]
                yield scrapy.Request(url, callback=self.parse_building)

            for each in body.xpath(
                    "//div[@id='search_result_page']/a[@onclick]"):
                if each.xpath("text()").extract()[0] == "下一页>":
                    search_result = re.search(
                        r".*\(.*,.*,.*,(\d*)\)",
                        each.xpath("@onclick").extract()[0])
                    page_no = search_result.group(1)
                    url = re.sub("page_no=\d*", "page_no=%s" % page_no,
                                 response.url, 1)
                    yield scrapy.Request(url, callback=self.get_area_house)
        except Exception as e:
            self.log("!!!!!error %s" % e)
コード例 #8
0
 def handle_521(self, response, callback, **kwargs):
     n = response.meta.get('n', 0)
     if 'document.cookie' in response.text:
         js_clearance = re.findall('cookie=(.*?);location',
                                   response.text)[0]
         result = execjs.eval(js_clearance).split(';')[0]
         k, v, *_ = result.split('=')
         yield Request(response.url,
                       callback=callback,
                       cookies={k: v},
                       meta={'n': n + 1},
                       dont_filter=True)
     else:
         params = get_params(response)
         chars = params['chars']
         bts = params['bts']
         ha = params['ha']
         ct = params['ct']
         hash_func = hash_d[ha]
         clearance = encrypt_cookies(chars, bts, ct, hash_func)
         yield Request(response.url,
                       callback=callback,
                       cookies={'__jsl_clearance_s': clearance},
                       meta={'n': n + 1},
                       dont_filter=True)
コード例 #9
0
def getCinemaShowtime(cinemaId, date):
    '''
    根据影院ID和日期,获取该影院该日的拍片情况
    :param cinemaId: 影院ID
    :param date: 日期,格式为 20170404
    :return: 一个dict,可通过['value']['showtimes']得到showtimes
    '''
    url = 'http://service.theater.mtime.com/Cinema.api?Ajax_CallBack=true' \
          '&Ajax_CallBackType=Mtime.Cinema.Services&Ajax_CallBackMethod=GetShowtimesJsonObjectByCinemaId&' \
          'Ajax_CallBackArgument0=' + str(cinemaId) + '&Ajax_CallBackArgument1=' + str(date)
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    text = ''
    movieIDList = []
    # 抓取整个网页
    try:
        print('Requesting url: ', url)
        text = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT).text
    except:
        print('Error when request url=', url)
        return None
    try:
        var = re.match(r'^var GetShowtimesJsonObjectByCinemaResult = (.+);',
                       text).group(1)  # 获取javascript值
        if var:
            var = execjs.eval(var)  # 用库处理js值
            return var
    except:
        print('error in var = re.match ')

    return None
コード例 #10
0
    def extract_fields(self, res):
        selector = etree.HTML(text=res.text)
        # r = requests.get(url='http://www.baidu.com/')
        trs = selector.xpath('//div[@class="table-responsive"]/table/tbody/tr')
        result = []
        attrib = selector.xpath('//div[@class="container-fluid"]/div[1]')[0].attrib
        key = filter(lambda x: 'data' in x, attrib)[0]
        compute_port = int(attrib[key])

        for x in trs:
            js_str = ''.join(x.xpath('.//td[1]//script/text()'))
            # 得到ip
            ip_1 = execjs.eval(js_str.split(';')[0].split('=')[1])
            string = js_str.split(';')[1].split('=')[1]
            base64_code = re.search('(?<=atob\().*?\.', string).group().strip('.')
            ip_2 = base64.b64decode(eval(base64_code))
            ip = ip_1 + ip_2

            # 解码端口号,得到真正的端口
            port_str = js_str.split(';')[2].split('=')[1]
            string = re.search('\([0-9]{2,4}', port_str).group().strip('(')
            port = int(string) + compute_port
            if ip and port:
                result.append(dict(ip=ip, port=str(port), name=self.name))
        return result
コード例 #11
0
def get_data(pageCount):
    headers = {
        'Accept':
        'application/json, text/plain, */*',
        'Origin':
        'https://sou.zhaopin.com',
        'Referer':
        'https://sou.zhaopin.com/?p={}&jl=702&sf=0&st=0&kw=%E5%A4%A7%E6%95%B0%E6%8D%AE&kt=3'
        .format(pageCount),
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
    }
    js = '''"f097795abafd429bb0b65846ac9944b7-" + (new Date()).valueOf() + "-" + parseInt(Math.random() * 1000000)'''
    url_id = execjs.eval(js)
    data_url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90&cityId=702&salary=0,0&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=大数据&kt=3&=0&_v=0.14145840&x-zp-page-request-id={}'.format(
        pageCount * 90 if pageCount > 0 else 0, url_id)
    response = requests.get(data_url, headers=headers).text
    #print(response)
    data = json.loads(response)

    data = data['data']['results']
    if len(data) > 2:
        return data
    else:
        return None
コード例 #12
0
ファイル: RequestNode.py プロジェクト: Cirreth/shome
 def action(self, parameters):
     value = self.value if hasattr(self, 'value') else None
     value = Node.substitute_placeholders(value, parameters)
     reference = Node.substitute_placeholders(self.reference, parameters)
     if self.referenceProcessing == 'evaluate':
         self.reference = execjs.eval(self.reference)
     try:
         res = self._plugin_manager.call_plugin(self.plugin, reference, value)
     except Exception as e:
         logging.error("""
                         Exception in RequestNode.
                         id: %s,
                         plugin: %s,
                         reference: %s,
                         value: %s,
                         parameters: %s,
                         exception: %s""",
                       self.id,
                       self.plugin,
                       str(self.reference),
                       self.value if hasattr(self, 'value') else '[none]',
                       str(parameters),
                       str(e)
                       )
         if hasattr(self, 'retvar'):
             return {self.retvar: 'error'}
     if hasattr(self, 'retvar'):
         return {self.retvar: res}
コード例 #13
0
def crawl_sina_shfe_day():
    """
    新浪财经 shfe 报价 | 铜、铝、镍、锌、铅
    http://stock2.finance.sina.com.cn/futures/api/jsonp.php/var _AHD2017_8_28=/GlobalFuturesService.getGlobalFuturesDailyKLine?symbol=AHD
    """
    metal_lme_mapping = OrderedDict((
        ('Al', {
            'match': 'AL0',
            'symbol': 'USE00159'
        }),
        ('Cu', {
            'match': 'CU0',
            'symbol': 'USE00160'
        }),
        ('Ni', {
            'match': 'NI0',
            'symbol': 'USE00161'
        }),
        ('Pb', {
            'match': 'PB0',
            'symbol': 'USE00162'
        }),
        ('Zn', {
            'match': 'ZN0',
            'symbol': 'USE00163'
        }),
    ))

    today = date.today().strftime('%Y_%m_%d')
    source = 'sina_shfe'
    exchange = 'SHFE'
    for metal, mapping in metal_lme_mapping.items():
        sina_code = mapping['match']
        url = f'http://stock2.finance.sina.com.cn/futures/api/jsonp.php/var%20_{sina_code}{today}=/InnerFuturesNewService.getDailyKLine?symbol={sina_code}'
        logger.info('开始爬取 %s, url: %s' % (source, url))
        response = requests.get(url, timeout=5)
        response = re.findall(r'\((.*)\)', response.text)[0]
        day_kline = execjs.eval(response)

        latest_day_kline = DataSinaDayKLine.objects.filter(
            symbol=mapping['symbol']).order_by('-date').first()
        latest_day = latest_day_kline.date if latest_day_kline else date(
            2000, 1, 1)

        for kline in filter(
                lambda kline: datetime.strptime(kline['d'], '%Y-%m-%d').date()
                >= latest_day, day_kline):
            DataSinaDayKLine.objects.update_or_create_all_envs(
                logger,
                varieties=metal,
                symbol=mapping['symbol'],
                exchange=exchange,
                date=datetime.strptime(kline['d'], '%Y-%m-%d'),
                defaults={
                    'price_low': kline['l'],
                    'price_high': kline['h'],
                    'price_open': kline['o'],
                    'price_close': kline['c'],
                    'volume': kline['v'],
                })
コード例 #14
0
ファイル: get_docid.py プロジェクト: z10z10b10/wenshu
def getkey(run_eval):
    a = unzip(run_eval)
    str1, str2 = re.findall('\$hidescript=(.*?);.*?\((.*?)\)\(\)', a)[0]
    js_func = str2.replace('$hidescript', str1)
    aes_key = execjs.eval(js_func)
    keys = re.findall('com.str._KEY=\"(.*?)\";', aes_key)[0]
    return keys
コード例 #15
0
ファイル: dmzj_m.py プロジェクト: v7368858/ComicCrawler
def getimgurls(html, url):
	pages_js = re.search(r'page_url":(\[[^\]]+\])', html).group(1)
	pages = execjs.eval(pages_js)

	# thumbs.db?!
	# http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
	return [page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #16
0
def main():
    context = get_html_context()
    body = context[0]
    guid = context[1]
    strInit = 'var s,t,o,p,b,r,e,a,k,i,n,g,f, '
    strInitIndex = body.index(strInit)

    # 拿到对象名称
    objName = body[strInitIndex + len(strInit): body.find('=', strInitIndex)]
    # 拿到属性名称
    propName = body[body.find('{"', strInitIndex) + 2: body.find('":', strInitIndex)]

    varName = objName + "." + propName

    # start -> body.find(":", str1Index) + 1
    # end -> body.find("}", str1Index)
    initExpression = body[body.find(":", strInitIndex) + 1: body.find("}", strInitIndex)]

    # 记录验证码的值
    sum = execjs.eval(initExpression)

    # 截取验证码表达式字符串
    str1Start = "('challenge-form');"
    str1End = "a.value"
    otherExpression = body[body.index(str1Start) + len(str1Start) + 1: body.index(str1End)].strip()[1:]
    # sum+=!+[]+!![]+!![]+!![]+!![]+!![]+!![];
    # sum+=+((!+[]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]));
    # sum+=!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![];
    # sum+=+((!+[]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]));
    print otherExpression
コード例 #17
0
ファイル: gtk.py プロジェクト: lyf1134/fanyi
def get_tkk():
	url = 'https://translate.google.cn/'
	res = requests.get(url, timeout = 1)
	tkk_fn = find_tkk_fn(res.text)
	content = tkk_fn.group(1).encode('utf-8').decode('unicode_escape')
	tkk = execjs.eval(content)
	return tkk
コード例 #18
0
ファイル: kuaikan.py プロジェクト: xlight/ComicBook
 def parse_api_data_from_page(self, html):
     r = re.search('<script>window.__NUXT__=(.*?);</script>', html, re.S)
     if not r:
         return
     js_str = r.group(1)
     r = execjs.eval(js_str)
     return r['data'][0]
コード例 #19
0
 def get_data_for_vine_id(self, vine_id, timeout=30):
     try:
         page = requests.get("https://vine.co/v/{}".format(vine_id), timeout=timeout)
     except requests.exceptions.RequestException as e:
         error_message = "Problem with comminicating with vine page - {}".format(e)
         raise PresserRequestError(error_message)
     if page.ok:
         content = BeautifulSoup(page.content)
         all_script_tags = content.find_all("script")
         potential_script_tags = [script for script in all_script_tags if not script.has_attr("src")]
         script_lines = []
         for tag in potential_script_tags:
             for content in tag.contents:
                 for line in content.split(";\n"):
                     if line.count("window.POST_DATA"):
                         script_lines.append(line.replace("window.POST_DATA = ", ""))
         if len(script_lines) > 1:
             raise PresserJavaScriptParseError("More POST_DATA extracted than expected")
         if not script_lines:
             raise PresserJavaScriptParseError("No POST_DATA extracted for id {}".format(vine_id))
         script_line = script_lines[0].replace("POST = ", "")
         try:
             data = execjs.eval(script_line)
             vine = data[vine_id]
             return vine
         except execjs.RuntimeError as e:
             error_message = "Problem with parsing, check parsing logic. {}".format(e)
             raise PresserJavaScriptParseError(error_message)
     elif page.status_code == 404:
         raise Presser404Error("{} could not be found".format(page.url))
     else:
         raise PresserURLError("{} could not be accessed {} - {}".format(page.url, page.status_code,page.content))
コード例 #20
0
def get_stock_top_10(date: datetime.date):
    """
    http://www.hkex.com.hk/chi/csm/chinaconndstat_daily.htm

    url ="http://www.hkex.com.hk/chi/csm/DailyStat/data_tab_daily_20170217c.js"

    ["Rank", "Stock Code", "Stock Name", "Buy Turnover", "Sell Turnover", "Total Turnover"]

    :return:
    """
    if isinstance(date, str):
        date = parser.parse(date).date()

    url = "http://www.hkex.com.hk/chi/csm/DailyStat/data_tab_daily_{date}c.js".format(date=date.strftime("%Y%m%d"))

    response = requests.get(url)
    if response.status_code == 200:
        content = response.content.decode("utf-8")
        stock_data = execjs.eval(content)  # js compile

        sse_hk, szse_hk = None, None
        for item in stock_data:
            if item["market"] == "SSE Southbound":  # 港股通(滬)
                sse_hk = _parser_top_10(item["content"])
            elif item["market"] == "SZSE Southbound":  # 港股通(深)
                szse_hk = _parser_top_10(item["content"])
        return sse_hk, szse_hk
コード例 #21
0
def get_js_object(js_code, key):
    text = js_code
    text = text[text.find("=") + 1:]
    text = text[:text.rfind(";")]
    text = "JSON.stringify(" + text + ")"
    text = execjs.eval(text)
    return json.loads(text)
コード例 #22
0
ファイル: gtk.py プロジェクト: lyf1134/fanyi
def get_tkk():
    url = 'https://translate.google.cn/'
    res = requests.get(url, timeout=1)
    tkk_fn = find_tkk_fn(res.text)
    content = tkk_fn.group(1).encode('utf-8').decode('unicode_escape')
    tkk = execjs.eval(content)
    return tkk
コード例 #23
0
ファイル: manhuagui.py プロジェクト: hardwarecode/onecomic
 def get_image_data_from_page(self, html):
     js = re.search(r">window.*(\(function\(p.*?)</script>", html).group(1)
     b64_str = re.search(r"[0-9],'([A-Za-z0-9+/=]+?)'", js).group(1)
     s = lzstring.LZString.decompressFromBase64(b64_str)
     new_js = re.sub(r"'[A-Za-z0-9+/=]*'\[.*\]\('\\x7c'\)", "'" + s + "'.split('|')", js)
     res = execjs.eval(new_js)
     return json.loads(re.search(r"(\{.*\})", res).group(1))
コード例 #24
0
ファイル: cookieTools.py プロジェクト: ddjj7/invest
def getMpsHeaderWithCookie(url):
    # 使用session保持会话
    res1 = requests.get(url, headers=headers)
    #print(res1.text)
    cookiejar = res1.cookies
    cookiedict = requests.utils.dict_from_cookiejar(cookiejar)
    #print(cookiejar)
    print(cookiedict)
    jsl_clearance_s = re.findall(r'cookie=(.*?);location', res1.text)[0]
    # 执行js代码
    jsl_clearance_s = str(
        execjs.eval(jsl_clearance_s)).split('=')[1].split(';')[0]
    # add_dict_to_cookiejar方法添加cookie
    #add_dict_to_cookiejar(session.cookies, {'__jsl_clearance_s': jsl_clearance_s})
    #cookiedict['__jsl_clearance_s'] = jsl_clearance_s
    #__jsl_clearance_s=1628565425.41|-1|RzuwzFWX8ZtPtb458AaFArcZRd0%3D
    #print(cookiedict)
    __jsluid_s = cookiedict['__jsluid_s']
    headers[
        'cookie'] = '__jsl_clearance_s=' + jsl_clearance_s + ";__jsluid_s=" + __jsluid_s
    print(headers)
    res2 = requests.get(url, headers=headers)
    print(res2.text)
    # 提取go方法中的参数
    data = json.loads(re.findall(r';go\((.*?)\)', res2.text)[0])
    jsl_clearance_s = getClearance(data)
    # 修改cookie
    #add_dict_to_cookiejar(session.cookies, {'__jsl_clearance_s': jsl_clearance_s})
    headers[
        'cookie'] = '__jsl_clearance_s=' + jsl_clearance_s + ";__jsluid_s=" + __jsluid_s
    print(headers)
    return headers
コード例 #25
0
    def fetch_chapter(cls, chapter_url, chapter_dir=None):
        mangabz_cid, mangabz_mid, mangabz_viewsign_dt, mangabz_viewsign, page_total = cls.fetch_chapter_argv(
            chapter_url)
        page_total = int(page_total)

        images_info = []
        desc = '\rFetching {}: ({}/{})'
        for i in range(page_total):
            print(desc.format(chapter_url, i + 1, page_total), end='\r')
            i += 1
            # skip exists image
            if chapter_dir is not None and os.path.isdir(chapter_dir):
                if os.path.exists(os.path.join(chapter_dir, str(i+1)+'.jpg')) or\
                   os.path.exists(os.path.join(chapter_dir, str(i+1)+'.png')):
                    continue

            js_str = cls.fetch_images_js(chapter_url, i, mangabz_cid,
                                         mangabz_mid, mangabz_viewsign_dt,
                                         mangabz_viewsign)
            imagesList = execjs.eval(js_str)
            img_url = imagesList[0]
            img_name = str(i + 1) + os.path.splitext(cls.url2fn(img_url))[-1]

            images_info.append({
                'fname': img_name,
                'url': img_url,
            })
        print(' ' * os.get_terminal_size().columns, end='\r')
        return images_info
コード例 #26
0
def get_tkk():
    '''从google服务器上获取tkk值,为计算tk值做准备'''
    def get_res(url):
        try:
            res = requests.get(url, timeout=1.5)
            res.raise_for_status()
            #res.encoding = 'utf-8'
            return res
        except Exception as ex:
            print('[-]ERROR: ' + str(ex))
            return res

    def find_tkk_fn(res):  #查找tkk计算函数
        re_tkk = r"TKK=eval\('(\(\(function\(\)\{.+?\}\)\(\)\))'\);"
        tkk_fn = re.search(re_tkk, res)
        return tkk_fn

    url = 'https://translate.google.cn/'
    try:
        res = get_res(url)
        tkk_fn = find_tkk_fn(res.text)
        #print(tkk_fn.group(1))
        content = tkk_fn.group(1).encode('utf-8').decode('unicode_escape')
        #print(content)
        tkk = execjs.eval(content)
        #print('tkk:',tkk)
        return tkk
    except Exception as ex:
        print(ex)
コード例 #27
0
ファイル: cli.py プロジェクト: chenpengcheng/cli
    def get_solutions(self, pid, sid, limit=10):
        url = self.url + '/submissions/detail/%s/' % sid
        js = r'var pageData =\s*(.*?);'

        resp = self.session.get(url)

        def diff(a, sl):
            for b in sl:
                r = difflib.SequenceMatcher(a=a.code, b=b.code).ratio()
                if r >= 0.9:
                    return False
            return True

        solutions = []
        for s in re.findall(js, resp.text, re.DOTALL):
            v = execjs.eval(s)
            try:
                df = json.loads(v.get('runtimeDistributionFormatted'))
                if df.get('lang') == self.lang:
                    for e in df.get('distribution')[:limit]:
                        t = int(e[0])
                        sln = self.get_solution(pid, t)
                        if diff(sln, solutions):
                            solutions.append(sln)
                    break
            except ValueError:
                pass

        return solutions
コード例 #28
0
 def get_msg_signal(self):
     """
         消息信号检查
     """
     call_back = {"retcode": "0", "selector": "0"}
     try:
         resp = self.get(
             API_synccheck,
             params={
                 "r": Device.get_timestamp(),
                 "skey": self.__auth_data["skey"],
                 "sid": self.__auth_data["wxsid"],
                 "uin": self.__auth_data["wxuin"],
                 "deviceid": self.__device_id,
                 "synckey": self.create_synckey(),
                 "_": Device.get_timestamp(),
             },
             timeout=API_checktimeout,
         )
         if not resp.status_code == 200:
             raise AssertionError()
         call_back = execjs.eval(resp.text.replace("window.synccheck=", ""))
     except requests.exceptions.ReadTimeout:
         pass
     except requests.exceptions.Timeout:
         pass
     except Exception as e:
         error(e)
     time.sleep(1)
     return call_back
コード例 #29
0
ファイル: mtwm.py プロジェクト: zhangxiaoyuyu/Python_Crawler
def get_shop_info(session, wm_latitude=22634767, wm_longitude=113834247):
    """
    :param wm_longitude: 定位的经度
    :param wm_latitude: 定位的纬度
    :param session: 保持会话的实例
    :return: 商铺列表
    """
    # header_cookie = ";".join([x + '=' + str(y) for x, y in cookie.items()])  # 请求头的Cookies拼接
    headers = {
        'Accept':
        'application/json',
        'Content-Type':
        'application/x-www-form-urlencoded',
        'Origin':
        'https://h5.waimai.meituan.com',
        'Referer':
        'https://h5.waimai.meituan.com/waimai/mindex/home',
        'User-Agent':
        'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Mobile Safari/537.36',
        'Cookie':
        "lxsdk_cuid=16915455dbec8-08db788af7a2ff-1333063-1fa400-16915455dbec8; ci=30; _ga=GA1.3.2111621561.1550840081; _gid=GA1.3.426525714.1550840081; IJSESSIONID=1oa4mjr7l5r0c1fa32alvdcpbn; iuuid=C2540F1F12DE8DEB7EFFE84661C401CBEDED6125E30E08F9BF648F828AD42BDF; cityname=%E6%B7%B1%E5%9C%B3; _lxsdk=C2540F1F12DE8DEB7EFFE84661C401CBEDED6125E30E08F9BF648F828AD42BDF; webp=1; ci3=1; _hc.v=f8c27eb3-603c-e958-93d8-63f0bfa0a746.1550840270; __utmz=74597006.1550906260.3.3.utmcsr=meishi.meituan.com|utmccn=(referral)|utmcmd=referral|utmcct=/i/; latlng=22.636802,113.829362,1550906262992; i_extend=C_b1Gimthomepagecategory1394H__a; openh5_uuid=C2540F1F12DE8DEB7EFFE84661C401CBEDED6125E30E08F9BF648F828AD42BDF; showTopHeader=show; _lxsdk_s=1691935afe6-838-f5a-f2%7C%7C44; _lx_utm=utm_source%3D60030; __utma=74597006.2078366826.1550840256.1550901561.1550906260.3; __utmc=74597006; wm_order_channel=mtib; __mta=51223190.1550840082094.1550840082094.1550840111070.2"
    }
    start_index = 0
    data = {
        'startIndex': start_index,  # 页数;从0开始
        'sortId': 5,  # 排序方式;0是综合排序,5是距离最近
        'multiFilterIds': '',
        'sliderSelectCode': '',
        'sliderSelectMin': '',
        'sliderSelectMax': '',
        'geoType': 2,
        'wm_latitude': wm_latitude,  # 定位坐标
        'wm_longitude': wm_longitude,
        'wm_actual_latitude': 22634767,  # 真实坐标
        'wm_actual_longitude': 113834247,
        '_token': '',
    }
    url = "https://i.waimai.meituan.com/openh5/homepage/poilist?_={}".format(
        execjs.eval("Date.now()"))
    print(f"当前爬取坐标({data['wm_longitude']},{data['wm_latitude']})")
    try:
        # 只爬取第四页,后续数据需要登录才能获取
        for index in range(4):
            res = session.post(url=url, headers=headers, data=data, timeout=5)

            # 返回数据成功
            if res.status_code == 200:
                shop_list = json.loads(res.text).get("data").get("shopList")
                return shop_list
            else:
                print("get_shop_info:返回数据失败,status_code非200")
            start_index += 1  # 页数加1
            time.sleep(2)

    except exceptions.ConnectionError:
        print("get_shop_info:网络连接错误")

    except exceptions.Timeout:
        print("get_shop_info:超过等待时间")
コード例 #30
0
ファイル: okex_future_data.py プロジェクト: zenozxu/vnpy
    def get_bars(self, symbol, period, callback, bar_is_completed=False,bar_freq=1, start_dt=None):
        """
        返回k线数据
        symbol:合约b tc:next_week:10
        period: 周期: 1min,3min,5min,15min,30min,1day,3day,1hour,2hour,4hour,6hour,12hour
        """
        ret_bars = []
        if ':' not in symbol:
            self.strategy.writeCtaError(u'{} {}格式需要包含合约类型,如:btc:next_week:10'.format(datetime.now(), symbol))
            return False, ret_bars
        s = symbol.split(':')
        symbol_pair, contract_type = s[0],s[1]
        if not symbol_pair.endswith('_usd'):
            symbol_pair += '_usd'

        if symbol_pair not in symbol_list:
            self.strategy.writeCtaError(u'{} {}不在下载清单中'.format(datetime.now(), symbol_pair))
            return False, ret_bars

        url = u'https://www.okex.com/api/v1/future_kline.do?symbol={}&type={}&contract_type={}'.format(symbol_pair, period,contract_type)
        self.strategy.writeCtaLog('{}开始下载:{} {}数据.URL:{}'.format(datetime.now(), symbol, period, url))
        bars = []
        content = None
        try:
            content = self.session.get(url).content.decode('gbk')
            bars = execjs.eval(content)
        except Exception as ex:
            self.strategy.writeCtaError('exception in get:{},{},{}'.format(url,str(ex), traceback.format_exc()))
            return False, ret_bars

        for i, bar in enumerate(bars):
            if len(bar) < 5:
                self.strategy.writeCtaError('error when import bar:{}'.format(bar))
                return False

            add_bar = CtaBarData()
            try:
                add_bar.vtSymbol = symbol
                add_bar.symbol = symbol
                add_bar.datetime = datetime.fromtimestamp(bar[0] / 1000)
                add_bar.date = add_bar.datetime.strftime('%Y-%m-%d')
                add_bar.time = add_bar.datetime.strftime('%H:%M:%S')
                add_bar.tradingDay = add_bar.date
                add_bar.open = float(bar[1])
                add_bar.high = float(bar[2])
                add_bar.low = float(bar[3])
                add_bar.close = float(bar[4])
                add_bar.volume = float(bar[6])    # 这里:5 是交易量,6是交易量转化BTC或LTC数量
            except Exception as ex:
                self.strategy.writeCtaError('error when convert bar:{},ex:{},t:{}'.format(bar, str(ex), traceback.format_exc()))
                return False, ret_bars

            if start_dt is not None and bar.datetime < start_dt:
                continue
            ret_bars.append(add_bar)
            if callback is not None:
                callback(add_bar, bar_is_completed, bar_freq)

        return True, ret_bars
コード例 #31
0
    def download_bars(self, symbol, period, size_=None, start_dt=None):
        """
        返回k线数据
        symbol:合约
        period: 周期: 1min,3min,5min,15min,30min,1day,3day,1hour,2hour,4hour,6hour,12hour
        """
        ret_bars = []
        if symbol not in symbol_list:
            msg = u'{} {}不在下载清单中'.format(datetime.now(), symbol)
            if self.strategy:
                self.strategy.writeCtaError(msg)
            else:
                print(msg)
            return ret_bars

        url = u'https://www.okex.com/api/v1/kline.do?symbol={}&type={}'.format(symbol, period)
        if isinstance(size_,int):
            url = url + u'&size={}'.format(size_)
        if start_dt is not None and isinstance(start_dt,datetime):
            url = url + u'&since={}'.format(int(start_dt.timestamp()*1000))
        self.writeLog('{}开始下载:{} {}数据.URL:{}'.format(datetime.now(), symbol, period,url))

        content = None
        try:
            content = self.session.get(url).content.decode('gbk')
        except Exception as ex:
            self.writeError('exception in get:{},{},{}'.format(url,str(ex), traceback.format_exc()))
            return ret_bars

        bars = execjs.eval(content)

        if not isinstance(bars,list):
            self.writeError('返回数据不是list:{}'.format(content))
            return ret_bars

        for i, bar in enumerate(bars):
            if len(bar) < 5:
                self.writeError('error when get bar:{}'.format(bar))
                return ret_bars
            if i == 0:
                continue
            add_bar = {}
            try:

                bar_datetime= datetime.fromtimestamp(bar[0] / 1000)
                add_bar['datetime'] = bar_datetime.strftime('%Y-%m-%d %H:%M:%S')
                add_bar['date'] = bar_datetime.strftime('%Y-%m-%d')
                add_bar['time'] = bar_datetime.strftime('%H:%M:%S')
                add_bar['open'] = float(bar[1])
                add_bar['high'] = float(bar[2])
                add_bar['low'] = float(bar[3])
                add_bar['close'] = float(bar[4])
                add_bar['volume'] = float(bar[5])
            except Exception as ex:
                self.writeError('error when convert bar:{},ex:{},t:{}'.format(bar, str(ex), traceback.format_exc()))

            ret_bars.append(add_bar)

        return ret_bars
コード例 #32
0
ファイル: util_sina.py プロジェクト: uniwin/vnpydjv
    def getDayBars(self, symbol, callback,start_dt=None):
        """
        从sina加载最新的Day数据
        :param symbol: (全路径得合约名称,先使用ctaTemplate.getFullSymbol()
        :param callback: 回调函数
        :param start_dt: 开始时间,缺省为None
        :return: 成功/失败
        """
        sinaBars = []

        try:
            url = u'http://stock.finance.sina.com.cn/futures/api/json.php/InnerFuturesService.getInnerFuturesDailyKLine?symbol={0}'.format(symbol)
            self.strategy.writeCtaLog(u'从sina下载{0}的日K数据 {1}'.format(symbol, url))
            responses = execjs.eval(self.session.get(url).content.decode('gbk'))
            dayVolume = 0

            for item in responses:
                bar = CtaBarData()

                bar.vtSymbol = symbol
                bar.symbol = symbol
                # bar的close time
                bar.datetime = datetime.strptime(item['date'], '%Y-%m-%d')
                if start_dt is not None:
                    if bar.datetime < start_dt:
                        continue

                bar.date = bar.datetime.strftime('%Y%m%d')
                bar.tradingDay = bar.date

                bar.time = bar.datetime.strftime('%H:%M:00')

                bar.open = float(item['open'])
                bar.high = float(item['high'])
                bar.low = float(item['low'])
                bar.close = float(item['close'])
                bar.volume = int(item['volume'])
                bar.dayVolume = bar.volume

                sinaBars.append(bar)

            if len(sinaBars)>0:
                self.strategy.writeCtaLog(u'从sina读取了{0}条日线K数据'.format(len(sinaBars)))

                # 把sina的bar灌入回调函数
                for bar in sinaBars:
                    callback(bar)

                # 处理完毕,清空
                sinaBars = []

                return True
            else:
                self.strategy.writeCtaLog(u'从sina读取日线K数据失败')
                return False

        except Exception as e:
            self.strategy.writeCtaLog(u'加载Sina历史日线数据失败:'+str(e))
            return False
コード例 #33
0
ファイル: calc.py プロジェクト: smarkets/hal
    def calc(response):
        query = response.match.group(3)

        params = dict(hl='en', q=query)
        response = requests.get('https://www.google.com/ig/calculator', params=params)
        mapping = execjs.eval(response.content)
        rhs, error = mapping['rhs'], mapping['error']
        return rhs or error
コード例 #34
0
ファイル: peye.py プロジェクト: jerryjunpy/PeyeSpider
    def _parse_cookie(js):
        cookie_string, anonymous_function = re.search(
            r"(__jsl_clearance=\d+\.?\d+\|0\|)'\+(\(function\(\).+)\+';Expires=",
            js).groups()
        result = execjs.eval(anonymous_function)

        key, value = f"{cookie_string}{result}".split("=")
        return {key: value}
コード例 #35
0
def get_img(js_api):
    try:
        r = requests.get(js_api, headers=headers, timeout=2)
        imgs = execjs.eval(r.text)  #js执行eval函数
        img = imgs[0]
        return img
    except Exception as e:
        print('api请求出错:', e)
コード例 #36
0
async def get_jsObj(response):
    bsObj = BeautifulSoup(response, 'lxml')
    mainBody = bsObj.find('div', id="mainBodySingle")
    scripts = bsObj.find_all('script', type="text/javascript")
    script = scripts[-1]
    jsObj = execjs.eval(script.text[0:-1])

    return jsObj, mainBody
コード例 #37
0
ファイル: sites.py プロジェクト: bllli/DriveIt
 def get_page_info(self, parent_link):
     inner_page_data = self.get_data(parent_link).decode('utf-8')
     inner_page_soup = BeautifulSoup(inner_page_data, 'html.parser')
     inner_script = inner_page_soup.find('script', {'type': 'text/javascript'})
     inner_script_refined = inner_script.text.split('\n')[3].strip().replace('eval(', '')[:-1]
     result = execjs.eval(inner_script_refined)
     self.info_dict = json.loads(result.replace('var pages=pages=\'', '').rstrip('\';'))
     return int(self.info_dict['sum_pages'])
コード例 #38
0
ファイル: sites.py プロジェクト: XIAZY/DriveIt
 def get_page_info(self, parent_link):
     inner_page_data = self.get_data(parent_link).decode("utf-8")
     inner_page_soup = BeautifulSoup(inner_page_data, "html.parser")
     inner_script = inner_page_soup.find("script", {"type": "text/javascript"})
     inner_script_refined = inner_script.text.split("\n")[3].strip().replace("eval(", "")[:-1]
     result = execjs.eval(inner_script_refined)
     self.info_dict = json.loads(result.replace("var pages=pages='", "").rstrip("';"))
     return int(self.info_dict["sum_pages"])
コード例 #39
0
ファイル: sites.py プロジェクト: XIAZY/DriveIt
 def get_page_info(self, parent_link):
     inner_page_data = self.get_data("http://manhua.dmzj.com%s" % parent_link, is_destop=True).decode("utf-8")
     inner_page_soup = BeautifulSoup(inner_page_data, "html.parser")
     inner_script = inner_page_soup.find("script", {"type": "text/javascript"})
     inner_script_refined = inner_script.text.split("\n")[3].strip().replace("eval(", "")[:-1]
     result = execjs.eval(inner_script_refined)
     self.image_list = json.loads(result.replace("var pages=pages='", "").rstrip("';"))
     return len(self.image_list)
コード例 #40
0
ファイル: kg_mid_generator.py プロジェクト: moyuwa/WebSpider
def random_string():
    """
    生成随机字符串
    :return:
    """
    generate_string = execjs.eval(
        '(((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1)')
    return generate_string
コード例 #41
0
ファイル: js_python.py プロジェクト: sdpku/base_function
def demo6():

    js = '''
       1+2
        '''

    response = execjs.eval(js)
    print(response)
コード例 #42
0
ファイル: ConditionalNode.py プロジェクト: Cirreth/shome
 def action(self, parameters):
     try:
         condition = self.substitute_placeholders(self.expression, parameters, skipped_to_null=True)
         res = eval(condition)
         return False if res is None else res
     except Exception as e:
         return 'ConditionalNode exception: id: %s, expression: %s, parameters: %s, exception: %s' % self.id, \
             str(self.expression), str(self.parameters), str(e)
コード例 #43
0
ファイル: Core.py プロジェクト: jacklaiu/MasterJack
 def getCodeArray(self, queryWords):
     codeArr = []
     for w in queryWords:
         arr = self.getCodeArrayFromWencai(
             execjs.eval("encodeURIComponent('" + w + "')"))
         for code in arr:
             codeArr.append(code)
     return codeArr
コード例 #44
0
ファイル: flickr.py プロジェクト: gravityfire-tw/ComicCrawler
def get_images(html, url):
    key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)',
                    html).group(1)
    model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then",
                      html).group(1)
    data = execjs.eval("auth = null, reqId = null, model = " + model +
                       ", model.modelExport['photo-models'][0]")
    return query_video(data["id"], data["secret"], key)
コード例 #45
0
ファイル: baidu.py プロジェクト: sclbeta/bdy
    def get_pre_login_info(self):
        url = 'https://passport.baidu.com/center'
        urllib2.urlopen(url)
        url = 'https://passport.baidu.com/v2/api/?getapi&tpl=pp&apiver=v3&' +  str(int(time.time()) * 1000) + '&class=login&logintype=basicLogin&callback=bd__cbs__hqe0c'
        content = urllib2.urlopen(url).read()
        content = content[len('bd__cbs__hqe0c('):-1]
        content = execjs.eval(content)
        self.token = content['data']['token']

        url = 'https://passport.baidu.com/v2/getpublickey?token=' + self.token + \
            '&tpl=pp&apiver=v3&tt=' + \
            str(int(time.time()) * 1000) + '&callback=bd__cbs__zgtpei'
        content = urllib2.urlopen(url).read()
        content = content[len('bd__cbs__zgtpei('):-1]
        content = execjs.eval(content)
        pubkey_temp = content['pubkey']
        self.pubkey = pubkey_temp  # .replace('\n','\\n')
        self.key = content['key']
        # print self.token, self.pubkey, self.key
        self.encrypt_keys()
コード例 #46
0
ファイル: util_sina.py プロジェクト: uniwin/vnpydjv
    def getTicks2(self, symbol, callback,start_dt=None):
        """
        # 从sina加载最新的M1数据(针对中金所)
        :param symbol:  合约代码(全路径得合约名称,先使用ctaTemplate.getFullSymbol()
        :param callback: 回调函数
        :param start_dt: 开始时间,缺省为None
        :return: 成功/失败
        """
        try:
            url = u'http://stock2.finance.sina.com.cn/futures/api/jsonp.php/var%20t1nf_{0}=/InnerFuturesNewService.getMinLine?symbol={0}'.format(symbol)
            self.strategy.writeCtaLog(u'从sina下载{0}Tick数据 {1}'.format(symbol, url))

            response_data= self.session.get(url).content
            response_data = response_data.decode('gbk').split('=')[-1]
            response_data = response_data.replace('(', '')
            response_data = response_data.replace(');', '')
            responses= execjs.eval(response_data)
            datevalue = datetime.now().strftime('%Y-%m-%d')

            self.strategy.writeCtaLog(u'一共提取{}条分时数据'.format(len(responses)))

            for i, item in enumerate(responses):

                tick = CtaTickData()
                tick.vtSymbol = symbol
                tick.symbol = symbol

                if len(item) >= 6:
                    datevalue = item[6]

                tick.date = datevalue
                tick.time = item[0] + u':00'
                tick.datetime = datetime.strptime(tick.date + ' ' + tick.time, '%Y-%m-%d %H:%M:%S')
                tick.tradingDay = tick.date

                if start_dt is not None:
                    if tick.datetime < start_dt:
                        continue

                tick.lastPrice = float(item[1])
                tick.volume = int(item[3])

                if type(item[4]) == type(None):
                    tick.openInterest = 0
                else:
                    tick.openInterest = int(item[4])

                callback(tick)

            return True

        except Exception as e:
            self.strategy.writeCtaLog(u'加载sina历史Tick数据失败:' + str(e))
            return False
コード例 #47
0
ファイル: sites.py プロジェクト: bllli/DriveIt
 def get_image_link(self, parent_link, page):
     node_script = ''
     while node_script is '':
         node_script = self.get_data(self.general_formula % (parent_link, parent_link[2:-1], page),
                                     'http://www.dm5.com%s' % parent_link).decode('utf-8')
         if node_script is '':
             webbrowser.open_new('http://www.dm5.com%s' % parent_link)
             time.sleep(3)
     link = execjs.eval(node_script)[0]
     link_safe = self.unicodeToURL(link)
     return link_safe
コード例 #48
0
ファイル: sites.py プロジェクト: XIAZY/DriveIt
 def get_image_link(self, parent_link, page):
     javascript_script = ""
     while javascript_script is "":
         javascript_script = self.get_data(
             self.general_formula % (parent_link, parent_link[2:-1], page), "http://www.dm5.com%s" % parent_link
         ).decode("utf-8")
         if javascript_script is "":
             webbrowser.open_new("http://www.dm5.com%s" % parent_link)
             time.sleep(3)
     link = execjs.eval(javascript_script)[0]
     link_safe = self.unicodeToURL(link)
     return link_safe
コード例 #49
0
ファイル: okex_data.py プロジェクト: uniwin/vnpydjv
    def get_bars(self, symbol, period, callback, bar_is_completed=False,bar_freq=1, start_dt=None):
        """
        返回k线数据
        symbol:合约
        period: 周期: 1min,3min,5min,15min,30min,1day,3day,1hour,2hour,4hour,6hour,12hour
        """
        if symbol not in symbol_list:
            self.strategy.writeCtaError(u'{} {}不在下载清单中'.format(datetime.now(), symbol))
            return

        url = u'https://www.okex.com/api/v1/kline.do?symbol={}&type={}'.format(symbol, period)
        self.strategy.writeCtaLog('{}开始下载:{} {}数据.URL:{}'.format(datetime.now(), symbol, period,url))

        content = None
        try:
            content = self.session.get(url).content.decode('gbk')
        except Exception as ex:
            self.strategy.writeCtaError('exception in get:{},{},{}'.format(url,str(ex), traceback.format_exc()))
            return

        bars = execjs.eval(content)

        for i, bar in enumerate(bars):
            if len(bar) < 5:
                self.strategy.writeCtaError('error when import bar:{}'.format(bar))
                return False
            if i == 0:
                continue
            add_bar = CtaBarData()
            try:
                add_bar.vtSymbol = symbol
                add_bar.symbol = symbol
                add_bar.datetime = datetime.fromtimestamp(bar[0] / 1000)
                add_bar.date = add_bar.datetime.strftime('%Y-%m-%d')
                add_bar.time = add_bar.datetime.strftime('%H:%M:%S')
                add_bar.tradingDay = add_bar.date
                add_bar.open = float(bar[1])
                add_bar.high = float(bar[2])
                add_bar.low = float(bar[3])
                add_bar.close = float(bar[4])
                add_bar.volume = float(bar[5])
            except Exception as ex:
                self.strategy.writeCtaError('error when convert bar:{},ex:{},t:{}'.format(bar, str(ex), traceback.format_exc()))
                return False

            if start_dt is not None and bar.datetime < start_dt:
                continue

            if callback is not None:
                callback(add_bar, bar_is_completed, bar_freq)

        return True
コード例 #50
0
ファイル: dmzj.py プロジェクト: v7368858/ComicCrawler
def getimgurls(html, url):
    # Set base url
    base = "http://images.dmzj.com/"

    # Get urls
    html = html.replace("\n", "")
    s = re.search(r"page = '';\s*(.+?);\s*var g_comic_name", html).group(1)
    pages = execjs.compile(s).eval("pages")
    pages = execjs.eval(pages)

    # thumbs.db?!
    # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
    return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #51
0
def jseval(job=None, expression=None):
    if expression.startswith('{'):
        exp_tpl = '''function () {
        $job = %s;
        return function()%s();}()
        '''
    else:
        exp_tpl = '''function () {
        $job = %s;
        return %s;}()
        '''
    exp = exp_tpl % (json.dumps(job), expression)
    return execjs.eval(exp)
コード例 #52
0
def grab_price_history(response):
    """Callback to parse out price history data"""
    cols = ''
    rows = ''
    # find js code lines with data
    for line in response.iter_lines():
        line = line.strip()
        if line.startswith('cols'):
            cols = ':'.join(line.split(':')[1:]).strip(',')
        elif line.startswith('rows'):
            rows = ':'.join(line.split(':')[1:]).strip()
    # eval js to to get data        
    rows = execjs.eval(rows)
    cols = execjs.eval(cols)
    # clean up cols and rows
    cols = [col.get('label') if col.get('label') else col.get('p').get('role') for col in cols]
    rows = [[f['v'] for f in row['c']] for row in rows]

    df = pd.DataFrame(rows)
    df.columns = cols

    return df
コード例 #53
0
ファイル: getters.py プロジェクト: fork42541/getprox
        def samair():
            """
            http://www.samair.ru/proxy
            """

            base_uri = 'http://www.samair.ru/proxy/'
            page = requests.get(base_uri)
            tree = lxml.html.fromstring(page.text)

            js_uri = urllib.basejoin(base_uri,
                                     tree.xpath('.//script[@type="text/javascript"]/@src')[0])
            js_vars = re.search('eval\((.*)\)', requests.get(js_uri).text.strip()).group(1)
            js_vars = execjs.eval(js_vars)

            uri_list = [base_uri]+[urllib.basejoin(base_uri, u) \
                        for u in tree.xpath('.//a[@class="page"]/@href')]
            results = []
            for uri in uri_list:
                page = requests.get(base_uri)
                tree = lxml.html.fromstring(page.text)
                rows = tree.xpath('.//table[@id="proxylist"]/tr')[1:]
                for row in rows:
                    td_list = row.xpath('.//td')
                    if len(td_list) != 4:
                        continue
                    ip = td_list[0].text

                    # Get the JavaScript that corresponds to the obfuscated port:
                    port_js = td_list[0].xpath('.//script')[0].text
                    port_js = re.search('document\.write\(\":\"\+(.+)\)', port_js).group(1)
                    port_vars = port_js.split('+')
                    p = '+'.join(['(%s).toString()' % v for v in port_vars])

                    # Construct function to interpret to get the actual port value:
                    f = 'function(){'+js_vars+'return '+p+'}()'
                    port = str(execjs.eval(f))

                    results.append('http://'+ip+':'+port)
            return results
コード例 #54
0
ファイル: cc_dm5.py プロジェクト: j129008/ComicCrawler
def getimgurls(html, page=0, url=""):
	header["Referer"] = url
	key = re.search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, re.S)
	if key:
		key = execjs.eval(key.group(1)).split(";")[1]
		key = re.search(r"=(.+)$", key).group(1)
		key = execjs.eval(key)
	else:
		key = ""
		
	base = re.search(r"(^.+)/[^/]*$", url).group(1)
	pages = re.search("DM5_IMAGE_COUNT=(\d+);", html).group(1)
	cid = re.search("DM5_CID=(\d+);", html).group(1)
	s = []
	for p in range(1, int(pages)+1):
		currentUrl = "{}/chapterfun.ashx?cid={}&page={}&language=1&key={}".format(base, cid, p, key)
		ot = comiccrawler.grabhtml(currentUrl, hd=header)
		
		context = execjs.compile(ot)
		# window.ajaxloadimage
		d = context.eval("(typeof (hd_c) != 'undefined' && hd_c.length > 0 && typeof (isrevtt) != 'undefined') ? hd_c : d")
		s.append(d[0])
	return s
コード例 #55
0
ファイル: scraper.py プロジェクト: duncanparkes/tonga
def unjs_email(script):
    """Takes a javascript email mangling script and returns the email address."""

    # Get hold of the lines of javascript which aren't fiddling with the DOM
    jslines = [x.strip() for x in re.search(r'<!--(.*)//-->', script, re.M | re.S).group(1).strip().splitlines() if not x.strip().startswith('document')]

    # The name of the variable containing the variable containing the email address
    # varies, so find it by regex.
    varname = re.search(r'var (addy\d+)', script).group(1)
    jslines.append('return {}'.format(varname))

    js = '(function() {{{}}})()'.format(' '.join(jslines))

    return unescape(execjs.eval(js))
コード例 #56
0
ファイル: cc_dmzj.py プロジェクト: j129008/ComicCrawler
def getimgurls(html, url):
	"""getimgurls(html, url) -> url list
	
	Return a list of urls.
	"""
	
	html = html.replace("\n", "")
	s = re.search("page = '';(.+?);var g_comic_name", html).group(1)
	ctx = execjs.compile(s)
	pages = execjs.eval(ctx.eval("pages"))
	base = "http://images.dmzj.com/"
	
	# thumbs.db?!
	# http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
	return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #57
0
ファイル: jseval.py プロジェクト: bhurwitz33/rabix
 def evaluate(self, expression=None, job=None, context=None, *args,
              **kwargs):
     if expression.startswith('{'):
         exp_tpl = '''function () {
         $job = %s;
         $self = %s;
         return function()%s();}()
         '''
     else:
         exp_tpl = '''function () {
         $job = %s;
         $self = %s;
         return %s;}()
         '''
     exp = exp_tpl % (json.dumps(job), json.dumps(context), expression)
     return execjs.eval(exp)
コード例 #58
0
ファイル: dmzj_m.py プロジェクト: v7368858/ComicCrawler
def getepisodelist(html, url):
	data_js = re.search("initIntroData(.+?);", html, re.DOTALL).group(1)
	data = execjs.eval(data_js)

	ep_data = []
	for category in data:
		ep_data += category["data"]
	ep_data = sorted(ep_data, key=lambda data: data["chapter_order"])

	episodes = []

	for data in ep_data:
		ep_url = "/view/{}/{}.html".format(data["comic_id"], data["id"])
		title = data["title"] + data["chapter_name"]
		episodes.append(Episode(title, urljoin(url, ep_url)))

	return episodes
コード例 #59
0
ファイル: evaluator.py プロジェクト: dionjwa/rabix
def evaluate_rabix_js(expression, job, context=None,
                      engine_config=None, outdir=None, tmpdir=None):
    # log.debug("expression: %s" % expression)
    if expression.startswith('{'):
        exp_tpl = '''function () {
        $job = %s;
        $self = %s;
        return function()%s();}()
        '''
    else:
        exp_tpl = '''function () {
        $job = %s;
        $self = %s;
        return %s;}()
        '''
    exp = exp_tpl % (json.dumps(job), json.dumps(context), expression)

    result = execjs.eval(exp)
    log.debug("Expression result: %s" % result)
    return result
コード例 #60
0
ファイル: dbhouse.py プロジェクト: wangyanweigithub/scrapy
    def get_area_house(self, response):
        try:
            print(response.url)
            body = response.body.decode("utf8")
            groups = re.search("\s*var search_result = \s*(.*);var search_result_list_num\s*=\s*\d", body)
            body = execjs.eval(groups[1])
            with open("a.html", 'w', encoding="utf-8") as f:
                f.write(body)
            body = Selector(text=body)
            for each in body.xpath("//li[@class='title']/h2"):
                url = each.xpath("a/@href").extract()[0]
                yield scrapy.Request(url, callback=self.parse_building)

            for each in body.xpath("//div[@id='search_result_page']/a[@onclick]"):
                if each.xpath("text()").extract()[0] == "下一页>":
                    search_result = re.search(r".*\(.*,.*,.*,(\d*)\)", each.xpath("@onclick").extract()[0])
                    page_no = search_result.group(1)
                    url = re.sub("page_no=\d*", "page_no=%s" % page_no, response.url, 1)
                    yield scrapy.Request(url, callback=self.get_area_house)
        except Exception as e:
            self.log("!!!!!error %s" % e)