def get_images(html, url): # build js context js = "var window = global;" configjs_url = re.search(r'src="(http://[^"]+?/config_\w+?\.js)"', html).group(1) configjs = grabhtml(configjs_url, referer=url) js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+', configjs, re.MULTILINE).group() js += re.search( r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script', html).group(1) with VM(js) as vm: files, path = vm.run("[cInfo.files, cInfo.path]") # find server # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js" # getpath() corejs_url = re.search(r'src="(http://[^"]+?/core_\w+?\.js)"', html).group(1) corejs = grabhtml(corejs_url, referer=url) # cache server list servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1) servs = eval(servs) servs = [host["h"] for category in servs for host in category["hosts"]] global servers servers = CycleList(servs) host = servers.get() utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1) js = utils + """; function getFiles(path, files, host) { // lets try if it will be faster in javascript return files.map(function(file){ return utils.getPath(host, path + file); }); } """ with VM(js) as vm: images = vm.call("getFiles", path, files, host) if config.getboolean("nowebp"): images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images) return images
def get_episodes(html, url): html = html.replace("\n", "") js = """ var output; function getCookie() {} function getcookie() {} var window = { open: function(result){ output = result; } }; var document = { location: { href: "" } }; """ + grabhtml(urljoin(url, "/js/comicview.js")) s = [] matches = re.finditer(r'<a [^>]*?onclick="(cview[^"]+?);[^>]*>(.+?)</a>', html, re.M) with VM(js) as vm: for match in matches: cview, title = match.groups() vm.run(cview) ep_url = vm.run("output") title = clean_tags(title) e = Episode(title, urljoin(url, ep_url)) s.append(e) return s
def get_episodes(html, url): episodes = None cid = re.search(r"comic/(\d+)", url).group(1) # http://tw.ikanman.com/comic/10924/ episodes = get_list(html, cid) # http://tw.ikanman.com/comic/4350/ if not episodes: view_state = re.search(r'id="__VIEWSTATE" value="([^"]+)', html).group(1) js_main = re.search(r'src="([^"]+?/main_[^"]*?\.js)"', html).group(1) js_main = grabhtml(js_main) js_main = re.search(r'^window\[.+', js_main, re.M).group() js = """ var window = global; """ + js_main with VM(js) as vm: ep_html = vm.call("LZString.decompressFromBase64", view_state) episodes = get_list(ep_html, cid) episodes = [Episode(v[0].strip(), urljoin(url, v[1])) for v in episodes] return episodes[::-1]
def get_images(html, url): s_files = re.search('sFiles="([^"]+)"', html).group(1) s_path = re.search('sPath="([^"]+)"', html).group(1) viewhtm = grabhtml("http://www.iibq.com/script/viewhtm.js") env = """ window = { "eval": eval, "parseInt": parseInt, "String": String, "RegExp": RegExp }; location = { "hostname": "www.iibq.com" }; """ js = env + re.search(r'(.+?)var cuImg', viewhtm, re.DOTALL).group(1) with VM(js) as vm: arr_files = vm.call("unsuan", s_files).split("|") ds = grabhtml("http://www.iibq.com/script/ds.js") sl_url = re.search('sDS = "([^"]+)"', ds).group(1).split("^")[0].split("|")[1] return [sl_url + s_path + f for f in arr_files]
def get_episodes(html, url): html = html.replace("\n", "") js = """ var output; function getCookie() {} var window = { open: function(result){ output = result; } }; function get(url, catid) { cview(url, catid); return output; } var document = { location: { href: "" } }; """ + grabhtml(urljoin(url, "/js/comicview.js")) s = [] matches = re.finditer( "<a [^>]*?onclick=\"cview\('(.+?)',(\d+?)[^>]*?>(.+?)</a>", html, re.M) with VM(js) as vm: for match in matches: ep_url, catid, title = match.groups() ep_url = vm.call("get", ep_url, int(catid)) title = clean_tags(title) e = Episode(title, urljoin(url, ep_url)) s.append(e) return s
def get_images(html, url): #print(html.encode('gbk','ignore').decode('gbk')) js = re.search(r'<script type="text/javascript">(var cInfo =[^;]+;)', html).group(1) with VM(js) as vm: files = vm.run("cInfo.fs") #http://www.ccdm1.com/Public/manhuadao/js/configs.js?v=0731 server = 'http://ccimg1.61mh.com' images = ["{server}{file}".format(server=server, file=i) for i in files] return images
def get_images(html, url): env = """ var photosr = new Array(); function base64decode(str){var base64EncodeChars="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var base64DecodeChars=new Array(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1);var c1,c2,c3,c4;var i,len,out;len=str.length;i=0;out="";while(i<len){do{c1=base64DecodeChars[str.charCodeAt(i++)&255]}while(i<len&&c1==-1);if(c1==-1){break}do{c2=base64DecodeChars[str.charCodeAt(i++)&255]}while(i<len&&c2==-1);if(c2==-1){break}out+=String.fromCharCode((c1<<2)|((c2&48)>>4));do{c3=str.charCodeAt(i++)&255;if(c3==61){return out}c3=base64DecodeChars[c3]}while(i<len&&c3==-1);if(c3==-1){break}out+=String.fromCharCode(((c2&15)<<4)|((c3&60)>>2));do{c4=str.charCodeAt(i++)&255;if(c4==61){return out}c4=base64DecodeChars[c4]}while(i<len&&c4==-1);if(c4==-1){break}out+=String.fromCharCode(((c3&3)<<6)|c4)}return out}; eval(eval(base64decode(packed).slice(4))); """ js = re.search(r'(packed=[^;]+;)', html).group(1) + env with VM(js) as vm: arr_files = vm.run('photosr')[1:] return ('http://img1.733mh.com/' + f for f in arr_files)
def get_images(html, url): nview = re.search('src="([^"]*nview\.js[^"]*)"', html).group(1) nview = urljoin(url, nview) nview = grabhtml(nview) try: # http://www.comicbus.com/html/103.html script = re.search('(var ch=.+?)spp\(\)', html, re.DOTALL).group(1) except AttributeError: # http://www.comicbus.com/html/7294.html script = re.search('(var chs=.+?)</script>', html, re.DOTALL).group(1) js = """ var url, images = [], document = { location: { toString() {return url;}, get href() {return url;}, set href(_url) {url = _url; scriptBody()} }, getElementById() { return { set src(value) { images.push(value); }, style: {} }; } }, navigator = { userAgent: "", language: "" }, window = {}, alert = () => {}; function scriptBody() { initpage = () => {}; """ + nview + script + """ jn(); } function getImages(url) { images = []; document.location.href = url; return images; } """ with VM(js) as vm: images = vm.call("getImages", url) return [urljoin(url, i) for i in images]
def test_VM(self): with self.subTest("create VM"): vm = VM().create() r = vm.run("'foo' + 'bar'") vm.destroy() self.assertEqual(r, "foobar") with self.subTest("with statement"): with VM() as vm: r = vm.run("'foo' + 'bar'") self.assertEqual(r, "foobar")
def turn(self, mat, strength, x, y): ref = {"mat": copy_2D(mat), "x": x, "y": y} try: with VM() as vm: turn_call = "turn({}, {}, {}, {}, {})".format( " '{}' ".format(self.color), json.dumps(mat), strength, x, y) call_str = "{} {} {}".format(preload_code, turn_code[self.color], turn_call) action = vm.run(call_str) if type(action) == list: return tuple(action) except: print("Error: {}".format(self.color))
def build_ctx(url): """Reuse javascript context""" global ctx js_url = urljoin(url, "/script/view.js") js = grabhtml(js_url) js = """ var imgEl = { style: {}, name: "" }, domainEl = {}; location = { href: "", hostname: "" }; document = { location: location, getElementById: function(id){ if (id == "hdDomain") { return domainEl; } if (/^img/.test(id)) { return imgEl; } return {}; } }; window = { document: document, eval: eval, parseInt: parseInt, String: String, RegExp: RegExp }; function getImages(url, name, hdDomain) { location.href = url; location.hostname = url.match(/:\/\/([^\/]+)/)[1]; imgEl.name = name; domainEl.value = hdDomain; window_onload(); return imgEl.src; } """ + js ctx = VM(js).create()
def get_episodes(html, url): html = html.replace("\n", "") js = """ var output; function getCookie() {} var window = { open: function(result){ output = result; } }; function get(url, catid) { cview(url, catid); return output; } var document = { location: { href: "" } }; """ + grabhtml(urljoin(url, "/js/comicview.js")) s = [] matches = re.finditer( "<a href='#' onclick=\"cview\('(.+?)',(\d+?)\);return " "false;\" id=\"\w+?\" class=\"\w+?\">(.+?)</a>", html, re.M) with VM(js) as vm: for match in matches: ep_url, catid, title = match.groups() ep_url = vm.call("get", ep_url, int(catid)) # tag cleanup title = title.strip() title = re.sub("<script.+?</script>", "", title) title = re.sub("<.+?>", "", title) e = Episode(title, urljoin(url, ep_url)) s.append(e) return s
def get_images(html, url): s_files = re.search('sFiles="([^"]+)', html).group(1) s_path = re.search('sPath="([^"]+)', html).group(1) viewhtm = re.search(r'src="([^"]*?viewhtm\d*\.js[^"]*)', html) viewhtm = grabhtml(urljoin(url, viewhtm.group(1))) env = """ window = { "eval": eval, "parseInt": parseInt, "String": String, "RegExp": RegExp }; location = { "hostname": null }; function setHostname(hostname) { location.hostname = hostname; } """ js = env + re.search(r'function isMobile\(\){.+?}(.+?)var cMod', viewhtm, re.DOTALL).group(1) with VM(js) as vm: vm.call("setHostname", urlparse(url).hostname) arr_files = vm.call("unsuan", s_files).split("|") ds = re.search(r"src='([^']*?ds\.js[^']*)", html) ds = grabhtml(urljoin(url, ds.group(1))) global servers servers = re.search('sDS = "([^"]+)', ds).group(1).split("^") servers = [s.split("|")[1] for s in servers] servers = cycle(servers) server = next(servers) return (server + s_path + f for f in arr_files)
def get_images(html, url): # build js context js = """ var window = global; var cInfo; var SMH = { imgData: function(data) { cInfo = data; return { preInit: function(){} }; } }; """ configjs_url = re.search(r'src="(https?://[^"]+?/config_\w+?\.js)"', html).group(1) configjs = grabhtml(configjs_url, referer=url) js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+', configjs, re.MULTILINE).group() js += re.search( r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script', html).group(1) with VM(js) as vm: files, path, params = vm.run("[cInfo.files, cInfo.path, cInfo.sl]") # find server # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js" # getpath() corejs_url = re.search(r'src="(https?://[^"]+?/core_\w+?\.js)"', html).group(1) corejs = grabhtml(corejs_url, referer=url) # cache server list servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1) servs = eval(servs) servs = [host["h"] for category in servs for host in category["hosts"]] global servers servers = cycle(servs) host = next(servers) utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1) js = """ var location = { protocol: "http:" }; """ + utils + """; function getFiles(path, files, host) { // lets try if it will be faster in javascript return files.map(function(file){ return utils.getPath(host, path + file); }); } """ with VM(js) as vm: images = vm.call("getFiles", path, files, host) if config.getboolean("nowebp"): images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images) params = urlencode(params) images = ["{file}?{params}".format(file=i, params=params) for i in images] return images