def get_images(html, url): key = search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, DOTALL) if key: key = eval(key.group(1)).split(";")[1] key = search(r"=(.+)$", key).group(1) key = eval(key) else: key = "" count = search("DM5_IMAGE_COUNT=(\d+);", html).group(1) cid = search("DM5_CID=(\d+);", html).group(1) s = [] for p in range(1, int(count) + 1): fun_url = urljoin( url, "chapterfun.ashx?cid={}&page={}&language=1&key={}>k=6".format( cid, p, key)) s.append(create_grabber(fun_url, url)) global first_grabber first_grabber = s[0] return s
def test_VMError(self): with self.assertRaisesRegex(VMError, "foo"): eval("throw new Error('foo')") # doesn't inherit Error with self.assertRaisesRegex(VMError, "foo"): eval("throw 'foo'")
def test_eval(self): with self.subTest("one line eval"): r = eval("'foo' + 'bar'") self.assertEqual(r, "foobar") with self.subTest("multiline"): r = eval(""" var foo = x => x + 'bar'; foo('foo'); """) self.assertEqual(r, "foobar")
def get_images_eval(html, url): # Set base url base = "http://images.dmzj.com/" # Get urls html = html.replace("\n", "") s = re.search(r"page = '';\s*(.+?);\s*var g_comic_name", html).group(1) pages = eval(s + "; pages") pages = eval(pages) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
def get_images(html, url): hash = re.search("/artwork/([^/]+)", url).group(1) pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format( hash=hash) data_json = eval(re.search(pattern, html, re.M).group(1)) data = json.loads(data_json) return [a["image_url"] for a in data["assets"]]
def get_images(html, url): if html[0] == '"': # wtf html = json.loads(html) # breakpoint() key = re.search(r'var KEY = "([^"]+)', html).group(1) cartoon_id = re.search(r'var CARTOON_ID = "([^"]+)', html).group(1) chapter_id = re.search(r'var CHAPTER_ID = "([^"]+)', html).group(1) page = re.search(r'var PAGE = "([^"]+)', html).group(1) total_page = re.search(r'var TOTAL_PAGE = "([^"]+)', html).group(1) if int(page) < int(total_page): next_page_cache[url] = urljoin( url, re.search('href="([^"]+)">下一頁', html).group(1)) code = grabhtml(urljoin(url, "/comicseries/getpictrue.html"), method="POST", data={ "key": key, "cartoon_id": cartoon_id, "chapter_id": chapter_id, "page": page }, header={"X-Requested-With": "XMLHttpRequest"}) # breakpoint() data = eval(code) return data["current"]
def get_images(html, url): data = re.search("var DATA\s*=\s*'[^']+'", html).group() nonce = re.search("window\.nonce = (.+)", html).group(1) nonce2 = re.search("window\[.+?=(.+)", html) nonce2 = nonce2.group(1) if nonce2 else None view_js = re.search('src="([^"]+?page\.chapter\.view[^"]+?\.js[^"]*)', html).group(1) view_js = grabhtml(urljoin(url, view_js)) view_js = re.search("(eval\(.+?)\}\(\)", view_js, re.DOTALL).group(1) code = "\n".join([ data, """ function createDummy() { return new Proxy(() => true, { get: () => createDummy() }); } const window = document = createDummy(); """, "const nonce = {};".format(nonce2 or nonce), "const W = {DATA, nonce};", view_js ]) data = node_vm2.eval(code) return [p["url"] for p in data["picture"]]
def get_init_data(html): js = re.search("(var globalInitData =.+?)</script>", html, re.DOTALL).group(1) return eval(""" Object.freeze = n => n; """ + js + """ globalInitData; """)
def get_images_eval(html, url): # Set base url base = "http://images.dmzj.com/" # Get urls html = html.replace("\n", "") s = re.search(r"page = '';\s*(.+?);\s*var g_comic_name", html).group(1) pages = eval(s + "; pages") pages = eval(pages) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [ base + page for page in pages if page and not page.lower().endswith("thumbs.db") ]
def get_images(html, url): pages_js = re.search(r'page_url":(\[[^\]]+\])', html).group(1) pages = eval(pages_js) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [page for page in pages if page and not page.lower().endswith("thumbs.db")]
def grabber(): text = grabhtml(fun, referer=url) d = eval(text + """; ((typeof (hd_c) != 'undefined' && hd_c.length > 0 && typeof (isrevtt) != 'undefined') ? hd_c : d) """) return d[0]
def get_images(html, url): key = search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, DOTALL) if key: key = eval(key.group(1)).split(";")[1] key = search(r"=(.+)$", key).group(1) key = eval(key) else: key = "" count = int(search("DM5_IMAGE_COUNT=(\d+);", html).group(1)) cid = search("DM5_CID=(\d+);", html).group(1) mid = search("DM5_MID=(\d+);", html).group(1) dt = search('DM5_VIEWSIGN_DT="([^"]+)', html).group(1) sign = search('DM5_VIEWSIGN="([^"]+)', html).group(1) pages = {} def grab_page(page): params = { "cid": cid, "page": page + 1, "language": 1, "key": key, "gtk": 6, "_cid": cid, "_mid": mid, "_dt": dt, "_sign": sign } fun_url = urljoin(url, "chapterfun.ashx") text = grabhtml(fun_url, referer=url, params=params) d = eval(text) for i, image in enumerate(d): pages[i + page] = image def create_page_getter(page): def getter(): if page not in pages: grab_page(page) return pages[page] return getter return [create_page_getter(p) for p in range(count)]
def get_images(html, url): key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)', html).group(1) model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then", html).group(1) data = eval(("auth = null, reqId = null, model = {model}, " "model.modelExport['photo-models'][0]").format(model=model)) return query_video(data["id"], data["secret"], key)
def get_images(html, url): key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)', html).group(1) model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then", html).group(1) data = eval(( "auth = null, reqId = null, model = {model}, " "model.modelExport['photo-models'][0]" ).format(model=model)) return query_video(data["id"], data["secret"], key)
def get_images(html, url): js = re.search(r"(window\.DATA = [\s\S]+?)</script>", html).group(1) imgs = eval(""" const window = {}; """ + js + """ window.PG_CONFIG.images.map(i => i.url.slice(0, i.url.length - window.DATA.seedLength)); """) return imgs
def get_state(html): js = re.search(r"(window\.__INITIAL_STATE__[\s\S]+?)</script>", html).group(1) return eval(""" const window = {}; """ + js + """ ; window.__INITIAL_STATE__ """)
def get_images(html, url): key = re.search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, re.DOTALL) if key: key = eval(key.group(1)).split(";")[1] key = re.search(r"=(.+)$", key).group(1) key = eval(key) else: key = "" count = int(re.search("DM5_IMAGE_COUNT=(\d+);", html).group(1)) cid = re.search("DM5_CID=(\d+);", html).group(1) mid = re.search("DM5_MID=(\d+);", html).group(1) dt = re.search('DM5_VIEWSIGN_DT="([^"]+)', html).group(1) sign = re.search('DM5_VIEWSIGN="([^"]+)', html).group(1) pages = {} def grab_page(page): params = { "cid": cid, "page": page + 1, "language": 1, "key": key, "gtk": 6, "_cid": cid, "_mid": mid, "_dt": dt, "_sign": sign } fun_url = urljoin(url, "chapterfun.ashx") text = grabhtml(fun_url, referer=url, params=params) d = eval(text) for i, image in enumerate(d): pages[i + page] = image def create_page_getter(page): def getter(): if page not in pages: grab_page(page) return pages[page] return getter return [create_page_getter(p) for p in range(count)]
def get_images(html, url): pages_js = re.search(r'page_url":(\[[^\]]+\])', html).group(1) pages = eval(pages_js) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [ page for page in pages if page and not page.lower().endswith("thumbs.db") ]
def get_images(html, url): scripts.fetch( html, url, ["crypto-js\.js", "decrypt\d+\.js", "config\.js", "common\.js"]) pre_js = re.search("(var chapterImages.+?)</script>", html, re.DOTALL).group(1) main_js = re.search("decrypt\d+\(.+", html).group() images = eval(""" (function () { function atob(data) { return Buffer.from(data, "base64").toString("binary"); } function createLocalStorage() { const storage = {}; return {setItem, getItem, removeItem}; function setItem(key, value) { storage[key] = value; } function getItem(key) { return storage[key]; } function removeItem(key) { delete storage[key]; } } const exports = undefined; const toastr = {options: {}}; const top = { location: {pathname: ""}, localStorage: createLocalStorage() }; const jQuery = Object.assign(() => {}, { cookie: () => false, event: {trigger() {}} }); const $ = jQuery; const window = top; const SinTheme = { initChapter() {}, getPage() {} }; """ + pre_js + str(scripts) + main_js + """ const s = []; for (let i = 0; i < chapterImages.length; i++) { s.push(SinMH.getChapterImage(i + 1)); } return s; }).call(global) """) return images
def get_images(html, url): match = re.search("/artwork/([^/]+)", url) if match: hash = match.group(1) pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format(hash=hash) data_json = eval(re.search(pattern, html, re.M).group(1)) elif re.search("/projects/(\w+)\.json", url): data_json = html else: raise Exception("Unknown page {}".format(url)) data = json.loads(data_json) return [a["image_url"] for a in data["assets"]]
def get_images(html, url): # Set base url base = "http://images.dmzj.com/" # Get urls s = re.search(r"page = '';\s+([^\n]+)", html).group(1) pages = eval(s + "; pages") pages = re.search('"page_url":"([^"]+)', pages).group(1) pages = re.split("\r?\n", pages) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
def get_images(html, url): match = re.search("/artwork/([^/]+)", url) if match: hash = match.group(1) pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format( hash=hash) data_json = eval(re.search(pattern, html, re.M).group(1)) elif re.search("/projects/(\w+)\.json", url): data_json = html else: raise Exception("Unknown page {}".format(url)) data = json.loads(data_json) return [a["image_url"] for a in data["assets"]]
def get_images(html, url): js = re.search('(var siteName.+?)</script>', html, re.DOTALL).group(1) # http://www.gufengmh.com/js/config.js config = grabhtml(urljoin(url, "/js/config.js")) return eval(""" const toastr = { options: {} }; """ + config + js + """ const domain = SinConf.resHost[0].domain[0]; chapterImages.map(i => `${domain}/${chapterPath}${i}`); """)
def get_images(html, url): # build js context js = "var window = global;" configjs_url = re.search(r'src="(http://[^"]+?/config_\w+?\.js)"', html).group(1) configjs = grabhtml(configjs_url, referer=url) js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+', configjs, re.MULTILINE).group() js += re.search( r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script', html).group(1) with VM(js) as vm: files, path = vm.run("[cInfo.files, cInfo.path]") # find server # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js" # getpath() corejs_url = re.search(r'src="(http://[^"]+?/core_\w+?\.js)"', html).group(1) corejs = grabhtml(corejs_url, referer=url) # cache server list servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1) servs = eval(servs) servs = [host["h"] for category in servs for host in category["hosts"]] global servers servers = CycleList(servs) host = servers.get() utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1) js = utils + """; function getFiles(path, files, host) { // lets try if it will be faster in javascript return files.map(function(file){ return utils.getPath(host, path + file); }); } """ with VM(js) as vm: images = vm.call("getFiles", path, files, host) if config.getboolean("nowebp"): images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images) return images
def get_images(html, url): data = re.search("var DATA\s*=\s*'[^']+'", html).group() nonce = re.search("window\.nonce = (.+)", html).group(1) nonce2 = re.search("window\[.+?=(.+)", html) nonce2 = nonce2.group(1) if nonce2 else None view_js = re.search('src="([^"]+?page\.chapter\.view[^"]+?\.js[^"]+)', html).group(1) view_js = grabhtml(urljoin(url, view_js)) view_js = re.search("(eval\(.+?)\}\(\)", view_js, re.DOTALL).group(1) code = data + ";var nonce = " + (nonce2 or nonce) + ";var W = {DATA, nonce};" + view_js + ";_v" data = node_vm2.eval(code) return [p["url"] for p in data["picture"]]
def get_images(html, url): key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)', html).group(1) model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then", html).group(1) js = """ const auth = null, reqId = null; const model = """ + model + """; model.modelExport.main["photo-models"][0] """ data = eval(js) if data.get("mediaType") == "video": return query_video(data["id"], data["secret"], key) max_size = max(data["sizes"].values(), key=lambda s: s.get("width", 0)) return urljoin(url, max_size["url"])
def get_images(html, url): js_url = re.search(r'src="([^"]+base64\.js)"', html).group(1) js_content = grabhtml(urljoin(url, js_url)) data = re.search('(var chapterTree=.+?)</script>', html, re.DOTALL).group(1) match = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.+?)</script>', html, re.DOTALL) data2 = match.group(1) if match else "" imgs = eval(""" const document = {{}}; {}; {}; eval({}); getUrlpics().map(getrealurl); """.format(js_content, data, data2)) return imgs
def get_images(html, url): # Set base url base = "http://images.dmzj.com/" # Get urls s = re.search(r"page = '';\s+([^\n]+)", html).group(1) pages = eval(s + "; pages") pages = re.search('"page_url":"([^"]+)', pages).group(1) pages = re.split("\r?\n", pages) # thumbs.db?! # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml return [ base + page for page in pages if page and not page.lower().endswith("thumbs.db") ]
def eval_script(scripts, name): name = name.replace('http://', '').replace('.', '_').replace('/', '_') new = '' for script in scripts: for line in script.replace('//-->', '').split('\n'): if line != '\n': new += line if 'document.write(' in new: js_target = new.replace('document.write(', '').replace('));', ');') a = eval(js_target) pa = './Export/%s/%s.dmp' % (datetime.date.today(), name) with open(pa, 'a+') as f: f.write('\n#Auto generated\n#' + str(datetime.date.today()) + '\n\n' + a) print('[+] Saved under ./Export/%s/%s' % (datetime.date.today(), name)) else: print('[-] Unable to process this script. Update me.') return 'None'
def get_images(html, url): global global_js js = re.search("(var ret_classurl.+?)</script>", html, re.DOTALL).group(1) if not global_js: global_js_url = re.search('src="([^"]+global\.js)"', html).group(1) global_js = grabhtml(urljoin(url, global_js_url)) global_js = re.search("(var WebimgServer.+?)window\.onerror", global_js, re.DOTALL).group(1) imgs, server = eval(""" function request() { return ""; } """ + js + global_js + """; [photosr.slice(1), WebimgServerURL[0]] """) return [urljoin(server, img) for img in imgs]
def get_episodes(html, url): data_js = re.search("initIntroData(.+?);", html, re.DOTALL).group(1) data = eval(data_js) ep_data = [] for category in data: ep_data += category["data"] ep_data = sorted(ep_data, key=lambda data: data["chapter_order"]) episodes = [] for data in ep_data: ep_url = "/view/{}/{}.html".format(data["comic_id"], data["id"]) title = data["title"] + data["chapter_name"] episodes.append(Episode(title, urljoin(url, ep_url))) return episodes
def grab_page(page): params = { "cid": cid, "page": page + 1, "language": 1, "key": key, "gtk": 6, "_cid": cid, "_mid": mid, "_dt": dt, "_sign": sign } fun_url = urljoin(url, "chapterfun.ashx") text = grabhtml(fun_url, referer=url, params=params) d = eval(text) for i, image in enumerate(d): pages[i + page] = image
def get_images(html, url): script = re.search(r'<script>\s*(var qTcms_Cur[\s\S]+?)</script>', html).group(1) show_js_src = re.search(r'src="([^"]+?show\.\d+\.js[^"]*)', html).group(1) show_js = grabhtml(urljoin(url, show_js_src)) real_pic_fn = re.search( r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function', show_js).group(1) code = """ {script} {real_pic_fn} Buffer.from(qTcms_S_m_murl_e, "base64") .toString() .split("$qingtiandy$") .map(f_qTcms_Pic_curUrl_realpic); """.format(script=script, real_pic_fn=real_pic_fn) return [urljoin(url, i) for i in eval(code)]
def get_images(html, url): script = re.search(r'<script>\s*(var qTcms_Cur[\s\S]+?)</script>', html).group(1) show_js_src = re.search(r'src="([^"]+?show\.\d+\.js[^"]*)', html).group(1) show_js = grabhtml(urljoin(url, show_js_src)) real_pic_fn = re.search(r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function', show_js).group(1) code = """ {script} {real_pic_fn} function base64_decode(data) {{ return Buffer.from(data, "base64").toString(); }} // m.wuyouhui.net/template/wap1/css/d7s/js/show.20170501.js?20190506201115 Buffer.from(qTcms_S_m_murl_e, "base64") .toString() .split("$qingtiandy$") .filter(u => !/^(--|\+)/.test(u)) .map(f_qTcms_Pic_curUrl_realpic); """.format(script=script, real_pic_fn=real_pic_fn) return [urljoin(url, i) for i in eval(code)]
def get_images(html, url): if "pixiv.user.loggedIn = true" not in html: raise PauseDownloadError("you didn't login!") # ugoku rs = re.search(r"pixiv\.context\.ugokuIllustFullscreenData\s+= ([^;]+)", html) if rs: json = rs.group(1) o = eval("(" + json + ")") cache["frames"] = o["frames"] return [o["src"]] # new image layout (2014/12/14) rs = re.search(r'class="big" data-src="([^"]+)"', html) if rs: return [rs.group(1)] rs = re.search(r'data-src="([^"]+)" class="original-image"', html) if rs: return [rs.group(1)] # old image layout imgs = get_images_old(html, url) if imgs: return imgs # restricted rs = re.search('<section class="restricted-content">', html) if rs: raise SkipEpisodeError # error page rs = re.search('class="error"', html) if rs: raise SkipEpisodeError # id doesn't exist rs = re.search("pixiv.context.illustId", html) if not rs: raise SkipEpisodeError
def get_images_old(html, url): match = re.search(r'"works_display"><a (?:class="[^"]*" )?href="([^"]+)"', html) if not match: return inner_url = match.group(1) html = grabhtml(urljoin(url, inner_url), referer=url) if "mode=big" in inner_url: # single image img = re.search(r'src="([^"]+)"', html).group(1) return [img] if "mode=manga" in inner_url: # multiple image imgs = [] def create_grabber(url): def grabber(): html = grabhtml(url) return re.search(r'img src="([^"]+)"', html).group(1) return grabber for match in re.finditer( r'a href="(/member_illust\.php\?mode=manga_big[^"]+)"', html): imgs.append(create_grabber(urljoin(url, match.group(1)))) # New manga reader (2015/3/18) # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=19254298 if not imgs: for match in re.finditer(r'originalImages\[\d+\] = ("[^"]+")', html): img = eval(match.group(1)) imgs.append(img) return imgs
def get_images(html, url): check_login(html) init_data = re.search(r"(var globalInitData[\s\S]+?)</script>", html).group(1) init_data = eval(""" Object.freeze = null; """ + init_data + """ globalInitData; """) illust_id = re.search("illust_id=(\d+)", url).group(1) illust = init_data["preload"]["illust"][illust_id] if illust["illustType"] != 2: # normal images first_img = illust["urls"]["original"] return [get_nth_img(first_img, i) for i in range(illust["pageCount"])] # https://www.pixiv.net/member_illust.php?mode=medium&illust_id=44298524 ugoira_meta = "https://www.pixiv.net/ajax/illust/{}/ugoira_meta".format( illust_id) ugoira_meta = json.loads(grabhtml(ugoira_meta)) cache["frames"] = ugoira_meta["body"]["frames"] return ugoira_meta["body"]["originalSrc"]
def get_images(html, url): # build js context js = """ var window = global; var cInfo; var SMH = { imgData: function(data) { cInfo = data; return { preInit: function(){} }; } }; """ configjs_url = re.search( r'src="(https?://[^"]+?/config_\w+?\.js)"', html ).group(1) configjs = grabhtml(configjs_url, referer=url) js += re.search( r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+', configjs, re.MULTILINE ).group() js += re.search( r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script', html ).group(1) with VM(js) as vm: files, path, md5, cid = vm.run("[cInfo.files, cInfo.path, cInfo.sl.md5, cInfo.cid]") # find server # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js" # getpath() corejs_url = re.search( r'src="(https?://[^"]+?/core_\w+?\.js)"', html ).group(1) corejs = grabhtml(corejs_url, referer=url) # cache server list servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1) servs = eval(servs) servs = [host["h"] for category in servs for host in category["hosts"]] global servers servers = cycle(servs) host = next(servers) utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1) js = """ var location = { protocol: "http:" }; """ + utils + """; function getFiles(path, files, host) { // lets try if it will be faster in javascript return files.map(function(file){ return utils.getPath(host, path + file); }); } """ with VM(js) as vm: images = vm.call("getFiles", path, files, host) if config.getboolean("nowebp"): images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images) params = urlencode({ "cid": cid, "md5": md5 }) images = ["{file}?{params}".format(file=i, params=params) for i in images] return images
def get_images(html, url): js = re.search(r"(eval\([\s\S]+?)</script", html).group(1) return eval(js + ";newImgs")