コード例 #1
0
ファイル: dm5.py プロジェクト: iamtowne/ComicCrawler
def get_images(html, url):
    key = search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html,
                 DOTALL)

    if key:
        key = eval(key.group(1)).split(";")[1]
        key = search(r"=(.+)$", key).group(1)
        key = eval(key)

    else:
        key = ""

    count = search("DM5_IMAGE_COUNT=(\d+);", html).group(1)
    cid = search("DM5_CID=(\d+);", html).group(1)
    s = []
    for p in range(1, int(count) + 1):
        fun_url = urljoin(
            url,
            "chapterfun.ashx?cid={}&page={}&language=1&key={}&gtk=6".format(
                cid, p, key))
        s.append(create_grabber(fun_url, url))

    global first_grabber
    first_grabber = s[0]

    return s
コード例 #2
0
    def test_VMError(self):
        with self.assertRaisesRegex(VMError, "foo"):
            eval("throw new Error('foo')")

        # doesn't inherit Error
        with self.assertRaisesRegex(VMError, "foo"):
            eval("throw 'foo'")
コード例 #3
0
    def test_eval(self):
        with self.subTest("one line eval"):
            r = eval("'foo' + 'bar'")
            self.assertEqual(r, "foobar")

        with self.subTest("multiline"):
            r = eval("""
				var foo = x => x + 'bar';
				foo('foo');
			""")
            self.assertEqual(r, "foobar")
コード例 #4
0
ファイル: dmzj.py プロジェクト: eight04/ComicCrawler
def get_images_eval(html, url):
	# Set base url
	base = "http://images.dmzj.com/"

	# Get urls
	html = html.replace("\n", "")
	s = re.search(r"page = '';\s*(.+?);\s*var g_comic_name", html).group(1)
	pages = eval(s + "; pages")
	pages = eval(pages)

	# thumbs.db?!
	# http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
	return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #5
0
def get_images(html, url):
    hash = re.search("/artwork/([^/]+)", url).group(1)
    pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format(
        hash=hash)
    data_json = eval(re.search(pattern, html, re.M).group(1))
    data = json.loads(data_json)
    return [a["image_url"] for a in data["assets"]]
コード例 #6
0
def get_images(html, url):
    if html[0] == '"':
        # wtf
        html = json.loads(html)
    # breakpoint()
    key = re.search(r'var KEY = "([^"]+)', html).group(1)
    cartoon_id = re.search(r'var CARTOON_ID = "([^"]+)', html).group(1)
    chapter_id = re.search(r'var CHAPTER_ID = "([^"]+)', html).group(1)
    page = re.search(r'var PAGE = "([^"]+)', html).group(1)
    total_page = re.search(r'var TOTAL_PAGE = "([^"]+)', html).group(1)

    if int(page) < int(total_page):
        next_page_cache[url] = urljoin(
            url,
            re.search('href="([^"]+)">下一頁', html).group(1))

    code = grabhtml(urljoin(url, "/comicseries/getpictrue.html"),
                    method="POST",
                    data={
                        "key": key,
                        "cartoon_id": cartoon_id,
                        "chapter_id": chapter_id,
                        "page": page
                    },
                    header={"X-Requested-With": "XMLHttpRequest"})
    # breakpoint()
    data = eval(code)
    return data["current"]
コード例 #7
0
def get_images(html, url):
    data = re.search("var DATA\s*=\s*'[^']+'", html).group()
    nonce = re.search("window\.nonce = (.+)", html).group(1)
    nonce2 = re.search("window\[.+?=(.+)", html)
    nonce2 = nonce2.group(1) if nonce2 else None

    view_js = re.search('src="([^"]+?page\.chapter\.view[^"]+?\.js[^"]*)',
                        html).group(1)
    view_js = grabhtml(urljoin(url, view_js))
    view_js = re.search("(eval\(.+?)\}\(\)", view_js, re.DOTALL).group(1)

    code = "\n".join([
        data, """
		function createDummy() {
			return new Proxy(() => true, {
				get: () => createDummy()
			});
		}
		const window = document = createDummy();
		""", "const nonce = {};".format(nonce2 or nonce), "const W = {DATA, nonce};",
        view_js
    ])

    data = node_vm2.eval(code)
    return [p["url"] for p in data["picture"]]
コード例 #8
0
ファイル: pixiv.py プロジェクト: eight04/ComicCrawler
def get_init_data(html):
	js = re.search("(var globalInitData =.+?)</script>", html, re.DOTALL).group(1)
	return eval("""
	Object.freeze = n => n;
	""" + js + """
	globalInitData;
	""")
コード例 #9
0
ファイル: dmzj.py プロジェクト: whtsky/ComicCrawler
def get_images_eval(html, url):
    # Set base url
    base = "http://images.dmzj.com/"

    # Get urls
    html = html.replace("\n", "")
    s = re.search(r"page = '';\s*(.+?);\s*var g_comic_name", html).group(1)
    pages = eval(s + "; pages")
    pages = eval(pages)

    # thumbs.db?!
    # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
    return [
        base + page for page in pages
        if page and not page.lower().endswith("thumbs.db")
    ]
コード例 #10
0
ファイル: dmzj_m.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	pages_js = re.search(r'page_url":(\[[^\]]+\])', html).group(1)
	pages = eval(pages_js)

	# thumbs.db?!
	# http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
	return [page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #11
0
ファイル: dm5.py プロジェクト: iamtowne/ComicCrawler
    def grabber():
        text = grabhtml(fun, referer=url)
        d = eval(text + """;
			((typeof (hd_c) != 'undefined' && hd_c.length > 0 && 
			typeof (isrevtt) != 'undefined') ? hd_c : d)
		""")
        return d[0]
コード例 #12
0
ファイル: dm5.py プロジェクト: RanDomWoW/ComicCrawler
def get_images(html, url):
    key = search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html,
                 DOTALL)

    if key:
        key = eval(key.group(1)).split(";")[1]
        key = search(r"=(.+)$", key).group(1)
        key = eval(key)

    else:
        key = ""

    count = int(search("DM5_IMAGE_COUNT=(\d+);", html).group(1))
    cid = search("DM5_CID=(\d+);", html).group(1)
    mid = search("DM5_MID=(\d+);", html).group(1)
    dt = search('DM5_VIEWSIGN_DT="([^"]+)', html).group(1)
    sign = search('DM5_VIEWSIGN="([^"]+)', html).group(1)

    pages = {}

    def grab_page(page):
        params = {
            "cid": cid,
            "page": page + 1,
            "language": 1,
            "key": key,
            "gtk": 6,
            "_cid": cid,
            "_mid": mid,
            "_dt": dt,
            "_sign": sign
        }
        fun_url = urljoin(url, "chapterfun.ashx")
        text = grabhtml(fun_url, referer=url, params=params)
        d = eval(text)
        for i, image in enumerate(d):
            pages[i + page] = image

    def create_page_getter(page):
        def getter():
            if page not in pages:
                grab_page(page)
            return pages[page]

        return getter

    return [create_page_getter(p) for p in range(count)]
コード例 #13
0
def get_init_data(html):
    js = re.search("(var globalInitData =.+?)</script>", html,
                   re.DOTALL).group(1)
    return eval("""
	Object.freeze = n => n;
	""" + js + """
	globalInitData;
	""")
コード例 #14
0
def get_images(html, url):
    key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)',
                    html).group(1)
    model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then",
                      html).group(1)
    data = eval(("auth = null, reqId = null, model = {model}, "
                 "model.modelExport['photo-models'][0]").format(model=model))
    return query_video(data["id"], data["secret"], key)
コード例 #15
0
ファイル: flickr.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)', html).group(1)
	model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then", html).group(1)
	data = eval((
		"auth = null, reqId = null, model = {model}, "
		"model.modelExport['photo-models'][0]"
	).format(model=model))
	return query_video(data["id"], data["secret"], key)
コード例 #16
0
def get_images(html, url):
    js = re.search(r"(window\.DATA = [\s\S]+?)</script>", html).group(1)
    imgs = eval("""
	const window = {};
	""" + js + """
	window.PG_CONFIG.images.map(i => i.url.slice(0, i.url.length - window.DATA.seedLength));
	""")
    return imgs
コード例 #17
0
def get_state(html):
    js = re.search(r"(window\.__INITIAL_STATE__[\s\S]+?)</script>",
                   html).group(1)
    return eval("""
		const window = {};
	""" + js + """
		; window.__INITIAL_STATE__
	""")
コード例 #18
0
ファイル: dm5.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	key = re.search(r'id="dm5_key".+?<script[^>]+?>\s*eval(.+?)</script>', html, re.DOTALL)
	
	if key:
		key = eval(key.group(1)).split(";")[1]
		key = re.search(r"=(.+)$", key).group(1)
		key = eval(key)
		
	else:
		key = ""
		
	count = int(re.search("DM5_IMAGE_COUNT=(\d+);", html).group(1))
	cid = re.search("DM5_CID=(\d+);", html).group(1)
	mid = re.search("DM5_MID=(\d+);", html).group(1)
	dt = re.search('DM5_VIEWSIGN_DT="([^"]+)', html).group(1)
	sign = re.search('DM5_VIEWSIGN="([^"]+)', html).group(1)
	
	pages = {}
	
	def grab_page(page):
		params = {
			"cid": cid,
			"page": page + 1,
			"language": 1,
			"key": key,
			"gtk": 6,
			"_cid": cid,
			"_mid": mid,
			"_dt": dt,
			"_sign": sign
		}
		fun_url = urljoin(url, "chapterfun.ashx")
		text = grabhtml(fun_url, referer=url, params=params)
		d = eval(text)
		for i, image in enumerate(d):
			pages[i + page] = image
	
	def create_page_getter(page):
		def getter():
			if page not in pages:
				grab_page(page)
			return pages[page]
		return getter
	
	return [create_page_getter(p) for p in range(count)]
コード例 #19
0
def get_images(html, url):
    pages_js = re.search(r'page_url":(\[[^\]]+\])', html).group(1)
    pages = eval(pages_js)

    # thumbs.db?!
    # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
    return [
        page for page in pages
        if page and not page.lower().endswith("thumbs.db")
    ]
コード例 #20
0
def get_images(html, url):
    scripts.fetch(
        html, url,
        ["crypto-js\.js", "decrypt\d+\.js", "config\.js", "common\.js"])
    pre_js = re.search("(var chapterImages.+?)</script>", html,
                       re.DOTALL).group(1)
    main_js = re.search("decrypt\d+\(.+", html).group()

    images = eval("""
	(function () {
	
	function atob(data) {
		return Buffer.from(data, "base64").toString("binary");
	}
	
	function createLocalStorage() {
		const storage = {};
		return {setItem, getItem, removeItem};
		function setItem(key, value) {
			storage[key] = value;
		}
		function getItem(key) {
			return storage[key];
		}
		function removeItem(key) {
			delete storage[key];
		}
	}
	
	const exports = undefined;
	const toastr = {options: {}};
	const top = {
		location: {pathname: ""},
		localStorage: createLocalStorage()
	};
	const jQuery = Object.assign(() => {}, {
		cookie: () => false,
		event: {trigger() {}}
	});
	const $ = jQuery;
	const window = top;
	const SinTheme = {
		initChapter() {},
		getPage() {}
	};
	""" + pre_js + str(scripts) + main_js + """
	const s = [];
	for (let i = 0; i < chapterImages.length; i++) {
		s.push(SinMH.getChapterImage(i + 1));
	}
	return s;
	
	}).call(global)
	""")
    return images
コード例 #21
0
ファイル: artstation.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	match = re.search("/artwork/([^/]+)", url)
	if match:
		hash = match.group(1)
		pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format(hash=hash)
		data_json = eval(re.search(pattern, html, re.M).group(1))
	elif re.search("/projects/(\w+)\.json", url):
		data_json = html
	else:
		raise Exception("Unknown page {}".format(url))
	data = json.loads(data_json)
	return [a["image_url"] for a in data["assets"]]
コード例 #22
0
ファイル: dmzj_www.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	# Set base url
	base = "http://images.dmzj.com/"

	# Get urls
	s = re.search(r"page = '';\s+([^\n]+)", html).group(1)
	pages = eval(s + "; pages")
	pages = re.search('"page_url":"([^"]+)', pages).group(1)
	pages = re.split("\r?\n", pages)

	# thumbs.db?!
	# http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
	return [base + page for page in pages if page and not page.lower().endswith("thumbs.db")]
コード例 #23
0
ファイル: artstation.py プロジェクト: yachu01/ComicCrawler
def get_images(html, url):
    match = re.search("/artwork/([^/]+)", url)
    if match:
        hash = match.group(1)
        pattern = "cache\.put\('/projects/{hash}\.json', ('.+?')\);$".format(
            hash=hash)
        data_json = eval(re.search(pattern, html, re.M).group(1))
    elif re.search("/projects/(\w+)\.json", url):
        data_json = html
    else:
        raise Exception("Unknown page {}".format(url))
    data = json.loads(data_json)
    return [a["image_url"] for a in data["assets"]]
コード例 #24
0
ファイル: gufeng.py プロジェクト: yachu01/ComicCrawler
def get_images(html, url):
	js = re.search('(var siteName.+?)</script>', html, re.DOTALL).group(1)
	# http://www.gufengmh.com/js/config.js
	config = grabhtml(urljoin(url, "/js/config.js"))
	return eval("""
	const toastr = {
		options: {}
	};
	""" + config + js + """
	const domain = SinConf.resHost[0].domain[0];
	chapterImages.map(i => `${domain}/${chapterPath}${i}`);
	""")
	
コード例 #25
0
def get_images(html, url):
    # build js context
    js = "var window = global;"

    configjs_url = re.search(r'src="(http://[^"]+?/config_\w+?\.js)"',
                             html).group(1)
    configjs = grabhtml(configjs_url, referer=url)
    js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+',
                    configjs, re.MULTILINE).group()

    js += re.search(
        r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script',
        html).group(1)

    with VM(js) as vm:
        files, path = vm.run("[cInfo.files, cInfo.path]")

    # find server
    # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js"
    # getpath()
    corejs_url = re.search(r'src="(http://[^"]+?/core_\w+?\.js)"',
                           html).group(1)
    corejs = grabhtml(corejs_url, referer=url)

    # cache server list
    servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1)
    servs = eval(servs)
    servs = [host["h"] for category in servs for host in category["hosts"]]

    global servers
    servers = CycleList(servs)

    host = servers.get()

    utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1)

    js = utils + """;
	function getFiles(path, files, host) {
		// lets try if it will be faster in javascript
		return files.map(function(file){
			return utils.getPath(host, path + file);
		});
	}
	"""
    with VM(js) as vm:
        images = vm.call("getFiles", path, files, host)

    if config.getboolean("nowebp"):
        images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images)

    return images
コード例 #26
0
ファイル: qq.py プロジェクト: yachu01/ComicCrawler
def get_images(html, url):
	data = re.search("var DATA\s*=\s*'[^']+'", html).group()
	nonce = re.search("window\.nonce = (.+)", html).group(1)
	nonce2 = re.search("window\[.+?=(.+)", html)
	nonce2 = nonce2.group(1) if nonce2 else None
	
	view_js = re.search('src="([^"]+?page\.chapter\.view[^"]+?\.js[^"]+)', html).group(1)
	view_js = grabhtml(urljoin(url, view_js))
	view_js = re.search("(eval\(.+?)\}\(\)", view_js, re.DOTALL).group(1)
	
	code = data + ";var nonce = " + (nonce2 or nonce) + ";var W = {DATA, nonce};" + view_js + ";_v"
	
	data = node_vm2.eval(code)
	return [p["url"] for p in data["picture"]]
コード例 #27
0
def get_images(html, url):
    key = re.search('root\.YUI_config\.flickr\.api\.site_key = "([^"]+)',
                    html).group(1)
    model = re.search(r"Y\.ClientApp\.init\(([\s\S]+?)\)\s*\.then",
                      html).group(1)
    js = """
	const auth = null, reqId = null;
	const model = """ + model + """;
	model.modelExport.main["photo-models"][0]
	"""
    data = eval(js)
    if data.get("mediaType") == "video":
        return query_video(data["id"], data["secret"], key)
    max_size = max(data["sizes"].values(), key=lambda s: s.get("width", 0))
    return urljoin(url, max_size["url"])
コード例 #28
0
ファイル: mh160.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	js_url = re.search(r'src="([^"]+base64\.js)"', html).group(1)
	js_content = grabhtml(urljoin(url, js_url))
	data = re.search('(var chapterTree=.+?)</script>', html, re.DOTALL).group(1)
	match = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.+?)</script>', html, re.DOTALL)
	data2 = match.group(1) if match else ""

	imgs = eval("""
	const document = {{}};
	{};
	{};
	eval({});
	getUrlpics().map(getrealurl);
	""".format(js_content, data, data2))
	
	return imgs
コード例 #29
0
def get_images(html, url):
    # Set base url
    base = "http://images.dmzj.com/"

    # Get urls
    s = re.search(r"page = '';\s+([^\n]+)", html).group(1)
    pages = eval(s + "; pages")
    pages = re.search('"page_url":"([^"]+)', pages).group(1)
    pages = re.split("\r?\n", pages)

    # thumbs.db?!
    # http://manhua.dmzj.com/zhuoyandexiana/3488-20.shtml
    return [
        base + page for page in pages
        if page and not page.lower().endswith("thumbs.db")
    ]
コード例 #30
0
def eval_script(scripts, name):
    name = name.replace('http://', '').replace('.', '_').replace('/', '_')
    new = ''
    for script in scripts:
        for line in script.replace('//-->', '').split('\n'):
            if line != '\n':
                new += line
        if 'document.write(' in new:
            js_target = new.replace('document.write(', '').replace('));', ');')
            a = eval(js_target)
            pa = './Export/%s/%s.dmp' % (datetime.date.today(), name)
            with open(pa, 'a+') as f:
                f.write('\n#Auto generated\n#' + str(datetime.date.today()) + '\n\n' + a)
            print('[+] Saved under ./Export/%s/%s' % (datetime.date.today(), name))
        else:
            print('[-] Unable to process this script. Update me.')
            return 'None'
コード例 #31
0
ファイル: chuixue.py プロジェクト: xieofxie/ComicCrawler
def get_images(html, url):
    global global_js
    js = re.search("(var ret_classurl.+?)</script>", html, re.DOTALL).group(1)
    if not global_js:
        global_js_url = re.search('src="([^"]+global\.js)"', html).group(1)
        global_js = grabhtml(urljoin(url, global_js_url))
        global_js = re.search("(var WebimgServer.+?)window\.onerror",
                              global_js, re.DOTALL).group(1)

    imgs, server = eval("""
	function request() {
		return "";
	}
	""" + js + global_js + """;
	[photosr.slice(1), WebimgServerURL[0]]
	""")
    return [urljoin(server, img) for img in imgs]
コード例 #32
0
def get_episodes(html, url):
    data_js = re.search("initIntroData(.+?);", html, re.DOTALL).group(1)
    data = eval(data_js)

    ep_data = []
    for category in data:
        ep_data += category["data"]
    ep_data = sorted(ep_data, key=lambda data: data["chapter_order"])

    episodes = []

    for data in ep_data:
        ep_url = "/view/{}/{}.html".format(data["comic_id"], data["id"])
        title = data["title"] + data["chapter_name"]
        episodes.append(Episode(title, urljoin(url, ep_url)))

    return episodes
コード例 #33
0
ファイル: dm5.py プロジェクト: eight04/ComicCrawler
	def grab_page(page):
		params = {
			"cid": cid,
			"page": page + 1,
			"language": 1,
			"key": key,
			"gtk": 6,
			"_cid": cid,
			"_mid": mid,
			"_dt": dt,
			"_sign": sign
		}
		fun_url = urljoin(url, "chapterfun.ashx")
		text = grabhtml(fun_url, referer=url, params=params)
		d = eval(text)
		for i, image in enumerate(d):
			pages[i + page] = image
コード例 #34
0
ファイル: dmzj_m.py プロジェクト: eight04/ComicCrawler
def get_episodes(html, url):
	data_js = re.search("initIntroData(.+?);", html, re.DOTALL).group(1)
	data = eval(data_js)

	ep_data = []
	for category in data:
		ep_data += category["data"]
	ep_data = sorted(ep_data, key=lambda data: data["chapter_order"])

	episodes = []

	for data in ep_data:
		ep_url = "/view/{}/{}.html".format(data["comic_id"], data["id"])
		title = data["title"] + data["chapter_name"]
		episodes.append(Episode(title, urljoin(url, ep_url)))

	return episodes
コード例 #35
0
ファイル: dm5.py プロジェクト: RanDomWoW/ComicCrawler
 def grab_page(page):
     params = {
         "cid": cid,
         "page": page + 1,
         "language": 1,
         "key": key,
         "gtk": 6,
         "_cid": cid,
         "_mid": mid,
         "_dt": dt,
         "_sign": sign
     }
     fun_url = urljoin(url, "chapterfun.ashx")
     text = grabhtml(fun_url, referer=url, params=params)
     d = eval(text)
     for i, image in enumerate(d):
         pages[i + page] = image
コード例 #36
0
def get_images(html, url):
    script = re.search(r'<script>\s*(var qTcms_Cur[\s\S]+?)</script>',
                       html).group(1)
    show_js_src = re.search(r'src="([^"]+?show\.\d+\.js[^"]*)', html).group(1)
    show_js = grabhtml(urljoin(url, show_js_src))
    real_pic_fn = re.search(
        r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function',
        show_js).group(1)
    code = """
	{script}
	{real_pic_fn}
	Buffer.from(qTcms_S_m_murl_e, "base64")
		.toString()
		.split("$qingtiandy$")
		.map(f_qTcms_Pic_curUrl_realpic);
	""".format(script=script, real_pic_fn=real_pic_fn)
    return [urljoin(url, i) for i in eval(code)]
コード例 #37
0
ファイル: mh160.py プロジェクト: yachu01/ComicCrawler
def get_images(html, url):
    js_url = re.search(r'src="([^"]+base64\.js)"', html).group(1)
    js_content = grabhtml(urljoin(url, js_url))
    data = re.search('(var chapterTree=.+?)</script>', html,
                     re.DOTALL).group(1)
    match = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.+?)</script>', html,
                      re.DOTALL)
    data2 = match.group(1) if match else ""

    imgs = eval("""
	const document = {{}};
	{};
	{};
	eval({});
	getUrlpics().map(getrealurl);
	""".format(js_content, data, data2))

    return imgs
コード例 #38
0
ファイル: xznj120.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	script = re.search(r'<script>\s*(var qTcms_Cur[\s\S]+?)</script>', html).group(1)
	show_js_src = re.search(r'src="([^"]+?show\.\d+\.js[^"]*)', html).group(1)
	show_js = grabhtml(urljoin(url, show_js_src))
	real_pic_fn = re.search(r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function', show_js).group(1)
	code = """
	{script}
	{real_pic_fn}
	function base64_decode(data) {{
		return Buffer.from(data, "base64").toString();
	}}
	// m.wuyouhui.net/template/wap1/css/d7s/js/show.20170501.js?20190506201115
	Buffer.from(qTcms_S_m_murl_e, "base64")
		.toString()
		.split("$qingtiandy$")
		.filter(u => !/^(--|\+)/.test(u))
		.map(f_qTcms_Pic_curUrl_realpic);
	""".format(script=script, real_pic_fn=real_pic_fn)
	return [urljoin(url, i) for i in eval(code)]
コード例 #39
0
def get_images(html, url):
    if "pixiv.user.loggedIn = true" not in html:
        raise PauseDownloadError("you didn't login!")

    # ugoku
    rs = re.search(r"pixiv\.context\.ugokuIllustFullscreenData\s+= ([^;]+)",
                   html)
    if rs:
        json = rs.group(1)
        o = eval("(" + json + ")")
        cache["frames"] = o["frames"]
        return [o["src"]]

    # new image layout (2014/12/14)
    rs = re.search(r'class="big" data-src="([^"]+)"', html)
    if rs:
        return [rs.group(1)]

    rs = re.search(r'data-src="([^"]+)" class="original-image"', html)
    if rs:
        return [rs.group(1)]

    # old image layout
    imgs = get_images_old(html, url)
    if imgs:
        return imgs

    # restricted
    rs = re.search('<section class="restricted-content">', html)
    if rs:
        raise SkipEpisodeError

    # error page
    rs = re.search('class="error"', html)
    if rs:
        raise SkipEpisodeError

    # id doesn't exist
    rs = re.search("pixiv.context.illustId", html)
    if not rs:
        raise SkipEpisodeError
コード例 #40
0
def get_images_old(html, url):
    match = re.search(r'"works_display"><a (?:class="[^"]*" )?href="([^"]+)"',
                      html)

    if not match:
        return

    inner_url = match.group(1)
    html = grabhtml(urljoin(url, inner_url), referer=url)

    if "mode=big" in inner_url:
        # single image
        img = re.search(r'src="([^"]+)"', html).group(1)
        return [img]

    if "mode=manga" in inner_url:
        # multiple image
        imgs = []

        def create_grabber(url):
            def grabber():
                html = grabhtml(url)
                return re.search(r'img src="([^"]+)"', html).group(1)

            return grabber

        for match in re.finditer(
                r'a href="(/member_illust\.php\?mode=manga_big[^"]+)"', html):
            imgs.append(create_grabber(urljoin(url, match.group(1))))

        # New manga reader (2015/3/18)
        # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=19254298
        if not imgs:
            for match in re.finditer(r'originalImages\[\d+\] = ("[^"]+")',
                                     html):
                img = eval(match.group(1))
                imgs.append(img)

        return imgs
コード例 #41
0
def get_images(html, url):
    check_login(html)
    init_data = re.search(r"(var globalInitData[\s\S]+?)</script>",
                          html).group(1)
    init_data = eval("""
	Object.freeze = null;
	""" + init_data + """
	globalInitData;
	""")
    illust_id = re.search("illust_id=(\d+)", url).group(1)
    illust = init_data["preload"]["illust"][illust_id]

    if illust["illustType"] != 2:  # normal images
        first_img = illust["urls"]["original"]
        return [get_nth_img(first_img, i) for i in range(illust["pageCount"])]

    # https://www.pixiv.net/member_illust.php?mode=medium&illust_id=44298524
    ugoira_meta = "https://www.pixiv.net/ajax/illust/{}/ugoira_meta".format(
        illust_id)
    ugoira_meta = json.loads(grabhtml(ugoira_meta))
    cache["frames"] = ugoira_meta["body"]["frames"]
    return ugoira_meta["body"]["originalSrc"]
コード例 #42
0
ファイル: seemh.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	# build js context
	js = """
	var window = global;
	var cInfo;
	var SMH = {
		imgData: function(data) {
			cInfo = data;
			return {
				preInit: function(){}
			};
		}
	};
	"""
	
	configjs_url = re.search(
		r'src="(https?://[^"]+?/config_\w+?\.js)"',
		html
	).group(1)
	configjs = grabhtml(configjs_url, referer=url)
	js += re.search(
		r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+',
		configjs,
		re.MULTILINE
	).group()

	js += re.search(
		r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script',
		html
	).group(1)
	
	with VM(js) as vm:
		files, path, md5, cid = vm.run("[cInfo.files, cInfo.path, cInfo.sl.md5, cInfo.cid]")
	
	# find server
	# "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js"
	# getpath()
	corejs_url = re.search(
		r'src="(https?://[^"]+?/core_\w+?\.js)"',
		html
	).group(1)
	corejs = grabhtml(corejs_url, referer=url)
	
	# cache server list
	servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1)
	servs = eval(servs)
	servs = [host["h"] for category in servs for host in category["hosts"]]
	
	global servers
	servers = cycle(servs)

	host = next(servers)
	
	utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1)
	
	js = """
	var location = {
		protocol: "http:"
	};
	""" + utils + """;
	function getFiles(path, files, host) {
		// lets try if it will be faster in javascript
		return files.map(function(file){
			return utils.getPath(host, path + file);
		});
	}
	"""
	with VM(js) as vm:
		images = vm.call("getFiles", path, files, host)
	
	if config.getboolean("nowebp"):
		images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images)
		
	params = urlencode({
		"cid": cid,
		"md5": md5
	})
	images = ["{file}?{params}".format(file=i, params=params) for i in images]
	
	return images
コード例 #43
0
ファイル: manhuaren.py プロジェクト: eight04/ComicCrawler
def get_images(html, url):
	js = re.search(r"(eval\([\s\S]+?)</script", html).group(1)
	return eval(js + ";newImgs")