Python VM Examples, node_vm2.VM Python Examples

Example #1

0

Show file

def get_images(html, url):
    # build js context
    js = "var window = global;"

    configjs_url = re.search(r'src="(http://[^"]+?/config_\w+?\.js)"',
                             html).group(1)
    configjs = grabhtml(configjs_url, referer=url)
    js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+',
                    configjs, re.MULTILINE).group()

    js += re.search(
        r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script',
        html).group(1)

    with VM(js) as vm:
        files, path = vm.run("[cInfo.files, cInfo.path]")

    # find server
    # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js"
    # getpath()
    corejs_url = re.search(r'src="(http://[^"]+?/core_\w+?\.js)"',
                           html).group(1)
    corejs = grabhtml(corejs_url, referer=url)

    # cache server list
    servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1)
    servs = eval(servs)
    servs = [host["h"] for category in servs for host in category["hosts"]]

    global servers
    servers = CycleList(servs)

    host = servers.get()

    utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1)

    js = utils + """;
	function getFiles(path, files, host) {
		// lets try if it will be faster in javascript
		return files.map(function(file){
			return utils.getPath(host, path + file);
		});
	}
	"""
    with VM(js) as vm:
        images = vm.call("getFiles", path, files, host)

    if config.getboolean("nowebp"):
        images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images)

    return images

Example #2

0

Show file

File: eight.py Project: yachu01/ComicCrawler

def get_episodes(html, url):
    html = html.replace("\n", "")

    js = """
		var output;
		function getCookie() {}
		function getcookie() {}
		var window = {
			open: function(result){
				output = result;
			}
		};
		var document = {
			location: {
				href: ""
			}
		};
	""" + grabhtml(urljoin(url, "/js/comicview.js"))

    s = []
    matches = re.finditer(r'<a [^>]*?onclick="(cview[^"]+?);[^>]*>(.+?)</a>',
                          html, re.M)
    with VM(js) as vm:
        for match in matches:
            cview, title = match.groups()

            vm.run(cview)
            ep_url = vm.run("output")
            title = clean_tags(title)

            e = Episode(title, urljoin(url, ep_url))
            s.append(e)
    return s

Example #3

0

Show file

def get_episodes(html, url):
    episodes = None
    cid = re.search(r"comic/(\d+)", url).group(1)

    # http://tw.ikanman.com/comic/10924/
    episodes = get_list(html, cid)

    # http://tw.ikanman.com/comic/4350/
    if not episodes:
        view_state = re.search(r'id="__VIEWSTATE" value="([^"]+)',
                               html).group(1)
        js_main = re.search(r'src="([^"]+?/main_[^"]*?\.js)"', html).group(1)
        js_main = grabhtml(js_main)
        js_main = re.search(r'^window\[.+', js_main, re.M).group()
        js = """
			var window = global;
		""" + js_main

        with VM(js) as vm:
            ep_html = vm.call("LZString.decompressFromBase64", view_state)

        episodes = get_list(ep_html, cid)

    episodes = [Episode(v[0].strip(), urljoin(url, v[1])) for v in episodes]
    return episodes[::-1]

Example #4

0

Show file

File: iibq.py Project: yachu01/ComicCrawler

def get_images(html, url):
    s_files = re.search('sFiles="([^"]+)"', html).group(1)
    s_path = re.search('sPath="([^"]+)"', html).group(1)

    viewhtm = grabhtml("http://www.iibq.com/script/viewhtm.js")

    env = """
	window = {
		"eval": eval,
		"parseInt": parseInt,
		"String": String,
		"RegExp": RegExp
	};
	location = {
		"hostname": "www.iibq.com"
	};
	"""

    js = env + re.search(r'(.+?)var cuImg', viewhtm, re.DOTALL).group(1)
    with VM(js) as vm:
        arr_files = vm.call("unsuan", s_files).split("|")

    ds = grabhtml("http://www.iibq.com/script/ds.js")

    sl_url = re.search('sDS = "([^"]+)"',
                       ds).group(1).split("^")[0].split("|")[1]

    return [sl_url + s_path + f for f in arr_files]

Example #5

0

Show file

File: eight.py Project: Python3pkg/ComicCrawler

def get_episodes(html, url):
    html = html.replace("\n", "")

    js = """
		var output;
		function getCookie() {}
		var window = {
			open: function(result){
				output = result;
			}
		};
		function get(url, catid) {
			cview(url, catid);
			return output;
		}
		var document = {
			location: {
				href: ""
			}
		};
	""" + grabhtml(urljoin(url, "/js/comicview.js"))

    s = []
    matches = re.finditer(
        "<a [^>]*?onclick=\"cview\('(.+?)',(\d+?)[^>]*?>(.+?)</a>", html, re.M)
    with VM(js) as vm:
        for match in matches:
            ep_url, catid, title = match.groups()

            ep_url = vm.call("get", ep_url, int(catid))
            title = clean_tags(title)

            e = Episode(title, urljoin(url, ep_url))
            s.append(e)
    return s

Example #6

0

Show file

def get_images(html, url):
    #print(html.encode('gbk','ignore').decode('gbk'))
    js = re.search(r'<script type="text/javascript">(var cInfo =[^;]+;)',
                   html).group(1)
    with VM(js) as vm:
        files = vm.run("cInfo.fs")
    #http://www.ccdm1.com/Public/manhuadao/js/configs.js?v=0731
    server = 'http://ccimg1.61mh.com'
    images = ["{server}{file}".format(server=server, file=i) for i in files]
    return images

Example #7

0

Show file

def get_images(html, url):
    env = """
	var photosr = new Array();
	function base64decode(str){var base64EncodeChars="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var base64DecodeChars=new Array(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1);var c1,c2,c3,c4;var i,len,out;len=str.length;i=0;out="";while(i<len){do{c1=base64DecodeChars[str.charCodeAt(i++)&255]}while(i<len&&c1==-1);if(c1==-1){break}do{c2=base64DecodeChars[str.charCodeAt(i++)&255]}while(i<len&&c2==-1);if(c2==-1){break}out+=String.fromCharCode((c1<<2)|((c2&48)>>4));do{c3=str.charCodeAt(i++)&255;if(c3==61){return out}c3=base64DecodeChars[c3]}while(i<len&&c3==-1);if(c3==-1){break}out+=String.fromCharCode(((c2&15)<<4)|((c3&60)>>2));do{c4=str.charCodeAt(i++)&255;if(c4==61){return out}c4=base64DecodeChars[c4]}while(i<len&&c4==-1);if(c4==-1){break}out+=String.fromCharCode(((c3&3)<<6)|c4)}return out};
	eval(eval(base64decode(packed).slice(4)));
	"""
    js = re.search(r'(packed=[^;]+;)', html).group(1) + env
    with VM(js) as vm:
        arr_files = vm.run('photosr')[1:]
    return ('http://img1.733mh.com/' + f for f in arr_files)

Example #8

0

Show file

File: eight.py Project: yachu01/ComicCrawler

def get_images(html, url):
    nview = re.search('src="([^"]*nview\.js[^"]*)"', html).group(1)
    nview = urljoin(url, nview)
    nview = grabhtml(nview)

    try:
        # http://www.comicbus.com/html/103.html
        script = re.search('(var ch=.+?)spp\(\)', html, re.DOTALL).group(1)
    except AttributeError:
        # http://www.comicbus.com/html/7294.html
        script = re.search('(var chs=.+?)</script>', html, re.DOTALL).group(1)

    js = """
	var url,
		images = [],
		document = {
			location: {
				toString() {return url;},
				get href() {return url;},
				set href(_url) {url = _url; scriptBody()}
			},
			getElementById() {
				return {
					set src(value) {
						images.push(value);
					},
					style: {}
				};
			}
		},
		navigator = {
			userAgent: "",
			language: ""
		},
		window = {},
		alert = () => {};
		
	function scriptBody() {
		initpage = () => {};
	""" + nview + script + """
		jn();
	}
	
	function getImages(url) {
		images = [];
		document.location.href = url;
		return images;
	}
	"""

    with VM(js) as vm:
        images = vm.call("getImages", url)

    return [urljoin(url, i) for i in images]

Example #9

0

Show file

    def test_VM(self):
        with self.subTest("create VM"):
            vm = VM().create()
            r = vm.run("'foo' + 'bar'")
            vm.destroy()
            self.assertEqual(r, "foobar")

        with self.subTest("with statement"):
            with VM() as vm:
                r = vm.run("'foo' + 'bar'")
                self.assertEqual(r, "foobar")

Example #10

0

Show file

File: gamelogic.py Project: SambhavS/squill

 def turn(self, mat, strength, x, y):
     ref = {"mat": copy_2D(mat), "x": x, "y": y}
     try:
         with VM() as vm:
             turn_call = "turn({}, {}, {}, {}, {})".format(
                 " '{}' ".format(self.color), json.dumps(mat), strength,
                 x, y)
             call_str = "{} {} {}".format(preload_code,
                                          turn_code[self.color],
                                          turn_call)
             action = vm.run(call_str)
             if type(action) == list:
                 return tuple(action)
     except:
         print("Error: {}".format(self.color))

Example #11

0

Show file

def build_ctx(url):
    """Reuse javascript context"""
    global ctx
    js_url = urljoin(url, "/script/view.js")
    js = grabhtml(js_url)
    js = """
		var imgEl = {
				style: {},
				name: ""
			},
			domainEl = {};
			
		location = {
			href: "",
			hostname: ""
		};
		document = {
			location: location,
			getElementById: function(id){
				if (id == "hdDomain") {
					return domainEl;
				}
				if (/^img/.test(id)) {
					return imgEl;
				}
				return {};
			}
		};
		window = {
			document: document,
			eval: eval,
			parseInt: parseInt,
			String: String,
			RegExp: RegExp
		};
		function getImages(url, name, hdDomain) {
			location.href = url;
			location.hostname = url.match(/:\/\/([^\/]+)/)[1];
			imgEl.name = name;
			domainEl.value = hdDomain;
			
			window_onload();
			return imgEl.src;
		}
	""" + js
    ctx = VM(js).create()

Example #12

0

Show file

File: eight.py Project: Superbil/ComicCrawler

def get_episodes(html, url):
    html = html.replace("\n", "")

    js = """
		var output;
		function getCookie() {}
		var window = {
			open: function(result){
				output = result;
			}
		};
		function get(url, catid) {
			cview(url, catid);
			return output;
		}
		var document = {
			location: {
				href: ""
			}
		};
	""" + grabhtml(urljoin(url, "/js/comicview.js"))

    s = []
    matches = re.finditer(
        "<a href='#' onclick=\"cview\('(.+?)',(\d+?)\);return "
        "false;\" id=\"\w+?\" class=\"\w+?\">(.+?)</a>", html, re.M)
    with VM(js) as vm:
        for match in matches:
            ep_url, catid, title = match.groups()

            ep_url = vm.call("get", ep_url, int(catid))

            # tag cleanup
            title = title.strip()
            title = re.sub("<script.+?</script>", "", title)
            title = re.sub("<.+?>", "", title)

            e = Episode(title, urljoin(url, ep_url))
            s.append(e)
    return s

Example #13

0

Show file

def get_images(html, url):
	s_files = re.search('sFiles="([^"]+)', html).group(1)
	s_path = re.search('sPath="([^"]+)', html).group(1)
	
	viewhtm = re.search(r'src="([^"]*?viewhtm\d*\.js[^"]*)', html)
	viewhtm = grabhtml(urljoin(url, viewhtm.group(1)))
	
	env = """
	window = {
		"eval": eval,
		"parseInt": parseInt,
		"String": String,
		"RegExp": RegExp
	};
	location = {
		"hostname": null
	};
	function setHostname(hostname) {
		location.hostname = hostname;
	}
	"""
	
	js = env + re.search(r'function isMobile\(\){.+?}(.+?)var cMod', viewhtm,
		re.DOTALL).group(1)
	with VM(js) as vm:
		vm.call("setHostname", urlparse(url).hostname)
		arr_files = vm.call("unsuan", s_files).split("|")
	
	ds = re.search(r"src='([^']*?ds\.js[^']*)", html)
	ds = grabhtml(urljoin(url, ds.group(1)))
	
	global servers
	servers = re.search('sDS = "([^"]+)', ds).group(1).split("^")
	servers = [s.split("|")[1] for s in servers]
	servers = cycle(servers)
	server = next(servers)
	return (server + s_path + f for f in arr_files)

Example #14

0

Show file

File: seemh.py Project: gasbarroni8/ComicCrawler

def get_images(html, url):
    # build js context
    js = """
	var window = global;
	var cInfo;
	var SMH = {
		imgData: function(data) {
			cInfo = data;
			return {
				preInit: function(){}
			};
		}
	};
	"""

    configjs_url = re.search(r'src="(https?://[^"]+?/config_\w+?\.js)"',
                             html).group(1)
    configjs = grabhtml(configjs_url, referer=url)
    js += re.search(r'^(var CryptoJS|window\["\\x65\\x76\\x61\\x6c"\]).+',
                    configjs, re.MULTILINE).group()

    js += re.search(
        r'<script type="text/javascript">((eval|window\["\\x65\\x76\\x61\\x6c"\]).+?)</script',
        html).group(1)

    with VM(js) as vm:
        files, path, params = vm.run("[cInfo.files, cInfo.path, cInfo.sl]")

    # find server
    # "http://c.3qfm.com/scripts/core_5C348B32A78647FF4208EACA42FC5F84.js"
    # getpath()
    corejs_url = re.search(r'src="(https?://[^"]+?/core_\w+?\.js)"',
                           html).group(1)
    corejs = grabhtml(corejs_url, referer=url)

    # cache server list
    servs = re.search(r"var servs=(.+?),pfuncs=", corejs).group(1)
    servs = eval(servs)
    servs = [host["h"] for category in servs for host in category["hosts"]]

    global servers
    servers = cycle(servs)

    host = next(servers)

    utils = re.search(r"SMH\.(utils=.+?),SMH\.imgData=", corejs).group(1)

    js = """
	var location = {
		protocol: "http:"
	};
	""" + utils + """;
	function getFiles(path, files, host) {
		// lets try if it will be faster in javascript
		return files.map(function(file){
			return utils.getPath(host, path + file);
		});
	}
	"""
    with VM(js) as vm:
        images = vm.call("getFiles", path, files, host)

    if config.getboolean("nowebp"):
        images = map(lambda i: i[:-5] if i.endswith(".webp") else i, images)

    params = urlencode(params)
    images = ["{file}?{params}".format(file=i, params=params) for i in images]

    return images