def get_video_url(url, DEBUG=0): scraper = cfscrape.create_scraper() try: response = scraper.get( 'http://www.fastvideo.me/embed-%s-607x360.html' % re.search('me/([^/]+)/', url).group(1)) except: return False, "Url not found" if 'p,a,c,k,e,d' in response.content: try: packed = re.search( r'<script[^>]+>eval\((function\(p,a,c,k,e,d\).*\)\))\).*?<\/script>', response.content, re.S).group(1) except: packed = None return False, "Packed regex error" if packed: import jsbeautifier.unpackers.packer as packer jwconfig = packer.unpack(packed) url = re.search('file:"([^"]+)"', jwconfig).group(1) return True, url else: return False, "Packed not found"
def is_artefact_packed(filename, byte_content): """ Returns a tuple (is_packed, unpacked_byte_content) if the artefact is packed using an algorithm similar to Dave Edward's p.a.c.k.e.r algorithm. This appears to generate syntax errors compounding feature analysis, but we try anyway... """ # dont load the file again if we already have the byte-content... if byte_content is None: with open(filename, 'rb') as fp: byte_content = fp.read() try: source = fast_decode(byte_content) is_packed = packer.detect(source) if is_packed: ret = packer.unpack(source) # TODO FIXME... yuk this is so silly! But we have to return bytes since extract-features.jar program expects that... # fortunately most artefacts arent packed so this is not as expensive as it could be... result = decode(encode(ret, 'utf-8', 'backslashreplace'), 'unicode-escape').encode() assert isinstance(result, bytes) assert len(result) > 0 return (True, result) # else return (False, byte_content) except Exception as e: # we fail an artefact that is purportedly packed, but which cannot be unpacked... raise e
def _get_streams(self): headers = { 'User-Agent': useragents.CHROME, 'Referer': self.url } res = http.get(self.url) html_text = res.text soup = BeautifulSoup(html_text, 'html.parser') scripts = soup.find_all('script') for script in scripts: if script.string is not None: if "document.getElementsByTagName('video')[0].volume = 1.0;" in script.string: code = script.string startidx = code.find('eval(function(p,a,c,k,e,') endidx = code.find('hunterobfuscator =') code = code[startidx:endidx] # Here is the call to the first part of the deobfuscation i.e. getting packed code #code = aadecode(code) #code = code.aadecode() if not code.replace(' ', '').startswith('eval(function(p,a,c,k,e,'): code = 'fail' break else: code = 'fail' else: code = 'fail' #The second part of deobfuscation occurs here. Using module jsbeautifier. if code != 'fail': unpacked = packer.unpack(code) video_location = unpacked[unpacked.rfind('http'):unpacked.rfind('m3u8')+4] url = video_location if url: self.logger.debug('HLS URL: {0}'.format(url)) yield 'live', HLSStream(self.session, url, headers=headers)
def get_video_url(url): list = [] from xbmctools import getnet, finddata data = getnet(url) regx = '''<script type="text/javascript"(.*)</script>''' #print "jucie",_filter(data) #print"data1", data data2 = finddata(data.replace("\n", ""), 'eval(', '</script') unpack = packer.unpack(data2) print 'unpack', unpack regx = '''http://(.*?)mkv''' try: link = 'http://' + re.findall(regx, unpack, re.M | re.I)[0] + 'mkv' except: regx = '''http://(.*?)mp4''' link = 'http://' + re.findall(regx, unpack, re.M | re.I)[0] + 'mp4' print 'link', link return link
def extract_ports(js_url): dict = {} # Download the JS file. html = download_webpage(js_url) if html is None: log.error('Failed to download webpage: %s', js_url) return dict try: # For now, try and extract out the css/port pairs from the JS. # This likely is a really back way of doing it, I'll revisit later. unpack = packer.unpack(html) unpack = unpack.replace("$(document).ready(function(){", "") unpack = unpack.replace("});", "") unpack = unpack.replace("\\", "") unpack = unpack.replace("'", "") unpack = unpack.replace(".", "") # Pull out everything that is within a bracket. parts = re.findall('\((.*?)\)', unpack) # Now convert the list into a dictionary. # Every other entry in the list is a pair css/port. i = 0 while i < len(parts): dict[parts[i]] = parts[i + 1] i += 2 return dict except Exception as e: log.exception('Failed do extract ports from %s: %s.', js_url, e) return dict
def play_video(selection): r = requests.get(arconaitv_url+selection) html_text = r.text soup = BeautifulSoup(html_text, 'html.parser') scripts = soup.find_all('script') for script in scripts: if script.string is not None: if "document.getElementsByTagName('video')[0].volume = 1.0;" in script.string: code = script.string startidx = code.find('eval(function(p,a,c,k,e,') endidx = code.find('hunterobfuscator =') code = code[startidx:endidx] # Here is the call to the first part of the deobfuscation i.e. getting packed code #code = aadecode(code) #code = code.aadecode() if not code.replace(' ', '').startswith('eval(function(p,a,c,k,e,'): code = 'fail' break else: code = 'fail' else: code = 'fail' #The second part of deobfuscation occurs here. Using module jsbeautifier. if code != 'fail': unpacked = packer.unpack(code) video_location = unpacked[unpacked.rfind('http'):unpacked.rfind('m3u8')+4] play_item = xbmcgui.ListItem(path=video_location+'|User-Agent=%s' % urllib2.quote(USER_AGENT, safe='')) xbmcplugin.setResolvedUrl(addon_handle, True, listitem=play_item) else: xbmcgui.Dialog().ok('Sorry','Could not deobfuscate the code.')
def parse(self, response): script = response.xpath('//script[1]/text()').extract()[0] parse_str = script.strip().split('\n')[2] # print(parse_str) pages = packer.unpack(parse_str.strip()) # pages = packer.unpack(str(parse_str.strip())) pages = pages.replace("var pages=pages=\\'[", "").replace(";", "").replace("\\", "").replace( '"', "").replace("[", "").replace("]", "").split(",") # for page in pages: item = LearningItem() # item['image_urls'] = "https://images.dmzj.com/" + page item['image_urls'] = map(lambda x: "https://images.dmzj.com/" + x, pages) item['img_dirname2'] = response.xpath( "//span[@class='redhotl']/text()").extract()[0] item['img_dirname1'] = response.xpath( "//a[@class='redhotl']/text()").extract()[0] # item['image_urls'].append("https://images.dmzj.com/" + pages[0]) # for img_link in pages: # item["image_urls"] = "https://images.dmzj.com/" + img_link yield item
def test_unpack(self): """Test unpack() function.""" check = lambda inp, out: self.assertEqual(unpack(inp), out) check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)" "){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=" "function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace(" "new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1'," "62,3,'var||a'.split('|'),0,{}))", 'var a=1')
def test_unpack(self): """Test unpack() function.""" check = lambda inp, out: self.assertEqual(unpack(inp), out) check( "eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)" "){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=" "function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace(" "new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1'," "62,3,'var||a'.split('|'),0,{}))", 'var a=1')
def fetch_images_url(chapter_url=''): request_url = HOME_URL_PREFIX + chapter_url req = requests.get(request_url, headers=HEADERS) tmp_url = req.text.split('\n')[11] # html中第11行保存了图片的URL后缀 tmp_url = unpack(tmp_url) pic_url_str = re.sub('var pages\s?=\s?pages\s?=\\\\\'\[\"', '', tmp_url) pic_url_str = re.sub('\"\]\\\\\';\s?', '', pic_url_str) pic_url_arr = [] # 保存最新一章的图片URL for i in pic_url_str.replace('\\\/', '/').split('","'): # url = PIC_URL_PREFIX + urllib.quote(i.decode('unicode-escape').encode('utf8')) url = PIC_URL_PREFIX + i pic_url_arr.append(url) return pic_url_arr
def solve_js(html): js = '(function' + re.findall('function(.*?)</script>', html)[0] encrpyted_js = js.split(',')[-3][1:-15] decrypted_js = lzstring.LZString().decompressFromBase64(encrpyted_js) original_js = js.split(',') original_js[-3] = "'" + decrypted_js + "'.split('|')" packed_js = 'eval(' + ','.join(original_js) + ')' # print('packed_js', packed_js) unpack = packer.unpack(packed_js) print(unpack) # js_result = jsbeautifier.beautify(unpack) # print('js_result', js_result) imgData = re.findall("SMH\.imgData\((.*?)\)\.preInit\(\)\;", unpack)[0] res = json.loads(imgData) print(res['bname']) return res
def obtem_vidig2(url): import jsbeautifier.unpackers.packer as packer soup = BeautifulSoup(abrir_url(url)) codigo_fonte = packer.unpack(soup('script')[6].text) #ow = open(r'G:\\html_list\\script4.html','w') #ow.write(str(soup('script')[6])) #ow.close()#"(http://.*?/v.flv)" try: url_video = re.findall(r"'(http://.*?/v.mp4)\\'",codigo_fonte)[0] return [url_video+'|User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',"-"] except: pass try: url_video = re.findall(r'"(http://.*?/v.flv)"',codigo_fonte)[0] return [url_video+'|User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',"-"] except: return ["-","-"]
def get_video_url(url): list=[] from xbmctools import getnet,finddata data=getnet(url) regx='''<script type="text/javascript"(.*)</script>''' #print "jucie",_filter(data) #print"data1", data data2 =finddata(data.replace("\n",""),'eval(','</script') try: unpack = packer.unpack(data2) except: unpack=data regx='''file:"(.*?)m3u8"''' try: m3u8=re.findall(regx,unpack, re.M|re.I)[0]+'m3u8' list.append(('m3u8',m3u8)) except: pass regx=''',{file:"(.*?)mp4"''' try: mp4=re.findall(regx,unpack, re.M|re.I)[0]+'mp4' list.append(('mp4',mp4)) except: pass print "list",list return list Pre_Stream_URL = re.search('file:"(.*)"', unpack).group(1) print 'Pre_Stream_URL1',Pre_Stream_URL Pre_Stream_URL = re.search('file:"(.*)",label', unpack).group(1) print 'Pre_Stream_URL2',Pre_Stream_URL sys.exit(0) return Pre_Stream_URL
def fetch(self, url): soup = self.requester.get(url) test = re.search(r"eval(?:.+fullscreen.+\))", soup.text) if test is None: return unpack = packer.unpack(test.group()) test = re.search("http:\/\/[^\s\"]+.\.(?:mp4|mpg|avi|flv)", unpack) if test is None: return info = self.requester.send(test.group()) info = re.search(r"(['\"])(?P<url>https:.+)\1", info.text) if info is None: return return info.group("url")
def parse_page_ext2(self, response): """ 拿到跟图片地址有关的js """ manga_no = response.meta['manga_no'] chapter_no = response.meta['chapter_no'] manga_name = response.meta['manga_name'] chapter_name = response.meta['chapter_name'] manga_save_folder = response.meta['manga_save_folder'] data = response.meta['data'] current_page = response.meta['current_page'] page_count = response.meta['page_count'] try_repr = response.meta.setdefault('try_repr', False) proxy = response.meta['proxy'] # 返回一段js代码 # jsbeautifier 1.7.5解码 # python3 兼容 response_body_str = str(response.body, response.encoding) packed_js = response_body_str try: if try_repr == True: packed_js = repr(packed_js) unpacked_js = packer.unpack(packed_js).replace("\\'", "'") # 解码后找出图片路径 cid = re.findall('var cid=(.*?);', unpacked_js)[0] key = re.findall("var key='(.*?)';", unpacked_js)[0] pix = re.findall('var pix="(.*?)";', unpacked_js)[0] pvalue = re.findall('var pvalue=\[(.*?)\];', unpacked_js)[0] pvalue = pvalue.replace('"', '') if (',' in pvalue): pvalue = pvalue.split(',') else: pvalue = [ pvalue, ] image_url = pix + pvalue[0] + '?cid=' + cid + '&key=' + key + '&uk=' item = Dm5Item() item['imgurl'] = image_url item['imgname'] = image_url.split('/')[-1].split('_')[0].zfill( 3) + '.jpg' # 不使用pipeline下载需要特殊处理下载地址 item['imgfolder'] = settings.get( 'IMAGES_STORE') + '/' + manga_save_folder + '/' + chapter_no # 准备下载图片 image_save_path = os.path.join(item['imgfolder'], item['imgname']) # 跳过已经下载的图片 if not os.path.exists(image_save_path): if not os.path.exists(item['imgfolder']): os.makedirs(item['imgfolder']) print('当前页面', manga_name, chapter_no, chapter_name, current_page, '/', page_count) host = image_url.split('//')[1].split('/')[0] headers = { 'Host': host, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0', 'Accept': '*/*', 'Connection': 'close', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.5,en-US;q=0.3', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://www.dm5.com/m' + str(data['cid']) + '-p' + str(current_page) + '/', 'DNT': '1', } # print 1111, image_url # 使用urllib2下载图片 # req = urllib2.Request(image_url, headers=headers) # res = urllib2.urlopen(req) # # print res # with open(image_save_path, 'wb') as f: # f.write(res.read()) # 使用requests下载图片 # res = requests.get(image_url, headers=headers, proxies=proxyDict) # with open(image_save_path, 'wb') as f: # f.write(res.content) # 使用自定义的imagepipeline下载图片 item['imgheaders'] = headers item['imgproxy'] = proxy yield item # 准备访问下一页 current_page += 1 if int(current_page) > int(page_count): return data['page'] = current_page pages_url = 'http://www.dm5.com/m' + str( data['cid']) + '/chapterfun.ashx?' + urllib.parse.urlencode( data) yield scrapy.Request(pages_url, cookies=cookies, headers={ 'Referer': 'http://www.dm5.com/m' + str(data['cid']) + '/' }, meta={ 'proxy': proxy, 'manga_no': manga_no, 'chapter_no': chapter_no, 'manga_name': manga_name, 'chapter_name': chapter_name, 'manga_save_folder': manga_save_folder, 'data': data, 'current_page': current_page, 'page_count': page_count }, callback=self.parse_page_ext2) except UnpackingError as e: print('解包js错误 访问地址', response.url) print('当前页面', current_page) response_body_str = str(response.body, response.encoding) print('返回内容', response_body_str) if try_repr == True: # 准备访问下一页 current_page += 1 if int(current_page) > int(page_count): return data['page'] = current_page pages_url = 'http://www.dm5.com/m' + str( data['cid'] ) + '/chapterfun.ashx?' + urllib.parse.urlencode(data) yield scrapy.Request(pages_url, cookies=cookies, headers={ 'Referer': 'http://www.dm5.com/m' + str(data['cid']) + '/' }, meta={ 'proxy': proxy, 'manga_no': manga_no, 'chapter_no': chapter_no, 'manga_name': manga_name, 'chapter_name': chapter_name, 'manga_save_folder': manga_save_folder, 'data': data, 'current_page': current_page, 'page_count': page_count }, callback=self.parse_page_ext2) else: yield scrapy.Request(response.url, cookies=cookies, meta={ 'proxy': proxy, 'manga_no': manga_no, 'chapter_no': chapter_no, 'manga_name': manga_name, 'chapter_name': chapter_name, 'manga_save_folder': manga_save_folder, 'data': data, 'current_page': current_page, 'page_count': page_count, 'try_repr': True }, callback=self.parse_page_ext2, dont_filter=True)
def check(inp, out): return detect(inp) and self.assertEqual(unpack(inp), out)
req = requests.Request( 'GET', base_url + '/Search?query=1%27+and+1%3D0+union+select+1%2C2%2CContent%2C4%2Cnull%2Cnull+from+Reviews+where+id%3D10+--+-', headers=headers, cookies=cookies) r = requestDebug(s, req) img = BeautifulSoup(r.text, 'lxml').find('img', {'src': '2'}).get('alt') first_obfuscated_part = img.split('\n', 3)[2] second_obfuscated_part = img.split('\n', 4)[3] # # Reverse algorithm in second_obfuscated_part to get the "good" input # second_part_unpacked = packer.unpack(img.split('\n', 4)[3]).split(';', 2)[0] flag = eval(second_part_unpacked.split()[1].split('"')[1]) myDebug('1:\n%s\n' % flag) input = flag[::-1] myDebug('2:\n%s\n' % input) intermediate = "" for i in range(len(input)): intermediate += chr(input[i] - 20) if i % 2 == 0 else chr(input[i]) myDebug('3:\n%s\n' % intermediate) input = intermediate.replace('\\x', '') myDebug('4:\n%s\n' % input) intermediate = b'' for i in range(0, len(input), 2): intermediate += codecs.decode(input[i:i + 2], 'hex') input = codecs.decode(intermediate, 'base64').decode() myDebug('5:\n%s\n' % input)
def check(inp, out): return self.assertEqual(unpack(inp), out)
def check(inp, out): return self.assertEqual(unpack(inp), out) check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"