def findPHP(data, streamId): regex = "document.write\('.*?src=['\"]*(.*?.php\?.*?=).*?['\" ]*.*?\)" php = regexUtils.findall(data, regex) if php: return php[0] + streamId regex = "document.write\('.*?src=['\"]*(.*?f*id\s*\+'\.html).*?['\" ]*.*?\)" html = regexUtils.findall(data, regex) if html: return re.sub(r"\'\+\s*f*id\s*\+\'", "%s" % streamId,html[0]) return None
def findPHP(data, streamId): regex = "document.write\('.*?src=['\"]*(.*?.(?:php|html)[^&\"]*).*?['\" ]*.*?\)" php = regexUtils.findall(data, regex) if php: return re.sub(r"\'\+\s*(?:[fc]*id|ch)\s*\+\'", "%s" % streamId,php[0]) regex = "document.write\('.*?src=['\"]*(.*?(?:f*id|ch)\s*\+'\.html*).*?['\" ]*.*?\)" html = regexUtils.findall(data, regex) if html: return re.sub(r"\'\+\s*(?:f*id|ch)\s*\+\'", "%s" % streamId,html[0]) return None
def findVideoFrameLink(page, data): minheight = 300 minwidth = 300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findall( data, "(frame(?![^>]*cbox\.ws)(?![^>]*Publi)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*adshell)(?![^>]*capacanal)(?![^>]*blacktvlive\.com)[^>]*\sheight\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)", ) if iframes: for iframe in iframes: if iframe[1] == "100%": height = minheight + 1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"' ]width\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*") if m: if m[0] == "100%": width = minwidth + 1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], "['\"\s]src=[\"']*\s*([^>\"' ]+)\s*[>\"']*") if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 1 iframes = regexUtils.findall( data, '(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*blacktvlive\.com)[^>]*["; ]height:\s*(\d+)[^>]*>)' ) if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], '["; ]width:\s*(\d+)') if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '["; ]src=["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 2 (Frameset) m = regexUtils.findall(data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() m = regexUtils.findall( data, '<a href="([^"]+)" target="_blank"><img src="[^"]+" height="450" width="600" longdesc="[^"]+"/></a>' ) if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() return None
def findVCods(data): regex = "function getURL03.*?sUrl.*?'([^']+)'.*?cod1.*?'([^']+)'.*?cod2.*?'([^']+)'.*?SWFObject\('([^']+)'" vcods = regexUtils.findall(data, regex) if vcods: return vcods[0] return None
def findVCods(data): regex = "function getURL03.*?sUrl.*?'([^']+)'.*?cod1.*?'([^']+)'.*?cod2.*?'([^']+)'.*?SWFObject\('([^']+)'" vcods = re.findall(data, regex) if vcods: return vcods[0] return None
def findEmbedPHPLink(data): regex = '<script type="text/javascript" src="([^"]+\.php\?[^"]+)"\s*>\s*</script>' links = re.findall(data, regex) if links: return links[0] return None
def findVideoFrameLink(page, data): minheight=300 minwidth=300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*Publi)(?![^>]*dailymotion)(?![^>]*blacktvlive\.)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*adshell)(?![^>]*capacanal)(?![^>]*waframedia)(?![^>]*Beba.tv/embed)(?![^>]*maxtags)(?![^>]*s/a1\.php)(?![^>]*right-sidebar)[^>]*\sheight\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)") if iframes: for iframe in iframes: if iframe[1] == '100%': height = minheight+1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"' ]width\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*") if m: if m[0] == '100%': width = minwidth+1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\'"\s]+(?:src|SRC)\s*=\s*["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: if 'premiertv' in page: page = page+'/' return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 1 iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*dailymotion)[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\"; ](?:src|SRC)=["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 2 (Frameset) m = regexUtils.findall(data, '<(?:FRAMESET|frameset)[^>]+100%[^>]+>\s*<(?:FRAME|frame)[^>]+src="([^"]+)"') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() m = regexUtils.findall(data, r'playStream\(\'iframe\', \'[^\']*(https*:[^\']+)\'\)') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() return None
def findContentRefreshLink(data): maxLength = 100 if len(data.replace(' ','')) > maxLength: return None regex = '0;\s*url=([^\'" ]+)' links = regexUtils.findall(data, regex) if links: return links[0] else: regex = 'window.location\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] return None
def findEmbedPHPLink(data): regex = '<script type="text/javascript" src="((?![^"]+localtimes)(?![^"]+adcash)[^"]+\.php\?[^"]+)"\s*>\s*</script>' links = regexUtils.findall(data, regex) if links: return links[0] return None
def findVideoFrameLink(page, data): minheight=300 minwidth=300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*Publi)(?![^>]*dailymotion)(?![^>]*blacktvlive\.)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*adshell)(?![^>]*capacanal)(?![^>]*blacktvlive\.com)[^>]*\sheight\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)") if iframes: for iframe in iframes: if iframe[1] == '100%': height = minheight+1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"' ]width\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*") if m: if m[0] == '100%': width = minwidth+1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\'"\s]+(?:src|SRC)\s*=\s*["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: if 'premiertv' in page: page = page+'/' return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 1 iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*dailymotion)[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\"; ](?:src|SRC)=["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 2 (Frameset) m = regexUtils.findall(data, '<(?:FRAMESET|frameset)[^>]+100%[^>]+>\s*<(?:FRAME|frame)[^>]+src="([^"]+)"') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() m = regexUtils.findall(data, '<a href="([^"]+)" target="_blank"><img src="[^"]+" height="\d+" width="\d+" longdesc="[^"]+"/></a>') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() return None
def findContentRefreshLink(data): maxLength = 100 if len(data.replace(' ', '')) > maxLength: return None regex = '0;url=([^\'" ]+)' links = re.findall(data, regex) if links: return links[0] else: regex = 'window.location\s*=\s*[\'"]([^\'"]+)[\'"]' links = re.findall(data, regex) if links: return links[0] return None
def findVideoFrameLink(page, data): minheight=300 minwidth=300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*Publi)(?![^>]*dailymotion)(?![^>]*blacktvlive\.)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*adshell)(?![^>]*capacanal)(?![^>]*blacktvlive\.com)[^>]*\sheight\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)") if iframes: for iframe in iframes: if iframe[1] == '100%': height = minheight+1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"' ]width\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*") if m: if m[0] == '100%': width = minwidth+1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\'"\s]+(?:src|SRC)\s*=\s*["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: if 'premiertv' in page: page = page+'/' return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 1 iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*dailymotion)[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\"; ](?:src|SRC)=["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 2 (Frameset) m = regexUtils.findall(data, '<(?:FRAMESET|frameset)[^>]+100%[^>]+>\s*<(?:FRAME|frame)[^>]+src="([^"]+)"') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() m = regexUtils.findall(data, '<a\s*href="([^"]+)"\s*target="_blank"><img\s*src="[^"]+"\s*height="\d+"\s*width="\d+"\s*longdesc="[^"]+"\s*/></a>') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() return None
def findVideoFrameLink(page, data): minheight=300 minwidth=300 frames = findFrames(data) if not frames: return None iframes = re.findall(data, "(frame[^>]* height=[\"']*(\d+)[\"']*[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = re.findall(iframe[0], "[\"' ]width=[\"']*(\d+[%]*)[\"']*") if m: if m[0] == '100%': width = minwidth+1 else: width = int(m[0]) if width > minwidth: m = re.findall(iframe[0], '[\'"\s]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): up = urlparse.urlparse(urllib.unquote(page)) if link.startswith('/'): link = urllib.basejoin(up[0] + '://' + up[1],link) else: link = urllib.basejoin(up[0] + '://' + up[1] + '/' + up[2],link) return link.strip() # Alternative 1 iframes = re.findall(data, "(frame[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = re.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = re.findall(iframe[0], '[ ]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): link = urllib.basejoin(page,link) return link.strip() # Alternative 2 (Frameset) iframes = re.findall(data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"') if iframes: link = iframes[0] if not link.startswith('http://'): link = urllib.basejoin(page,link) return link.strip() return None
def findContentRefreshLink(data): regex = '0;\s*url=([^\'" ]+)' links = regexUtils.findall(data, regex) if links: return links[0] regex = 'window.location\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] regex = 'frame\s*scrolling=\"auto\"\s*noresize\s*src\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] return None
def findJS(data): idName = 'f*id' jsName = '(.*?\.js)' regex = "(?:java)?script[^<]+" + idName + "\s*=\s*[\"']([^\"']+)[\"'][^<]*</script\s*>[^<]*<script[^<]*src=[\"']" + jsName + "[\"']" jscript = re.findall(data, regex) if jscript: jscript = filter(lambda x: x[1].find('twitter') == -1, jscript) return jscript return None
def findJS(data): idName = '(?:f*id|ch)' jsName = '([^\"\']+?\.js[^\"\']*?)' regex = "(?:java)?scr(?:'\+')?ipt.*?" + idName + "\s*=\s*[\"']([^\"']+)[\"'][^<]*</scr(?:'\+')?ipt\s*>[^<]*<scr(?:'\+')?ipt[^<]*src=[\"']" + jsName + "[\"']" jscript = regexUtils.findall(data, regex) if jscript: jscript = filter(lambda x: x[1].find('twitter') == -1, jscript) return jscript return None
def install(self, filename): destination = xbmc.translatePath(INSTALL_DIR) files = self.extract(filename, destination) if files: addonXml = filter(lambda x: x.filename.endswith('addon.xml'), files) if addonXml: path = os.path.join(destination, addonXml[0].filename) content = getFileContent(path) addonId = findall(content, '<addon id="([^"]+)"') if addonId: return addonId[0] return None
def findContentRefreshLink(data): regex = "0;\s*url=([^'\" ]+)" links = regexUtils.findall(data, regex) if links: return links[0] regex = "window.location\s*=\s*['\"]([^'\"]+)['\"]" links = regexUtils.findall(data, regex) if links: return links[0] regex = 'frame\s*scrolling="auto"\s*noresize\s*src\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] regex = '<center><a\s*href=[\'"]([^\'"]+)[\'"]\s*target="_blank"><img' links = regexUtils.findall(data, regex) if links: return links[0] return None
def findContentRefreshLink(data): regex = '0;\s*url=([^\'" ]+)' links = regexUtils.findall(data, regex) if links: return links[0] regex = 'window.location\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] regex = 'frame\s*scrolling=\"auto\"\s*noresize\s*src\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] regex = 'href=[\'"]([^\'"]+)[\'"]\s*target="_blank"><img class="alignnone"' links = regexUtils.findall(data, regex) if links: return links[0] return None
def findContentRefreshLink(page, data): regex = '0;\s*url=((?![^\'" ]+rojadirecta)[^\'" ]+)' links = regexUtils.findall(data, regex) if links: return links[0] #regex = 'window.location\s*=\s*[\'"]([^\'"]+)[\'"]' #links = regexUtils.findall(data, regex) #if links: #return links[0] regex = 'frame\s*scrolling=\"auto\"\s*noresize\s*src\s*=\s*[\'"]([^\'"]+)[\'"]' links = regexUtils.findall(data, regex) if links: return links[0] #hd**ee.fv/cr**hd.fv/sp**ts4u.tv regex = '<a\s*href="([^"]+)"\s*target="_blank"><img\s*(?:src="[^"]+"\s*height="\d+"\s*width="\d+"\s*longdesc="[^"]+"|class="alignnone"\s*src="[^"]*"\s*alt="[^"]*"\s*width="\d\d\d"\s*height="\d\d\d")' links = regexUtils.findall(data, regex) if links: return urlparse.urljoin(urllib.unquote(page), links[0]).strip() #cr**hd.com regex = '<a\s*href="([^"]+)"\s*title="[^"]*"><img\s*(?:src="[^"]+"\s*height="\d+"\s*width="\d+"\s*longdesc="[^"]+"|class="aligncenter"\s*alt="[^"]*"\s*title="[^"]*"\s*src="[^"]*"\s*width="\d\d\d"\s*height="\d\d\d")' links = regexUtils.findall(data, regex) if links: return urlparse.urljoin(urllib.unquote(page), links[0]).strip() #spo***live.com regex = '<a\s*href="([^"]+)"\s*title=""><img\s*data-scalestrategy="crop"\s*width="\d\d\d"\s*height="\d\d\d"' links = regexUtils.findall(data, regex) if links: return urlparse.urljoin(urllib.unquote(page), links[0]).strip() return None
def findFrames(data): if data.lower().find('frame') == -1: return None return regexUtils.findall(data, "(frame[^>]*)>")
def findRTMP(url, data): # if data.lower().find('rtmp') == -1: # return None try: text = str(data) except: text = data # method 1 # ["'=](http://[^'" ]*.swf[^'" ]*file=([^&"']+)[^'" ]*&streamer=([^"'&]+)) # streamer=([^&"]+).*?file=([^&"]+).*?src="([^"]+.swf)" # method 2 # "([^"]+.swf\?.*?file=(rtmp[^&]+)&.*?id=([^&"]+)[^"]*)" sep1 = "['\"&\? ]" sep2 = "(?:['\"]\s*(?:,|\:)\s*['\"]|=)" value = "([^'\"&]+)" method1 = True method2 = False radius = 400 playpath = "" swfUrl = "" rtmp = regexUtils.findall(text, sep1 + "streamer" + sep2 + value) if not rtmp: tryMethod2 = regexUtils.findall(text, sep1 + "file" + sep2 + value) if tryMethod2 and tryMethod2[0].startswith("rtmp"): method1 = False method2 = True rtmp = tryMethod2 if rtmp: for r in rtmp: tmpRtmp = r.replace("/&", "").replace("&", "") idx = text.find(tmpRtmp) min_idx = 0 max_idx = len(text) - 1 start = idx - radius if start < min_idx: start = min_idx end = idx + radius if end > max_idx: end = max_idx area = text[start:end] clipStart = idx + len(tmpRtmp) if clipStart < max_idx: text = text[clipStart:] if method1: playpath = regexUtils.findall(area, sep1 + "file" + sep2 + value) if method2: playpath = regexUtils.findall(area, sep1 + "id" + sep2 + value) if playpath: tmpRtmp = tmpRtmp + "/" + playpath[0] if playpath: swfUrl = regexUtils.findall(area, "SWFObject\(['\"]([^'\"]+)['\"]") if not swfUrl: swfUrl = regexUtils.findall(area, sep1 + "([^'\"& ]+\.swf)") if not swfUrl: swfUrl = regexUtils.findall(data, sep1 + "([^'\"& ]+\.swf)") if swfUrl: finalSwfUrl = swfUrl[0] if not finalSwfUrl.startswith("http"): finalSwfUrl = urlparse.urljoin(url, finalSwfUrl) regex = "://(.*?)/" server = regexUtils.findall(tmpRtmp, regex) if server: if server[0].find(":") == -1: tmpRtmp = tmpRtmp.replace(server[0], server[0] + ":1935") return [tmpRtmp, playpath[0], finalSwfUrl] return None
def findPHP(data, streamId): regex = "document.write\('.*?src=['\"]*(.*?.php\?.*?=).*?['\" ]*.*?\)" php = re.findall(data, regex) if php: return php[0] + streamId return None
def findVideoFrameLink(page, data): minheight=300 minwidth=300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*capacanal)(?![^>]*programacion)[^>]*\sheight\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)") if iframes: for iframe in iframes: if iframe[1] == '100%': height = minheight+1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"' ]width\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*") if m: if m[0] == '100%': width = minwidth+1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\'"\s]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): #if not page.endswith('/'): # page += '/' up = urlparse.urlparse(urllib.unquote(page)) if link.startswith('/'): link = urllib.basejoin(up[0] + '://' + up[1],link) else: link = urllib.basejoin(up[0] + '://' + up[1] + '/' + up[2],link) return link.strip() # Alternative 1 iframes = regexUtils.findall(data, "(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*programacion)[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall(iframe[0], '[\"; ]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): link = urllib.basejoin(page,link) return link.strip() # Alternative 2 (Frameset) iframes = regexUtils.findall(data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"') if iframes: link = iframes[0] if not link.startswith('http://'): link = urllib.basejoin(page,link) return link.strip() return None
def findVideoFrameLink(page, data): minheight = 300 minwidth = 300 frames = findFrames(data) if not frames: return None iframes = re.findall(data, "(frame[^>]* height=[\"']*(\d+)[\"']*[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = re.findall(iframe[0], "[\"' ]width=[\"']*(\d+[%]*)[\"']*") if m: if m[0] == '100%': width = minwidth + 1 else: width = int(m[0]) if width > minwidth: m = re.findall( iframe[0], '[\'"\s]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): up = urlparse.urlparse(urllib.unquote(page)) if link.startswith('/'): link = urllib.basejoin( up[0] + '://' + up[1], link) else: link = urllib.basejoin( up[0] + '://' + up[1] + '/' + up[2], link) return link.strip() # Alternative 1 iframes = re.findall(data, "(frame[^>]*[\"; ]height:\s*(\d+)[^>]*>)") if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = re.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = re.findall(iframe[0], '[ ]src=["\']*\s*([^"\' ]+)\s*["\']*') if m: link = m[0] if not link.startswith('http://'): link = urllib.basejoin(page, link) return link.strip() # Alternative 2 (Frameset) iframes = re.findall( data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"') if iframes: link = iframes[0] if not link.startswith('http://'): link = urllib.basejoin(page, link) return link.strip() return None
def findFrames(data): if data.lower().find("frame") == -1: return None return re.findall(data, "(frame[^>]*)>")
def findRTMP(url, data): #if data.lower().find('rtmp') == -1: # return None try: text = str(data) except: text = data #method 1 #["'=](http://[^'" ]*.swf[^'" ]*file=([^&"']+)[^'" ]*&streamer=([^"'&]+)) #streamer=([^&"]+).*?file=([^&"]+).*?src="([^"]+.swf)" # method 2 #"([^"]+.swf\?.*?file=(rtmp[^&]+)&.*?id=([^&"]+)[^"]*)" sep1 = '[\'"&\? ]' sep2 = '(?:[\'"]\s*(?:,|\:)\s*[\'"]|=)' value = '([^\'"&]+)' method1 = True method2 = False radius = 400 playpath = '' swfUrl = '' rtmp = regexUtils.findall(text, sep1 + 'streamer' + sep2 + value) if not rtmp: tryMethod2 = regexUtils.findall(text, sep1 + 'file' + sep2 + value) if tryMethod2 and tryMethod2[0].startswith('rtmp'): method1 = False method2 = True rtmp = tryMethod2 if rtmp: for r in rtmp: tmpRtmp = r.replace('/&','').replace('&','') idx = text.find(tmpRtmp) min_idx = 0 max_idx = len(text) - 1 start = idx-radius if start < min_idx: start = min_idx end = idx+radius if end > max_idx: end = max_idx area = text[start:end] clipStart = idx+len(tmpRtmp) if clipStart < max_idx: text = text[clipStart:] if method1: playpath = regexUtils.findall(area, sep1 + 'file' + sep2 + value) if method2: playpath = regexUtils.findall(area, sep1 + 'id' + sep2 + value) if playpath: tmpRtmp = tmpRtmp + '/' + playpath[0] if playpath: swfUrl = regexUtils.findall(area, 'SWFObject\([\'"]([^\'"]+)[\'"]') if not swfUrl: swfUrl = regexUtils.findall(area, sep1 + '([^\'"& ]+\.swf)') if not swfUrl: swfUrl = regexUtils.findall(data, sep1 + '([^\'"& ]+\.swf)') if swfUrl: finalSwfUrl = swfUrl[0] if not finalSwfUrl.startswith('http'): finalSwfUrl = urlparse.urljoin(url, finalSwfUrl) regex = '://(.*?)/' server = regexUtils.findall(tmpRtmp, regex) if server: if server[0].find(':') == -1: tmpRtmp = tmpRtmp.replace(server[0], server[0] + ':1935') return [tmpRtmp, playpath[0], finalSwfUrl] return None
def findVideoFrameLink(page, data): minheight = 300 minwidth = 300 frames = findFrames(data) if not frames: return None iframes = regexUtils.findallIgnoreCase( data, "((?:frame|FRAME)(?![^>]*cbox\.ws)(?![^>]*publi)(?![^>]*dailymotion)(?![^>]*guide\.)(?![^>]*chat\d*\.\w+)(?![^>]*ad122m)(?![^>]*adshell)(?![^>]*capacanal)(?![^>]*waframedia)(?![^>]*banner)(?![^>]*maxtags)(?![^>]*s/a1\.php)(?![^>]*ads.php)(?![^>]*right-sidebar)[^>]*\s(?:height|HEIGHT)\s*=\s*[\"']*([\%\d]+)(?:px)?[\"']*[^>]*>)" ) if iframes: for iframe in iframes: if iframe[1] == '100%': height = minheight + 1 else: height = int(iframe[1]) if height > minheight: m = regexUtils.findall( iframe[0], "[\"'\s](?:width|WIDTH)\s*=\s*[\"']*(\d+[%]*)(?:px)?[\"']*" ) if m: if m[0] == '100%': width = minwidth + 1 else: width = int(m[0]) if width > minwidth: m = regexUtils.findall( iframe[0], '[\'"\s]+(?:src|SRC)\s*=\s*["\']*\s*([^>"\' ]+)\s*[>"\']*' ) if m: if 'premiertv' in page: page = page + '/' return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 1 iframes = regexUtils.findall( data, "(frame(?![^>]*cbox\.ws)(?![^>]*capacanal)(?![^>]*dailymotion)[^>]*[\"; ]height:\s*(\d+)[^>]*>)" ) if iframes: for iframe in iframes: height = int(iframe[1]) if height > minheight: m = regexUtils.findall(iframe[0], "[\"; ]width:\s*(\d+)") if m: width = int(m[0]) if width > minwidth: m = regexUtils.findall( iframe[0], '[\"; ](?:src|SRC)=["\']*\s*([^>"\' ]+)\s*[>"\']*') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() # Alternative 2 (Frameset) m = regexUtils.findall( data, '<(?:FRAMESET|frameset)[^>]+100%[^>]+>\s*<(?:FRAME|frame)[^>]+src="([^"]+)"' ) if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() m = regexUtils.findall( data, r'playStream\(\'iframe\',\s*\'[^\']*(https*:[^\']+)\'\)') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() #sportsh**tv m = regexUtils.findall( data, r'<iframe\s*src="(stream[^"]+)"\s*allowfullscreen>') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() #stre***d m = regexUtils.findall( data, r'<iframe\s*class="embed[^"]+"\s*name="video[^"]+"\s*src="([^"]+)"') if m: return urlparse.urljoin(urllib.unquote(page), m[0]).strip() return None