def findLink(req, url): link = '' txt = load(url) for m in re.finditer(r'"http(s|)://(www.|)(dailymotion|videomega|videowood|youtube|openload)(.com|.tv|.co)([^"]*)', txt): if m.group()[1:-1] != link: link = m.group()[1:-1] image = getImage(link) page.addVideo(req, link, link, image)
def findImageLink(req, url, unquote=False, showPage=False): txt = load(url) for m in re.finditer(r'<a .*?</a>', txt, re.DOTALL): link = re.search(r'href="([^"]*)"', m.group(0)) image = re.search(r'src="([^"]*)"', m.group(0)) if link and image: if unquote == True: link1 = urllib.unquote(link.group(1)) else: link1 = link.group(1) image1 = image.group(1) if showPage == False: page.addVideo(req, link1, link1, image1) else: page.addPage(req, link1, link1, image1)
def findVideoLink(req, url, showPage=False, showImage=False): parsed_uri = urlparse.urlparse(url) domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri) txt = load(url) for m in re.finditer(r'<a .*?</a>', txt, re.DOTALL): link = re.search(r'href="([^"]*)"', m.group(0)) title = re.search(r'title="([^"]*)"', m.group(0)) image = re.search(r'src="([^"]*)"', m.group(0)) if link and title and image: link1 = absURL(domain, link.group(1)) title1 = title.group(1) image1 = absURL(domain, image.group(1)) if showPage == False: page.addVideo(req, link1, title1, image1) elif showImage == True: page.addPage(req, link1, title1, image1) else: page.addPage(req, link1, title1)
def findFrame(req, url): for m in re.finditer(r'<iframe (.*?)</iframe>', load(url)): src = re.search(r'src="([^"]*)"', m.group(1)) if src: page.addVideo(req, src.group(1))