def findImageLink(req, url, unquote=False, showPage=False): txt = load(url) for m in re.finditer(r'<a .*?</a>', txt, re.DOTALL): link = re.search(r'href="([^"]*)"', m.group(0)) image = re.search(r'src="([^"]*)"', m.group(0)) if link and image: if unquote == True: link1 = urllib.unquote(link.group(1)) else: link1 = link.group(1) image1 = image.group(1) if showPage == False: page.addVideo(req, link1, link1, image1) else: page.addPage(req, link1, link1, image1)
def findVideoLink(req, url, showPage=False, showImage=False): parsed_uri = urlparse.urlparse(url) domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri) txt = load(url) for m in re.finditer(r'<a .*?</a>', txt, re.DOTALL): link = re.search(r'href="([^"]*)"', m.group(0)) title = re.search(r'title="([^"]*)"', m.group(0)) image = re.search(r'src="([^"]*)"', m.group(0)) if link and title and image: link1 = absURL(domain, link.group(1)) title1 = title.group(1) image1 = absURL(domain, image.group(1)) if showPage == False: page.addVideo(req, link1, title1, image1) elif showImage == True: page.addPage(req, link1, title1, image1) else: page.addPage(req, link1, title1)