def get_info(urls): for url in urls: h = get_url(url, just_header=True) m = re.search(r'Location: (.+)\r\n', h) if m and not re.match(m.group(1), FilefactoryCom.__pattern__): #: It's a direct link! Skipping yield (url, 0, 3, url) else: #: It's a standard html page yield parse_fileInfo(FilefactoryCom, url, get_url(url))
def get_info(urls): h = get_request() h.c.setopt(pycurl.HTTPHEADER, ["Accept: text/html", "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0"]) for url in urls: html = h.load(url) yield parse_fileInfo(MegaRapidCz, url, html)
def get_info(urls): for url in urls: html = get_url("http://www.fshare.vn/check_link.php", post={'action': "check_link", 'arrlinks': url}) yield parse_fileInfo(FshareVn, url, html)