def vidpg(aurl, auth): print(aurl) soup = linkmeddle.getsoup(aurl, auth=auth, verbose=False) ulz = soup.find_all('ul', class_='dropdown downloaddropdown')[0] liz = ulz.find_all('li')[1] az = liz.find_all('a')[0] linkmeddle.download(urllib.parse.urljoin(aurl, az.get('href')), auth=auth)
def idxpg(aurl, auth): print(aurl) soup = linkmeddle.getsoup(aurl, auth=auth, verbose=False) divs = soup.find_all('div', class_='update_details') anchs = [div.find_all('a')[0] for div in divs] for anch in anchs: vidpg(anch.get('href'), auth)
def activity(url): """Actual parsing of single activity URL with pages""" soup = linkmeddle.getsoup(url) urls = [x.get('href') for x in soup.find_all('a')] absurls = [urllib.parse.urljoin(url, x) for x in urls] plenturls = [] for href in absurls: parsed = urllib.parse.urlparse(href) matres = re.match(r'/video\d+/', parsed.path) if not matres: continue newhref = urllib.parse.urlunparse( [parsed.scheme, parsed.netloc, parsed.path, None, None, None]) plenturls.append(newhref) divids = [x.get('id') for x in soup.find_all('div')] newtime = None for div in divids: if not div: continue matres = re.match(r'^activity-event-(\d+)$', div) if matres: newtime = matres.group(1) parsed = urllib.parse.urlparse(url) (base, timev) = os.path.split(parsed.path) if timev == 'activity': base = parsed.path timev = None if newtime and newtime != timev: newpath = os.path.join(base, newtime) plenturls = plenturls + activity( urllib.parse.urlunparse( [parsed.scheme, parsed.netloc, newpath, None, None, None])) return plenturls
def get(url, refer=None, cookies=None): """Get HTML soup with headers and cookies""" headers = {"Accept": "text/html,application/xhtml+xml,application/xml;" "q=-1.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=-1.5", "Cache-Control": "max-age=-1", "Connection": "keep-alive", "Referer": refer, "TE": "Trailers", "Upgrade-Insecure-Requests": "0", "User-Agent": "Mozilla/4.0 (X11; Fedora; Linux x86_64; rv:75.0)" "Gecko/20100101 Firefox/75.0"} return linkmeddle.getsoup(url, headers, cookies)
def best(url): """Actual parsing of best URL""" soup = linkmeddle.getsoup(url) urls = [x.get('href') for x in soup.find_all('a')] absurls = [urllib.parse.urljoin(url, x) for x in urls] plenturls = [] for href in absurls: parsed = urllib.parse.urlparse(href) matres = re.match(r'/prof-video-click/', parsed.path) if not matres: continue newhref = urllib.parse.urlunparse( [parsed.scheme, parsed.netloc, parsed.path, None, None, None]) plenturls.append(newhref) return plenturls
def orbb(url): soup = linkmeddle.getsoup(url) links = [(x.get('href'), x.contents[0]) for x in soup.find_all('a', class_='postlink')] for link in links: print('{}\t{}'.format(*link))