def searchshows(self, keyword=None): with Browser(loadtimeout=0) as browser: browser.navigate(domain, validate=self.ispagevalid) browser.elem_setattr("value", "'%s'" % keyword, tag="input") browser.elem_call("submit", tag="form") browser.loadtimeout = 3 page = browser.html() self.scrapegrid(htmlement.fromstring(page)) if not len(self.items): redirect = re.search( "window\.location\s*?\=\s*?(?:\"|\')(.+?)(?:\"|\')", page) if redirect and "anime/" in redirect.group(1): url = net.absurl(redirect.group(1), domain) with Browser(loadtimeout=0) as browser: page = browser.navigate(url, domain, validate=self.ispagevalid) xpage = htmlement.fromstring(page) div = xpage.find(".//div[@class='table-responsive']/") title = div.find(".//tr[2]/td[3]").text img = net.absurl( div.find(".//div[@class='imaj']/.//img").get("data-src"), domain) imgid = re.search("([0-9]+)", img).group(1) art = {"icon": img, "thumb": img, "poster": img} url = imgid, art self.additem(title, url, art=art)
def itermedias(chid): iframeurl = "%s/bm/vid.php?id=%s" % (domain, chid) iframesrc = net.http(iframeurl, referer=domain) mpd = re.search('var src = "(.+?)"', iframesrc) mpdlic = re.search('var myWV = "(.+?)"', iframesrc) headers = {"Referer": iframeurl} if mpd and mpdlic: mpd = net.absurl(base64.b64decode(mpd.group(1)).decode(), iframeurl) mpdlic = net.absurl(base64.b64decode(mpdlic.group(1)).decode(), iframeurl) m = net.mpdurl(mpd, headers, mpdlic, headers.copy()) yield m
def scrapegrid(self, xpage): for div in xpage.iterfind(".//div[@class='panel panel-visible']"): a = div.find(".//a[@class='baloon']") img = net.absurl(div.find(".//img").get("data-src"), domain) art = {"icon": img, "thumb": img, "poster": img} title = a.get("data-original-title").replace("izle", "").strip() url = net.absurl(a.get("href"), domain) if "/anime/" in url: imgid = re.search("([0-9]+)", img) if not imgid: continue url = imgid.group(1), art self.additem(title, url, art=art)
def iterpage(xpage): for a in xpage.iterfind( ".//div[@class='content container']/div/div/ul/li/a"): href = net.absurl(a.get("href").split("#")[0], domain) chname = tools.elementsrc(a).lower().strip() if "xxx" in chname.lower(): continue normname = normalize(chname) if normname in namemap: chname = namemap[normname] icon = a.find(".//img") if icon is not None: icon = net.absurl(icon.get("src"), domain) _meta = json.dumps([href, chname, icon]) yield href, chname, icon
def itermedias(ctvcid, ctvcids=None): if not ctvcids: ctvcids = [ctvcid] for ctvcid in ctvcids: u = domain + "/" + ctvcid iframe1 = htmlement.fromstring(net.http( u, referer=domain)).find(".//iframe").get("src") iframe2 = htmlement.fromstring(net.http( iframe1, referer=u)).find(".//iframe").get("src") src = net.http(iframe2, referer=iframe1) media = re.search( "file[\s\t]*?\:[\s\t]*?atob\((?:\"|\')(.+?)(?:\"|\')\)", src) if media: yield net.hlsurl(base64.b64decode(media.group(1)).decode(), headers={"referer": domain}) else: for script in htmlement.fromstring(src).iterfind(".//script"): if script.get("src") and "yayin" in script.get("src"): scriptsrc = net.http(script.get("src"), referer=domain) key = re.search(rgxkey, scriptsrc) if key: for link in re.findall(rgxlink, scriptsrc): if "anahtar" in link: link = net.absurl(link, script.get("src")) yield net.hlsurl(link + key.group(1), headers={"referer": domain}) break
def getcategories(self): u = "%sajax/turler" % domain with Browser() as browser: page = browser.navigate( u, domain, headers={"x-requested-with": "XMLHttpRequest"}) # page = self.download(u, headers=headers, referer=domain) xpage = htmlement.fromstring(page) for a in xpage.iterfind(".//a"): self.additem(a.get("title"), net.absurl(a.get("href"), domain))
def iterajaxlink(self, xpage, xpath=None): xpath = xpath or ".//button" for button in xpage.iterfind(xpath): link = button.get("onclick") if link: ajaxlink = re.search("\((?:\"|\')(ajax.+?)(?:\"|\')", link) if ajaxlink: tag = tools.elementsrc(button).encode("ascii", "replace").strip() yield tag, net.absurl(ajaxlink.group(1), domain)
def scrapegrid(self, search=None, genre=None): domain = "https://%s" % self.setting.getstr("domain") query = { "t": "y", "m": "m", "w": "q", "type": self.section, "sort": "Trending" } if self.page: query["page"] = self.page if genre: query["genre[]"] = genre if search: query["s"] = search page = htmlement.fromstring( self.download(domain, params=query, referer=domain)) div = page.find(".//div[@class='index_container']") if div is not None: for subdiv in div.iterfind(".//div"): if subdiv.get("class") and "index_item" in subdiv.get("class"): titlediv = subdiv.find(".//div[@class='title-cutoff']") title = titlediv.text.strip().title() year = elementsrc(subdiv.find("./a/h2"), exclude=titlediv) if year: yearre = re.search("([0-9]{4})", year) if yearre: year = int(yearre.group(1)) else: year = None img = subdiv.find(".//img") if img is not None: img = absurl(img.get("src"), domain) else: img = None info = {"title": title, "year": year} if self.section == "tv": info["tvshowtitle"] = title art = {"icon": img, "thumb": img, "poster": img} url = absurl(subdiv.find(".//a").get("href"), domain) self.additem(title, (url, info, art), info, art)
def iteratechannels(self): for channel in self.download(net.absurl("/bm/channels.json", domain), referer=domain, json=True): url = channel["qvt"] img = channel.get("image") if img: img = img.get("url", "DefaultFodler.png") else: img = "DefaultFolder.png" yield self.makechannel(url, chanjs, title=channel["title"], icon=img, url=url )
def get(self): u = "%s/ing/%s" % (domain, self.ysid) p = net.http(u, referer=domain) iframeu = htmlement.fromstring(p).find(".//iframe").get("src") iframep = net.http(iframeu, referer=u) m3path = re.search("atob\((?:\"|\')(.+?)(?:\"|\')\)", iframep).group(1) for suffix in ["", "=", "=="]: try: yield net.hlsurl(net.absurl( base64.b64decode(m3path + suffix).decode(), iframeu), headers={"Referer": iframeu}) break except Exception: pass
def getlink(self, mirrorlink, xmirrorpage=None): try: if not xmirrorpage: with Browser() as browser: mirrorpage = browser.navigate( mirrorlink, domain, headers={"x-requested-with": "XMLHttpRequest"}) xmirrorpage = htmlement.fromstring(mirrorpage) iframe = net.absurl( xmirrorpage.find(".//iframe").get("src"), domain) with Browser() as browser: iframesrc = browser.navigate(iframe, domain) iframe2 = json.loads( re.search("var\s*?iframe\s*?\=\s*?(?:\'|\")(.+)(?:\'|\")", iframesrc).group(1)) password = re.search("var\s*?pass\s*?\=\s*?(?:\'|\")(.+)(?:\'|\")", iframesrc).group(1) link = jscrypto.decrypt(base64.b64decode(iframe2["ct"]), password, binascii.unhexlify(iframe2["s"])) link = json.loads(link) return net.absurl(link, domain) except Exception: print(traceback.format_exc())
def iterprogrammes(tvid): u = "%s/api/stationAirings?stationId=%s" % (domain, tvid) for p in json.loads(net.http(u, referer=domain)): start = datetime.strptime(p["startTime"], "%Y-%m-%dT%H:%MZ").replace(tzinfo=UTC) end = datetime.strptime(p["endTime"], "%Y-%m-%dT%H:%MZ").replace(tzinfo=UTC) img = p["program"].get("preferredImage") if img: img = img.get("uri") if img: img = net.absurl(img, imgdomain) yield programme(p["program"]["title"], start, end, desc=p["program"].get( "longDescription", p["program"].get("shortDescription", None)), icon=img)
def getepisodes(self, showargs=None, seaargs=None): if showargs: aniid, art = showargs url = "%sajax/bolumler&animeId=%s" % (domain, aniid) with Browser() as browser: page = browser.navigate( url, domain, headers={"x-requested-with": "XMLHttpRequest"}) for a in htmlement.fromstring(page).iterfind(".//a"): href = a.get("href") if href and "/video/" in href: title = a.get("title") url = net.absurl(a.get("href"), domain) self.additem(title, url, art=art) else: with Browser() as browser: self.scrapegrid( htmlement.fromstring( browser.navigate(domain, None, self.ispagevalid)))
def scrapeinfo(self, link): domain = "https://%s" % self.setting.getstr("domain") pg = self.download(link, referer=domain, cache=None) pg = re.sub("<script.*?script>", " ", pg, re.DOTALL) page = htmlement.fromstringlist(pg) info = {} art = {} episodes = {} for sublink in page.findall( ".//div[@class='movie_info_actions']/div/a"): subtext = sublink.text.lower() if "imdb" in subtext: imdbnumber = re.search("(tt[0-9]+)", sublink.get("href")) if imdbnumber: info["imdbnumber"] = imdbnumber.group(1) """ if "trailer" in subtext: info["trailer"] = sublink.get("href") """ infodiv = page.find(".//div[@class='movie_info']") if infodiv is not None: for tr in infodiv.findall(".//tr")[1:]: tds = tr.findall(".//td") if not len(tds) == 2: continue infotype = elementsrc(tds[0]).strip().lower() if "released" in infotype: released = re.search("([0-9]{4})", elementsrc(tds[1])) if released: info["year"] = int(released.group(1)) if "genre" in infotype: info["genre"] = [ elem.text for elem in tds[1].findall(".//a") ] if "cast" in infotype: info["cast"] = [ elem.get("href").split("cast=")[-1].strip().title() for elem in tds[1].findall(".//a") ] if "runtime" in infotype: runtime = re.search("([0-9]+)\s?min", elementsrc(tds[1])) if runtime: info["duration"] = int(runtime.group(1)) * 60 img = infodiv.find(".//img") if img is not None: art["icon"] = art["poster"] = art["thumb"] = absurl( img.get("src"), domain) for season in page.iterfind(".//div[@class='show_season']"): snum = int(season.get("data-id")) for episode in season.iterfind(".//div[@class='tv_episode_item']"): if snum not in episodes: episodes[snum] = [] a = episode.find(".//a") url = absurl(a.get("href"), domain) epi = a.text enum = re.search("([0-9]+)", epi) if enum: epinum = int(enum.group(1)) else: epinum = 0 title = episode.find( ".//span[@class='tv_episode_name']").text.replace("-", "") title = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", title) episodes[snum].append((epinum, title, url)) return info, art, episodes
def getchanurl(chname): for a in htmlement.fromstring( net.http("%s/24-hours-channels.php" % dom, cache=60 * 24)).iterfind(".//table/.//tr/td/a"): if a.find(".//i").tail.replace(" ", "").lower().strip() == chname: return net.absurl(a.get("href"), dom)