def get_file_url(self): """ Returns the absolute downloadable filepath """ if not self.data: self.download_html() return html_unescape(re.search(r'hashlink=(http.*?)"', self.data).group(1))
def handle_free(self, pyfile): fileid = re.search(self.FILEID_PATTERN, self.data).group(1) self.log_debug("FileID: " + fileid) token = re.search(self.TOKEN_PATTERN, self.data).group(1) self.log_debug("Token: " + token) self.data = self.load("http://lolabits.es/action/License/Download", post={'fileId' : fileid, '__RequestVerificationToken' : token}, decode="unicode-escape") self.link = html_unescape(re.search(self.LINK_PATTERN, self.data).group(1))
def handle_web_links(self): self.log_debug("Search for Web links ") package_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"' ids = re.findall(pattern, self.data, re.I | re.S) self.log_debug("Decrypting %d Web links" % len(ids)) for idx, weblink_id in enumerate(ids): try: res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id}) indexs = res.find("window.location =") + 19 indexe = res.find('"', indexs) link2 = res[indexs:indexe] link2 = html_unescape(link2) package_links.append(link2) except Exception, detail: self.log_debug("Error decrypting Web link %s, %s" % (weblink_id, detail))
def get_file_name(self): file_name_pattern = r'<h1 class=\'globalHd\'>(.*)</h1>' return html_unescape(re.search(file_name_pattern, self.data).group(1).replace("/", "") + '.flv')
def process(self, pyfile): pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS) html = self.load(pyfile.url) if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html): self.offline() if "We have been receiving a large volume of requests from your network." in html: self.temp_offline() #: Get config use3d = self.get_config('3d') if use3d: quality = {'sd': 82, 'hd': 84, 'fullhd': 85, '240p': 83, '360p': 82, '480p': 82, '720p': 84, '1080p': 85, '3072p': 85} else: quality = {'sd': 18, 'hd': 22, 'fullhd': 37, '240p': 5, '360p': 18, '480p': 35, '720p': 22, '1080p': 37, '3072p': 38} desired_fmt = self.get_config('fmt') if not desired_fmt: desired_fmt = quality.get(self.get_config('quality'), 18) elif desired_fmt not in self.formats: self.log_warning(_("FMT %d unknown, using default") % desired_fmt) desired_fmt = 0 #: Parse available streams streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), urllib.unquote(x['url'])) for x in streams] # self.log_debug("Found links: %s" % streams) self.log_debug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) #: Build dictionary of supported itags (3D/2D) allowed = lambda x: self.get_config(self.formats[x][0]) streams = [x for x in streams if x[0] in self.formats and allowed(x[0])] if not streams: self.fail(_("No available stream meets your preferences")) fmt_dict = dict([x for x in streams if self.formats[x[0]][4] is use3d] or streams) self.log_debug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) #: Return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #: Select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.log_debug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.log_debug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.log_debug("URL: %s" % url) #: Set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") #: Cleaning invalid characters from the file name name = name.encode('ascii', 'replace') for c in self.invalid_chars: name = name.replace(c, '_') pyfile.name = html_unescape(name) time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if m is None: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename]) os.remove(inputfile)
def handle_captcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.data) if m is not None: captcha_url = m.group(1) inputs['code'] = self.captcha.decrypt(captcha_url) return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.data, re.S) if m is not None: captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) self.log_debug(captcha_div) inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.log_debug("Captcha code: %s" % inputs['code'], numerals) return recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.data).group(1) except Exception: captcha_key = recaptcha.detect_key() else: self.log_debug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.data).group(1) except Exception: captcha_key = solvemedia.detect_key() else: self.log_debug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key)