Ejemplo n.º 1
0
    def handleShowDJ(self, url):
        src = self.getSJSrc(url)
        soup = BeautifulSoup(src)
        post = soup.find("div", attrs={"id": "page_post"})
        ps = post.findAll("p")
        found = unescape(soup.find("h2").find("a").string.split(' –')[0])
        if found:
            seasonName = found

        groups = {}
        gid = -1
        for p in ps:
            if re.search("<strong>Sprache|<strong>Format", str(p)):
                var = p.findAll("strong")
                opts = {"Sprache": "", "Format": ""}
                for v in var:
                    n = unescape(v.string).strip()
                    n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n)
                    if n.strip() not in opts:
                        continue
                    val = v.nextSibling
                    if not val:
                        continue
                    val = val.replace("|", "").strip()
                    val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val)
                    opts[n.strip()] = val.strip()
                gid += 1
                groups[gid] = {}
                groups[gid]["ep"] = {}
                groups[gid]["opts"] = opts
            elif re.search("<strong>Download:", str(p)):
                parts = str(p).split("<br />")
                if re.search("<strong>", parts[0]):
                    ename = re.search('<strong>(.*?)</strong>',parts[0]).group(1).strip().decode("utf-8").replace("&#8211;", "-")
                    groups[gid]["ep"][ename] = {}
                    parts.remove(parts[0])
                    for part in parts:
                        hostername = re.search(" \| ([-a-zA-Z0-9]+\.\w+)",part)
                        if hostername:
                            hostername = hostername.group(1)
                            groups[gid]["ep"][ename][hostername] = []
                            links = re.findall('href="(.*?)"',part)
                            for link in links:
                                groups[gid]["ep"][ename][hostername].append(link + "#hasName")

        links = []
        for g in groups.values():
            for ename in g["ep"]:
                links.extend(self.getpreferred(g["ep"][ename]))
                if self.getConfig("changeNameDJ") == "Episode":
                    self.packages.append((ename, links, ename))
                    links = []
            package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"])
            if self.getConfig("changeNameDJ") == "Format":
                self.packages.append((package, links, package))
                links = []
        if (self.getConfig("changeNameDJ") == "Packagename") or re.search("#hasName", url):
            self.core.files.addLinks(links, self.pyfile.package().id)
        elif (self.getConfig("changeNameDJ") == "Show") or not re.search("#hasName", url):
            self.packages.append((seasonName, links, seasonName))
Ejemplo n.º 2
0
 def handleWebLinks(self):
     package_links = []
     self.logDebug("Search for Web links")
     if not self.js:
         self.logDebug("no JS -> skip Web links")
     else: 
         #@TODO: Gather paginated web links  
         pattern = r'<a href="http://linksave\.in/(\w{43})"'
         ids = re.findall(pattern, self.html)
         self.logDebug("Decrypting %d Web links" % len(ids))
         for i, weblink_id in enumerate(ids):
             try:
                 webLink = "http://linksave.in/%s" % weblink_id
                 self.logDebug("Decrypting Web link %d, %s" % (i+1, webLink))
                 fwLink = "http://linksave.in/fw-%s" % weblink_id
                 response = self.load(fwLink)
                 jscode = re.findall(r'<script type="text/javascript">(.*)</script>', response)[-1]
                 jseval = self.js.eval("document = { write: function(e) { return e; } }; %s" % jscode)
                 dlLink = re.search(r'http://linksave\.in/dl-\w+', jseval).group(0)
                 self.logDebug("JsEngine returns value [%s] for redirection link" % dlLink)
                 response = self.load(dlLink)
                 link = unescape(re.search(r'<iframe src="(.+?)"', response).group(1))
                 package_links.append(link)
             except Exception, detail:
                 self.logDebug("Error decrypting Web link %s, %s" % (webLink, detail))    
Ejemplo n.º 3
0
 def get_file_name(self):
     if self.html is None:
         self.download_html()
         
     name = re.search("flashvars.title = \"(.*?)\";", self.html).group(1)
     name = "%s.flv" % unescape(name.encode("ascii", "ignore")).decode("utf-8").encode("ascii", "ignore").replace("+", " ")
     return name
Ejemplo n.º 4
0
    def decrypt(self, pyfile):
        url = pyfile.url
        src = self.req.load(str(url))

        soup = BeautifulSoup(src)
        captchaTag = soup.find("img", attrs={"id": "captcha_image"})
        if captchaTag:
            captchaUrl = "http://rs-layer.com/" + captchaTag["src"]
            self.logDebug("Captcha URL: %s" % captchaUrl)
            result = self.decryptCaptcha(str(captchaUrl), imgtype="png")
            captchaInput = soup.find("input", attrs={"id": "captcha"})
            self.req.lastUrl = url
            src = self.req.load(str(url), post={'captcha_input': result, 'image_name': captchaTag["src"]})

        link_ids = re.findall(r"onclick=\"getFile\(\'([0-9]{7}-.{8})\'\);changeBackgroundColor", src)

        if not len(link_ids) > 0:
            self.retry()

        self.correctCaptcha()

        links = []
        for id in link_ids:
            self.logDebug("ID: %s" % id)
            new_link = unescape(re.search(r"<iframe style=\"width: 100%; height: 100%;\" src=\"(.*)\"></frame>",
                                          self.req.load("http://rs-layer.com/link-" + id + ".html")).group(1))
            self.logDebug("Link: %s" % new_link)
            links.append(new_link)

        self.packages.append((self.pyfile.package().name, links, self.pyfile.package().folder))
Ejemplo n.º 5
0
 def handleWebLinks(self):
     package_links = []
     self.logDebug("Search for Web links")
     if not self.js:
         self.logDebug("no JS -> skip Web links")
     else: 
         #@TODO: Gather paginated web links  
         pattern = r'<a href="http://linksave\.in/(\w{43})"'
         ids = re.findall(pattern, self.html)
         self.logDebug("Decrypting %d Web links" % len(ids))
         for i, id in enumerate(ids):
             try:
                 webLink = "http://linksave.in/%s" % id
                 self.logDebug("Decrypting Web link %d, %s" % (i+1, webLink))
                 fwLink = "http://linksave.in/fw-%s" % id
                 response = self.load(fwLink)
                 jscode = re.findall(r'<script type="text/javascript">(.*)</script>', response)[-1]
                 jseval = self.js.eval("document = { write: function(e) { return e; } }; %s" % jscode)
                 dlLink = re.search(r'http://linksave\.in/dl-\w+', jseval).group(0)
                 self.logDebug("JsEngine returns value [%s] for redirection link" % dlLink)
                 response = self.load(dlLink)
                 link = unescape(re.search(r'<iframe src="(.+?)"', response).group(1))
                 package_links.append(link)
             except Exception, detail:
                 self.logDebug("Error decrypting Web link %s, %s" % (webLink, detail))    
Ejemplo n.º 6
0
 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)
Ejemplo n.º 7
0
 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)
Ejemplo n.º 8
0
    def get_file_url(self):
        """ returns the absolute downloadable filepath
        """
        if self.html is None:
            self.download_html()

        file_url = unescape(re.search(r'hashlink=(http.*?)"', self.html).group(1))

        return file_url
Ejemplo n.º 9
0
    def get_file_name(self):
        if self.html is None:
            self.download_html()

        name = re.search("flashvars.title = \"(.*?)\";", self.html).group(1)
        name = "%s.flv" % unescape(
            name.encode("ascii", "ignore")).decode("utf-8").encode(
                "ascii", "ignore").replace("+", " ")
        return name
Ejemplo n.º 10
0
    def get_file_url(self):
        """ returns the absolute downloadable filepath
        """
        if self.html is None:
            self.download_html()

        file_url = unescape(
            re.search(r'hashlink=(http.*?)"', self.html).group(1))

        return file_url
Ejemplo n.º 11
0
 def handleContainer(self, type_):
     package_links = []
     type_ = type_.lower()
     self.logDebug('Seach for %s Container links' % type_.upper())
     if not type_.isalnum():  # check to prevent broken re-pattern (cnl2,rsdf,ccf,dlc,web are all alpha-numeric)
         self.fail('unknown container type "%s" (this is probably a bug)' % type_)
     pattern = r"\('%s_link'\).href=unescape\('(.*?\.%s)'\)" % (type_, type_)
     containersLinks = re.findall(pattern, self.html)
     self.logDebug("Found %d %s Container links" % (len(containersLinks), type_.upper()))
     for containerLink in containersLinks:
         link = "http://linksave.in/%s" % unescape(containerLink)
         package_links.append(link)
     return package_links
Ejemplo n.º 12
0
 def handleContainer(self, type_):
     package_links = []
     type_ = type_.lower()
     self.logDebug('Seach for %s Container links' % type_.upper())
     if not type_.isalnum():  # check to prevent broken re-pattern (cnl2,rsdf,ccf,dlc,web are all alpha-numeric)
         self.fail('unknown container type "%s" (this is probably a bug)' % type_)
     pattern = r"\('%s_link'\).href=unescape\('(.*?\.%s)'\)" % (type_, type_)
     containersLinks = re.findall(pattern, self.html)
     self.logDebug("Found %d %s Container links" % (len(containersLinks), type_.upper()))
     for containerLink in containersLinks:
         link = "http://linksave.in/%s" % unescape(containerLink)
         package_links.append(link)
     return package_links
Ejemplo n.º 13
0
    def get_file_url(self):
        """ returns the absolute downloadable filepath
        """
        if self.html is None:
            self.download_html()

        # get id
        id = re.search("previewplayer/\\?v=(.*?)&width", self.html).group(1)
        
        # check for hd link and return if there
        if "flashvars.hd = \"1\";" in self.html:
            content = self.req.load("http://www.megavideo.com/xml/videolink.php?v=%s" % id)
            return unescape(re.search("hd_url=\"(.*?)\"", content).group(1))
            
        # else get normal link
        s = re.search("flashvars.s = \"(\\d+)\";", self.html).group(1)
        un = re.search("flashvars.un = \"(.*?)\";", self.html).group(1)
        k1 = re.search("flashvars.k1 = \"(\\d+)\";", self.html).group(1)
        k2 = re.search("flashvars.k2 = \"(\\d+)\";", self.html).group(1)
        return "http://www%s.megavideo.com/files/%s/" % (s, self.__decrypt(un, int(k1), int(k2)))
Ejemplo n.º 14
0
    def handleShow(self, url):
        src = self.getSJSrc(url)
        soup = BeautifulSoup(src)
        packageName = self.pyfile.package().name
        if self.getConfig("changeNameSJ") == "Show":
            found = unescape(soup.find("h2").find("a").string.split(' &#8211;')[0])
            if found:
                 packageName = found

        nav = soup.find("div", attrs={"id": "scb"})

        package_links = []
        for a in nav.findAll("a"):
            if self.getConfig("changeNameSJ") == "Show":
                package_links.append(a["href"])
            else:
                package_links.append(a["href"] + "#hasName")
        if self.getConfig("changeNameSJ") == "Show":
            self.packages.append((packageName, package_links, packageName))
        else:
            self.core.files.addLinks(package_links, self.pyfile.package().id)
Ejemplo n.º 15
0
    def decrypt(self, pyfile):
        url = pyfile.url
        src = self.req.load(str(url))

        soup = BeautifulSoup(src)
        captchaTag = soup.find("img", attrs={"id": "captcha_image"})
        if captchaTag:
            captchaUrl = "http://rs-layer.com/" + captchaTag["src"]
            self.logDebug("Captcha URL: %s" % captchaUrl)
            result = self.decryptCaptcha(str(captchaUrl), imgtype="png")
            captchaInput = soup.find("input", attrs={"id": "captcha"})
            self.req.lastUrl = url
            src = self.req.load(str(url),
                                post={
                                    'captcha_input': result,
                                    'image_name': captchaTag["src"]
                                })

        link_ids = re.findall(
            r"onclick=\"getFile\(\'([0-9]{7}-.{8})\'\);changeBackgroundColor",
            src)

        if not len(link_ids) > 0:
            self.retry()

        self.correctCaptcha()

        links = []
        for id in link_ids:
            self.logDebug("ID: %s" % id)
            new_link = unescape(
                re.search(
                    r"<iframe style=\"width: 100%; height: 100%;\" src=\"(.*)\"></frame>",
                    self.req.load("http://rs-layer.com/link-" + id +
                                  ".html")).group(1))
            self.logDebug("Link: %s" % new_link)
            links.append(new_link)

        self.packages.append(
            (self.pyfile.package().name, links, self.pyfile.package().folder))
Ejemplo n.º 16
0
    def get_file_url(self):
        """ returns the absolute downloadable filepath
        """
        if self.html is None:
            self.download_html()

        # get id
        id = re.search("previewplayer/\\?v=(.*?)&width", self.html).group(1)

        # check for hd link and return if there
        if "flashvars.hd = \"1\";" in self.html:
            content = self.req.load(
                "http://www.megavideo.com/xml/videolink.php?v=%s" % id)
            return unescape(re.search("hd_url=\"(.*?)\"", content).group(1))

        # else get normal link
        s = re.search("flashvars.s = \"(\\d+)\";", self.html).group(1)
        un = re.search("flashvars.un = \"(.*?)\";", self.html).group(1)
        k1 = re.search("flashvars.k1 = \"(\\d+)\";", self.html).group(1)
        k2 = re.search("flashvars.k2 = \"(\\d+)\";", self.html).group(1)
        return "http://www%s.megavideo.com/files/%s/" % (
            s, self.__decrypt(un, int(k1), int(k2)))
Ejemplo n.º 17
0
 def get_file_name(self):
     file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>"
     return unescape(re.search(file_name_pattern, self.html).group(1).replace("/", "") + '.flv')
Ejemplo n.º 18
0
 def get_file_name(self):
     file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>"
     return unescape(
         re.search(file_name_pattern, self.html).group(1).replace("/", "") +
         '.flv')