Python html_unescape Beispiele, module.utils.html_unescape Python Beispiele

Beispiel #1

0

Datei anzeigen

    def downloadFile(self, pyfile):
        header = self.load(pyfile.url, just_header = True)
        #self.logDebug(header)

        # self.load does not raise a BadHeader on 404 responses, do it here
        if header.has_key('code') and header['code'] == 404:
            raise BadHeader(404)

        if 'location' in header:
            self.logDebug("Location: " + header['location'])
            url = unquote(header['location'])
        else:
            url = pyfile.url

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " + header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']: disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name: name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

Beispiel #2

0

Datei anzeigen

    def decrypt(self, pyfile):
        self.prepare()

        self.logDebug("Looking for link redirect...")
        self.handleDirect(pyfile)

        if self.link:
            self.urls = [self.link]

        else:
            self.preload()
            self.checkInfo()

            self.links = self.getLinks() or list()

            if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'):
                self.handlePages(pyfile)

            self.logDebug("Package has %d links" % len(self.links))

        if self.links:
            self.links    = [html_unescape(l.decode('unicode-escape').strip()) for l in self.links]  #@TODO: Move to Crypter in 0.4.10
            self.packages = [(self.info['name'], self.links, self.info['folder'])]

        elif not self.urls and not self.packages:  #@TODO: Remove in 0.4.10
            self.fail(_("No link grabbed"))

Beispiel #3

0

Datei anzeigen

Datei: RarefileNet.py Projekt: 4Christopher/pyload

 def handleCaptcha(self, inputs):
     captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
     self.logDebug(captcha_div)
     numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
     inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))])
     self.logDebug("CAPTCHA", inputs['code'], numerals)
     return 3

Beispiel #4

0

Datei anzeigen

Datei: CoolshareCz.py Projekt: keat01/pyLoad

 def handlePremium(self):
     found = re.search(self.PREMIUM_URL_PATTERN, self.html)
     if not found: self.parseError("Premium URL")
     url = html_unescape(found.group(1))
     self.logDebug("Premium URL: " + url)        
     if not url.startswith("http://"): self.resetAccount()
     self.download(url)

Beispiel #5

0

Datei anzeigen

Datei: SimpleHoster.py Projekt: Dmanugm/pyload

    def getFileInfo(self):
        self.logDebug("URL: %s" % self.pyfile.url)
        if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
            self.tempOffline()

        name, size, status = parseFileInfo(self)[:3]

        if status == 1:
            self.offline()
        elif status != 2:
            self.logDebug(self.file_info)
            self.parseError('File info')

        if name:
            self.pyfile.name = name
        else:
            self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])

        if size:
            self.pyfile.size = size
        else:
            self.logError("File size not parsed")

        self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
        return self.file_info

Beispiel #6

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: wangjun/pyload

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                # Due to a 0.4.9 core bug self.load would use cookies even if
                # cookies=False. Workaround using getURL to avoid cookies.
                # Can be reverted in 0.5 as the cookies bug has been fixed.
                self.html = getURL(pyfile.url, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
                )

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

Beispiel #7

0

Datei anzeigen

 def handleCaptcha(self, inputs):
     captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
     self.logDebug(captcha_div)
     numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
     inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
     self.logDebug("CAPTCHA", inputs['code'], numerals)
     return 3

Beispiel #8

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: starfighter77895/pyload

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                # Due to a 0.4.9 core bug self.load would use cookies even if
                # cookies=False. Workaround using getURL to avoid cookies.
                # Can be reverted in 0.5 as the cookies bug has been fixed.
                self.html = getURL(pyfile.url, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(unquote(urlparse(
                    self.location if self.location else pyfile.url).path.split("/")[-1]))

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

Beispiel #9

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: japhigu/pyload

 def handleCaptcha(self, inputs):
     found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
     if found:
         recaptcha_key = unquote(found.group(1))
         self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
         recaptcha = ReCaptcha(self)
         inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key)
         return 1
     else:
         found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
         if found:
             captcha_url = found.group(1)
             inputs["code"] = self.decryptCaptcha(captcha_url)
             return 2
         else:
             found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
             if found:
                 captcha_div = found.group(1)
                 self.logDebug(captcha_div)
                 numerals = re.findall(
                     r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div)
                 )
                 inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
                 self.logDebug("CAPTCHA", inputs["code"], numerals)
                 return 3
             else:
                 found = re.search(self.SOLVEMEDIA_PATTERN, self.html)
                 if found:
                     captcha_key = found.group(1)
                     captcha = SolveMedia(self)
                     inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key)
                     return 4
     return 0

Beispiel #10

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: sebmaynard/pyload

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies = False, decode = True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]))

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

Beispiel #11

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: keat01/pyLoad

 def handleCaptcha(self, inputs):
     found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
     if found:
         recaptcha_key = unquote(found.group(1))
         self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
         recaptcha = ReCaptcha(self)
         inputs['recaptcha_challenge_field'], inputs[
             'recaptcha_response_field'] = recaptcha.challenge(
                 recaptcha_key)
         return 1
     else:
         found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
         if found:
             captcha_url = found.group(1)
             inputs['code'] = self.decryptCaptcha(captcha_url)
             return 2
         else:
             found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
             if found:
                 captcha_div = found.group(1)
                 self.logDebug(captcha_div)
                 numerals = re.findall(
                     '<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>',
                     html_unescape(captcha_div))
                 inputs['code'] = "".join([
                     a[1]
                     for a in sorted(numerals, key=lambda num: int(num[0]))
                 ])
                 self.logDebug("CAPTCHA", inputs['code'], numerals)
                 return 3
     return 0

Beispiel #12

0

Datei anzeigen

Datei: BasePlugin.py Projekt: DasLampe/pyload

    def downloadFile(self, pyfile):
        url = pyfile.url

        for i in range(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if 'code' in header and header['code'] == 404:
                raise BadHeader(404)

            if 'location' in header:
                self.logDebug("Location: " + header['location'])
                url = unquote(header['location'])
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " + header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']:
                    disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

Beispiel #13

0

Datei anzeigen

Datei: LinkCryptWs.py Projekt: kurtiss/htpc

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))

                res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                self.logDebug(link2)

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))

Beispiel #14

0

Datei anzeigen

    def getFileInfo(self):
        self.logDebug("URL: %s" % self.pyfile.url)
        if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
            self.tempOffline()

        name, size, status = parseFileInfo(self)[:3]
        
        if status == 1:
            self.offline()
        elif status != 2:
            self.logDebug(self.file_info)
            self.parseError('File info')

        if name:
            self.pyfile.name = name
        else:
            self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])

        if size:
            self.pyfile.size = size
        else:
            self.logError("File size not parsed")

        self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
        return self.file_info

Beispiel #15

0

Datei anzeigen

Datei: IcefilmsShowSyncer.py Projekt: t4skforce/IcefilmsShowSyncer

 def __init__(self, show, epPageLink, linkText):
     self.url['epPage'] = epPageLink
     self.seasonNum, self.episodeNum = extractSeasonAndEpisodeNum(linkText)
     m = re.search(r'\S*\s(.*)', linkText)
     if m:
         self.episodeName = html_unescape(m.group(1))
     self.show = show

Beispiel #16

0

Datei anzeigen

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                res = self.load("http://linkcrypt.ws/out.html",
                                post={'file': weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" %
                              (weblink_id, detail))

Beispiel #17

0

Datei anzeigen

Datei: OneKhDe.py Projekt: earthGavinLee/pyload

 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = html_unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)

Beispiel #18

0

Datei anzeigen

    def get_file_name(self):
        try:
            name =  self.api["name"]
        except KeyError:
            file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
            name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]

        return html_unescape(name)

Beispiel #19

0

Datei anzeigen

Datei: Plugin.py Projekt: earthGavinLee/pyload

    def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None):
        """
        Load content at url and returns it

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Loaded content
        """
        if hasattr(self, 'pyfile') and self.pyfile.abort:
            self.abort()

        url = fixurl(url)

        if not url or not isinstance(url, basestring):
            self.fail(_("No url given"))

        if self.pyload.debug:
            self.log_debug("LOAD URL " + url,
                           *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])

        if req is None:
            req = self.req or self.pyload.requestFactory.getRequest(self.__name__)

        #@TODO: Move to network in 0.4.10
        if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list):
            set_cookies(req.cj, self.COOKIES)

        res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True)  #@TODO: Fix network multipart in 0.4.10

        #@TODO: Move to network in 0.4.10
        if decode:
            res = html_unescape(res)

        #@TODO: Move to network in 0.4.10
        if isinstance(decode, basestring):
            res = decode(res, decode)

        if self.pyload.debug:
            frame = inspect.currentframe()
            framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno))
            try:
                if not exists(os.path.join("tmp", self.__name__)):
                    os.makedirs(os.path.join("tmp", self.__name__))

                with open(framefile, "wb") as f:
                    del frame  #: Delete the frame or it wont be cleaned
                    f.write(encode(res))

            except IOError, e:
                self.log_error(e)

Beispiel #20

0

Datei anzeigen

Datei: SimpleCrypter.py Projekt: 3DMeny/pyload

    def getPackageNameAndFolder(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = folder = html_unescape(m.group('title').strip())
                self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
                return name, folder

        name = self.pyfile.package().name
        folder = self.pyfile.package().folder
        self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder))
        return name, folder

Beispiel #21

0

Datei anzeigen

Datei: LolabitsEs.py Projekt: earthGavinLee/pyload

    def handle_free(self, pyfile):
        fileid = re.search(self.FILEID_PATTERN, self.html).group(1)
        self.log_debug("FileID: " + fileid)

        token = re.search(self.TOKEN_PATTERN, self.html).group(1)
        self.log_debug("Token: " + token)

        self.html = self.load("http://lolabits.es/action/License/Download",
                              post={'fileId'                     : fileid,
                                    '__RequestVerificationToken' : token},
                              decode="unicode-escape")

        self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))

Beispiel #22

0

Datei anzeigen

def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
    
    # Translate
    fileInfo = {}
    try:
        fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
        fileInfo['name'] = html_unescape(apiFileDataMap['n'])
        fileInfo['size'] = int(apiFileDataMap['s'])
        fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]        
    except:
        pass

    return fileInfo

Beispiel #23

0

Datei anzeigen

    def handleCaptcha(self, inputs):
        m = re.search(self.CAPTCHA_PATTERN, self.html)
        if m:
            captcha_url = m.group(1)
            inputs['code'] = self.decryptCaptcha(captcha_url)
            return

        m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)
        if m:
            captcha_div = m.group(1)
            numerals = re.findall(
                r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>',
                html_unescape(captcha_div))

            self.logDebug(captcha_div)

            inputs['code'] = "".join(
                a[1] for a in sorted(numerals, key=lambda num: int(num[0])))

            self.logDebug("Captcha code: %s" % inputs['code'], numerals)
            return

        recaptcha = ReCaptcha(self)
        try:
            captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = recaptcha.detect_key()

        else:
            self.logDebug("ReCaptcha key: %s" % captcha_key)

        if captcha_key:
            inputs['recaptcha_response_field'], inputs[
                'recaptcha_challenge_field'] = recaptcha.challenge(captcha_key)
            return

        solvemedia = SolveMedia(self)
        try:
            captcha_key = re.search(self.SOLVEMEDIA_PATTERN,
                                    self.html).group(1)

        except Exception:
            captcha_key = solvemedia.detect_key()

        else:
            self.logDebug("SolveMedia key: %s" % captcha_key)

        if captcha_key:
            inputs['adcopy_response'], inputs[
                'adcopy_challenge'] = solvemedia.challenge(captcha_key)

Beispiel #24

0

Datei anzeigen

Datei: UploadedTo.py Projekt: masterwaster/pyload

def getInfo(urls):
    for chunk in chunks(urls, 80):
        result = []

        api = getAPIData(chunk)

        for data in api.itervalues():
            if data[0] == "online":
                result.append((html_unescape(data[2]), data[1], 2, data[4]))

            elif data[0] == "offline":
                result.append((data[4], 0, 1, data[4]))

        yield result

Beispiel #25

0

Datei anzeigen

Datei: SimpleHoster.py Projekt: Zerknechterer/pyload

    def download(self, url, *args, **kwargs):
        if not url or not isinstance(url, basestring):
            return

        self.correctCaptcha()

        url = html_unescape(url.decode('unicode-escape').strip())  #@TODO: Move to Hoster in 0.4.10

        if not urlparse.urlparse(url).scheme:
            url_p   = urlparse.urlparse(self.pyfile.url)
            baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
            url     = urlparse.urljoin(baseurl, url)

        return super(SimpleHoster, self).download(url, *args, **kwargs)

Beispiel #26

0

Datei anzeigen

Datei: UploadedTo.py Projekt: sraedler/pyload

def getInfo(urls):
    for chunk in chunks(urls, 80):
        result = []

        api = getAPIData(chunk)

        for data in api.itervalues():
            if data[0] == "online":
                result.append((html_unescape(data[2]), data[1], 2, data[4]))

            elif data[0] == "offline":
                result.append((data[4], 0, 1, data[4]))

        yield result

Beispiel #27

0

Datei anzeigen

Datei: UploadedTo.py Projekt: masterwaster/pyload

def parseFileInfo(self, url = '', html = ''):
    if not html and hasattr(self, "html"): html = self.html
    name, size, status, found, fileid = url, 0, 3, None, None

    if re.search(self.FILE_OFFLINE_PATTERN, html):
        # File offline
        status = 1
    else:
        found = re.search(self.FILE_INFO_PATTERN, html)
        if found:
            name, fileid = html_unescape(found.group('N')), found.group('ID')
            size = parseFileSize(found.group('S'))
            status = 2

    return name, size, status, fileid

Beispiel #28

0

Datei anzeigen

def parseFileInfo(self, url='', html=''):
    if not html and hasattr(self, "html"): html = self.html
    name, size, status, found, fileid = url, 0, 3, None, None

    if re.search(self.FILE_OFFLINE_PATTERN, html):
        # File offline
        status = 1
    else:
        found = re.search(self.FILE_INFO_PATTERN, html)
        if found:
            name, fileid = html_unescape(found.group('N')), found.group('ID')
            size = parseFileSize(found.group('S'))
            status = 2

    return name, size, status, fileid

Beispiel #29

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: keat01/pyLoad

    def process(self, pyfile):
        if not hasattr(self, "HOSTER_NAME"):
            self.HOSTER_NAME = re.search(self.__pattern__,
                                         self.pyfile.url).group(1)
        if not hasattr(self, "DIRECT_LINK_PATTERN"):
            self.DIRECT_LINK_PATTERN = r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME

        self.captcha = self.errmsg = None
        self.passwords = self.getPassword().splitlines()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail(
                    "Only premium users can download from other hosters with %s"
                    % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies=False, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.req.http.lastURL = self.pyfile.url

            self.req.http.c.setopt(FOLLOWLOCATION, 0)
            self.html = self.load(self.pyfile.url, cookies=True, decode=True)
            self.header = self.req.http.header
            self.req.http.c.setopt(FOLLOWLOCATION, 1)

            self.location = None
            found = re.search("Location\s*:\s*(.*)", self.header, re.I)
            if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)):
                self.location = found.group(1).strip()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(
                        urlparse(self.location if self.location else pyfile.url
                                 ).path.split("/")[-1]))

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

Beispiel #30

0

Datei anzeigen

Datei: SimpleCrypter.py Projekt: sebmaynard/pyload

    def getPackageNameAndFolder(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = folder = html_unescape(m.group('title').strip())
                self.logDebug(
                    "Found name [%s] and folder [%s] in package info" %
                    (name, folder))
                return name, folder

        name = self.pyfile.package().name
        folder = self.pyfile.package().folder
        self.logDebug(
            "Package info not found, defaulting to pyfile name [%s] and folder [%s]"
            % (name, folder))
        return name, folder

Beispiel #31

0

Datei anzeigen

Datei: XFileSharingPro.py Projekt: tetratec/shareacc

    def process(self, pyfile):
        if not hasattr(self, "HOSTER_NAME"):
            self.HOSTER_NAME = re.search(self.__pattern__, self.pyfile.url).group(1)
        if not hasattr(self, "DIRECT_LINK_PATTERN"):
            self.DIRECT_LINK_PATTERN = (
                r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME
            )

        self.captcha = self.errmsg = None
        self.passwords = self.getPassword().splitlines()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies=False, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.req.http.lastURL = self.pyfile.url

            self.req.http.c.setopt(FOLLOWLOCATION, 0)
            self.html = self.load(self.pyfile.url, cookies=True, decode=True)
            self.header = self.req.http.header
            self.req.http.c.setopt(FOLLOWLOCATION, 1)

            self.location = None
            found = re.search("Location\s*:\s*(.*)", self.header, re.I)
            if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)):
                self.location = found.group(1).strip()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
                )

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

Beispiel #32

0

Datei anzeigen

    def handleFree(self, pyfile):
        for _i in xrange(2):
            m = re.search(self.LINK_FREE_PATTERN, self.html)

            if m is None:
                self.error(_("Free download link not found"))

            else:
                link = html_unescape(m.group(1).decode('unicode-escape'))
                if not urlparse.urlparse(link).scheme:
                    link = urlparse.urljoin("https://docs.google.com/", link)

                direct_link = self.directLink(link, False)
                if not direct_link:
                    self.html = self.load(link, decode=True)
                else:
                    self.link = direct_link
                    break

Beispiel #33

0

Datei anzeigen

    def downloadFile(self, pyfile):
        url = pyfile.url

        for i in range(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if 'code' in header and header['code'] == 404:
                raise BadHeader(404)

            if 'location' in header:
                self.logDebug("Location: " + header['location'])
                base = search(r'https?://[^/]+', url).group(0)
                if header['location'].startswith("http"):
                    url = unquote(header['location'])
                elif header['location'].startswith("/"):
                    url = base + unquote(header['location'])
                else:
                    url = '%s/%s' % (base, unquote(header['location']))
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " +
                          header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)",
                       header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']:
                    disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

Beispiel #34

0

Datei anzeigen

Datei: UploadedTo.py Projekt: keat01/pyLoad

    def process(self, pyfile):
        self.req.cj.setCookie("uploaded.net", "lang",
                              "en")  # doesn't work anymore
        self.load("http://uploaded.net/language/en")

        api = getAPIData([pyfile.url])

        # TODO: fallback to parse from site, because api sometimes delivers wrong status codes

        if not api:
            self.logWarning("No response for API call")

            self.html = unicode(self.load(pyfile.url, decode=False),
                                'iso-8859-1')
            name, size, status, self.fileID = parseFileInfo(self)
            self.logDebug(name, size, status, self.fileID)
            if status == 1:
                self.offline()
            elif status == 2:
                pyfile.name, pyfile.size = name, size
            else:
                self.fail('Parse error - file info')
        elif api == 'Access denied':
            self.fail(_("API key invalid"))

        else:
            if self.fileID not in api:
                self.offline()

            self.data = api[self.fileID]
            if self.data[0] != "online":
                self.offline()

            pyfile.name = html_unescape(self.data[2])

        # self.pyfile.name = self.get_file_name()

        if self.premium:
            self.handlePremium()
        else:
            self.handleFree()

Beispiel #35

0

Datei anzeigen

Datei: SerienjunkiesOrg.py Projekt: J-Ha/pyload-stuff

    def handleShow(self, url):
        src = self.getSJSrc(url)
        soup = BeautifulSoup(src)
        packageName = self.pyfile.package().name
        if self.config.get("changeNameSJ") == "Show":
            found = html_unescape(soup.find("h2").find("a").string.split(" &#8211;")[0])
            if found:
                packageName = found

        nav = soup.find("div", attrs={"id": "scb"})

        package_links = []
        for a in nav.findAll("a"):
            if self.config.get("changeNameSJ") == "Show":
                package_links.append(a["href"])
            else:
                package_links.append(a["href"] + "#hasName")
        if self.config.get("changeNameSJ") == "Show":
            self.packages.append((packageName, package_links, packageName))
        else:
            self.packages.append((self.pyfile.package().name, package_links, self.pyfile.package().name))

Beispiel #36

0

Datei anzeigen

Datei: UploadedTo.py Projekt: beefone/pyload

    def process(self, pyfile):
        self.req.cj.setCookie("uploaded.net", "lang", "en") # doesn't work anymore
        self.load("http://uploaded.net/language/en")

        api = getAPIData([pyfile.url])

        # TODO: fallback to parse from site, because api sometimes delivers wrong status codes

        if not api:
            self.logWarning("No response for API call")

            self.html = unicode(self.load(pyfile.url, decode = False), 'iso-8859-1')
            name, size, status, self.fileID = parseFileInfo(self)
            self.logDebug(name, size, status, self.fileID)
            if status == 1:
                self.offline()
            elif status == 2:
                pyfile.name, pyfile.size = name, size
            else:
                self.fail('Parse error - file info')
        elif api == 'Access denied':
            self.fail(_("API key invalid"))

        else:
            if self.fileID not in api:
                self.offline()

            self.data = api[self.fileID]
            if self.data[0] != "online":
                self.offline()

            pyfile.name = html_unescape(self.data[2])

        # self.pyfile.name = self.get_file_name()

        if self.premium:
            self.handlePremium()
        else:
            self.handleFree()

Beispiel #37

0

Datei anzeigen

Datei: YoutubeCom.py Projekt: masterwaster/pyload

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if "watch-player-unavailable" in html:
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()
        
        #get config
        use3d = self.getConf("3d")
        if use3d:
            quality = {"sd":82,"hd":84,"fullhd":85,"240p":83,"360p":82,"480p":82,"720p":84,"1080p":85,"3072p":85} 
        else:
            quality = {"sd":18,"hd":22,"fullhd":37,"240p":5,"360p":18,"480p":35,"720p":22,"1080p":37,"3072p":38} 
        desired_fmt = self.getConf("fmt")
        if desired_fmt and desired_fmt not in self.formats:
            self.logWarning("FMT %d unknown - using default." % desired_fmt) 
            desired_fmt = 0 
        if not desired_fmt:
            desired_fmt = quality.get(self.getConf("quality"), 18)        
        
        #parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=',1)) for y in x) for x in streams]
        streams = [(int(x['itag']), "%s&signature=%s" % (unquote(x['url']), x['sig'])) for x in streams]
        #self.logDebug("Found links: %s" % streams) 
        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])                    
        
        #build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])        
        streams = [x for x in streams if x[0] in self.formats and allowed(x[0])]
        if not streams:
            self.fail("No available stream meets your preferences")
        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams)              
                
        self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % 
                          (desired_fmt, 
                           "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
                           "" if desired_fmt in fmt_dict else "NOT ", 
                           "" if allowed(desired_fmt) else "NOT ")
                      )        

        #return fmt nearest to quality index        
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel = lambda x: self.formats[x][3] #select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))
        
            self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()])
            fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
                                       sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)
        url = fmt_dict[fmt]
        self.logDebug("URL: %s" % url)

        #set file name        
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix
        pyfile.name = html_unescape(name)
        
        self.download(url)

Beispiel #38

0

Datei anzeigen

Datei: SerienjunkiesOrg.py Projekt: J-Ha/pyload-stuff

    def handleSeason(self, url):
        src = self.getSJSrc(url)
        soup = BeautifulSoup(src)
        post = soup.find("div", attrs={"class": "post-content"})
        ps = post.findAll("p")

        seasonName = html_unescape(soup.find("a", attrs={"rel": "bookmark"}).string).replace("&#8211;", "-")
        groups = {}
        gid = -1
        for p in ps:
            if re.search("<strong>Sprache|<strong>Format", str(p)):
                var = p.findAll("strong")
                opts = {"Sprache": "", "Format": ""}
                for v in var:
                    n = html_unescape(v.string).strip()
                    n = re.sub(r"^([:]?)(.*?)([:]?)$", r"\2", n)
                    if n.strip() not in opts:
                        continue
                    val = v.nextSibling
                    if not val:
                        continue
                    val = val.replace("|", "").strip()
                    val = re.sub(r"^([:]?)(.*?)([:]?)$", r"\2", val)
                    opts[n.strip()] = val.strip()
                gid += 1
                groups[gid] = {}
                groups[gid]["ep"] = {}
                groups[gid]["opts"] = opts
            elif re.search("<strong>Download:", str(p)):
                parts = str(p).split("<br />")
                if re.search("<strong>", parts[0]):
                    ename = (
                        re.search("<strong>(.*?)</strong>", parts[0])
                        .group(1)
                        .strip()
                        .decode("utf-8")
                        .replace("&#8211;", "-")
                    )
                    groups[gid]["ep"][ename] = {}
                    parts.remove(parts[0])
                    for part in parts:
                        hostername = re.search(r" \| ([-a-zA-Z0-9]+\.\w+)", part)
                        if hostername:
                            hostername = hostername.group(1)
                            groups[gid]["ep"][ename][hostername] = []
                            links = re.findall('href="(.*?)"', part)
                            for link in links:
                                groups[gid]["ep"][ename][hostername].append(link + "#hasName")

        links = []
        for g in groups.values():
            for ename in g["ep"]:
                links.extend(self.getpreferred(g["ep"][ename]))
                if self.config.get("changeNameSJ") == "Episode":
                    self.packages.append((ename, links, ename))
                    links = []
            package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"])
            if self.config.get("changeNameSJ") == "Format":
                self.packages.append((package, links, package))
                links = []
        if (self.config.get("changeNameSJ") == "Packagename") or re.search("#hasName", url):
            self.packages.append((self.pyfile.package().name, links, self.pyfile.package().name))
        elif (self.config.get("changeNameSJ") == "Season") or not re.search("#hasName", url):
            self.packages.append((seasonName, links, seasonName))

Beispiel #39

0

Datei anzeigen

Datei: Plugin.py Projekt: earthGavinLee/pyload

def fixurl(url):
    return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip().rstrip('/')

Beispiel #40

0

Datei anzeigen

Datei: YoutubeCom.py Projekt: ASCIIteapot/pyload

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html):
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        #get config
        use3d = self.getConfig("3d")
        if use3d:
            quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82,
                       "480p": 82, "720p": 84, "1080p": 85, "3072p": 85}
        else:
            quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18,
                       "480p": 35, "720p": 22, "1080p": 37, "3072p": 38}
        desired_fmt = self.getConfig("fmt")
        if desired_fmt and desired_fmt not in self.formats:
            self.logWarning("FMT %d unknown - using default." % desired_fmt)
            desired_fmt = 0
        if not desired_fmt:
            desired_fmt = quality.get(self.getConfig("quality"), 18)

        #parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=', 1)) for y in x) for x in streams]
        streams = [(int(x['itag']), unquote(x['url'])) for x in streams]
        #self.logDebug("Found links: %s" % streams)
        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])

        #build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])
        streams = [x for x in streams if x[0] in self.formats and allowed(x[0])]
        if not streams:
            self.fail("No available stream meets your preferences")
        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams)

        self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
                      (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
                       "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT "))

        #return fmt nearest to quality index
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel = lambda x: self.formats[x][3]  # select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))

            self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()])
            fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
                         sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)
        url = fmt_dict[fmt]
        self.logDebug("URL: %s" % url)

        #set file name
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "")

        # Cleaning invalid characters from the file name
        name = name.encode('ascii', 'replace')

        pyfile.name = html_unescape(name)

        time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
        ffmpeg = which("ffmpeg")
        if ffmpeg and time:
            m, s = time.groups()[1:]
            if not m:
                m = "0"

            pyfile.name += " (starting at %s:%s)" % (m, s)
        pyfile.name += file_suffix

        filename = self.download(url)

        if ffmpeg and time:
            inputfile = filename + "_"
            os.rename(filename, inputfile)

            subprocess.call([
                ffmpeg,
                "-ss", "00:%s:%s" % (m, s),
                "-i", inputfile,
                "-vcodec", "copy",
                "-acodec", "copy",
                filename])
            os.remove(inputfile)

Beispiel #41

0

Datei anzeigen

Datei: XFSHoster.py Projekt: earthGavinLee/pyload

    def handle_captcha(self, inputs):
        m = re.search(self.CAPTCHA_PATTERN, self.html)
        if m:
            captcha_url = m.group(1)
            inputs['code'] = self.captcha.decrypt(captcha_url)
            return

        m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)
        if m:
            captcha_div = m.group(1)
            numerals    = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))

            self.log_debug(captcha_div)

            inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0])))

            self.log_debug("Captcha code: %s" % inputs['code'], numerals)
            return

        recaptcha = ReCaptcha(self)
        try:
            captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = recaptcha.detect_key()

        else:
            self.log_debug("ReCaptcha key: %s" % captcha_key)

        if captcha_key:
            inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key)
            return

        solvemedia = SolveMedia(self)
        try:
            captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = solvemedia.detect_key()

        else:
            self.log_debug("SolveMedia key: %s" % captcha_key)

        if captcha_key:
            inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key)

Beispiel #42

0

Datei anzeigen

Datei: YoutubeCom.py Projekt: fener06/pyload

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if "watch-player-unavailable" in html:
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        #videoId = pyfile.url.split("v=")[1].split("&")[0]
        #videoHash = re.search(r'&amp;t=(.+?)&', html).group(1)

        file_name_pattern = '<meta name="title" content="(.+?)">'

        quality = self.getConf("quality")
        desired_fmt = 18

        if quality == "sd":
            desired_fmt = 18
        elif quality == "hd":
            desired_fmt = 22
        elif quality == "fullhd":
            desired_fmt = 37

        if self.getConfig("fmt"):
            desired_fmt = self.getConf("fmt")

        flashvars = re.search(r'flashvars=\\"(.*?)\\"', html)
        flashvars = unquote(flashvars.group(1))

        fmts = re.findall(r'url=(.*?)%3B.*?itag=(\d+)', flashvars)
        fmt_dict = {}
        for url, fmt in fmts:
            fmt = int(fmt)
            fmt_dict[fmt] = unquote(url)

        
        self.logDebug("Found links: %s" % fmt_dict)
        for fmt in fmt_dict.keys():
            if fmt not in self.formats:
	        self.logDebug("FMT not supported: %s" % fmt)
		del fmt_dict[fmt]

        allowed = lambda x: self.getConfig(self.formats[x][0])
        sel = lambda x: self.formats[x][3] #select quality index
        comp = lambda x, y: abs(sel(x) - sel(y))

        #return fmt nearest to quali index
        fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
                                       sel(x) > sel(y) and
                                       allowed(x) else y, fmt_dict.keys())

        self.logDebug("Choose fmt: %s" % fmt)

        file_suffix = ".flv"
        if fmt in self.formats:
            file_suffix = self.formats[fmt][0]
        name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix
        pyfile.name = html_unescape(name)

        self.download(fmt_dict[fmt])

Beispiel #43

0

Datei anzeigen

Datei: YoutubeCom.py Projekt: sebmaynard/pyload

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if '<h1 id="unavailable-message" class="message">' in html:
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        #get config
        use3d = self.getConf("3d")
        if use3d:
            quality = {
                "sd": 82,
                "hd": 84,
                "fullhd": 85,
                "240p": 83,
                "360p": 82,
                "480p": 82,
                "720p": 84,
                "1080p": 85,
                "3072p": 85
            }
        else:
            quality = {
                "sd": 18,
                "hd": 22,
                "fullhd": 37,
                "240p": 5,
                "360p": 18,
                "480p": 35,
                "720p": 22,
                "1080p": 37,
                "3072p": 38
            }
        desired_fmt = self.getConf("fmt")
        if desired_fmt and desired_fmt not in self.formats:
            self.logWarning("FMT %d unknown - using default." % desired_fmt)
            desired_fmt = 0
        if not desired_fmt:
            desired_fmt = quality.get(self.getConf("quality"), 18)

        #parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",',
                            html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=', 1)) for y in x) for x in streams]
        streams = [(int(x['itag']),
                    "%s&signature=%s" % (unquote(x['url']), x['sig']))
                   for x in streams]
        #self.logDebug("Found links: %s" % streams)
        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])

        #build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])
        streams = [
            x for x in streams if x[0] in self.formats and allowed(x[0])
        ]
        if not streams:
            self.fail("No available stream meets your preferences")
        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d]
                        or streams)

        self.logDebug(
            "DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
            (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
             "" if desired_fmt in fmt_dict else "NOT ",
             "" if allowed(desired_fmt) else "NOT "))

        #return fmt nearest to quality index
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel = lambda x: self.formats[x][3]  #select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))

            self.logDebug("Choosing nearest fmt: %s" %
                          [(x, allowed(x), comp(x, desired_fmt))
                           for x in fmt_dict.keys()])
            fmt = reduce(
                lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt)
                and sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)
        url = fmt_dict[fmt]
        self.logDebug("URL: %s" % url)

        #set file name
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "")
        pyfile.name = html_unescape(name)

        time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
        ffmpeg = which("ffmpeg")
        if ffmpeg and time:
            m, s = time.groups()[1:]
            if not m:
                m = "0"

            pyfile.name += " (starting at %s:%s)" % (m, s)
        pyfile.name += file_suffix

        filename = self.download(url)

        if ffmpeg and time:
            inputfile = filename + "_"
            os.rename(filename, inputfile)

            subprocess.call([
                ffmpeg, "-ss",
                "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy",
                "-acodec", "copy", filename
            ])
            os.remove(inputfile)

Beispiel #44

0

Datei anzeigen

Datei: YoutubeCom.py Projekt: keat01/pyLoad

    def process(self, pyfile):
        html = self.load(pyfile.url, decode=True)

        if "watch-player-unavailable" in html:
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        #videoId = pyfile.url.split("v=")[1].split("&")[0]
        #videoHash = re.search(r'&amp;t=(.+?)&', html).group(1)

        file_name_pattern = '<meta name="title" content="(.+?)">'

        quality = self.getConf("quality")
        desired_fmt = 18

        if quality == "sd":
            desired_fmt = 18
        elif quality == "hd":
            desired_fmt = 22
        elif quality == "fullhd":
            desired_fmt = 37

        if self.getConfig("fmt"):
            desired_fmt = self.getConf("fmt")

        flashvars = re.search(r'flashvars=\\"(.*?)\\"', html)
        flashvars = unquote(flashvars.group(1))

        fmts = re.findall(r'url=(.*?)%3B.*?itag=(\d+)', flashvars)
        fmt_dict = {}
        for url, fmt in fmts:
            fmt = int(fmt)
            fmt_dict[fmt] = unquote(url)

        self.logDebug("Found links: %s" % fmt_dict)
        for fmt in fmt_dict.keys():
            if fmt not in self.formats:
                self.logDebug("FMT not supported: %s" % fmt)
                del fmt_dict[fmt]

        allowed = lambda x: self.getConfig(self.formats[x][0])
        sel = lambda x: self.formats[x][3]  #select quality index
        comp = lambda x, y: abs(sel(x) - sel(y))

        #return fmt nearest to quali index
        fmt = reduce(
            lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
            sel(x) > sel(y) and allowed(x) else y, fmt_dict.keys())

        self.logDebug("Choose fmt: %s" % fmt)

        file_suffix = ".flv"
        if fmt in self.formats:
            file_suffix = self.formats[fmt][0]
        name = re.search(file_name_pattern, html).group(1).replace(
            "/", "") + file_suffix
        pyfile.name = html_unescape(name)

        self.download(fmt_dict[fmt])