Ejemplo n.º 1
0
def find(resource_name):
    """
    Find the given resource by searching through the directories and
    zip files in ``nltk.data.path``, and return a corresponding path
    name.  If the given resource is not found, raise a ``LookupError``,
    whose message gives a pointer to the installation instructions for
    the NLTK downloader.

    Zip File Handling:

      - If ``resource_name`` contains a component with a ``.zip``
        extension, then it is assumed to be a zipfile; and the
        remaining path components are used to look inside the zipfile.

      - If any element of ``nltk.data.path`` has a ``.zip`` extension,
        then it is assumed to be a zipfile.

      - If a given resource name that does not contain any zipfile
        component is not found initially, then ``find()`` will make a
        second attempt to find that resource, by replacing each
        component *p* in the path with *p.zip/p*.  For example, this
        allows ``find()`` to map the resource name
        ``corpora/chat80/cities.pl`` to a zip file path pointer to
        ``corpora/chat80.zip/chat80/cities.pl``.

      - When using ``find()`` to locate a directory contained in a
        zipfile, the resource name must end with the forward slash
        character.  Otherwise, ``find()`` will not locate the
        directory.

    :type resource_name: str
    :param resource_name: The name of the resource to search for.
        Resource names are posix-style relative path names, such as
        ``corpora/brown``.  In particular, directory names should always
        be separated by the forward slash character, which will be
        automatically converted to a platform-appropriate path separator.
    :rtype: str
    """
    # Check if the resource name includes a zipfile name
    m = re.match("(.*\.zip)/?(.*)$|", resource_name)
    zipfile, zipentry = m.groups()

    # Check each item in our path
    for path_item in path:

        # Is the path item a zipfile?
        if os.path.isfile(path_item) and path_item.endswith(".zip"):
            try:
                return ZipFilePathPointer(path_item, resource_name)
            except IOError:
                continue  # resource not in zipfile

        # Is the path item a directory?
        elif os.path.isdir(path_item):
            if zipfile is None:
                p = os.path.join(path_item, *resource_name.split("/"))
                if os.path.exists(p):
                    if p.endswith(".gz"):
                        return GzipFileSystemPathPointer(p)
                    else:
                        return FileSystemPathPointer(p)
            else:
                p = os.path.join(path_item, *zipfile.split("/"))
                if os.path.exists(p):
                    try:
                        return ZipFilePathPointer(p, zipentry)
                    except IOError:
                        continue  # resource not in zipfile

    # Fallback: if the path doesn't include a zip file, then try
    # again, assuming that one of the path components is inside a
    # zipfile of the same name.
    if zipfile is None:
        pieces = resource_name.split("/")
        for i in range(len(pieces)):
            modified_name = "/".join(pieces[:i] + [pieces[i] + ".zip"] + pieces[i:])
            try:
                return find(modified_name)
            except LookupError:
                pass

    # Display a friendly error message if the resource wasn't found:
    msg = textwrap.fill(
        "Resource %r not found.  Please use the NLTK Downloader to "
        "obtain the resource:  >>> nltk.download()" % (resource_name,),
        initial_indent="  ",
        subsequent_indent="  ",
        width=66,
    )
    msg += "\n  Searched in:" + "".join("\n    - %r" % d for d in path)
    sep = "*" * 70
    resource_not_found = "\n%s\n%s\n%s" % (sep, msg, sep)
    raise LookupError(resource_not_found)
Ejemplo n.º 2
0
def find(resource_name):
    """
    Find the given resource by searching through the directories and
    zip files in L{nltk.data.path}, and return a corresponding path
    name.  If the given resource is not found, raise a C{LookupError},
    whose message gives a pointer to the installation instructions for
    the NLTK downloader.

    Zip File Handling:

      - If C{resource_name} contains a component with a C{.zip}
        extension, then it is assumed to be a zipfile; and the
        remaining path components are used to look inside the zipfile.
        
      - If any element of C{nltk.data.path} has a C{.zip} extension,
        then it is assumed to be a zipfile.

      - If a given resource name that does not contain any zipfile
        component is not found initially, then C{find()} will make a
        second attempt to find that resource, by replacing each
        component I{p} in the path with I{p.zip/p}.  For example, this
        allows C{find()} to map the resource name
        C{corpora/chat80/cities.pl} to a zip file path pointer to
        C{corpora/chat80.zip/chat80/cities.pl}.

      - When using C{find()} to locate a directory contained in a
        zipfile, the resource name I{must} end with the C{'/'}
        character.  Otherwise, C{find()} will not locate the
        directory.

    @type resource_name: C{str}
    @param resource_name: The name of the resource to search for.
        Resource names are posix-style relative path names, such as
        C{'corpora/brown'}.  In particular, directory names should
        always be separated by the C{'/'} character, which will be
        automatically converted to a platform-appropriate path
        separator.
    @rtype: C{str}
    """
    # Check if the resource name includes a zipfile name
    m = re.match('(.*\.zip)/?(.*)$|', resource_name)
    zipfile, zipentry = m.groups()
    
    # Check each item in our path
    for path_item in path:
        
        # Is the path item a zipfile?
        if os.path.isfile(path_item) and path_item.endswith('.zip'):
            try: return ZipFilePathPointer(path_item, resource_name)
            except IOError: continue # resource not in zipfile

        # Is the path item a directory?
        elif os.path.isdir(path_item):
            if zipfile is None:
                p = os.path.join(path_item, *resource_name.split('/'))
                if os.path.exists(p):
                    if p.endswith('.gz'):
                        return GzipFileSystemPathPointer(p)
                    else:   
                        return FileSystemPathPointer(p)
            else:
                p = os.path.join(path_item, *zipfile.split('/'))
                if os.path.exists(p):
                    try: return ZipFilePathPointer(p, zipentry)
                    except IOError: continue # resource not in zipfile

    # Fallback: if the path doesn't include a zip file, then try
    # again, assuming that one of the path components is inside a
    # zipfile of the same name.
    if zipfile is None:
        pieces = resource_name.split('/')
        for i in range(len(pieces)):
            modified_name = '/'.join(pieces[:i]+[pieces[i]+'.zip']+pieces[i:])
            try: return find(modified_name)
            except LookupError: pass

    # Display a friendly error message if the resource wasn't found:
    msg = textwrap.fill(
        'Resource %r not found.  Please use the NLTK Downloader to '
        'obtain the resource: >>> nltk.download().' %
        (resource_name,), initial_indent='  ', subsequent_indent='  ',
        width=66)
    msg += '\n  Searched in:' + ''.join('\n    - %r' % d for d in path)
    sep = '*'*70
    resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
    raise LookupError(resource_not_found)
Ejemplo n.º 3
0
 def _path_zipfile(self, zipfile):
     zipfile_name = zipfile.split('.')[0]
     
     for temp_folder in self.tempfolder:
         if zipfile_name and zipfile_name in temp_folder:
             return os.path.join(temp_folder, zipfile)
Ejemplo n.º 4
0
def find(resource_name):
    """
    Find the given resource by searching through the directories and
    zip files in L{nltk.data.path}, and return a corresponding path
    name.  If the given resource is not found, raise a C{LookupError},
    whose message gives a pointer to the installation instructions for
    the NLTK downloader.

    Zip File Handling:

      - If C{resource_name} contains a component with a C{.zip}
        extension, then it is assumed to be a zipfile; and the
        remaining path components are used to look inside the zipfile.
        
      - If any element of C{nltk.data.path} has a C{.zip} extension,
        then it is assumed to be a zipfile.

      - If a given resource name that does not contain any zipfile
        component is not found initially, then C{find()} will make a
        second attempt to find that resource, by replacing each
        component I{p} in the path with I{p.zip/p}.  For example, this
        allows C{find()} to map the resource name
        C{corpora/chat80/cities.pl} to a zip file path pointer to
        C{corpora/chat80.zip/chat80/cities.pl}.

      - When using C{find()} to locate a directory contained in a
        zipfile, the resource name I{must} end with the C{'/'}
        character.  Otherwise, C{find()} will not locate the
        directory.

    @type resource_name: C{str}
    @param resource_name: The name of the resource to search for.
        Resource names are posix-style relative path names, such as
        C{'corpora/brown'}.  In particular, directory names should
        always be separated by the C{'/'} character, which will be
        automatically converted to a platform-appropriate path
        separator.
    @rtype: C{str}
    """
    # Check if the resource name includes a zipfile name
    m = re.match('(.*\.zip)/?(.*)$|', resource_name)
    zipfile, zipentry = m.groups()

    # Check each item in our path
    for path_item in path:

        # Is the path item a zipfile?
        if os.path.isfile(path_item) and path_item.endswith('.zip'):
            try:
                return ZipFilePathPointer(path_item, resource_name)
            except IOError:
                continue  # resource not in zipfile

        # Is the path item a directory?
        elif os.path.isdir(path_item):
            if zipfile is None:
                p = os.path.join(path_item, *resource_name.split('/'))
                if os.path.exists(p):
                    if p.endswith('.gz'):
                        return GzipFileSystemPathPointer(p)
                    else:
                        return FileSystemPathPointer(p)
            else:
                p = os.path.join(path_item, *zipfile.split('/'))
                if os.path.exists(p):
                    try:
                        return ZipFilePathPointer(p, zipentry)
                    except IOError:
                        continue  # resource not in zipfile

    # Fallback: if the path doesn't include a zip file, then try
    # again, assuming that one of the path components is inside a
    # zipfile of the same name.
    if zipfile is None:
        pieces = resource_name.split('/')
        for i in range(len(pieces)):
            modified_name = '/'.join(pieces[:i] + [pieces[i] + '.zip'] +
                                     pieces[i:])
            try:
                return find(modified_name)
            except LookupError:
                pass

    # Display a friendly error message if the resource wasn't found:
    msg = textwrap.fill(
        'Resource %r not found.  Please use the NLTK Downloader to '
        'obtain the resource: >>> nltk.download().' % (resource_name, ),
        initial_indent='  ',
        subsequent_indent='  ',
        width=66)
    msg += '\n  Searched in:' + ''.join('\n    - %r' % d for d in path)
    sep = '*' * 70
    resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
    raise LookupError(resource_not_found)
Ejemplo n.º 5
0
def Download(id, url, filename, stack=False):
    subtitle_list = []
    ## Cleanup temp dir, we recomend you download/unzip your subs in temp folder and
    ## pass that to XBMC to copy and activate
    if xbmcvfs.exists(__temp__):
        try:
            rmtree(__temp__)
        except:
            Notify('Error cleanup', 'error')
            pass
    xbmcvfs.mkdirs(__temp__)

    log_my('Download from id', url)
    sub = get_sub(id, url, filename)

    if (sub.has_key('data') and sub.has_key('fname')):
        log_my('{0}'.format(sub['fname']), 'saving')
        ff = os.path.join(__temp__, sub['fname'])
        subFile = xbmcvfs.File(ff, 'wb')
        subFile.write(sub['data'])
        subFile.close()
        xbmc.sleep(500)
        Notify('{0}'.format(sub['fname']), 'load')
        if id == 'unacs':
            xbmcvfs.delete(ff)
            headers = {
                "Host": "subsunacs.net",
                "User-Agent":
                "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
                "Accept":
                "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
                "Accept-Language": "en-US,en;q=0.5",
                "Accept-Encoding": "gzip, deflate, br",
                "DNT": "1",
                "Connection": "keep-alive",
                "Upgrade-Insecure-Requests": "1",
                "Cache-Control": "max-age=0",
            }
            url = 'https://subsunacs.net' + url + '!'
            req = requests.get(url, headers=headers)
            match = re.compile('<a href="(.+?)">(.+?)</a></label>').findall(
                req.text)
            for suburl, subname in match:
                subname = subname.encode('cp1251', 'ignore').decode(
                    'cp1251', 'ignore').encode('utf-8',
                                               'ignore').replace(' ', '.')
                #suname = subname.encode('utf-8')
                subtitri = __temp__ + subname
                try:
                    url2 = 'https://subsunacs.net' + suburl
                    req2 = requests.get(url2, headers=headers)
                    f = open(subtitri, 'wb')
                    f.write(req2.content)
                    f.close()
                    xbmc.sleep(1000)
                except:
                    pass
        else:
            if __addon__.getSetting('xbmc_extractor') == 'true':
                if '.zip' in ff:
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        ff,
                        __temp__,
                    )).encode('utf-8'), True)
                    xbmcvfs.delete(ff)
                    #check for rars after zip extraction
                    unextracted_rars = xbmcvfs.listdir(__temp__)
                    for rars in unextracted_rars[1]:
                        if rars.endswith('.rar'):
                            src = 'archive' + '://' + urllib.quote_plus(
                                __temp__ + rars) + '/'
                            (cdirs, cfiles) = xbmcvfs.listdir(src)
                            for cfile in cfiles:
                                fsrc = '%s%s' % (src, cfile)
                                xbmcvfs.copy(fsrc, __temp__ + cfile)
                else:
                    src = 'archive' + '://' + urllib.quote_plus(ff) + '/'
                    (cdirs, cfiles) = xbmcvfs.listdir(src)
                    for cfile in cfiles:
                        fsrc = '%s%s' % (src, cfile)
                        xbmcvfs.copy(fsrc, __temp__ + cfile)

            elif __addon__.getSetting('rarlab') == 'true':
                import rarfile
                if '.rar' in ff:
                    archive = rarfile.RarFile(ff)
                    archive.extract(__temp__)
                    xbmcvfs.delete(ff)
                else:
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        ff,
                        __temp__,
                    )).encode('utf-8'), True)
                    xbmcvfs.delete(ff)
                    #check for rars after zip extraction
                    unextracted_rars = xbmcvfs.listdir(__temp__)
                    for rars in unextracted_rars[1]:
                        if rars.endswith('.rar'):
                            archive = rarfile.RarFile(__temp__ + rars)
                            archive.extract(__temp__)

            elif __addon__.getSetting('extract_me') == 'true':
                if '.zip' in ff:
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        ff,
                        __temp__,
                    )).encode('utf-8'), True)
                    xbmcvfs.delete(ff)
                    #check for rars after zip extraction
                    unextracted_rars = xbmcvfs.listdir(__temp__)
                    for rars in unextracted_rars[1]:
                        if rars.endswith('.rar'):
                            s = requests.Session()
                            r = s.get('https://extract.me/upload/')
                            mycook = re.search(
                                'uid=(.+?);', r.headers['Set-Cookie']).group(1)
                            fname = rars
                            files = {
                                'files':
                                (fname, open(__temp__ + rars,
                                             'rb'), "application/octet-stream")
                            }
                            payload = {'uid': mycook, 'files': filename}
                            r = s.post('https://extract.me/upload/',
                                       files=files,
                                       data=payload)
                            tmp_filename = r.json()['files'][0]['tmp_filename']
                            name = r.json()['files'][0]['name']
                            nexpayload = {
                                'tmp_filename': tmp_filename,
                                'archive_filename': name,
                                'password': ''
                            }
                            r = s.post('https://extract.me/unpack/',
                                       data=nexpayload)
                            compres_to_zip = s.post(
                                'https://extract.me/compress/zip/' + mycook +
                                '/' + tmp_filename)
                            zipped = compres_to_zip.json()['download_url']
                            nexturl = 'https://extract.me/' + mycook + zipped
                            ziper = s.get(nexturl)
                            zf = re.search('.*\/(.+?\.zip)', zipped).group(1)
                            zname = __temp__ + zf
                            f = open(zname, 'wb+')
                            f.write(ziper.content)
                            f.close()
                            #xbmc.executebuiltin(('XBMC.Extract doent extract zips lol
                            import zipfile
                            #xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (zname,__temp__,)).encode('utf-8'), True)
                            #xbmc.sleep(500)
                            with zipfile.ZipFile(zname, 'r') as zip_ref:
                                zip_ref.extractall(__temp__)
                else:
                    s = requests.Session()
                    r = s.get('https://extract.me/upload/')
                    mycook = re.search('uid=(.+?);',
                                       r.headers['Set-Cookie']).group(1)
                    fname = sub['fname']
                    files = {
                        'files':
                        (fname, open(ff, 'rb'), "application/octet-stream")
                    }
                    payload = {'uid': mycook, 'files': filename}
                    r = s.post('https://extract.me/upload/',
                               files=files,
                               data=payload)
                    tmp_filename = r.json()['files'][0]['tmp_filename']
                    name = r.json()['files'][0]['name']
                    nexpayload = {
                        'tmp_filename': tmp_filename,
                        'archive_filename': name,
                        'password': ''
                    }
                    r = s.post('https://extract.me/unpack/', data=nexpayload)
                    compres_to_zip = s.post(
                        'https://extract.me/compress/zip/' + mycook + '/' +
                        tmp_filename)
                    zipped = compres_to_zip.json()['download_url']
                    nexturl = 'https://extract.me/' + mycook + zipped
                    ziper = s.get(nexturl)
                    zf = re.search('.*\/(.+?\.zip)', zipped).group(1)
                    f = open(__temp__ + zf, 'wb+')
                    f.write(ziper.content)
                    f.close()
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        __temp__ + zf,
                        __temp__,
                    )).encode('utf-8'), True)

            elif __addon__.getSetting('online-convert-com') == 'true':
                if '.zip' in ff:
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        ff,
                        __temp__,
                    )).encode('utf-8'), True)
                    xbmcvfs.delete(ff)
                    #check for rars after zip extraction We try to extract from xbmc because not to wasting minutes in OCdotCom
                    unextracted_rars = xbmcvfs.listdir(__temp__)
                    for rars in unextracted_rars[1]:
                        if rars.endswith('.rar'):
                            src = 'archive' + '://' + urllib.quote_plus(
                                __temp__ + rars) + '/'
                            (cdirs, cfiles) = xbmcvfs.listdir(src)
                            for cfile in cfiles:
                                fsrc = '%s%s' % (src, cfile)
                                xbmcvfs.copy(fsrc, __temp__ + cfile)
                else:
                    api_key = __addon__.getSetting('ocapi')
                    newendpoint = 'http://api2.online-convert.com/jobs'
                    data = {
                        "conversion": [{
                            "category": "archive",
                            "target": "zip"
                        }]
                    }
                    head = {
                        'x-oc-api-key': api_key,
                        'Content-Type': 'application/json',
                        'Cache-Control': 'no-cache'
                    }
                    res = requests.post(newendpoint,
                                        data=json.dumps(data),
                                        headers=head)
                    match = re.compile(
                        'id":"(.+?)".+?server":"(.+?)"').findall(res.text)
                    for idj, servurl in match:
                        servurl = servurl.replace('\/', '/')
                        nextendpont = servurl + '/upload-file/' + idj
                        file = {'file': open(ff, 'rb')}
                        head = {'x-oc-api-key': api_key}
                        res = requests.post(nextendpont,
                                            files=file,
                                            headers=head)
                        xbmc.sleep(2000)
                        res = requests.get(newendpoint, headers=head)
                        match2 = re.compile('"uri":"(http.+?zip)"').findall(
                            res.text)
                        for dlzip in match2:
                            zipfile = dlzip.replace('\/', '/')
                            subfile = zipfile.split("/")[-1]
                            r = requests.get(zipfile)
                            with open((__temp__ + subfile), 'wb') as f:
                                f.write(r.content)
                                xbmc.sleep(500)
                                f.close()
                                xbmc.sleep(1000)
                                delurl = 'http://api2.online-convert.com/jobs/' + idj
                                head = {
                                    'x-oc-api-key': api_key,
                                    'Content-Type': 'application/json',
                                    'Cache-Control': 'no-cache'
                                }
                                res = requests.delete(delurl, headers=head)
                                xbmc.sleep(500)
                                jj = __temp__ + subfile
                                xbmc.executebuiltin(
                                    ('XBMC.Extract("%s","%s")' %
                                     (jj, __temp__)), True)

            elif __addon__.getSetting('android_rar') == 'true':
                if 'zip' in ff:
                    xbmc.executebuiltin(('XBMC.Extract("%s","%s")' % (
                        ff,
                        __temp__,
                    )).encode('utf-8'), True)
                else:
                    app = 'com.rarlab.rar'
                    intent = 'android.intent.action.VIEW'
                    dataType = 'application/rar'
                    dataURI = ff
                    arch = 'StartAndroidActivity("%s", "%s", "%s", "%s")' % (
                        app, intent, dataType, dataURI)
                    xbmc.executebuiltin(arch)

        if __addon__.getSetting('android_rar') == 'true':
            timer = __addon__.getSetting('ar_wait_time')
            xbmc.sleep(int(timer) * 1000)
        dirs, files = xbmcvfs.listdir(__temp__)
        files.extend(dirs)
        appendsubfiles(subtitle_list, __temp__, files)

        if len(subtitle_list) >= 2:
            subtitle_list = select_1(subtitle_list)
        if xbmcvfs.exists(subtitle_list[0]):
            return subtitle_list

    else:
        Notify('Error', 'Bad format or ....')
        return []