Python relpath Examples

Programming Language: Python

Namespace/Package Name: prody.utilities

Method/Function: relpath

Examples at hotexamples.com: 10

Python relpath - 10 examples found. These are the top rated real world Python examples of prody.utilities.relpath extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: trajfile.py Project: youbingchenyoubing/asa_map

    def getFilename(self, absolute=False):
        """Return relative path to the current file. For absolute path,
        pass ``absolute=True`` argument."""

        if absolute:
            return abspath(self._filename)
        return relpath(self._filename)

Example #2

Show file

File: trajfile.py Project: fongchun/ProDy

    def getFilename(self, absolute=False):
        """Returns relative path to the current file. For absolute path,
        pass ``absolute=True`` argument."""

        if absolute:
            return abspath(self._filename)
        return relpath(self._filename)

Example #3

Show file

File: wwpdbftp.py Project: gokceneraslan/ProDy

def fetchPDB(pdb, folder='.', compressed=True, copy=False, **kwargs):
    """Retrieve PDB, PDBML, or mmCIF file(s) for specified *pdb* identifier(s).  
    *pdb* may be a string or a list.  The function will return a filename or a 
    list of filenames depending on input (see :ref:`fetchpdb` for examples).  

    If *compressed* is ``False``, all files will be decompressed.  If *copy* is 
    ``True``, all files from local PDB mirror will copied to the user specified 
    *folder*.  *format* keyword argument can be used to retrieve `PDBML 
    <http://pdbml.pdb.org/>`_ and `mmCIF <http://mmcif.pdb.org/>`_ files:  
    ``format="cif"`` will fetch an mmCIF file (e.g. :file:`1XXX.cif.gz`), 
    similarly ``format="xml"`` will fetch a PDBML file.  If PDBML header file 
    is desired, ``format="xml", noatom=True`` will do the job (e.g. 
    :file:`1XXX-noatom.xml.gz`)
    
    The order of file search operations are as follows:  First, files are 
    sought in *folder*.  Second, local PDB mirror will be sought, if one is 
    set by the user (see :func:`setPDBMirrorPath`).  Then, local PDB folder
    will be sought, if one is  set by the user (see :func:`setPDBLocalFolder`).
    Finally, if files are not found locally, they will be downloaded one of 
    wwPDB FTP servers (use :func:`setWWPDBFTPServer` to specify one close to 
    you)."""
    
    if isinstance(pdb, str):
        identifiers = [pdb]
    elif isinstance(pdb, list):
        identifiers = pdb
    else:
        raise TypeError('pdb may be a string or a list of strings')
        
    assert isinstance(folder, str), 'folder must be a string'
    assert isinstance(compressed, bool), 'compressed must be a boolean'
    assert isinstance(copy, bool), 'copy must be a boolean'
    format = kwargs.pop('format', 'pdb')
    assert isinstance(format, str), 'format must be a string'
    format = format.lower()
    assert format in _PDB_FORMATS, '{0:s} is not valid format'.format(
                                                                repr(format))
    noatom = kwargs.pop('noatom', False) 
    assert isinstance(noatom, bool), 'noatom must be a boolean'
    if kwargs:
        raise TypeError('{0:s} is not a valid keyword argument for this' 
                        'function'.format(repr(kwargs.iterkeys().next())))
    if folder != '.':
        folder = makePath(folder)
    if not os.access(folder, os.W_OK):
        raise IOError('permission to write in {0:s} is denied, please '
                      'specify another folder'.format(folder))
    
    filenames = []
    exists = 0
    success = 0
    failure = 0
    download = False
    if format == 'pdb':
        divided = 'data/structures/divided/pdb'
        pdbext = '.ent.gz'
        extensions = ['.ent', '.pdb'] # '.pdb' should be the last item
        prefix = 'pdb'
    elif format == 'xml':
        if noatom:
            divided = 'data/structures/divided/XML-noatom'
            pdbext = '-noatom.xml.gz'
            extensions = ['-noatom.xml']
        else:
            divided = 'data/structures/divided/XML'
            pdbext = '.xml.gz'
            extensions = ['.xml']
        prefix = ''
    else:
        divided = 'data/structures/divided/mmCIF'
        pdbext = '.cif.gz'
        extensions = ['.cif'] # '.pdb' should be the last item
        prefix = ''
    
    pdbfnmap = {}
    for extension in extensions:
        for pdbfn in glob(os.path.join(folder, '*' + extension + '*')): 
            if os.path.splitext(pdbfn)[1] in _PDB_EXTENSIONS:
                pdbfnmap[os.path.split(pdbfn)[1].split('.')[0].lower()] = pdbfn
        for pdbfn in glob(os.path.join(folder, '*' + extension.upper() + '*')):
            if os.path.splitext(pdbfn)[1] in _PDB_EXTENSIONS:
                pdbfnmap[os.path.split(pdbfn)[1].split('.')[0].lower()] = pdbfn
                
    for i, pdbid in enumerate(identifiers):
        # Check validity of identifiers
        if not isinstance(pdbid, str):
            LOGGER.debug('{0:s} is not a valid identifier.'.format(pdbid))
            filenames.append(None)
            failure += 1 
            continue
        pdbid = pdbid.strip().lower()
        if not (len(pdbid) == 4 and pdbid.isalnum()):
            LOGGER.debug('{0:s} is not a valid identifier.'.format(pdbid))
            filenames.append(None)
            failure += 1 
            continue
        # Check if file exists in working directory
        identifiers[i] = pdbid
        if noatom:
            fn = pdbfnmap.get(pdbid + '-noatom', None)
        else:
            fn = pdbfnmap.get(pdbid, None) or pdbfnmap.get('pdb'+pdbid, None)
        if fn:
            fn = relpath(fn)
            if not compressed:
                temp, ext = os.path.splitext(fn) 
                if ext == '.gz':
                    fn = gunzip(fn, temp)
            filenames.append(fn)
            LOGGER.debug('{0:s} ({1:s}) is found in the working directory.'
                         .format(pdbid, fn))
            exists += 1
            continue
        # Check the PDB mirror
        mirror_path = getPDBMirrorPath()
        if mirror_path is not None and os.path.isdir(mirror_path):
            fn = os.path.join(mirror_path, divided, pdbid[1:3], 
                              prefix + pdbid + pdbext)
            if os.path.isfile(fn):
                if copy or not compressed:
                    if compressed:
                        filename = os.path.join(folder, pdbid + extension + 
                                                        '.gz')
                        shutil.copy(fn, filename)
                    else:
                        filename = os.path.join(folder, pdbid + extension)
                        gunzip(fn, filename)
                    filenames.append(filename)
                    LOGGER.debug('{0:s} copied from local mirror ({1:s})'
                                 .format(pdbid, filename))
                    success += 1
                else:
                    filenames.append(fn)
                    
                    LOGGER.debug('{0:s} ({1:s}...{2:s}) is found in the local '
                                'mirror.'.format(pdbid, 
                                fn[:fn[1:].index(os.path.sep)+2], fn[-15:]))
                    exists += 1
                continue
        # Check the PDB mirror
        local_folder = getPDBLocalFolder()
        if format and local_folder:
            local_folder, is_divided = local_folder
            if is_divided:
                fn = os.path.join(local_folder, pdbid[1:3], 
                                  'pdb' + pdbid + '.pdb.gz')
            else:
                fn = os.path.join(local_folder, pdbid + '.pdb.gz')
                
            if os.path.isfile(fn):
                if copy or not compressed:
                    if compressed:
                        filename = os.path.join(folder, pdbid + extension + 
                                                        '.gz')
                        shutil.copy(fn, filename)
                    else:
                        filename = os.path.join(folder, pdbid + extension)
                        gunzip(fn, filename)
                    filenames.append(filename)
                    LOGGER.debug('{0:s} copied from local PDB folder ({1:s})'
                                 .format(pdbid, filename))
                    success += 1
                else:
                    filenames.append(fn)
                    
                    LOGGER.debug('{0:s} ({1:s}...{2:s}) is found in the PDB '
                                'local folder.'.format(pdbid, 
                                fn[:fn[1:].index(os.path.sep)+2], fn[-15:]))
                    exists += 1
                continue

        filenames.append(pdbid)
        download = True
    if download:
        from ftplib import FTP
        ftp_name, ftp_host, ftp_path = getWWPDBFTPServer()
        LOGGER.debug('Connecting wwPDB FTP server {0:s}.'.format(ftp_name))
        if format == 'pdb' and not copy and local_folder:
            folder = local_folder
            compressed = True
            if is_divided:
                getfn = lambda folder, pdbid, ext: \
                    os.path.join(makePath(os.path.join(local_folder, 
                                            pdbid[1:3])), 'pdb' + pdbid + ext)
            else:
                getfn = lambda folder, pdbid, ext: os.path.join(folder,
                                                                pdbid + ext)
                
        else: 
            getfn = lambda folder, pdbid, ext: os.path.join(folder, 
                                                            pdbid + ext)
        try:
            ftp = FTP(ftp_host)
        except Exception as error:
            raise type(error)('FTP connection problem, potential reason: '
                              'no internet connectivity')
        else:
            #ftp_path = os.path.join(ftp_path, divided)
            ftp.login('')
            for i, pdbid in enumerate(identifiers):
                if pdbid != filenames[i]:
                    continue
                filename = getfn(folder, pdbid, extension)
                if compressed:
                    filename += '.gz'

                pdbfile = open(filename, 'w+b')
                fn = prefix + pdbid + pdbext
                try:
                    ftp.cwd(ftp_path)
                    ftp.cwd(divided)
                    ftp.cwd(pdbid[1:3])
                    ftp.retrbinary('RETR ' + fn, pdbfile.write)
                except Exception as error:
                    pdbfile.close()
                    os.remove(filename)
                    if fn in ftp.nlst():
                        LOGGER.debug('{0:s} download failed ({1:s}). It '
                                     'is possible that you don\'t have '
                                     'rights to download .gz files in the '
                                     'current network.'.format(pdbid, 
                                     str(error)))
                    else:
                        LOGGER.debug('{0:s} download failed. {1:s} does not '
                                     'exist on {2:s}.'
                                     .format(fn, pdbid, ftp_host))
                    failure += 1
                    filenames[i] = None 
                else:
                    pdbfile.close()
                    if not compressed:
                        gunzip(filename)
                    filename = relpath(filename)
                    LOGGER.debug('{0:s} downloaded ({1:s})'
                                 .format(pdbid, filename))
                    success += 1
                    filenames[i] = filename
            ftp.quit()
    if len(identifiers) == 1:
        return filenames[0]    
    else:
        LOGGER.info('PDB download completed ({2:d} found, '
                    '{0:d} downloaded, {1:d} failed).'
                    .format(success, failure, exists))
        return filenames

Example #4

Show file

def fetchPDB(*pdb, **kwargs):
    """Returns path(s) to PDB file(s) for specified *pdb* identifier(s).  Files
    will be sought in user specified *folder* or current working director, and
    then in local PDB folder and mirror, if they are available.  If *copy*
    is set **True**, files will be copied into *folder*.  If *compressed* is
    **False**, all files will be decompressed.  See :func:`pathPDBFolder` and
    :func:`pathPDBMirror` for managing local resources, :func:`.fetchPDBviaFTP`
    and :func:`.fetchPDBviaFTP` for downloading files from PDB servers."""

    if len(pdb) == 1 and isinstance(pdb[0], list):
        pdb = pdb[0]

    if 'format' in kwargs and kwargs.get('format') != 'pdb':
        return fetchPDBviaFTP(*pdb, **kwargs)

    identifiers = checkIdentifiers(*pdb)

    folder = kwargs.get('folder', '.')
    compressed = kwargs.get('compressed')

    # check *folder* specified by the user, usually pwd ('.')
    filedict = findPDBFiles(folder, compressed=compressed)

    filenames = []
    not_found = []
    exists = 0
    for i, pdb in enumerate(identifiers):
        if pdb is None:
            filenames.append(None)
        elif pdb in filedict:
            filenames.append(filedict[pdb])
            exists += 1
        else:
            filenames.append(None)
            not_found.append((i, pdb))

    if not not_found:
        if len(filenames) == 1:
            filenames = filenames[0]
            if exists:
                LOGGER.debug(
                    'PDB file is found in working directory ({0}).'.format(
                        sympath(filenames)))
        return filenames

    if not isWritable(folder):
        raise IOError('permission to write in {0} is denied, please '
                      'specify another folder'.format(folder))

    if compressed is not None and not compressed:
        filedict = findPDBFiles(folder, compressed=True)
        not_found, decompress = [], not_found
        for i, pdb in decompress:
            if pdb in filedict:
                fn = filedict[pdb]
                filenames[i] = gunzip(fn, splitext(fn)[0])
            else:
                not_found.append((i, pdb))

    if not not_found:
        return filenames[0] if len(identifiers) == 1 else filenames

    local_folder = pathPDBFolder()
    copy = kwargs.setdefault('copy', False)
    if local_folder:
        local_folder, is_divided = local_folder
        temp, not_found = not_found, []
        for i, pdb in temp:
            if is_divided:
                fn = join(local_folder, pdb[1:3], 'pdb' + pdb + '.pdb.gz')
            else:
                fn = join(local_folder, pdb + '.pdb.gz')
            if isfile(fn):
                if copy or not compressed and compressed is not None:
                    if compressed:
                        fn = copyFile(fn, join(folder, pdb + 'pdb.gz'))
                    else:
                        fn = gunzip(fn, join(folder, pdb + '.pdb'))
                filenames[i] = normpath(fn)
            else:
                not_found.append((i, pdb))

    if not not_found:
        if len(identifiers) == 1:
            fn = filenames[0]
            items = fn.split(pathsep)
            if len(items) > 5:
                fndisp = pathsep.join(items[:3] + ['...'] + items[-1:])
            else:
                fndisp = relpath(fn)
            LOGGER.debug(
                'PDB file is found in the local folder ({0}).'.format(fndisp))
            return fn
        else:
            return filenames

    if kwargs['copy'] or (compressed is not None and not compressed):
        kwargs['folder'] = folder

    downloads = [pdb for i, pdb in not_found]
    fns = None

    try:
        fns = fetchPDBfromMirror(*downloads, **kwargs)
    except IOError:
        pass
    else:
        if len(downloads) == 1: fns = [fns]
        temp, not_found = not_found, []
        for i, fn in enumerate(fns):
            if fn is None:
                not_found.append(temp[i])
            else:
                i, _ = temp[i]
                filenames[i] = fn

    if not not_found:
        return filenames[0] if len(identifiers) == 1 else filenames

    if fns:
        downloads = [pdb for i, pdb in not_found]

    fns = None

    tp = kwargs.pop('tp', None)
    if tp is not None:
        tp = tp.lower()

    if tp == 'http':
        try:
            fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs)
        except Exception as err:
            LOGGER.warn('Downloading PDB files via HTTP failed '
                        '({0}).'.format(str(err)))
    elif tp == 'ftp':
        try:
            fns = fetchPDBviaFTP(*downloads, check=False, **kwargs)
        except Exception as err:
            LOGGER.warn('Downloading PDB files via FTP failed '
                        '({0}).'.format(str(err)))
    else:
        tryHTTP = False
        try:
            fns = fetchPDBviaFTP(*downloads, check=False, **kwargs)
        except Exception as err:
            tryHTTP = True

        if fns is None or isinstance(fns, list) and None in fns:
            tryHTTP = True
        elif isinstance(fns, list):
            downloads = [
                not_found[i][1] for i in range(len(fns)) if fns[i] is None
            ]
            if len(downloads) > 0:
                tryHTTP = True
        if tryHTTP:
            LOGGER.info('Downloading PDB files via FTP failed, '
                        'trying HTTP.')
            try:
                fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs)
            except Exception as err:
                LOGGER.warn('Downloading PDB files via HTTP also failed '
                            '({0}).'.format(str(err)))

    if len(downloads) == 1: fns = [fns]
    if fns:
        for i, fn in zip([i for i, pdb in not_found], fns):
            filenames[i] = fn

    return filenames[0] if len(identifiers) == 1 else filenames

Example #5

Show file

File: pfam.py Project: youbingchenyoubing/asa

def fetchPfamMSA(acc, alignment="full", compressed=False, **kwargs):
    """Return a path to the downloaded Pfam MSA file.

    :arg acc: Pfam ID or Accession Code
    :type acc: str

    :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``,
         ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``,
         or ``'rp75'`` where rp stands for representative proteomes

    :arg compressed: gzip the downloaded MSA file, default is **False**

    *Alignment Options*

    :arg format: a Pfam supported MSA file format, one of ``'selex'``,
        (default), ``'stockholm'`` or ``'fasta'``

    :arg order: ordering of sequences, ``'tree'`` (default) or
        ``'alphabetical'``

    :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'``

    :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``,
        ``'mixed'`` or **None** for unaligned

    *Other Options*

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60

    :arg outname: out filename, default is input ``'acc_alignment.format'``

    :arg folder: output folder, default is ``'.'``"""

    url = "http://pfam.sanger.ac.uk/family/acc?id=" + acc
    handle = openURL(url)
    orig_acc = acc
    acc = handle.readline().strip()
    if PY3K:
        acc = acc.decode()
    url_flag = False

    if not re.search("(?<=PF)[0-9]{5}$", acc):
        raise ValueError("{0} is not a valid Pfam ID or Accession Code".format(repr(orig_acc)))

    if alignment not in DOWNLOAD_FORMATS:
        raise ValueError("alignment must be one of full, seed, ncbi or" " metagenomics")
    if alignment == "ncbi" or alignment == "metagenomics":
        url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped"
        url_flag = True
        extension = ".sth"
    else:
        if not kwargs:
            url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped"
            url_flag = True
            extension = ".sth"
        else:
            align_format = kwargs.get("format", "selex").lower()

            if align_format not in FORMAT_OPTIONS["format"]:
                raise ValueError("alignment format must be of type selex" " stockholm or fasta. MSF not supported")

            if align_format == SELEX:
                align_format, extension = "pfam", ".slx"
            elif align_format == FASTA:
                extension = ".fasta"
            else:
                extension = ".sth"

            gaps = str(kwargs.get("gaps", "dashes")).lower()
            if gaps not in FORMAT_OPTIONS["gaps"]:
                raise ValueError("gaps must be of type mixed, dots, dashes, " "or None")

            inserts = kwargs.get("inserts", "upper").lower()
            if inserts not in FORMAT_OPTIONS["inserts"]:
                raise ValueError("inserts must be of type lower or upper")

            order = kwargs.get("order", "tree").lower()
            if order not in FORMAT_OPTIONS["order"]:
                raise ValueError("order must be of type tree or alphabetical")

            url = (
                "http://pfam.sanger.ac.uk/family/"
                + acc
                + "/alignment/"
                + alignment
                + "/format?format="
                + align_format
                + "&alnType="
                + alignment
                + "&order="
                + order[0]
                + "&case="
                + inserts[0]
                + "&gaps="
                + gaps
                + "&download=1"
            )

    response = openURL(url, timeout=int(kwargs.get("timeout", 60)))
    outname = kwargs.get("outname", None)
    if not outname:
        outname = orig_acc
    folder = str(kwargs.get("folder", "."))
    filepath = join(makePath(folder), outname + "_" + alignment + extension)
    if compressed:
        filepath = filepath + ".gz"
        if url_flag:
            f_out = open(filepath, "wb")
        else:
            f_out = openFile(filepath, "wb")
        f_out.write(response.read())
        f_out.close()
    else:
        if url_flag:
            gunzip(response.read(), filepath)
        else:
            with open(filepath, "wb") as f_out:
                f_out.write(response.read())

    filepath = relpath(filepath)
    LOGGER.info("Pfam MSA for {0} is written as {1}.".format(orig_acc, filepath))

    return filepath

Example #6

Show file

File: pfam.py Project: youbingchenyoubing/asa_map

def fetchPfamMSA(acc, alignment='full', compressed=False, **kwargs):
    """Return a path to the downloaded Pfam MSA file.

    :arg acc: Pfam ID or Accession Code
    :type acc: str

    :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``,
         ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``,
         or ``'rp75'`` where rp stands for representative proteomes

    :arg compressed: gzip the downloaded MSA file, default is **False**

    *Alignment Options*

    :arg format: a Pfam supported MSA file format, one of ``'selex'``,
        (default), ``'stockholm'`` or ``'fasta'``

    :arg order: ordering of sequences, ``'tree'`` (default) or
        ``'alphabetical'``

    :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'``

    :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``,
        ``'mixed'`` or **None** for unaligned

    *Other Options*

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60

    :arg outname: out filename, default is input ``'acc_alignment.format'``

    :arg folder: output folder, default is ``'.'``"""

    url = 'http://pfam.sanger.ac.uk/family/acc?id=' + acc
    handle = openURL(url)
    orig_acc = acc
    acc = handle.readline().strip()
    if PY3K:
        acc = acc.decode()
    url_flag = False

    if not re.search('(?<=PF)[0-9]{5}$', acc):
        raise ValueError('{0} is not a valid Pfam ID or Accession Code'
                         .format(repr(orig_acc)))

    if alignment not in DOWNLOAD_FORMATS:
        raise ValueError('alignment must be one of full, seed, ncbi or'
                         ' metagenomics')
    if alignment == 'ncbi' or alignment == 'metagenomics':
        url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
               alignment + '/gzipped')
        url_flag = True
        extension = '.sth'
    else:
        if not kwargs:
            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
                   alignment + '/gzipped')
            url_flag = True
            extension = '.sth'
        else:
            align_format = kwargs.get('format', 'selex').lower()

            if align_format not in FORMAT_OPTIONS['format']:
                raise ValueError('alignment format must be of type selex'
                                 ' stockholm or fasta. MSF not supported')

            if align_format == SELEX:
                align_format, extension = 'pfam', '.slx'
            elif align_format == FASTA:
                extension = '.fasta'
            else:
                extension = '.sth'

            gaps = str(kwargs.get('gaps', 'dashes')).lower()
            if gaps not in FORMAT_OPTIONS['gaps']:
                raise ValueError('gaps must be of type mixed, dots, dashes, '
                                 'or None')

            inserts = kwargs.get('inserts', 'upper').lower()
            if(inserts not in FORMAT_OPTIONS['inserts']):
                raise ValueError('inserts must be of type lower or upper')

            order = kwargs.get('order', 'tree').lower()
            if order not in FORMAT_OPTIONS['order']:
                raise ValueError('order must be of type tree or alphabetical')

            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/'
                   + alignment + '/format?format=' + align_format +
                   '&alnType=' + alignment + '&order=' + order[0] +
                   '&case=' + inserts[0] + '&gaps=' + gaps + '&download=1')

    response = openURL(url, timeout=int(kwargs.get('timeout', 60)))
    outname = kwargs.get('outname', None)
    if not outname:
        outname = orig_acc
    folder = str(kwargs.get('folder', '.'))
    filepath = join(makePath(folder), outname + '_' + alignment + extension)
    if compressed:
        filepath = filepath + '.gz'
        if url_flag:
            f_out = open(filepath, 'wb')
        else:
            f_out = openFile(filepath, 'wb')
        f_out.write(response.read())
        f_out.close()
    else:
        if url_flag:
            gunzip(response.read(), filepath)
        else:
            with open(filepath, 'wb') as f_out:
                f_out.write(response.read())

    filepath = relpath(filepath)
    LOGGER.info('Pfam MSA for {0} is written as {1}.'
                .format(orig_acc, filepath))

    return filepath

Example #7

Show file

File: localpdb.py Project: sixpi/ProDy

def fetchPDB(*pdb, **kwargs):
    """Returns path(s) to PDB file(s) for specified *pdb* identifier(s).  Files
    will be sought in user specified *folder* or current working director, and
    then in local PDB folder and mirror, if they are available.  If *copy*
    is set **True**, files will be copied into *folder*.  If *compressed* is
    **False**, all files will be decompressed.  See :func:`pathPDBFolder` and
    :func:`pathPDBMirror` for managing local resources, :func:`.fetchPDBviaFTP`
    and :func:`.fetchPDBviaFTP` for downloading files from PDB servers."""

    if len(pdb) == 1 and isinstance(pdb[0], list):
        pdb = pdb[0]

    if 'format' in kwargs and kwargs.get('format') != 'pdb':
        return fetchPDBviaFTP(*pdb, **kwargs)

    identifiers = checkIdentifiers(*pdb)

    folder = kwargs.get('folder', '.')
    compressed = kwargs.get('compressed')

    # check *folder* specified by the user, usually pwd ('.')
    filedict = findPDBFiles(folder, compressed=compressed)

    filenames = []
    not_found = []
    exists = 0
    for i, pdb in enumerate(identifiers):
        if pdb is None:
            filenames.append(None)
        elif pdb in filedict:
            filenames.append(filedict[pdb])
            exists += 1
        else:
            filenames.append(None)
            not_found.append((i, pdb))

    if not not_found:
        if len(filenames) == 1:
            filenames = filenames[0]
            if exists:
                LOGGER.debug('PDB file is found in working directory ({0}).'
                             .format(sympath(filenames)))
        return filenames

    if not isWritable(folder):
        raise IOError('permission to write in {0} is denied, please '
                      'specify another folder'.format(folder))

    if compressed is not None and not compressed:
        filedict = findPDBFiles(folder, compressed=True)
        not_found, decompress = [], not_found
        for i, pdb in decompress:
            if pdb in filedict:
                fn = filedict[pdb]
                filenames[i] = gunzip(fn, splitext(fn)[0])
            else:
                not_found.append((i, pdb))

    if not not_found:
        return filenames[0] if len(identifiers) == 1 else filenames

    local_folder = pathPDBFolder()
    copy = kwargs.setdefault('copy', False)
    if local_folder:
        local_folder, is_divided = local_folder
        temp, not_found = not_found, []
        for i, pdb in temp:
            if is_divided:
                fn = join(local_folder, pdb[1:3], 'pdb' + pdb + '.pdb.gz')
            else:
                fn = join(local_folder, pdb + '.pdb.gz')
            if isfile(fn):
                if copy or not compressed and compressed is not None:
                    if compressed:
                        fn = copyFile(fn, join(folder, pdb + 'pdb.gz'))
                    else:
                        fn = gunzip(fn, join(folder, pdb + '.pdb'))
                filenames[i] = normpath(fn)
            else:
                not_found.append((i, pdb))

    if not not_found:
        if len(identifiers) == 1:
            fn = filenames[0]
            if kwargs.get('report', True):
                items = fn.split(pathsep)
                if len(items) > 5:
                    fndisp = pathsep.join(items[:3] + ['...'] + items[-1:])
                else:
                    fndisp = relpath(fn)
                LOGGER.debug('PDB file is found in the local folder ({0}).'
                             .format(fndisp))
            return fn
        else:
            return filenames

    if kwargs['copy'] or (compressed is not None and not compressed):
        kwargs['folder'] = folder

    downloads = [pdb for i, pdb in not_found]
    fns = None

    try:
        fns = fetchPDBfromMirror(*downloads, **kwargs)
    except IOError:
        pass
    else:
        if len(downloads) == 1: fns = [fns]
        temp, not_found = not_found, []
        for i, fn in enumerate(fns):
            if fn is None:
                not_found.append(temp[i])
            else:
                i, _ = temp[i]
                filenames[i] = fn

    if not not_found:
        return filenames[0] if len(identifiers) == 1 else filenames

    if fns:
        downloads = [pdb for i, pdb in not_found]
    fns = None
    try:
        fns = fetchPDBviaFTP(*downloads, check=False, **kwargs)
    except Exception as err:
        LOGGER.warn('Downloading PDB files via FTP failed ({0}), '
                    'trying HTTP.'.format(str(err)))
        try:
            fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs)
        except Exception as err:
            LOGGER.warn('Downloading PDB files via HTTP also failed '
                        '({0}).'.format(str(err)))
    if len(downloads) == 1: fns = [fns]
    if fns:
        for i, fn in zip([i for i, pdb in not_found], fns):
            filenames[i] = fn

    return filenames[0] if len(identifiers) == 1 else filenames

Example #8

Show file

File: pfam.py Project: njekin/ProDy

def fetchPfamMSA(acc, alignment='full', compressed=False, **kwargs):
    """Return a path to the downloaded Pfam MSA file.

    :arg acc: Pfam ID or Accession Code
    :type acc: str

    :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``,
         ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``,
         or ``'rp75'`` where rp stands for representative proteomes

    :arg compressed: gzip the downloaded MSA file, default is **False**

    *Alignment Options*

    :arg format: a Pfam supported MSA file format, one of ``'selex'``,
        (default), ``'stockholm'`` or ``'fasta'``

    :arg order: ordering of sequences, ``'tree'`` (default) or
        ``'alphabetical'``

    :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'``

    :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``,
        ``'mixed'`` or **None** for unaligned

    *Other Options*

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60

    :arg outname: out filename, default is input ``'acc_alignment.format'``

    :arg folder: output folder, default is ``'.'``"""

    url = 'http://pfam.sanger.ac.uk/family/acc?id=' + acc
    handle = openURL(url)
    orig_acc = acc
    acc = handle.readline().strip()
    if PY3K:
        acc = acc.decode()
    url_flag = False

    if not re.search('(?<=PF)[0-9]{5}$', acc):
        raise ValueError('{0} is not a valid Pfam ID or Accession Code'
                         .format(repr(orig_acc)))

    if alignment not in DOWNLOAD_FORMATS:
        raise ValueError('alignment must be one of full, seed, ncbi or'
                         ' metagenomics')
    if alignment == 'ncbi' or alignment == 'metagenomics':
        url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
               alignment + '/gzipped')
        url_flag = True
        extension = '.sth'
    else:
        if not kwargs:
            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
                   alignment + '/gzipped')
            url_flag = True
            extension = '.sth'
        else:
            align_format = kwargs.get('format', 'selex').lower()

            if align_format not in FORMAT_OPTIONS['format']:
                raise ValueError('alignment format must be of type selex'
                                 ' stockholm or fasta. MSF not supported')

            if align_format == SELEX:
                align_format, extension = 'pfam', '.slx'
            elif align_format == FASTA:
                extension = '.fasta'
            else:
                extension = '.sth'

            gaps = str(kwargs.get('gaps', 'dashes')).lower()
            if gaps not in FORMAT_OPTIONS['gaps']:
                raise ValueError('gaps must be of type mixed, dots, dashes, '
                                 'or None')

            inserts = kwargs.get('inserts', 'upper').lower()
            if(inserts not in FORMAT_OPTIONS['inserts']):
                raise ValueError('inserts must be of type lower or upper')

            order = kwargs.get('order', 'tree').lower()
            if order not in FORMAT_OPTIONS['order']:
                raise ValueError('order must be of type tree or alphabetical')

            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/'
                   + alignment + '/format?format=' + align_format +
                   '&alnType=' + alignment + '&order=' + order[0] +
                   '&case=' + inserts[0] + '&gaps=' + gaps + '&download=1')

    response = openURL(url, timeout=int(kwargs.get('timeout', 60)))
    outname = kwargs.get('outname', None)
    if not outname:
        outname = orig_acc
    folder = str(kwargs.get('folder', '.'))
    filepath = join(makePath(folder), outname + '_' + alignment + extension)
    if compressed:
        filepath = filepath + '.gz'
        if url_flag:
            f_out = open(filepath, 'wb')
        else:
            f_out = openFile(filepath, 'wb')
        f_out.write(response.read())
        f_out.close()
    else:
        if url_flag:
            gunzip(response.read(), filepath)
        else:
            with open(filepath, 'wb') as f_out:
                f_out.write(response.read())

    filepath = relpath(filepath)
    LOGGER.info('Pfam MSA for {0} is written as {1}.'
                .format(orig_acc, filepath))

    return filepath

Example #9

Show file

def fetchPDBviaHTTP(*pdb, **kwargs):
    """Retrieve PDB file(s) for specified *pdb* identifier(s) and return
    path(s).  Downloaded files will be stored in local PDB folder, if one
    is set using :meth:`.pathPDBFolder`, and copied into *folder*, if
    specified by the user.  If no destination folder is specified, files
    will be saved in the current working directory.  If *compressed* is
    **False**, decompressed files will be copied into *folder*."""

    if kwargs.get('check', True):
        identifiers = checkIdentifiers(*pdb)
    else:
        identifiers = list(pdb)

    output_folder = kwargs.pop('folder', None)
    compressed = bool(kwargs.pop('compressed', True))

    extension = '.pdb'
    local_folder = pathPDBFolder()
    if local_folder:
        local_folder, is_divided = local_folder
        if is_divided:
            getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])),
                                       'pdb' + pdb + '.pdb.gz')
        else:
            getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz')
        if output_folder is None:
            second = lambda filename, pdb: filename
        else:
            if compressed:
                second = lambda filename, pdb: (copyFile(filename,
                            join(output_folder, pdb + extension + '.gz')))
            else:
                second = lambda filename, pdb: gunzip(filename,
                            join(output_folder, pdb + extension))

    else:
        if output_folder is None:
            output_folder = getcwd()
        if compressed:
            getPath = lambda pdb: join(output_folder, pdb + extension + '.gz')
            second = lambda filename, pdb: filename
        else:
            getPath = lambda pdb: join(output_folder, pdb + extension)
            second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb))


    getURL = WWPDB_HTTP_URL[wwPDBServer() or 'us']

    success = 0
    failure = 0
    filenames = []
    for pdb in identifiers:
        if pdb is None:
            filenames.append(None)
            continue
        try:
            handle = openURL(getURL(pdb))
        except Exception as err:
            LOGGER.warn('{0} download failed ({1}).'.format(pdb, str(err)))
            failure += 1
            filenames.append(None)
        else:
            data = handle.read()
            if len(data):
                filename = getPath(pdb)

                with open(filename, 'w+b') as pdbfile:
                    pdbfile.write(data)

                filename = normpath(relpath(second(filename, pdb)))
                LOGGER.debug('{0} downloaded ({1})'
                             .format(pdb, sympath(filename)))
                success += 1
                filenames.append(filename)
            else:
                LOGGER.warn('{0} download failed, reason unknown.'
                            .format(pdb))
                failure += 1
                filenames.append(None)
    LOGGER.debug('PDB download via HTTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
    if len(identifiers) == 1:
        return filenames[0]
    else:
        return filenames

Example #10

Show file

def fetchPDBviaFTP(*pdb, **kwargs):
    """Retrieve PDB (default), PDBML, mmCIF, or EMD file(s) for specified *pdb*
    identifier(s) and return path(s).  Downloaded files will be stored in
    local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied
    into *folder*, if specified by the user.  If no destination folder is
    specified, files will be saved in the current working directory.  If
    *compressed* is **False**, decompressed files will be copied into
    *folder*.  *format* keyword argument can be used to retrieve
    `PDBML <http://pdbml.pdb.org/>`_, `mmCIF <http://mmcif.pdb.org/>`_
    and `PDBML <ftp://ftp.wwpdb.org/pub/emdb/doc/Map-format/current/EMDB_map_format.pdf>`_ 
    files: ``format='cif'`` will fetch an mmCIF file, ``format='emd'`` will fetch an EMD file,
    and ``format='xml'`` will fetch a PDBML file. 
    If PDBML header file is desired, ``noatom=True`` argument will do the job."""

    if kwargs.get('check', True):
        identifiers = checkIdentifiers(*pdb)
    else:
        identifiers = list(pdb)

    output_folder = kwargs.pop('folder', None)
    compressed = bool(kwargs.pop('compressed', True))
    format = str(kwargs.pop('format', 'pdb')).lower()
    noatom = bool(kwargs.pop('noatom', False))

    if format == 'pdb':
        ftp_divided = 'pdb/data/structures/divided/pdb'
        ftp_pdbext = '.ent.gz'
        ftp_prefix = 'pdb'
        extension = '.pdb'
    elif format == 'xml':
        if noatom:
            ftp_divided = 'pdb/data/structures/divided/XML-noatom'
            ftp_pdbext = '-noatom.xml.gz'
            extension = '-noatom.xml'
        else:
            ftp_divided = 'pdb/data/structures/divided/XML'
            ftp_pdbext = '.xml.gz'
            extension = '.xml'
        ftp_prefix = ''
    elif format == 'cif':
        ftp_divided = 'pdb/data/structures/divided/mmCIF'
        ftp_pdbext = '.cif.gz'
        ftp_prefix = ''
        extension = '.cif'
    elif format == 'emd' or format == 'map':
        ftp_divided = 'emdb/structures'
        ftp_pdbext = '.map.gz'
        ftp_prefix = 'emd_'
        extension = '.map'
    else:
        raise ValueError(repr(format) + ' is not valid format')

    local_folder = pathPDBFolder()

    if format == 'pdb' and local_folder:
        local_folder, is_divided = local_folder
        if is_divided:
            getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])),
                                       'pdb' + pdb + '.pdb.gz')
        else:
            getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz')
        if output_folder is None:
            second = lambda filename, pdb: filename
        else:
            if compressed:
                second = lambda filename, pdb: (copyFile(filename,
                            join(output_folder, pdb + extension + '.gz')))
            else:
                second = lambda filename, pdb: gunzip(filename,
                            join(output_folder, pdb + extension))

    else:
        if output_folder is None:
            output_folder = getcwd()
        if compressed:
            getPath = lambda pdb: join(output_folder, pdb + extension + '.gz')
            second = lambda filename, pdb: filename
        else:
            getPath = lambda pdb: join(output_folder, pdb + extension)
            second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb))


    ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us']
    LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name))

    from ftplib import FTP
    try:
        ftp = FTP(ftp_host)
    except Exception as error:
        raise type(error)('FTP connection problem, potential reason: '
                          'no internet connectivity')
    else:
        success = 0
        failure = 0
        filenames = []
        ftp.login('')
        for pdb in identifiers:
            if pdb is None:
                filenames.append(None)
                continue
            data = []
            ftp_fn = ftp_prefix + pdb + ftp_pdbext
            try:
                ftp.cwd(ftp_path)
                ftp.cwd(ftp_divided)
                if format == 'emd':
                    ftp.cwd('EMD-{0}/map'.format(pdb))
                else:
                    ftp.cwd(pdb[1:3])
                ftp.retrbinary('RETR ' + ftp_fn, data.append)
            except Exception as error:
                if ftp_fn in ftp.nlst():
                    LOGGER.warn('{0} download failed ({1}). It is '
                                'possible that you do not have rights to '
                                'download .gz files in the current network.'
                                .format(pdb, str(error)))
                else:
                    LOGGER.info('{0} download failed. {1} does not exist '
                                'on {2}.'.format(ftp_fn, pdb, ftp_host))
                failure += 1
                filenames.append(None)
            else:
                if len(data):
                    filename = getPath(pdb)

                    with open(filename, 'w+b') as pdbfile:
                        write = pdbfile.write
                        [write(block) for block in data]

                    filename = normpath(relpath(second(filename, pdb)))
                    LOGGER.debug('{0} downloaded ({1})'
                                 .format(pdb, sympath(filename)))
                    success += 1
                    filenames.append(filename)
                else:
                    LOGGER.warn('{0} download failed, reason unknown.'
                                .format(pdb))
                    failure += 1
                    filenames.append(None)

        ftp.quit()

    LOGGER.debug('PDB download via FTP completed ({0} downloaded, '
                 '{1} failed).'.format(success, failure))
    if len(identifiers) == 1:
        return filenames[0]
    else:
        return filenames