def getFilename(self, absolute=False): """Return relative path to the current file. For absolute path, pass ``absolute=True`` argument.""" if absolute: return abspath(self._filename) return relpath(self._filename)
def getFilename(self, absolute=False): """Returns relative path to the current file. For absolute path, pass ``absolute=True`` argument.""" if absolute: return abspath(self._filename) return relpath(self._filename)
def fetchPDB(pdb, folder='.', compressed=True, copy=False, **kwargs): """Retrieve PDB, PDBML, or mmCIF file(s) for specified *pdb* identifier(s). *pdb* may be a string or a list. The function will return a filename or a list of filenames depending on input (see :ref:`fetchpdb` for examples). If *compressed* is ``False``, all files will be decompressed. If *copy* is ``True``, all files from local PDB mirror will copied to the user specified *folder*. *format* keyword argument can be used to retrieve `PDBML <http://pdbml.pdb.org/>`_ and `mmCIF <http://mmcif.pdb.org/>`_ files: ``format="cif"`` will fetch an mmCIF file (e.g. :file:`1XXX.cif.gz`), similarly ``format="xml"`` will fetch a PDBML file. If PDBML header file is desired, ``format="xml", noatom=True`` will do the job (e.g. :file:`1XXX-noatom.xml.gz`) The order of file search operations are as follows: First, files are sought in *folder*. Second, local PDB mirror will be sought, if one is set by the user (see :func:`setPDBMirrorPath`). Then, local PDB folder will be sought, if one is set by the user (see :func:`setPDBLocalFolder`). Finally, if files are not found locally, they will be downloaded one of wwPDB FTP servers (use :func:`setWWPDBFTPServer` to specify one close to you).""" if isinstance(pdb, str): identifiers = [pdb] elif isinstance(pdb, list): identifiers = pdb else: raise TypeError('pdb may be a string or a list of strings') assert isinstance(folder, str), 'folder must be a string' assert isinstance(compressed, bool), 'compressed must be a boolean' assert isinstance(copy, bool), 'copy must be a boolean' format = kwargs.pop('format', 'pdb') assert isinstance(format, str), 'format must be a string' format = format.lower() assert format in _PDB_FORMATS, '{0:s} is not valid format'.format( repr(format)) noatom = kwargs.pop('noatom', False) assert isinstance(noatom, bool), 'noatom must be a boolean' if kwargs: raise TypeError('{0:s} is not a valid keyword argument for this' 'function'.format(repr(kwargs.iterkeys().next()))) if folder != '.': folder = makePath(folder) if not os.access(folder, os.W_OK): raise IOError('permission to write in {0:s} is denied, please ' 'specify another folder'.format(folder)) filenames = [] exists = 0 success = 0 failure = 0 download = False if format == 'pdb': divided = 'data/structures/divided/pdb' pdbext = '.ent.gz' extensions = ['.ent', '.pdb'] # '.pdb' should be the last item prefix = 'pdb' elif format == 'xml': if noatom: divided = 'data/structures/divided/XML-noatom' pdbext = '-noatom.xml.gz' extensions = ['-noatom.xml'] else: divided = 'data/structures/divided/XML' pdbext = '.xml.gz' extensions = ['.xml'] prefix = '' else: divided = 'data/structures/divided/mmCIF' pdbext = '.cif.gz' extensions = ['.cif'] # '.pdb' should be the last item prefix = '' pdbfnmap = {} for extension in extensions: for pdbfn in glob(os.path.join(folder, '*' + extension + '*')): if os.path.splitext(pdbfn)[1] in _PDB_EXTENSIONS: pdbfnmap[os.path.split(pdbfn)[1].split('.')[0].lower()] = pdbfn for pdbfn in glob(os.path.join(folder, '*' + extension.upper() + '*')): if os.path.splitext(pdbfn)[1] in _PDB_EXTENSIONS: pdbfnmap[os.path.split(pdbfn)[1].split('.')[0].lower()] = pdbfn for i, pdbid in enumerate(identifiers): # Check validity of identifiers if not isinstance(pdbid, str): LOGGER.debug('{0:s} is not a valid identifier.'.format(pdbid)) filenames.append(None) failure += 1 continue pdbid = pdbid.strip().lower() if not (len(pdbid) == 4 and pdbid.isalnum()): LOGGER.debug('{0:s} is not a valid identifier.'.format(pdbid)) filenames.append(None) failure += 1 continue # Check if file exists in working directory identifiers[i] = pdbid if noatom: fn = pdbfnmap.get(pdbid + '-noatom', None) else: fn = pdbfnmap.get(pdbid, None) or pdbfnmap.get('pdb'+pdbid, None) if fn: fn = relpath(fn) if not compressed: temp, ext = os.path.splitext(fn) if ext == '.gz': fn = gunzip(fn, temp) filenames.append(fn) LOGGER.debug('{0:s} ({1:s}) is found in the working directory.' .format(pdbid, fn)) exists += 1 continue # Check the PDB mirror mirror_path = getPDBMirrorPath() if mirror_path is not None and os.path.isdir(mirror_path): fn = os.path.join(mirror_path, divided, pdbid[1:3], prefix + pdbid + pdbext) if os.path.isfile(fn): if copy or not compressed: if compressed: filename = os.path.join(folder, pdbid + extension + '.gz') shutil.copy(fn, filename) else: filename = os.path.join(folder, pdbid + extension) gunzip(fn, filename) filenames.append(filename) LOGGER.debug('{0:s} copied from local mirror ({1:s})' .format(pdbid, filename)) success += 1 else: filenames.append(fn) LOGGER.debug('{0:s} ({1:s}...{2:s}) is found in the local ' 'mirror.'.format(pdbid, fn[:fn[1:].index(os.path.sep)+2], fn[-15:])) exists += 1 continue # Check the PDB mirror local_folder = getPDBLocalFolder() if format and local_folder: local_folder, is_divided = local_folder if is_divided: fn = os.path.join(local_folder, pdbid[1:3], 'pdb' + pdbid + '.pdb.gz') else: fn = os.path.join(local_folder, pdbid + '.pdb.gz') if os.path.isfile(fn): if copy or not compressed: if compressed: filename = os.path.join(folder, pdbid + extension + '.gz') shutil.copy(fn, filename) else: filename = os.path.join(folder, pdbid + extension) gunzip(fn, filename) filenames.append(filename) LOGGER.debug('{0:s} copied from local PDB folder ({1:s})' .format(pdbid, filename)) success += 1 else: filenames.append(fn) LOGGER.debug('{0:s} ({1:s}...{2:s}) is found in the PDB ' 'local folder.'.format(pdbid, fn[:fn[1:].index(os.path.sep)+2], fn[-15:])) exists += 1 continue filenames.append(pdbid) download = True if download: from ftplib import FTP ftp_name, ftp_host, ftp_path = getWWPDBFTPServer() LOGGER.debug('Connecting wwPDB FTP server {0:s}.'.format(ftp_name)) if format == 'pdb' and not copy and local_folder: folder = local_folder compressed = True if is_divided: getfn = lambda folder, pdbid, ext: \ os.path.join(makePath(os.path.join(local_folder, pdbid[1:3])), 'pdb' + pdbid + ext) else: getfn = lambda folder, pdbid, ext: os.path.join(folder, pdbid + ext) else: getfn = lambda folder, pdbid, ext: os.path.join(folder, pdbid + ext) try: ftp = FTP(ftp_host) except Exception as error: raise type(error)('FTP connection problem, potential reason: ' 'no internet connectivity') else: #ftp_path = os.path.join(ftp_path, divided) ftp.login('') for i, pdbid in enumerate(identifiers): if pdbid != filenames[i]: continue filename = getfn(folder, pdbid, extension) if compressed: filename += '.gz' pdbfile = open(filename, 'w+b') fn = prefix + pdbid + pdbext try: ftp.cwd(ftp_path) ftp.cwd(divided) ftp.cwd(pdbid[1:3]) ftp.retrbinary('RETR ' + fn, pdbfile.write) except Exception as error: pdbfile.close() os.remove(filename) if fn in ftp.nlst(): LOGGER.debug('{0:s} download failed ({1:s}). It ' 'is possible that you don\'t have ' 'rights to download .gz files in the ' 'current network.'.format(pdbid, str(error))) else: LOGGER.debug('{0:s} download failed. {1:s} does not ' 'exist on {2:s}.' .format(fn, pdbid, ftp_host)) failure += 1 filenames[i] = None else: pdbfile.close() if not compressed: gunzip(filename) filename = relpath(filename) LOGGER.debug('{0:s} downloaded ({1:s})' .format(pdbid, filename)) success += 1 filenames[i] = filename ftp.quit() if len(identifiers) == 1: return filenames[0] else: LOGGER.info('PDB download completed ({2:d} found, ' '{0:d} downloaded, {1:d} failed).' .format(success, failure, exists)) return filenames
def fetchPDB(*pdb, **kwargs): """Returns path(s) to PDB file(s) for specified *pdb* identifier(s). Files will be sought in user specified *folder* or current working director, and then in local PDB folder and mirror, if they are available. If *copy* is set **True**, files will be copied into *folder*. If *compressed* is **False**, all files will be decompressed. See :func:`pathPDBFolder` and :func:`pathPDBMirror` for managing local resources, :func:`.fetchPDBviaFTP` and :func:`.fetchPDBviaFTP` for downloading files from PDB servers.""" if len(pdb) == 1 and isinstance(pdb[0], list): pdb = pdb[0] if 'format' in kwargs and kwargs.get('format') != 'pdb': return fetchPDBviaFTP(*pdb, **kwargs) identifiers = checkIdentifiers(*pdb) folder = kwargs.get('folder', '.') compressed = kwargs.get('compressed') # check *folder* specified by the user, usually pwd ('.') filedict = findPDBFiles(folder, compressed=compressed) filenames = [] not_found = [] exists = 0 for i, pdb in enumerate(identifiers): if pdb is None: filenames.append(None) elif pdb in filedict: filenames.append(filedict[pdb]) exists += 1 else: filenames.append(None) not_found.append((i, pdb)) if not not_found: if len(filenames) == 1: filenames = filenames[0] if exists: LOGGER.debug( 'PDB file is found in working directory ({0}).'.format( sympath(filenames))) return filenames if not isWritable(folder): raise IOError('permission to write in {0} is denied, please ' 'specify another folder'.format(folder)) if compressed is not None and not compressed: filedict = findPDBFiles(folder, compressed=True) not_found, decompress = [], not_found for i, pdb in decompress: if pdb in filedict: fn = filedict[pdb] filenames[i] = gunzip(fn, splitext(fn)[0]) else: not_found.append((i, pdb)) if not not_found: return filenames[0] if len(identifiers) == 1 else filenames local_folder = pathPDBFolder() copy = kwargs.setdefault('copy', False) if local_folder: local_folder, is_divided = local_folder temp, not_found = not_found, [] for i, pdb in temp: if is_divided: fn = join(local_folder, pdb[1:3], 'pdb' + pdb + '.pdb.gz') else: fn = join(local_folder, pdb + '.pdb.gz') if isfile(fn): if copy or not compressed and compressed is not None: if compressed: fn = copyFile(fn, join(folder, pdb + 'pdb.gz')) else: fn = gunzip(fn, join(folder, pdb + '.pdb')) filenames[i] = normpath(fn) else: not_found.append((i, pdb)) if not not_found: if len(identifiers) == 1: fn = filenames[0] items = fn.split(pathsep) if len(items) > 5: fndisp = pathsep.join(items[:3] + ['...'] + items[-1:]) else: fndisp = relpath(fn) LOGGER.debug( 'PDB file is found in the local folder ({0}).'.format(fndisp)) return fn else: return filenames if kwargs['copy'] or (compressed is not None and not compressed): kwargs['folder'] = folder downloads = [pdb for i, pdb in not_found] fns = None try: fns = fetchPDBfromMirror(*downloads, **kwargs) except IOError: pass else: if len(downloads) == 1: fns = [fns] temp, not_found = not_found, [] for i, fn in enumerate(fns): if fn is None: not_found.append(temp[i]) else: i, _ = temp[i] filenames[i] = fn if not not_found: return filenames[0] if len(identifiers) == 1 else filenames if fns: downloads = [pdb for i, pdb in not_found] fns = None tp = kwargs.pop('tp', None) if tp is not None: tp = tp.lower() if tp == 'http': try: fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs) except Exception as err: LOGGER.warn('Downloading PDB files via HTTP failed ' '({0}).'.format(str(err))) elif tp == 'ftp': try: fns = fetchPDBviaFTP(*downloads, check=False, **kwargs) except Exception as err: LOGGER.warn('Downloading PDB files via FTP failed ' '({0}).'.format(str(err))) else: tryHTTP = False try: fns = fetchPDBviaFTP(*downloads, check=False, **kwargs) except Exception as err: tryHTTP = True if fns is None or isinstance(fns, list) and None in fns: tryHTTP = True elif isinstance(fns, list): downloads = [ not_found[i][1] for i in range(len(fns)) if fns[i] is None ] if len(downloads) > 0: tryHTTP = True if tryHTTP: LOGGER.info('Downloading PDB files via FTP failed, ' 'trying HTTP.') try: fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs) except Exception as err: LOGGER.warn('Downloading PDB files via HTTP also failed ' '({0}).'.format(str(err))) if len(downloads) == 1: fns = [fns] if fns: for i, fn in zip([i for i, pdb in not_found], fns): filenames[i] = fn return filenames[0] if len(identifiers) == 1 else filenames
def fetchPfamMSA(acc, alignment="full", compressed=False, **kwargs): """Return a path to the downloaded Pfam MSA file. :arg acc: Pfam ID or Accession Code :type acc: str :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``, ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``, or ``'rp75'`` where rp stands for representative proteomes :arg compressed: gzip the downloaded MSA file, default is **False** *Alignment Options* :arg format: a Pfam supported MSA file format, one of ``'selex'``, (default), ``'stockholm'`` or ``'fasta'`` :arg order: ordering of sequences, ``'tree'`` (default) or ``'alphabetical'`` :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'`` :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``, ``'mixed'`` or **None** for unaligned *Other Options* :arg timeout: timeout for blocking connection attempt in seconds, default is 60 :arg outname: out filename, default is input ``'acc_alignment.format'`` :arg folder: output folder, default is ``'.'``""" url = "http://pfam.sanger.ac.uk/family/acc?id=" + acc handle = openURL(url) orig_acc = acc acc = handle.readline().strip() if PY3K: acc = acc.decode() url_flag = False if not re.search("(?<=PF)[0-9]{5}$", acc): raise ValueError("{0} is not a valid Pfam ID or Accession Code".format(repr(orig_acc))) if alignment not in DOWNLOAD_FORMATS: raise ValueError("alignment must be one of full, seed, ncbi or" " metagenomics") if alignment == "ncbi" or alignment == "metagenomics": url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped" url_flag = True extension = ".sth" else: if not kwargs: url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped" url_flag = True extension = ".sth" else: align_format = kwargs.get("format", "selex").lower() if align_format not in FORMAT_OPTIONS["format"]: raise ValueError("alignment format must be of type selex" " stockholm or fasta. MSF not supported") if align_format == SELEX: align_format, extension = "pfam", ".slx" elif align_format == FASTA: extension = ".fasta" else: extension = ".sth" gaps = str(kwargs.get("gaps", "dashes")).lower() if gaps not in FORMAT_OPTIONS["gaps"]: raise ValueError("gaps must be of type mixed, dots, dashes, " "or None") inserts = kwargs.get("inserts", "upper").lower() if inserts not in FORMAT_OPTIONS["inserts"]: raise ValueError("inserts must be of type lower or upper") order = kwargs.get("order", "tree").lower() if order not in FORMAT_OPTIONS["order"]: raise ValueError("order must be of type tree or alphabetical") url = ( "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/format?format=" + align_format + "&alnType=" + alignment + "&order=" + order[0] + "&case=" + inserts[0] + "&gaps=" + gaps + "&download=1" ) response = openURL(url, timeout=int(kwargs.get("timeout", 60))) outname = kwargs.get("outname", None) if not outname: outname = orig_acc folder = str(kwargs.get("folder", ".")) filepath = join(makePath(folder), outname + "_" + alignment + extension) if compressed: filepath = filepath + ".gz" if url_flag: f_out = open(filepath, "wb") else: f_out = openFile(filepath, "wb") f_out.write(response.read()) f_out.close() else: if url_flag: gunzip(response.read(), filepath) else: with open(filepath, "wb") as f_out: f_out.write(response.read()) filepath = relpath(filepath) LOGGER.info("Pfam MSA for {0} is written as {1}.".format(orig_acc, filepath)) return filepath
def fetchPfamMSA(acc, alignment='full', compressed=False, **kwargs): """Return a path to the downloaded Pfam MSA file. :arg acc: Pfam ID or Accession Code :type acc: str :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``, ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``, or ``'rp75'`` where rp stands for representative proteomes :arg compressed: gzip the downloaded MSA file, default is **False** *Alignment Options* :arg format: a Pfam supported MSA file format, one of ``'selex'``, (default), ``'stockholm'`` or ``'fasta'`` :arg order: ordering of sequences, ``'tree'`` (default) or ``'alphabetical'`` :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'`` :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``, ``'mixed'`` or **None** for unaligned *Other Options* :arg timeout: timeout for blocking connection attempt in seconds, default is 60 :arg outname: out filename, default is input ``'acc_alignment.format'`` :arg folder: output folder, default is ``'.'``""" url = 'http://pfam.sanger.ac.uk/family/acc?id=' + acc handle = openURL(url) orig_acc = acc acc = handle.readline().strip() if PY3K: acc = acc.decode() url_flag = False if not re.search('(?<=PF)[0-9]{5}$', acc): raise ValueError('{0} is not a valid Pfam ID or Accession Code' .format(repr(orig_acc))) if alignment not in DOWNLOAD_FORMATS: raise ValueError('alignment must be one of full, seed, ncbi or' ' metagenomics') if alignment == 'ncbi' or alignment == 'metagenomics': url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' + alignment + '/gzipped') url_flag = True extension = '.sth' else: if not kwargs: url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' + alignment + '/gzipped') url_flag = True extension = '.sth' else: align_format = kwargs.get('format', 'selex').lower() if align_format not in FORMAT_OPTIONS['format']: raise ValueError('alignment format must be of type selex' ' stockholm or fasta. MSF not supported') if align_format == SELEX: align_format, extension = 'pfam', '.slx' elif align_format == FASTA: extension = '.fasta' else: extension = '.sth' gaps = str(kwargs.get('gaps', 'dashes')).lower() if gaps not in FORMAT_OPTIONS['gaps']: raise ValueError('gaps must be of type mixed, dots, dashes, ' 'or None') inserts = kwargs.get('inserts', 'upper').lower() if(inserts not in FORMAT_OPTIONS['inserts']): raise ValueError('inserts must be of type lower or upper') order = kwargs.get('order', 'tree').lower() if order not in FORMAT_OPTIONS['order']: raise ValueError('order must be of type tree or alphabetical') url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' + alignment + '/format?format=' + align_format + '&alnType=' + alignment + '&order=' + order[0] + '&case=' + inserts[0] + '&gaps=' + gaps + '&download=1') response = openURL(url, timeout=int(kwargs.get('timeout', 60))) outname = kwargs.get('outname', None) if not outname: outname = orig_acc folder = str(kwargs.get('folder', '.')) filepath = join(makePath(folder), outname + '_' + alignment + extension) if compressed: filepath = filepath + '.gz' if url_flag: f_out = open(filepath, 'wb') else: f_out = openFile(filepath, 'wb') f_out.write(response.read()) f_out.close() else: if url_flag: gunzip(response.read(), filepath) else: with open(filepath, 'wb') as f_out: f_out.write(response.read()) filepath = relpath(filepath) LOGGER.info('Pfam MSA for {0} is written as {1}.' .format(orig_acc, filepath)) return filepath
def fetchPDB(*pdb, **kwargs): """Returns path(s) to PDB file(s) for specified *pdb* identifier(s). Files will be sought in user specified *folder* or current working director, and then in local PDB folder and mirror, if they are available. If *copy* is set **True**, files will be copied into *folder*. If *compressed* is **False**, all files will be decompressed. See :func:`pathPDBFolder` and :func:`pathPDBMirror` for managing local resources, :func:`.fetchPDBviaFTP` and :func:`.fetchPDBviaFTP` for downloading files from PDB servers.""" if len(pdb) == 1 and isinstance(pdb[0], list): pdb = pdb[0] if 'format' in kwargs and kwargs.get('format') != 'pdb': return fetchPDBviaFTP(*pdb, **kwargs) identifiers = checkIdentifiers(*pdb) folder = kwargs.get('folder', '.') compressed = kwargs.get('compressed') # check *folder* specified by the user, usually pwd ('.') filedict = findPDBFiles(folder, compressed=compressed) filenames = [] not_found = [] exists = 0 for i, pdb in enumerate(identifiers): if pdb is None: filenames.append(None) elif pdb in filedict: filenames.append(filedict[pdb]) exists += 1 else: filenames.append(None) not_found.append((i, pdb)) if not not_found: if len(filenames) == 1: filenames = filenames[0] if exists: LOGGER.debug('PDB file is found in working directory ({0}).' .format(sympath(filenames))) return filenames if not isWritable(folder): raise IOError('permission to write in {0} is denied, please ' 'specify another folder'.format(folder)) if compressed is not None and not compressed: filedict = findPDBFiles(folder, compressed=True) not_found, decompress = [], not_found for i, pdb in decompress: if pdb in filedict: fn = filedict[pdb] filenames[i] = gunzip(fn, splitext(fn)[0]) else: not_found.append((i, pdb)) if not not_found: return filenames[0] if len(identifiers) == 1 else filenames local_folder = pathPDBFolder() copy = kwargs.setdefault('copy', False) if local_folder: local_folder, is_divided = local_folder temp, not_found = not_found, [] for i, pdb in temp: if is_divided: fn = join(local_folder, pdb[1:3], 'pdb' + pdb + '.pdb.gz') else: fn = join(local_folder, pdb + '.pdb.gz') if isfile(fn): if copy or not compressed and compressed is not None: if compressed: fn = copyFile(fn, join(folder, pdb + 'pdb.gz')) else: fn = gunzip(fn, join(folder, pdb + '.pdb')) filenames[i] = normpath(fn) else: not_found.append((i, pdb)) if not not_found: if len(identifiers) == 1: fn = filenames[0] if kwargs.get('report', True): items = fn.split(pathsep) if len(items) > 5: fndisp = pathsep.join(items[:3] + ['...'] + items[-1:]) else: fndisp = relpath(fn) LOGGER.debug('PDB file is found in the local folder ({0}).' .format(fndisp)) return fn else: return filenames if kwargs['copy'] or (compressed is not None and not compressed): kwargs['folder'] = folder downloads = [pdb for i, pdb in not_found] fns = None try: fns = fetchPDBfromMirror(*downloads, **kwargs) except IOError: pass else: if len(downloads) == 1: fns = [fns] temp, not_found = not_found, [] for i, fn in enumerate(fns): if fn is None: not_found.append(temp[i]) else: i, _ = temp[i] filenames[i] = fn if not not_found: return filenames[0] if len(identifiers) == 1 else filenames if fns: downloads = [pdb for i, pdb in not_found] fns = None try: fns = fetchPDBviaFTP(*downloads, check=False, **kwargs) except Exception as err: LOGGER.warn('Downloading PDB files via FTP failed ({0}), ' 'trying HTTP.'.format(str(err))) try: fns = fetchPDBviaHTTP(*downloads, check=False, **kwargs) except Exception as err: LOGGER.warn('Downloading PDB files via HTTP also failed ' '({0}).'.format(str(err))) if len(downloads) == 1: fns = [fns] if fns: for i, fn in zip([i for i, pdb in not_found], fns): filenames[i] = fn return filenames[0] if len(identifiers) == 1 else filenames
def fetchPDBviaHTTP(*pdb, **kwargs): """Retrieve PDB file(s) for specified *pdb* identifier(s) and return path(s). Downloaded files will be stored in local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied into *folder*, if specified by the user. If no destination folder is specified, files will be saved in the current working directory. If *compressed* is **False**, decompressed files will be copied into *folder*.""" if kwargs.get('check', True): identifiers = checkIdentifiers(*pdb) else: identifiers = list(pdb) output_folder = kwargs.pop('folder', None) compressed = bool(kwargs.pop('compressed', True)) extension = '.pdb' local_folder = pathPDBFolder() if local_folder: local_folder, is_divided = local_folder if is_divided: getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])), 'pdb' + pdb + '.pdb.gz') else: getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz') if output_folder is None: second = lambda filename, pdb: filename else: if compressed: second = lambda filename, pdb: (copyFile(filename, join(output_folder, pdb + extension + '.gz'))) else: second = lambda filename, pdb: gunzip(filename, join(output_folder, pdb + extension)) else: if output_folder is None: output_folder = getcwd() if compressed: getPath = lambda pdb: join(output_folder, pdb + extension + '.gz') second = lambda filename, pdb: filename else: getPath = lambda pdb: join(output_folder, pdb + extension) second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb)) getURL = WWPDB_HTTP_URL[wwPDBServer() or 'us'] success = 0 failure = 0 filenames = [] for pdb in identifiers: if pdb is None: filenames.append(None) continue try: handle = openURL(getURL(pdb)) except Exception as err: LOGGER.warn('{0} download failed ({1}).'.format(pdb, str(err))) failure += 1 filenames.append(None) else: data = handle.read() if len(data): filename = getPath(pdb) with open(filename, 'w+b') as pdbfile: pdbfile.write(data) filename = normpath(relpath(second(filename, pdb))) LOGGER.debug('{0} downloaded ({1})' .format(pdb, sympath(filename))) success += 1 filenames.append(filename) else: LOGGER.warn('{0} download failed, reason unknown.' .format(pdb)) failure += 1 filenames.append(None) LOGGER.debug('PDB download via HTTP completed ({0} downloaded, ' '{1} failed).'.format(success, failure)) if len(identifiers) == 1: return filenames[0] else: return filenames
def fetchPDBviaFTP(*pdb, **kwargs): """Retrieve PDB (default), PDBML, mmCIF, or EMD file(s) for specified *pdb* identifier(s) and return path(s). Downloaded files will be stored in local PDB folder, if one is set using :meth:`.pathPDBFolder`, and copied into *folder*, if specified by the user. If no destination folder is specified, files will be saved in the current working directory. If *compressed* is **False**, decompressed files will be copied into *folder*. *format* keyword argument can be used to retrieve `PDBML <http://pdbml.pdb.org/>`_, `mmCIF <http://mmcif.pdb.org/>`_ and `PDBML <ftp://ftp.wwpdb.org/pub/emdb/doc/Map-format/current/EMDB_map_format.pdf>`_ files: ``format='cif'`` will fetch an mmCIF file, ``format='emd'`` will fetch an EMD file, and ``format='xml'`` will fetch a PDBML file. If PDBML header file is desired, ``noatom=True`` argument will do the job.""" if kwargs.get('check', True): identifiers = checkIdentifiers(*pdb) else: identifiers = list(pdb) output_folder = kwargs.pop('folder', None) compressed = bool(kwargs.pop('compressed', True)) format = str(kwargs.pop('format', 'pdb')).lower() noatom = bool(kwargs.pop('noatom', False)) if format == 'pdb': ftp_divided = 'pdb/data/structures/divided/pdb' ftp_pdbext = '.ent.gz' ftp_prefix = 'pdb' extension = '.pdb' elif format == 'xml': if noatom: ftp_divided = 'pdb/data/structures/divided/XML-noatom' ftp_pdbext = '-noatom.xml.gz' extension = '-noatom.xml' else: ftp_divided = 'pdb/data/structures/divided/XML' ftp_pdbext = '.xml.gz' extension = '.xml' ftp_prefix = '' elif format == 'cif': ftp_divided = 'pdb/data/structures/divided/mmCIF' ftp_pdbext = '.cif.gz' ftp_prefix = '' extension = '.cif' elif format == 'emd' or format == 'map': ftp_divided = 'emdb/structures' ftp_pdbext = '.map.gz' ftp_prefix = 'emd_' extension = '.map' else: raise ValueError(repr(format) + ' is not valid format') local_folder = pathPDBFolder() if format == 'pdb' and local_folder: local_folder, is_divided = local_folder if is_divided: getPath = lambda pdb: join(makePath(join(local_folder, pdb[1:3])), 'pdb' + pdb + '.pdb.gz') else: getPath = lambda pdb: join(local_folder, pdb + '.pdb.gz') if output_folder is None: second = lambda filename, pdb: filename else: if compressed: second = lambda filename, pdb: (copyFile(filename, join(output_folder, pdb + extension + '.gz'))) else: second = lambda filename, pdb: gunzip(filename, join(output_folder, pdb + extension)) else: if output_folder is None: output_folder = getcwd() if compressed: getPath = lambda pdb: join(output_folder, pdb + extension + '.gz') second = lambda filename, pdb: filename else: getPath = lambda pdb: join(output_folder, pdb + extension) second = lambda filename, pdb: gunzip(getPath(pdb), getPath(pdb)) ftp_name, ftp_host, ftp_path = WWPDB_FTP_SERVERS[wwPDBServer() or 'us'] LOGGER.debug('Connecting wwPDB FTP server {0}.'.format(ftp_name)) from ftplib import FTP try: ftp = FTP(ftp_host) except Exception as error: raise type(error)('FTP connection problem, potential reason: ' 'no internet connectivity') else: success = 0 failure = 0 filenames = [] ftp.login('') for pdb in identifiers: if pdb is None: filenames.append(None) continue data = [] ftp_fn = ftp_prefix + pdb + ftp_pdbext try: ftp.cwd(ftp_path) ftp.cwd(ftp_divided) if format == 'emd': ftp.cwd('EMD-{0}/map'.format(pdb)) else: ftp.cwd(pdb[1:3]) ftp.retrbinary('RETR ' + ftp_fn, data.append) except Exception as error: if ftp_fn in ftp.nlst(): LOGGER.warn('{0} download failed ({1}). It is ' 'possible that you do not have rights to ' 'download .gz files in the current network.' .format(pdb, str(error))) else: LOGGER.info('{0} download failed. {1} does not exist ' 'on {2}.'.format(ftp_fn, pdb, ftp_host)) failure += 1 filenames.append(None) else: if len(data): filename = getPath(pdb) with open(filename, 'w+b') as pdbfile: write = pdbfile.write [write(block) for block in data] filename = normpath(relpath(second(filename, pdb))) LOGGER.debug('{0} downloaded ({1})' .format(pdb, sympath(filename))) success += 1 filenames.append(filename) else: LOGGER.warn('{0} download failed, reason unknown.' .format(pdb)) failure += 1 filenames.append(None) ftp.quit() LOGGER.debug('PDB download via FTP completed ({0} downloaded, ' '{1} failed).'.format(success, failure)) if len(identifiers) == 1: return filenames[0] else: return filenames