Example #1
0
 def get_seqres_file(self, savefile='pdb_seqres.txt'):
     """Retrieves a (big) file containing all the sequences of PDB entries
     and writes it to a file.
     """
     print("Retrieving sequence file (takes over 110 MB).")
     url = self.pdb_server + '/pub/pdb/derived_data/pdb_seqres.txt'
     _urlretrieve(url, savefile)
Example #2
0
    def retrieve_pdb_file(self, pdb_code, obsolete=False, pdir=None):
        """ Retrieves a PDB structure file from the PDB server and
        stores it in a local file tree.

        The PDB structure's file name is returned as a single string.
        If obsolete == True, the file will be saved in a special file tree.

        @param pdir: put the file in this directory (default: create a PDB-style directory tree)
        @type pdir: string

        @return: filename
        @rtype: string
        """
        # Get the compressed PDB structure
        code = pdb_code.lower()
        archive_fn = "pdb%s.ent.gz" % code
        pdb_dir = "divided" if not obsolete else "obsolete"
        url = (self.pdb_server + '/pub/pdb/data/structures/%s/pdb/%s/%s' %
               (pdb_dir, code[1:3], archive_fn))

        # Where does the final PDB file get saved?
        if pdir is None:
            path = self.local_pdb if not obsolete else self.obsolete_pdb
            if not self.flat_tree:  # Put in PDB-style directory tree
                path = os.path.join(path, code[1:3])
        else:  # Put in specified directory
            path = pdir
        if not os.access(path, os.F_OK):
            os.makedirs(path)

        filename = os.path.join(path, archive_fn)
        final_file = os.path.join(path, "pdb%s.ent" % code)  # (decompressed)

        # Skip download if the file already exists
        if not self.overwrite:
            if os.path.exists(final_file):
                print("Structure exists: '%s' " % final_file)
                return final_file

        # Retrieve the file
        print("Downloading PDB structure '%s'..." % pdb_code)
        _urlretrieve(url, filename)

        # Uncompress the archive, delete when done
        #Can't use context manager with gzip.open until Python 2.7
        gz = gzip.open(filename, 'rb')
        with open(final_file, 'wb') as out:
            out.writelines(gz)
        gz.close()
        os.remove(filename)

        return final_file
Example #3
0
def open_pdb(pdbid, pdb_url=None):
    """Make a local copy of an online pdb file and return a file handle."""
    if pdb_url is None:
        pdb_url = default_pdb_url
    url = pdb_url % pdbid
    fn, header = _urlretrieve(url)
    return open(fn)
Example #4
0
 def get_seqres_file(self, savefile="pdb_seqres.txt"):
     """Retrieve and save a (big) file containing all the sequences of PDB entries."""
     if self._verbose:
         print("Retrieving sequence file (takes over 110 MB).")
     url = self.pdb_server + "/pub/pdb/derived_data/pdb_seqres.txt"
     _urlretrieve(url, savefile)
Example #5
0
    def retrieve_pdb_file(self,
                          pdb_code,
                          obsolete=False,
                          pdir=None,
                          file_format=None,
                          overwrite=False):
        """Fetch PDB structure file from PDB server, and store it locally.

        The PDB structure's file name is returned as a single string.
        If obsolete ``==`` True, the file will be saved in a special file tree.

        NOTE. The default download format has changed from PDB to PDBx/mmCif

        :param pdb_code: 4-symbols structure Id from PDB (e.g. 3J92).
        :type pdb_code: string

        :param file_format:
            File format. Available options:

            * "mmCif" (default, PDBx/mmCif file),
            * "pdb" (format PDB),
            * "xml" (PDBML/XML format),
            * "mmtf" (highly compressed),
            * "bundle" (PDB formatted archive for large structure}

        :type file_format: string

        :param overwrite: if set to True, existing structure files will be overwritten. Default: False
        :type overwrite: bool

        :param obsolete:
            Has a meaning only for obsolete structures. If True, download the obsolete structure
            to 'obsolete' folder, otherwise download won't be performed.
            This option doesn't work for mmtf format as obsoleted structures aren't stored in mmtf.
            Also doesn't have meaning when parameter pdir is specified.
            Note: make sure that you are about to download the really obsolete structure.
            Trying to download non-obsolete structure into obsolete folder will not work
            and you face the "structure doesn't exists" error.
            Default: False

        :type obsolete: bool

        :param pdir: put the file in this directory (default: create a PDB-style directory tree)
        :type pdir: string

        :return: filename
        :rtype: string
        """
        file_format = self._print_default_format_warning(
            file_format)  # Deprecation warning

        # Get the compressed PDB structure
        code = pdb_code.lower()
        archive = {
            "pdb": "pdb%s.ent.gz",
            "mmCif": "%s.cif.gz",
            "xml": "%s.xml.gz",
            "mmtf": "%s",
            "bundle": "%s-pdb-bundle.tar.gz"
        }
        archive_fn = archive[file_format] % code

        if file_format not in archive.keys():
            raise (
                "Specified file_format %s doesn't exists or is not supported. Maybe a typo. "
                "Please, use one of the following: mmCif, pdb, xml, mmtf, bundle"
                % file_format)

        if file_format in ("pdb", "mmCif", "xml"):
            pdb_dir = "divided" if not obsolete else "obsolete"
            file_type = "pdb" if file_format == "pdb" else "mmCIF" if file_format == "mmCif" else "XML"
            url = (self.pdb_server + "/pub/pdb/data/structures/%s/%s/%s/%s" %
                   (pdb_dir, file_type, code[1:3], archive_fn))
        elif file_format == "bundle":
            url = (self.pdb_server +
                   "/pub/pdb/compatible/pdb_bundle/%s/%s/%s" %
                   (code[1:3], code, archive_fn))
        else:
            url = ("http://mmtf.rcsb.org/v1.0/full/%s" % code)

        # Where does the final PDB file get saved?
        if pdir is None:
            path = self.local_pdb if not obsolete else self.obsolete_pdb
            if not self.flat_tree:  # Put in PDB-style directory tree
                path = os.path.join(path, code[1:3])
        else:  # Put in specified directory
            path = pdir
        if not os.access(path, os.F_OK):
            os.makedirs(path)
        filename = os.path.join(path, archive_fn)
        final = {
            "pdb": "pdb%s.ent",
            "mmCif": "%s.cif",
            "xml": "%s.xml",
            "mmtf": "%s.mmtf",
            "bundle": "%s-pdb-bundle.tar"
        }
        final_file = os.path.join(path, final[file_format] % code)

        # Skip download if the file already exists
        if not overwrite:
            if os.path.exists(final_file):
                if self._verbose:
                    print("Structure exists: '%s' " % final_file)
                return final_file

        # Retrieve the file
        if self._verbose:
            print("Downloading PDB structure '%s'..." % pdb_code)
        try:
            _urlcleanup()
            _urlretrieve(url, filename)
        except IOError:
            print("Desired structure doesn't exists")
        else:
            with gzip.open(filename, "rb") as gz:
                with open(final_file, "wb") as out:
                    out.writelines(gz)
            os.remove(filename)
        return final_file
Example #6
0
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "hp:o:i:",
            ["help", "usage", "pdb=", "output=", "input="])
    except getopt.GetoptError:
        # show help information and exit:
        usage()
        sys.exit(2)

    input = None
    in_handle = None
    output = None
    pdb_url = None
    cla_url = None
    raf_url = None

    for o, a in opts:
        if o in ("-h", "--help", "--usage"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output = a
        elif o in ("-i", "--input"):
            input = a
        elif o in ("-p", "--pdb"):
            pdb_url = a

    if len(args) < 2:
        sys.stderr.write(
            "Not enough arguments. Try --help for more details.\n")
        sys.exit(2)

    raf_url = args[0]
    cla_url = args[1]

    (raf_filename, headers) = _urlretrieve(raf_url)
    seqMapIndex = Raf.SeqMapIndex(raf_filename)

    (cla_filename, headers) = _urlretrieve(cla_url)
    claIndex = Cla.Index(cla_filename)

    if input is None:
        sids = args[2:]
    elif input == '-':
        sids = sys.stdin
    else:
        in_handle = open(input)
        sids = in_handle

    try:
        for sid in sids:
            if not sid or sid[0:1] == '#':
                continue
            id = sid[0:7]
            pdbid = id[1:5]
            s = pdbid[0:1]
            if s == '0' or s == 's':
                sys.stderr.write("No coordinates for domain %s\n" % id)
                continue

            if output is None:
                filename = id + ".ent"
                out_handle = open(filename, "w+")
            elif output == '-':
                out_handle = sys.stdout
            else:
                out_handle = open(output, "w+")

            try:
                try:
                    claRec = claIndex[id]
                    residues = claRec.residues
                    seqMap = seqMapIndex.getSeqMap(residues)
                    pdbid = residues.pdbid

                    f = open_pdb(pdbid, pdb_url)
                    try:
                        seqMap.getAtoms(f, out_handle)
                    finally:
                        f.close()
                except (IOError, KeyError, RuntimeError) as e:
                    sys.stderr.write("I cannot do SCOP domain %s : %s\n" %
                                     (id, e))
            finally:
                out_handle.close()
    finally:
        if in_handle is not None:
            in_handle.close()
Example #7
0
def open_pdb(pdbid, pdb_url=None):
    if pdb_url is None:
        pdb_url = default_pdb_url
    url = pdb_url % pdbid
    fn, header = _urlretrieve(url)
    return open(fn)
    def retrieve_pdb_file(
            self,
            pdb_code,
            obsolete=False,
            pdir=None,
            file_format=None,
            overwrite=False,
            biounit=False):
        """
            Replacement for Bio.PDB.PDBList.retrieve_pdb_file to support
            MMB PDB API. Defaults to Biopython super() if standard server is used.
        """
        if self.pdb_server != 'mmb':
            return super().retrieve_pdb_file(
                pdb_code, obsolete, pdir, file_format, overwrite
            )

        self._verbose = True

        code = pdb_code.lower()

        if file_format in ('pdb', 'mmCif', 'xml'):
            if file_format == 'mmCif':
                file_format = 'cif'
            if not biounit:
                url = (URL_PREFIX + '/%s.%s' % (code, file_format))
            else:
                file_format = 'pdb'
                url = (URL_PREFIX + '/%s_bn%s.pdb' % (code, biounit))
        else:
            print('Error: MMB Server: File format', file_format, 'not supported')
            sys.exit(1)
        #Where does the final PDB file get saved?
        if pdir is None:
            path = self.local_pdb if not obsolete else self.obsolete_pdb
            if not self.flat_tree:  # Put in PDB-style directory tree
                path = os.path.join(path, code[1:3])
        else:  # Put in specified directory
            path = pdir
        if not os.access(path, os.F_OK):
            os.makedirs(path)
        if biounit:
            final = {
                'pdb': '%s_%s.pdb',
                'mmCif': '%s_%s.cif',
                'cif': '%s_%s.cif',
                'xml': '%s_%s.xml'
            }
            final_file = os.path.join(path, final[file_format] % (code, biounit))
        else:
            final = {
                'pdb': '%s.pdb',
                'mmCif': '%s.cif',
                'cif': '%s.cif',
                'xml': '%s.xml'
            }
            final_file = os.path.join(path, final[file_format] % code)
        # Skip download if the file already exists
        if not overwrite:
            if os.path.exists(final_file):
                if self._verbose:
                    print("Structure exists: '%s' " % final_file)
                return final_file

        # Retrieve the file
        if self._verbose:
            if biounit:
                print("Downloading PDB structure '%s.%s'..." % (pdb_code, biounit))
            else:
                print("Downloading PDB structure '%s'..." % pdb_code)
        try:
            _urlcleanup()
            _urlretrieve(url, final_file)
        except IOError:
            print("Desired structure doesn't exists")
        return final_file