Beispiel #1
0
 def __init__(self, model, pdb_file = None,
              naccess_binary = 'naccess', tmp_directory = '/tmp'):
     res_data, atm_data = run_naccess(model, pdb_file, naccess = naccess_binary,
                                      temp_path = tmp_directory)
     naccess_dict = process_rsa_data(res_data)
     res_list = []
     property_dict={}
     property_keys=[]
     property_list=[]
     # Now create a dictionary that maps Residue objects to accessibility
     for chain in model:
         chain_id=chain.get_id()
         for res in chain:
             res_id=res.get_id()
             if (chain_id, res_id) in naccess_dict:
                 item = naccess_dict[(chain_id, res_id)]
                 res_name = item['res_name']
                 assert (res_name == res.get_resname())
                 property_dict[(chain_id, res_id)] = item
                 property_keys.append((chain_id, res_id))
                 property_list.append((res, item))
                 res.xtra["EXP_NACCESS"]=item
             else:
                 pass
     AbstractResiduePropertyMap.__init__(self, property_dict, property_keys, 
             property_list)
Beispiel #2
0
 def __init__(self,
              model,
              pdb_file=None,
              naccess_binary='naccess',
              tmp_directory='/tmp'):
     res_data, atm_data = run_naccess(model,
                                      pdb_file,
                                      naccess=naccess_binary,
                                      temp_path=tmp_directory)
     naccess_dict = process_rsa_data(res_data)
     res_list = []
     property_dict = {}
     property_keys = []
     property_list = []
     # Now create a dictionary that maps Residue objects to accessibility
     for chain in model:
         chain_id = chain.get_id()
         for res in chain:
             res_id = res.get_id()
             if (chain_id, res_id) in naccess_dict:
                 item = naccess_dict[(chain_id, res_id)]
                 res_name = item['res_name']
                 assert (res_name == res.get_resname())
                 property_dict[(chain_id, res_id)] = item
                 property_keys.append((chain_id, res_id))
                 property_list.append((res, item))
                 res.xtra["EXP_NACCESS"] = item
             else:
                 pass
     AbstractResiduePropertyMap.__init__(self, property_dict, property_keys,
                                         property_list)
Beispiel #3
0
    def __init__(self, structure, dssp="dsspcmbi", id=None):
        """
        @param model: the first model of the structure
        @type model: L{Model}

        @param pdb_file: a PDB file
        @type pdb_file: string

        @param dssp: the dssp executable (ie. the argument to os.system)
        @type dssp: string

        @param id: the structure id to use if parsed
        @type id: string
        """
        # create DSSP dictionary
        structure, dssp_dict, dssp_keys = dssp_dict_from_structure(structure, dssp, id=id)
        self.structure = structure

        self.model = structure
        while self.model.get_parent() != None:
            self.model = self.model.get_parent()
        self.model = self.model[0]
        # assert isinstance(self.model, Model)

        dssp_map = {}
        dssp_list = []
        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            try:
                chain_id, res_id = key

                chain = self.model[chain_id]
                res = chain[res_id]
            except:
                continue
            aa, ss, acc = dssp_dict[key]
            res.xtra[SS] = ss
            res.xtra[ASA] = acc
            # relative accessibility
            resname = res.get_resname()
            rel_acc = acc / MAX_ACC[resname]
            if rel_acc > 1.0:
                rel_acc = 1.0
            res.xtra[RASA] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            resname = to_one_letter_code[resname]
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = "C"
            if not (resname == aa):
                raise PDBException("Structure/DSSP mismatch at " + str(res))
            dssp_map[key] = (res, ss, acc, rel_acc)
            dssp_list.append((res, ss, acc, rel_acc))
        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
Beispiel #4
0
    def __init__(self, model, pdb_file, dssp="dssp"):
        """
        @param model: the first model of the structure
        @type model: L{Model}

        @param pdb_file: a PDB file
        @type pdb_file: string

        @param dssp: the dssp executable (ie. the argument to os.system)
        @type dssp: string
        """
        # create DSSP dictionary
        dssp_dict, dssp_keys=dssp_dict_from_pdb_file(pdb_file, dssp)
        dssp_map={}
        dssp_list=[]
        # Now create a dictionary that maps Residue objects to 
        # secondary structure and accessibility, and a list of 
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id=key
            chain=model[chain_id]
            res=chain[res_id]
            aa, ss, acc=dssp_dict[key]
            res.xtra["SS_DSSP"]=ss
            res.xtra["EXP_DSSP_ASA"]=acc
            # relative accessibility
            resname=res.get_resname()
            rel_acc=acc/MAX_ACC[resname]
            if rel_acc>1.0:
                rel_acc=1.0
            res.xtra["EXP_DSSP_RASA"]=rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            resname=to_one_letter_code[resname]
            if resname=="C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa='C'
            if not (resname==aa):
                raise PDBException("Structure/DSSP mismatch at "+str(res))
            dssp_map[key]=((res, ss, acc, rel_acc))
            dssp_list.append((res, ss, acc, rel_acc))
        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
Beispiel #5
0
    def __init__(self, model, pdb_file, dssp="dssp"):
        """
        ::

        @param model: the first model of the structure
        @type model: L{Model} ::

        @param pdb_file: a PDB file
        @type pdb_file: string ::

        @param dssp: the dssp executable (ie. the argument to os.system)
        @type dssp: string
        """
        # create DSSP dictionary
        dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp)
        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, BioPython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            aa, ss, acc, phi, psi = dssp_dict[key]
            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / MAX_ACC[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            resname = SCOPData.protein_letters_3to1.get(resname, 'X')
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)
            dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi))
            dssp_list.append((res, ss, acc, rel_acc, phi, psi))

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                dssp_list)
Beispiel #6
0
    def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type='PDB'):
        """Create a DSSP object.

        Parameters
        ----------
        model : Model
            The first model of the structure
        in_file : string
            Either a PDB file or a DSSP file.
        dssp : string
            The dssp executable (ie. the argument to os.system)
        acc_array : string
            Accessible surface area (ASA) from either Miller et al. (1987),
            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
            Sander/Wilke/Miller. Defaults to Sander.
        file_type: string
            File type switch, either PDB or DSSP with PDB as default.

        """
        self.residue_max_acc = residue_max_acc[acc_array]

        # create DSSP dictionary
        file_type = file_type.upper()
        assert(file_type in ['PDB', 'DSSP'])
        # If the input file is a PDB file run DSSP and parse output:
        if file_type == 'PDB':
            # Newer versions of DSSP program call the binary 'mkdssp', so
            # calling 'dssp' will not work in some operating systems
            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
            try:
                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
            except OSError:  # TODO: Use FileNotFoundError once drop Python 2
                if dssp == 'dssp':
                    dssp = 'mkdssp'
                elif dssp == 'mkdssp':
                    dssp = 'dssp'
                else:
                    raise
            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
        # If the input file is a DSSP file just parse it directly:
        elif file_type == 'DSSP':
            dssp_dict, dssp_keys = make_dssp_dict(in_file)

        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, Biopython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            (aa, ss, acc, phi, psi, dssp_index,
                NH_O_1_relidx, NH_O_1_energy,
                O_NH_1_relidx, O_NH_1_energy,
                NH_O_2_relidx, NH_O_2_energy,
                O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / self.residue_max_acc[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            try:
                resname = three_to_one(resname)
            except KeyError:
                resname = 'X'
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)

            dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi,
                         NH_O_1_relidx, NH_O_1_energy,
                         O_NH_1_relidx, O_NH_1_energy,
                         NH_O_2_relidx, NH_O_2_energy,
                         O_NH_2_relidx, O_NH_2_energy)

            dssp_map[key] = dssp_vals
            dssp_list.append(dssp_vals)

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                dssp_list)
Beispiel #7
0
    def __init__(self,
                 model,
                 in_file,
                 dssp="dssp",
                 acc_array="Sander",
                 file_type=""):
        """Create a DSSP object.

        Parameters
        ----------
        model : Model
            The first model of the structure
        in_file : string
            Either a PDB file or a DSSP file.
        dssp : string
            The dssp executable (ie. the argument to subprocess)
        acc_array : string
            Accessible surface area (ASA) from either Miller et al. (1987),
            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
            Sander/Wilke/Miller. Defaults to Sander.
        file_type: string
            File type switch: either PDB, MMCIF or DSSP. Inferred from the
            file extension by default.

        """
        self.residue_max_acc = residue_max_acc[acc_array]

        # create DSSP dictionary
        if file_type == "":
            file_type = os.path.splitext(in_file)[1][1:]
        file_type = file_type.upper()
        if file_type == "CIF":
            file_type = "MMCIF"
        assert file_type in [
            "PDB",
            "MMCIF",
            "DSSP",
        ], "File type must be PDB, mmCIF or DSSP"
        # If the input file is a PDB or mmCIF file run DSSP and parse output:
        if file_type == "PDB" or file_type == "MMCIF":
            # Newer versions of DSSP program call the binary 'mkdssp', so
            # calling 'dssp' will not work in some operating systems
            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
            try:
                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
            except FileNotFoundError:
                if dssp == "dssp":
                    dssp = "mkdssp"
                elif dssp == "mkdssp":
                    dssp = "dssp"
                else:
                    raise
            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
        # If the input file is a DSSP file just parse it directly:
        elif file_type == "DSSP":
            dssp_dict, dssp_keys = make_dssp_dict(in_file)

        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return f"{res_id[1]}{res_id[2]}"

        # DSSP outputs label_asym_id from the mmCIF file as the chain ID
        # But MMCIFParser reads in the auth_asym_id
        # Here we create a dictionary to map label_asym_id to auth_asym_id
        # using the mmCIF file
        if file_type == "MMCIF":
            mmcif_dict = MMCIF2Dict(in_file)
            mmcif_chain_dict = {}
            for i, c in enumerate(mmcif_dict["_atom_site.label_asym_id"]):
                if c not in mmcif_chain_dict:
                    mmcif_chain_dict[c] = mmcif_dict[
                        "_atom_site.auth_asym_id"][i]
            dssp_mapped_keys = []

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            if file_type == "MMCIF":
                chain_id = mmcif_chain_dict[chain_id]
                dssp_mapped_keys.append((chain_id, res_id))
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (" ", "W"):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id) from None

            # For disordered residues of point mutations, Biopython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple("A1 "):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = {a.get_altloc() for a in res.get_unpacked_list()}
                if altlocs.isdisjoint("A1 "):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (" ", "W"):
                            if resid2code(
                                    r.id) == res_seq_icode and r.get_list(
                                    )[0].get_altloc() in tuple("A1 "):
                                res = r
                                break

            (
                aa,
                ss,
                acc,
                phi,
                psi,
                dssp_index,
                NH_O_1_relidx,
                NH_O_1_energy,
                O_NH_1_relidx,
                O_NH_1_energy,
                NH_O_2_relidx,
                NH_O_2_energy,
                O_NH_2_relidx,
                O_NH_2_energy,
            ) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / self.residue_max_acc[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = "NA"
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            try:
                resname = three_to_one(resname)
            except KeyError:
                resname = "X"
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = "C"
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == " " or aa != "X"):
                raise PDBException(f"Structure/DSSP mismatch at {res}")

            dssp_vals = (
                dssp_index,
                aa,
                ss,
                rel_acc,
                phi,
                psi,
                NH_O_1_relidx,
                NH_O_1_energy,
                O_NH_1_relidx,
                O_NH_1_energy,
                NH_O_2_relidx,
                NH_O_2_energy,
                O_NH_2_relidx,
                O_NH_2_energy,
            )

            dssp_map[(chain_id, res_id)] = dssp_vals
            dssp_list.append(dssp_vals)

        if file_type == "MMCIF":
            dssp_keys = dssp_mapped_keys
        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                                            dssp_list)
Beispiel #8
0
    def __init__(self, model, pdb_file, dssp="dssp"):
        """
        ::

        @param model: the first model of the structure
        @type model: L{Model} ::

        @param pdb_file: a PDB file
        @type pdb_file: string ::

        @param dssp: the dssp executable (ie. the argument to os.system)
        @type dssp: string
        """
        # create DSSP dictionary
        dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp)
        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, BioPython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy,
             O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy,
             O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / MAX_ACC[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            resname = SCOPData.protein_letters_3to1.get(resname, 'X')
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)

            dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx,
                         NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy,
                         NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx,
                         O_NH_2_energy)

            dssp_map[key] = dssp_vals
            dssp_list.append(dssp_vals)

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                                            dssp_list)
Beispiel #9
0
    def __init__(self,
                 model,
                 in_file,
                 dssp="dssp",
                 acc_array="Sander",
                 file_type='PDB'):
        """Create a DSSP object.

        Parameters
        ----------
        model : Model
            The first model of the structure
        in_file : string
            Either a PDB file or a DSSP file.
        dssp : string
            The dssp executable (ie. the argument to os.system)
        acc_array : string
            Accessible surface area (ASA) from either Miller et al. (1987),
            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
            Sander/Wilke/Miller. Defaults to Sander.
        file_type: string
            File type switch, either PDB or DSSP with PDB as default.
        """
        self.residue_max_acc = residue_max_acc[acc_array]

        # create DSSP dictionary
        file_type = file_type.upper()
        assert (file_type in ['PDB', 'DSSP'])
        # If the input file is a PDB file run DSSP and parse output:
        if file_type == 'PDB':
            # Newer versions of DSSP program call the binary 'mkdssp', so
            # calling 'dssp' will not work in some operating systems
            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
            try:
                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
            except FileNotFoundError:
                if dssp == 'dssp':
                    dssp = 'mkdssp'
                elif dssp == 'mkdssp':
                    dssp = 'dssp'
                else:
                    raise
            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
        # If the input file is a DSSP file just parse it directly:
        elif file_type == 'DSSP':
            dssp_dict, dssp_keys = make_dssp_dict(in_file)

        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, BioPython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy,
             O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy,
             O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / self.residue_max_acc[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            resname = SCOPData.protein_letters_3to1.get(resname, 'X')
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)

            dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx,
                         NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy,
                         NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx,
                         O_NH_2_energy)

            dssp_map[key] = dssp_vals
            dssp_list.append(dssp_vals)

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                                            dssp_list)