Esempio n. 1
0
def get_surface_residues(filename, my_acc_array, my_threshold):
    """
    Given a pdb file, finds the residues exposed to the solvent (not buried)
    according to the ASA (accessible surface area) value given by DSSP module.
    The user can select a threshold of ASA. Default is 0.2.
    """
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("code.pdb", filename)
    model = s[0]
    d = DSSP(model, filename, dssp='mkdssp', acc_array=my_acc_array)

    sys.stderr.write("\nHandled %i residues\n" % len(d))

    residue_number = set()

    for element in sorted(d):
        if type(element[3]) is not str:  #Sometimes the element[3] is NA
            if element[3] >= my_threshold:
                # foreach aa in the surface (according to threshold) store residue_number
                try:
                    residue_number.add(
                        str(list(d.keys())[element[0] - 1][1][1]) +
                        list(d.keys())[element[0] - 1][0])
                except IndexError:
                    sys.stderr.write("Element " + str(d.keys()[0]) +
                                     " index out of range\n")
    return residue_number
Esempio n. 2
0
    def _run(self, pdbcode, chains):
        pdb_file = os.path.join(PDB_PATH, 'pdb' + pdbcode + '.ent')
        p = PDBParser()
        structure = p.get_structure(pdbcode, pdb_file)
        model = structure[0]
        dssp = DSSP(model, pdb_file, dssp=dssp_route)
        valid_keys = [key for key in dssp.keys() if key[0] in chains]
        chain = dssp.keys()[0][0]

        return [dssp[key] for key in valid_keys], chain
Esempio n. 3
0
	def add_dssp(pdb_file):
		'This is a small function that makes a dictionary with secondary structure information'

		from Bio.Seq import Seq
		from Bio import SeqIO
		from Bio.Alphabet import IUPAC

		io = PDBIO()
		import pandas as pd
		from Bio.PDB.DSSP import DSSP

		pdbl = PDBList()
		parser = PDBParser()
		ppb = PPBuilder()

		'If the structure hasnt been downloaed then it will - else parse it'

		structure = parser.get_structure(pdb_file,pdb_file)


		model = structure[0]
		chain = model['A']

		'path to the dssp excecutable:'
		dssp_exc = '/Users/thorn/dssp-2.2.1/mkdssp'
		dssp = DSSP(model, pdb_file, dssp_exc)

		sec_dict = {}
		for i in range(len(dssp)):
			a_key = dssp.keys()[i]
			index = a_key[1][1]
			sec_structure = dssp[a_key][1]
			sec_dict[index] = sec_structure

		return sec_dict
Esempio n. 4
0
def calc_dssp(model, chain_sites: dict, pdb_name: str) -> None:
    # DSSP
    #    ============ ===
    #    Tuple Index  Value
    #    ============ ===
    #    0            DSSP index
    #    1            Amino acid
    #    2            Secondary structure
    #    3            Relative ASA
    #    4            Phi
    #    5            Psi
    #    6            NH-->O_1_relidx
    #    7            NH-->O_1_energy
    #    8            O-->NH_1_relidx
    #    9            O-->NH_1_energy
    #    10           NH-->O_2_relidx
    #    11           NH-->O_2_energy
    #    12           O-->NH_2_relidx
    #    13           O-->NH_2_energy
    #    ============ ===

    try:
        dssp = DSSP(model, pdb_name, dssp="mkdssp")
    except:
        dssp = {}
        print("dssp failed!")
    for residue in dssp.keys():
        (
            dssp_i,
            aa,
            sec_struct,
            sasa_r,
            phi,
            psi,
            nh_o1_relidx,
            nh_o1_e,
            o_nh1_relidx,
            o_nh1_e,
            nh_o2_relidx,
            nh_o2_e,
            o_nh2_relidx,
            o_nh2_e,
        ) = dssp[residue]

        chain_id, res_id = residue
        _, resnumb, _ = res_id

        if chain_id in chain_sites and resnumb in chain_sites[chain_id]:
            resid = chain_sites[chain_id][resnumb][1]
            new_res = chain_sites[chain_id][resnumb][2]

            new_res.sec_struct = sec_struct
            new_res.sasa_r = sasa_r
            new_res.phi = phi
            new_res.psi = psi
    session.commit()
def calc_features(PATH, PDB_id, OUTPATH):

    #Loading the files
    parser = PDBParser(PERMISSIVE=1)

    filename = os.path.join(PATH, PDB_id + ".pdb")
    structure = parser.get_structure(PDB_id, filename)
    model = structure[0]

    #DSSP Analysis for SS, PHI, PSI
    dssp = DSSP(model, filename)

    #NACCESS Analysis for SASA
    rsa, asa = run_naccess(model, filename)
    rsa = process_rsa_data(rsa)

    #Feature mapping to each atomic coordinate
    dssp_present, dssp_not_present = 0, 0

    feature = dict()  #The feature dictionary

    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:

                    print(atom.get_full_id())
                    ID = (atom.get_full_id()[2], atom.get_full_id()[3])

                    if (ID in list(dssp.keys())):
                        if (rsa[ID]["all_atoms_abs"] > Threshold):
                            rsa_label = 1
                        else:
                            rsa_label = 0
                        feat = (SS_Labels[dssp[ID][2]], dssp[ID][4] / 360,
                                dssp[ID][5] / 360, rsa_label)
                        feature[tuple(atom.get_coord())] = feat

                        print(ID, atom.get_coord(), feat)
                        dssp_present += 1

                    else:
                        print("==> ID not present : ", atom.get_full_id())
                        dssp_not_present += 1

    #Printing the Stats
    print(
        "==> STATS : PDBID : %s , DSSP PRESENT : %s , DSSP NOT PRESENT : %s" %
        (PDB_id, dssp_present, dssp_not_present))

    #Saving the feature to each PDB file
    with open(os.path.join(OUTPATH, PDB_id + ".dat"), "wb+") as f:
        pickle.dump(feature, f)
        print("==> Dump completed")
def getSecondaryStructure(pdbID):
    pdb.fetchInPDBFormat(pdbID)
    p = PDBParser()
    structure = p.get_structure(pdbID, pdb.getTemporaryPDBPath())
    model = structure[0]
    dssp = DSSP(model, pdb.getTemporaryPDBPath())
    seq = ""
    secStructure = ""
    for key in dssp.keys():
        seq = seq + dssp[key][1]
        secStructure = secStructure + dssp[key][2]
    # print("Secuencia: ", seq + " de longitud " + str(len(seq)))
    # print("Estructura secundaria: ", secStructure + " de longitud " + str(len(secStructure)))
    return secStructure
Esempio n. 7
0
def accessible_surface_area(PDB_file):
    # Calcul de la surface accessible au solvant pour chaque residus
    ASA_dict = {}
    parser = PDBParser()
    structure_id = PDB_file.split(".")[0]
    structure = parser.get_structure(structure_id, PDB_file)
    model = structure[0]
    dssp = DSSP(model, PDB_file, dssp='mkdssp')
    id_CA = 0
    for CA in list(dssp.keys()):
        if dssp[CA][1] != 'X':
            ASA_dict[id_CA] = dssp[CA][3]
            id_CA += 1
    return ASA_dict
Esempio n. 8
0
def calculate_asa(model, filename, AROM_LIST, chain_list):
    """Returns a list of surface exposed residues as determined by relative solvent accessibility.

    Only standard protein residues are currently supported. Non-protein and user specified custom residues cannot be
    classified as surface exposed using this criteria.

    Parameters
    ---------
    model: :class:`Bio.PDB.Model.Model`
        Model which contains chains and residues of protein strucutre
    filename: str
        Name of pdb file to be analyzed
    AROM_LIST : list of str
        List containing which standard residues are included in analysis
    chain_list: list of str
        Chains are included in analysis

    Notes
    -----
    The relative accessible surface area (RSA) of each residue is calculated using the Bio.PDB.DSSP module. A residue
    with an RSA value of 0.05 or higher is classified as surface exposed.

    References
    ---------
    Tien, M. Z.; Meyer, A. G.; Sydykova, D. K.; Spielman, S. J.; Wilke, C. O. PLoS ONE 2013, 8 (11).
        Reference for relative solvent accessibility cutoff of 0.05, and for MaxASA values

    """
    cutoff = .05
    surface_exposed_res = []
    letter_list = []
    for res_name in AROM_LIST:
        if res_name_to_char.get(res_name):
            letter_list.append(res_name_to_char.get(res_name))
    try:
        dssp = DSSP(model, filename, acc_array="Wilke")
        keys = list(dssp.keys())
        for key in keys:
            if key[0] in chain_list and dssp[key][3] >= cutoff and dssp[key][
                    1] in letter_list:
                goal_str = dssp[key][1] + \
                    str(key[1][1]) + "(" + str(key[0]) + ")"
                surface_exposed_res.append(goal_str)
    except Exception as e:
        warnings.warn(
            "Unable to calculate solvent accessibility. Check that DSSP is installed.",
            RuntimeWarning,
            stacklevel=2)
    return surface_exposed_res
Esempio n. 9
0
    def __init__(self, pdbfile, fastafile):
        names = {'HIS':'H','ASP':'D','ARG':'R','PHE':'F','ALA':'A','CYS':'C','GLY':'G',\
                 'GLN':'Q','GLU':'E','LYS':'K','LEU':'L','MET':'M','ASN':'N','SER':'S',\
                 'TYR':'Y','THR':'T','ILE':'I','TRP':'W','PRO':'P','VAL':'V','SER':'S'}
        # Load fasta residue sequence
        f = open(fastafile)
        ff = [line.rstrip("\n") for line in f]
        f.close()
        p_id = ff[0][1:]
        self.seq = ff[1]

        # Load pdb information
        p = PDBParser(PERMISSIVE=1)                                             
        st = p.get_structure(p_id, pdbfile)
        model = st[0]                                                            
        tag = p_id[-1] 
        chain = model[tag] 
        residues = chain.get_residues()
        self.residues = [res for res in residues] 
        ## sequence info from pdb
        self.pdbseq  = "".join([names[res.get_resname()] for res in self.residues if \
                names.has_key(res.get_resname())])
        ## 3-state sse info from pdb
        dssp = DSSP(model, pdbfile)
        to3_dict = {'-':'C', 'G':'H', 'H':'H', 'I':'H', 'E':'E', 'B':'E', 'T':'C', \
                'S':'C', 'L':'C'}
        keys = list(dssp.keys())
        self.pdbss3seq = "".join([to3_dict[dssp[k][2]]for k in keys])

        # Align the pdb sequence(always missing some residues) to fasta sequence
        alignment = AlignNW(self.seq, self.pdbseq)
        self.re_index = alignment['j']
        self.re_index = [i-1 for i in alignment['j']] # minus 1 for indexing

        # generate sequence alignment between pdb sequence and fasta sequence
        self.alignment = "".join([self.pdbseq[i] if i > -1 else "-" for i in self.re_index])
        self.alignment = "\n".join([self.seq, self.alignment])

        # generate full lenght of 3-state SSE sequence according to re-index
        self.ss3seq = "".join([self.pdbss3seq[i] if i > -1 else "C" for i in self.re_index])
        
        # generate full lenght of distance matrix(distance=-1 when disappear in pdbseq)
        self.dist_matrix = self.generate_dist_matrix()
        #np.savetxt("test.txt", self.dist_matrix)
        # generate full lenght of angle matrix(distance=None when disappear in pdbseq)
        self.angle_matrix = self.generate_angle_matrix()
def getDSSP(
    struct,
    fname,
    dsspPath=os.path.expanduser(
        "~/Tesis/rriPredMethod/dependencies/bioinformaticTools/dssp/mkdssp")):
    dssp = DSSP(struct[0], fname, dssp=dsspPath)
    chains = struct[0].child_list
    dsspDict = {
        chain.get_id(): {symbol: []
                         for symbol in DSSP_SYMBOLS}
        for chain in chains
    }
    for chainId, resId in dssp.keys():
        secStruct = dssp[(chainId, resId)][2]
        dsspDict[chainId][secStruct].append(resId)

    return dsspDict
Esempio n. 11
0
 def __applyDssp(self):
     import Bio.PDB as bio
     print('PSU: applying dssp')
     from Bio.PDB.DSSP import DSSP
     p = bio.PDBParser()
     pdbFile = self.pdbDataPath + 'pdb' + self.pdbCode + '.ent'
     structure = p.get_structure(self.pdbCode, pdbFile)
     model = structure[0]
     dssp = DSSP(model, pdbFile)
     for akey in list(dssp.keys()):
         chain = akey[0]
         res_no = akey[1][1]
         row = dssp[akey]
         ss = row[2]
         for atom in self.atoms:
             if atom.values['rid'] == res_no and atom.values[
                     'chain'] == chain:
                 atom.setDsspInfo(ss)
     print('PSU: applied dssp successfully')
Esempio n. 12
0
def Make_dssp():
    ref = {
        'A': 'ALA',
        'R': 'ARG',
        'N': 'ASN',
        'D': 'ASP',
        'B': 'ASX',
        'C': 'CYS',
        'E': 'GLU',
        'Q': 'GLN',
        'Z': 'GLX',
        'G': 'GLY',
        'H': 'HIS',
        'I': 'ILE',
        'L': 'LEU',
        'K': 'LYS',
        'M': 'MET',
        'F': 'PHE',
        'P': 'PRO',
        'S': 'SER',
        'T': 'THR',
        'W': 'TRP',
        'Y': 'TYR',
        'V': 'VAL',
        'X': '---'
    }
    dssp_dict = {}
    dssp_dict['X'] = np.NaN
    p = PDBParser()
    structure = p.get_structure('model', './current_pdb.txt')
    mod = structure[0]
    dssp = DSSP(mod, './current_pdb.txt')
    for i in range(len(dssp)):
        a_key = list(dssp.keys())[i]
        dssp_dict[str(a_key[0]) + str(a_key[1][1]) +
                  str(ref[dssp[a_key][1]])] = dssp[a_key][2]
    return (dssp_dict)
Esempio n. 13
0
            print()
            os.remove(filename)
            continue

        valid_aa = [
            'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P',
            'Q', 'R', 'S', 'T', 'V', 'W', 'Y'
        ]
        current_chain = ""
        chain = ""
        phi_psis = []
        dihedrals = []
        chains = []
        chain_names = []
        # we assume that the chains and residues are in order, i.e. A1,A2,A3,...,B1,B2,B3,...
        for key in dssp.keys():

            chain_id = key[0]

            if not current_chain or chain_id != current_chain:
                current_chain = chain_id
                chain_names.append(chain_id)
                if chain:
                    assert len(chain) == len(phi_psis), \
                        "the length of chain '%s' does not equal the number of dihedrals: %s" % (chain, len(phi_psis))
                    chains.append(chain)
                    dihedrals.append(phi_psis)
                chain = ""
                phi_psis = []

            residue = dssp[key]
Esempio n. 14
0
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP

#Read in and Parse PDB file to obtain DSSP --> secondary structure determination
#follows the basic outline on biopython.org -- tutorial
parse = PDBParser()
struc = parse.get_structure('6hrc', "6hrc.pdb")
model = struc[0]
dssp = DSSP(model, '6hrc.pdb')
sec_struc = ''
a_helix = 0
b_sheet = 0
other = 0
none = 0

key = list(dssp.keys())[2]

dssp[key]

for c in range(len(dssp)):
    key = list(dssp.keys())[c]
    sec_struc += dssp[key][2]
    if dssp[key][2] == "H" or dssp[key][2] == "G" or dssp[key][2] == "I":
        a_helix += 1
    if dssp[key][2] == "E" or dssp[key][2] == "B":
        b_sheet += 1
    if dssp[key][2] == "-":
        none += 1
    else:
        other += 1
Esempio n. 15
0
def calc_features(PATH, pdb_ligand_ID, OUTPATH):

    #Loading the files
    parser = PDBParser(PERMISSIVE=1)

    PDB_id = pdb_ligand_ID[:4].lower()  #+ '_pocket'
    filename = os.path.join(PATH, PDB_id + ".pdb")
    structure = parser.get_structure(PDB_id, filename)
    model = structure[0]

    #DSSP Analysis for SS, PHI, PSI
    dssp = DSSP(model, filename)

    #NACCESS Analysis for SASA
    rsa, asa = run_naccess(model, filename)
    rsa = process_rsa_data(rsa)
    # print(rsa)
    #Feature mapping to each atomic coordinate
    dssp_present, dssp_not_present = 0, 0

    feature = dict()  #The feature dictionary

    for model in structure:
        for chain in model:
            if (chain.get_full_id()[2] == pdb_ligand_ID.split('_')[2]):
                pssm_ID = chain.get_full_id()[0][:4].upper(
                ) + '_' + chain.get_full_id()[2]
                pssm = parse_PSSM(pssm_ID)
                start = True
                gap = 0
                idx_prev = 0
                for residue in chain:
                    # if(start):
                    # start_idx =residue.get_full_id()[3][1]
                    # idx_prev = 0
                    idx = residue.get_full_id()[3][1]
                    if (idx < 1):
                        print(idx)
                        a = 0
                        pass
                    elif (idx - idx_prev >= 1):
                        print(idx)
                        a = 1
                        gap += idx - idx_prev - 1
                        # elif(start):
                        # gap += -1
                        # start = False

                        for atom in residue:
                            # print(atom.get_full_id())
                            ID = (atom.get_full_id()[2], atom.get_full_id()[3])

                            if (ID in list(dssp.keys())):
                                if (rsa[ID]["all_atoms_abs"] > RSA_Threshold):
                                    rsa_label = 1
                                else:
                                    rsa_label = 0

                                print(gap, atom.get_full_id()[3][1], a)
                                feat = (SS_Labels[dssp[ID][2]],
                                        dssp[ID][4] / 360, dssp[ID][5] / 360,
                                        rsa_label) + tuple(
                                            pssm[str(atom.get_full_id()[3][1] -
                                                     gap)])
                                feature[tuple(atom.get_coord())] = feat

                                print(pdb_ligand_ID[:4], ID, atom.get_coord(),
                                      feat)
                                dssp_present += 1

                            else:
                                print(">>> ID not present : ",
                                      atom.get_full_id())
                                dssp_not_present += 1
                        idx_prev = idx

    #Printing the Stats
    print(
        "===> STATS : PDBID : %s , DSSP PRESENT : %s , DSSP NOT PRESENT : %s" %
        (PDB_id, dssp_present, dssp_not_present))

    #Saving the feature to each PDB file
    with open(os.path.join(OUTPATH, pdb_ligand_ID + ".dat"), "wb+") as f:
        pickle.dump(feature, f)
        print("====> Dump completed")
args = parser.parse_args()
# args = parser.parse_args(['-p', '-c', 'A', '--pymol', 'ctc445/CTC-445.pdb'])
# args = parser.parse_args(['-p', '-c', 'A', '--pymol', 'ctc445/CTC-640.pdb'])

for pdb in args.pdbs:
    init_time = time()
    input_basename, _ = path.splitext(path.basename(pdb))
    # parse input pdb and prepare for prediction
    input_structure = PDB.PDBParser().get_structure('input_structure', pdb)
    try:
        dssp = DSSP(input_structure[0], pdb)
    except Exception as e:
        raise RuntimeError('DSSP failed', e)
    ss = [
        dssp[k][2] for k in filter(
            lambda k: k[0] == args.chain if args.chain else True, dssp.keys())
    ]

    if args.chain:
        input_structure = next(
            filter(lambda c: c.id == args.chain, input_structure.get_chains()))

#     ca_atoms = list(filter(lambda a: a.name == 'CA', input_structure.get_atoms()))
    n_atoms = list(filter(lambda a: a.name == 'N',
                          input_structure.get_atoms()))
    c_atoms = list(filter(lambda a: a.name == 'C',
                          input_structure.get_atoms()))
    ca_atoms = list(
        filter(lambda a: a.name == 'CA', input_structure.get_atoms()))

    cb_xyz = []
Esempio n. 17
0
from Bio.PDB import *
from Bio.PDB.DSSP import DSSP

print("START")
p = PDBParser()
structure = p.get_structure("X", "3g8n.pdb")
model = structure[0]

#chain = model['A']
#print("RESIDES #:", len(list(chain)))

dssp = DSSP(model, "3g8n.pdb", dssp='mkdssp')
print(dssp.keys()[2][1][1])  #RESIDUE NUMBER

result = ""
for i in range(len(list(dssp.keys()))):
    if dssp.keys()[i][0] != 'A':
        continue
    a_key = list(dssp.keys())[i]
    result += dssp[a_key][2]
print("STRING", result)
print(len(result))

#a_key = list(dssp.keys())[4]
#print("RESULT", dssp[a_key])
#print("LEN", len(list(dssp.keys())))
Esempio n. 18
0
    def computeOneFile(self, pdbFname, struct):
        '''
      Computes DSSP for a given pdb file
      :param pdbFname: str. fname to pdb file
      :param struct: Bio.PDB.Structure
    '''

        allResidues = set([])
        for chain in struct[0]:
            residues = chain.get_residues()
            allResidues = allResidues.union(set(residues))

        prefixExtended = self.getExtendedPrefix(pdbFname)
        prefix, chainType = self.splitExtendedPrefix(prefixExtended)[:2]
        if self.checkAlreayComputed(prefixExtended):
            print("Dssp already computed for %s" % prefixExtended)
            return 0
        print("launching Dssp over %s" % prefixExtended)
        try:

            featuresDict = {}
            try:
                dssp_out = DSSP(struct[0], pdbFname, dssp=self.dsspBinPath)
            except Exception as e:
                if "DSSP failed to produce an output" in e.message:
                    dssp_out = {}
                else:
                    print(e)
                    raise e
            for chainId, resId in dssp_out.keys():
                secStruct = dssp_out[(chainId, resId)][2]
                if secStruct == "-": secStruct = "Z"
                try:
                    featuresDict[struct[0][chainId][resId]] = secStruct
                except KeyError:
                    continue

            dataDict = {}
            for aa in allResidues:
                chainId_resIdStr_resName = self.fromRes2ChainResIdAndName(aa)
                chainId, resIdStr, resName = chainId_resIdStr_resName
                if resName is None: continue
                if aa in featuresDict:
                    values = [featuresDict[aa]]
                else:
                    values = ["Z"]

                record = [chainId, resIdStr, resName] + values
                record = " ".join(record)

                try:
                    dataDict[chainId].append(record)
                except KeyError:
                    dataDict[chainId] = [record]
            categoricalLevels = {
                ("H", "B", "E", "G", "I", "T", "S", "Z"): ("2ndStruct", )
            }
            self.writeResultsFromDataDictSingleChain(
                dataDict,
                outName=self.getFNames(prefixExtended)[0],
                categoricalLevels=categoricalLevels)

        except (Exception, KeyboardInterrupt):
            self.tryToRemoveAllFnames(prefixExtended)
            raise
Esempio n. 19
0
def get_struc_feat(seq_len, model_file):
    '''
    Agrs:
        seq_len (int): sequence length.
        model_file (string): the path of model file.
    '''
    feature = {}

    structure = pdb_parser.get_structure("tmp_stru", model_file)
    model = structure.get_list()[0]
    residues = model.get_list()[0].get_list()

    # SS and RSA
    dssp = DSSP(model, model_file, dssp='dssp')
    SS3s, RSAs = np.zeros((seq_len, 3)), np.zeros((seq_len, 1))
    for _key in dssp.keys():
        res_index = _key[1][1]
        if res_index >= 1 and res_index <= seq_len:
            SS3s[res_index - 1, SS3_TYPES[dssp[_key][2]]] = 1
            RSAs[res_index - 1] = [dssp[_key][3]]
    feature['SS3'] = SS3s
    feature['RSA'] = RSAs

    atom_types = ['CA', 'CB', 'N', 'O']
    # generate empty coordinates
    coordinates = []
    for _ in range(seq_len):
        _dict = {}
        for atom_type in atom_types:
            _dict[atom_type] = None
        coordinates.append(_dict)

    # extract coordinates from pdb
    for res in residues:
        for atom in res:
            if atom.name in atom_types:
                coordinates[res.id[1] - 1][atom.name] = atom.coord
        # copy CA coordinate to CB if CB is None (GLY)
        if 'CB' in atom_types and coordinates[res.id[1] - 1]['CB'] is None:
            coordinates[res.id[1] - 1]['CB'] = coordinates[res.id[1] - 1]['CA']

    # distance map
    atom_pairs = ['CaCa', 'CbCb', 'NO']
    for atom_pair in atom_pairs:
        atom1, atom2 = atom_pair[:int(len(atom_pair) / 2)].upper(
        ), atom_pair[int(len(atom_pair) / 2):].upper()
        X = [
            list(c[atom1]) if
            (c is not None and c[atom1] is not None) else [0, 0, 0]
            for c in coordinates
        ]
        X_valid = [
            0 if (c is None or c[atom1] is None) else 1 for c in coordinates
        ]
        Y = [
            list(c[atom2]) if
            (c is not None and c[atom2] is not None) else [0, 0, 0]
            for c in coordinates
        ]
        Y_valid = [
            0 if (c is None or c[atom2] is None) else 1 for c in coordinates
        ]
        dist = scipy.spatial.distance_matrix(X, Y).astype(np.float16)
        XY_valid = np.outer(X_valid, Y_valid)
        np.putmask(dist, XY_valid == 0, -1)
        if atom1 == atom2:
            np.fill_diagonal(dist, 0)  # set the self distance to 0

        feature[atom_pair] = dist.reshape(
            (dist.shape[0], dist.shape[1], -1)) * 0.1

    return feature
Esempio n. 20
0
vectors = []
phi = []
psi = []
exp_phi = np.array([60, -80])
exp_psi = np.array([-120, 0])
b_turns = []

for atom in structure[0]['A'].get_atoms():
    vectors.append(atom.get_vector())

for i in range(len(vectors) - 2):
    if i % 2 == 0:
        phi.append(calc_angle(vectors[i], vectors[i + 1], vectors[i + 2]))
    else:
        psi.append(calc_angle(vectors[i], vectors[i + 1], vectors[i + 2]))

df = pd.DataFrame(list(zip(phi, psi)), columns=['phi', 'psi'])

for df_slice in df.rolling(window=2):
    if np.allclose(df_slice['phi'].values, exp_phi,
                   atol=error) and np.allclose(
                       df_slice['psi'].values, exp_psi, atol=error):
        b_turns.append(df_slice)

print('hello')

dssp = DSSP(structure[0], '1g60.pdb', dssp='mkdssp')

phi = list(dssp.keys())[4]
print(dssp[phi])
Esempio n. 21
0
        #Printing atomic types
        #########################################
        f_write.write("\n[")
        #print("[")
        for i in range(len(all_atom_types)):
            #print("[")
            f_write.write("[")
            for j in range(len(all_atom_types[i])):
                f_write.write(list_atoms_types[all_atom_types[i][j]] + " ")
                #print(list_atoms_types[all_atom_types[i][j]])
            #print("]")
            f_write.write("]")
        f_write.write("]")
        #print("]")

        #########################################
        #Printing secondary structure
        #########################################
        model = structure[0]
        dssp = DSSP(model, file_data_name, dssp='mkdssp')
        secondary_struct = ""
        for i in range(len(list(dssp.keys()))):
            if dssp.keys()[i][0] != num_chain:
                continue
            a_key = list(dssp.keys())[i]
            secondary_struct += dssp[a_key][2]
        f_write.write("\n" + secondary_struct)
        num_of_structures += 1
        f_write.write("\n\n")
        os.remove(new_name)
     f_write.write("]")
 f_write.write("]")
 print("]")
 f_write.write("\n[")
 print("[")
 for i in range(len(all_atom_types)):
     print("[")
     f_write.write("[")
     for j in range(len(all_atom_types[i])):
         f_write.write(list_atoms_types[all_atom_types[i][j]] + " ")
         print(list_atoms_types[all_atom_types[i][j]])
     print("]")
     f_write.write("]")
 f_write.write("]")
 print("]\n")
 #break
 # dssp:
 model = structure[0]
 dssp = DSSP(model, file_data_name, dssp='mkdssp')
 num_res_in_chain = 5  # TODO: change!
 secondary_struct = ""
 for i in range(num_res_in_chain):
     a_key = list(dssp.keys())[i]
     result = dssp[a_key][2]  # returns DSSP secondary structure
     secondary_struct += result
 f_write.write("secondary_struct" + secondary_struct)
 print("secondary_struct" + secondary_struct)
 # delete current file
 os.remove(file_name)
 num_of_structures += 1
 break
Esempio n. 23
0
def main(pdb_file, dssp_exe, out_file):
    # Read PDB structrue
    p = PDBParser()
    structure = p.get_structure('id', pdb_file)
    model = structure[0]

    # Run DSSP
    dssp = DSSP(model, pdb_file, dssp=dssp_exe)
    # keys:
    # (dssp index, amino acid, secondary structure, relative ASA, phi, psi,
    # NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy,
    # NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy)
    sec_struc = []
    rel_asa = []
    for a_key in list(dssp.keys()):
        aux = dssp[a_key]
        sec_struc.append(aux[2])
        rel_asa.append(0 if aux[3] == 'NA' else aux[3])
    rel_asa = np.expand_dims(rel_asa, axis=1)

    # Get coordinates for N, CA and C atoms
    coor_N = []
    coor_CA = []
    coor_C = []
    for chain in model.get_list():
        total_len = len(chain.get_list())
        for residue in chain.get_list():
            coor_N.append(residue['N'].get_coord())
            coor_CA.append(residue['CA'].get_coord())
            coor_C.append(residue['C'].get_coord())

    assert len(coor_N) == len(coor_CA) == len(coor_C) == total_len

    # Get dihedral angles and inter-residue angles
    angles = []
    for j in range(total_len):
        # Initialize angles if not found
        phi_angle, psi_angle, omega_angle = (2 * np.pi, 2 * np.pi, 2 * np.pi)
        theta_angle, tau_angle = (2 * np.pi, 2 * np.pi)

        vec_N2, vec_CA2, vec_C2 = (coor_N[j], coor_CA[j], coor_C[j])

        # Phi
        if j != 0:
            vec_C1 = coor_C[j - 1]
            phi_angle = calculate_dihedral(vec_C1, vec_N2, vec_CA2, vec_C2)

        # Psi and Omega
        if j != total_len - 1:
            vec_N3 = coor_N[j + 1]
            psi_angle = calculate_dihedral(vec_N2, vec_CA2, vec_C2, vec_N3)
            vec_CA3 = coor_CA[j + 1]
            omega_angle = calculate_dihedral(vec_CA2, vec_C2, vec_N3, vec_CA3)

        # Theta
        if np.logical_and(j != 0, j != total_len - 1):
            vec_CA1 = coor_CA[j - 1]
            vec_CA3 = coor_CA[j + 1]
            theta_angle = calculate_angle(vec_CA1, vec_CA2, vec_CA3)

        # Tau
        if np.logical_and(j > 1, j != total_len - 1):
            vec_CA0, vec_CA1, vec_CA3 = (coor_CA[j - 2], coor_CA[j - 1],
                                         coor_CA[j + 1])
            tau_angle = calculate_dihedral(vec_CA0, vec_CA1, vec_CA2, vec_CA3)

        # Concatenate angles
        angles.append([phi_angle, psi_angle, theta_angle, tau_angle])

    # Calculate sine and cosine of each angle
    angles = np.array(angles)
    angles_sin = np.sin(angles)
    angles_cos = np.cos(angles)
    angles_sin_cos = np.stack([
        angles_sin[:, 0], angles_cos[:, 0], angles_sin[:, 1], angles_cos[:, 1],
        angles_sin[:, 2], angles_cos[:, 2], angles_sin[:, 3], angles_cos[:, 3]
    ]).T
    angles_sin_cos[np.where(np.abs(angles_sin_cos) < 1e-10)] = 0

    # One-hot encoding of secondary structure (8-state)
    alphabet = 'HBEGITS-'  # H Alpha helix (4-12)
    # B Isolated beta-bridge residue
    # E Strand
    # G 3-10 helix
    # I Pi helix
    # T Turn
    # S Bend
    # - None
    ohdict = dict((c, i) for i, c in enumerate(alphabet))
    ss_onehot = np.zeros((total_len, len(ohdict)))
    for i in range(total_len):
        ss_onehot[i, ohdict[sec_struc[i]]] = 1

    # Create feature matrix for the protein
    features = np.hstack([ss_onehot, rel_asa, angles_sin_cos])

    # Save dictionary of features
    with open(out_file, 'wb') as f:
        pickle.dump(features.astype('float32'), f, protocol=2)