def iter_ca_hbonds_don(self, parsed_pdb): for hbond in self.iter_ca_triplets_don(parsed_pdb): angle = pr.calcAngle(hbond[0], hbond[1], hbond[2]) if angle > 90: distance_acc_hyd = pr.calcDistance(hbond[0], hbond[1]) distance_heavy = pr.calcDistance(hbond[0], hbond[2]) yield hbond, angle, distance_acc_hyd, distance_heavy
def iter_hbonds_acc(self, parsed_pdb): for hbond in self.iter_triplets_acc(parsed_pdb): if hbond[2].getResname() == 'HOH': hyds = parsed_pdb.contacts(1.1, hbond[0].getCoords()).select('element H D') if hyds is not None: angle_tests = [] for hyd in hyds.iterAtoms(): angle = pr.calcAngle(hbond[0], hyd, hbond[2]) if angle > 90: angle_tests.append(False) else: angle_tests.append(True) if all(angle_tests): distance_acc_hyd = 99 distance_heavy = pr.calcDistance(hbond[0], hbond[2]) angle = 360 yield hbond, angle, distance_acc_hyd, distance_heavy else: distance_acc_hyd = 99 distance_heavy = pr.calcDistance(hbond[0], hbond[2]) angle = 360 yield hbond, angle, distance_acc_hyd, distance_heavy else: angle = pr.calcAngle(hbond[0], hbond[1], hbond[2]) if angle > 90: distance_acc_hyd = pr.calcDistance(hbond[0], hbond[1]) distance_heavy = pr.calcDistance(hbond[0], hbond[2]) yield hbond, angle, distance_acc_hyd, distance_heavy
def calculate_hbond(pdbfilename): ''' Geometric criteria of hbond identification given by paper: Protein Eng. 15 (2002) 359. D2-D1-D-H...A-A1 Rule 1: D-A < 3.9 A; Rule 2: H-A < 2.5 A; Rule 3: D-H-A > 90.0 degree; Rule 4: A1-A-D > 90.0 degree; Rule 5: A1-A-H > 90.0 degree. 0,1,2 for each residue (only consider backbone-backbone hbond) In rare cases, nhbond = 3: bifurcated hbond ''' # calculate H-bond import prody structure = prody.parsePDB(pdbfilename) protein = structure.select('protein') hv = protein.getHierView() nres = len([s for s in protein.select('name CA').getResnames()]) NHbond = np.zeros(nres,dtype=int) CObond = np.zeros(nres,dtype=int) nhbond = np.zeros(nres,dtype=int) resTyp = ['' for i in range(nres)] for i, res in enumerate(hv.iterResidues()): resIndex = i resNum = res.getResnum() resTyp[resIndex] = res.getResname() resChainIndex = res.getChid() N = res.select('name N') C = res.select('name C') O = res.select('name O') HN = res.select('name HN') # N-HN for k,res2 in enumerate(hv.iterResidues()): if res2.getResindex() != resIndex: AO = res2.select('name O') if (HN != None) and (AO != None) and (prody.calcDistance(N,AO) <= 3.9): if prody.calcDistance(HN,AO) <= 2.5: # dist_H_O_check if prody.calcAngle(N,HN,AO) > 90: # angle_N_HN_O_check: AC = res2.select('name C') if prody.calcAngle(AC,AO,N) > 90: # angle_C_O_N_check if prody.calcAngle(AC,AO,HN) > 90: # angle_C_O_HN_check NHbond[resIndex] += 1 if NHbond[resIndex] == 1: break # if NHbond[resIndex] == 1 (max), break # C=O for k,res2 in enumerate(hv.iterResidues()): if res2.getResindex() != resIndex: DHN = res2.select('name HN') if (DHN != None) and (O != None) and (prody.calcDistance(DHN,O) <= 2.5): DN = res2.select('name N') if prody.calcDistance(DN,O) <= 3.9: # dist_N_O_check if prody.calcAngle(DN,DHN,O) > 90: # angle_N_HN_O_check if prody.calcAngle(C,O,DN) > 90: # angle_C_O_N_check_ if prody.calcAngle(C,O,DHN) > 90: # angle_C_O_HN_check CObond[resIndex] += 1 if CObond[resIndex] == 2: break # if CObond[resIndex] == 2 (max), break # total nhbond hbond = NHbond+CObond return NHbond, np.where((NHbond == 0)), CObond, np.where((CObond == 0)), resTyp
def checkHBonds(appf,no_d, ox_a, ssindex): #1 Dist(don, acc) < 3.5 da_dist = pr.calcDistance( no_d , ox_a ) if( da_dist >= DONOR_ACCEPTOR_MAXDISTANCE ): return h_don = getHforAtom(appf,no_d) if(h_don == None): return #2 Dist(h_don, acc) < 3.5 ha_dist = pr.calcDistance(h_don, ox_a) if( ha_dist >= HYDROGEN_ACCEPTOR_MAXDISTANCE): return #3 Angle(don, h_don, acc) > 90 dha_ang = pr.calcAngle( no_d, h_don, ox_a ) if( dha_ang < DHA_ANGLE ): return #4 Angle(don, acc, acc_ante) > 90 #daa_ang = pr.calcAngle( no_d, ox_a, acc_ante) #if( daa_ang < DAB_ANGLE ): # return acc_ante = getAntecedent(appf, ox_a) #Get acc_ante if(acc_ante == None): return #5 Angle(h_don, acc, acc_ante) > 90 haa_ang = pr.calcAngle( h_don, ox_a, acc_ante ) if( haa_ang < HAB_ANGLE ): return #We have a valid H-Bond with no_d, ox_a, h_don #print 'HBond elements', h_don.getName(), no_d.getName(), ox_a.getName() #print 'DA dist', da_dist #print 'HA dist', ha_dist #print 'DHA ang', dha_ang #print 'DAA ang', daa_ang #print 'HAA ang', haa_ang beta_ang = getBetaAngle (appf,acc_ante,ox_a,h_don) gamm_ang = getGammaAngle(appf,acc_ante,ox_a,h_don) #PUT DATA INTO COLUMN DATA STRUCTURES # print ssindex #towrite = ",".join([str(h_don.getResnum()),str(h_don.getCoords())]) #fp.write(towrite+"\r\n") NUM_H_BONDS[ssindex] += 1 COLUMN_D_ON [ssindex].append(da_dist) COLUMN_D_OH [ssindex].append(ha_dist) COLUMN_A_NHO [ssindex].append(dha_ang) COLUMN_A_HOC [ssindex].append(haa_ang) COLUMN_BETA [ssindex].append(beta_ang) COLUMN_GAMMA [ssindex].append(gamm_ang) return
def compute_distances_given_instruction(pdb_object, instruction, chain="L"): ligand_resname = get_resname_of_chain(pdb_object, chain) ligand_resnum = get_resnum_of_chain(pdb_object, chain) atom_name_ligand, atom_name_aa, resnum_aa, resname_aa = instruction atom_of_ligand = select_atom_given_name_type_and_num( pdb_object, ligand_resnum, atom_name_ligand) atom_of_protein = select_atom_given_name_type_and_num( pdb_object, ligand_resnum, atom_name_ligand) distance = prody.calcDistance(atom_of_ligand, atom_of_protein) return atom_name_ligand, atom_name_aa, resnum_aa, resname_aa, distance
def calcAllDist(coordset): from prody import calcDistance shape = coordset.shape distance = np.zeros((shape[1], shape[1])) for i in range(shape[1]): temp = np.tile(coordset[0,i,:], (1, shape[1], 1)) distance[:,i] = calcDistance(coordset, temp) return distance
def calcAllDist(coordset): from prody import calcDistance shape = coordset.shape distance = np.zeros((shape[1], shape[1])) for i in range(shape[1]): temp = np.tile(coordset[0, i, :], (1, shape[1], 1)) distance[:, i] = calcDistance(coordset, temp) return distance
def getHforAtom(appf, anAtom): if(anAtom.getResnum() < 1): return None resnumneighbors = appf.select('resnum '+str(anAtom.getResnum())) curMin = 100000 target = None for at in resnumneighbors: if(at.getElement() == 'H'): #FILTER OTHER HYDROGENS dist = pr.calcDistance(at,anAtom) if(dist <= curMin): #print pr.calcDistance(at,anAtom), at.getResnum(), at.getName() curMin = dist target = at return target
def main_calc(pep): print(pep) Nterm = False ind = find_index(seq, pep) new_pep = [] for i in range(ind[0], ind[1]): for j in seq_to_pdb[i]: new_pep.append([file[j], i]) lengths = np.zeros((mV[0], mV[0])) index = np.zeros((mV[0], 2)) bonded = np.zeros((mV[0], mV[0])) # lengths = np.zeros((len(new_pep), len(new_pep))) # index = np.zeros((len(new_pep),2)) # bonded = np.zeros((len(new_pep), len(new_pep))) for i in range(len(new_pep)): ele = float(ElementSymbols.index(str(new_pep[i][0].getName()[0]))) + 1 if ele == 6.0: index[i][1] = 0.0 elif ele == 7.0 and Nterm: index[i][1] = 1.0 elif ele == 8.0: index[i][1] = 2.0 if Nterm == False and new_pep[i][0].getName() == 'N': Nterm = True index[i][0] = ele #/len(ElementSymbols) for j in range(len(new_pep)): if i != j and lengths[i][j] == 0: lengths[i][j] = (float( pd.calcDistance(new_pep[i][0], new_pep[j][0]))) lengths[j][i] = lengths[i][j] if (lengths[i][j] < 1.9) and (new_pep[i][0] != new_pep[j][0]): if new_pep[j][0].getSequence() in possible_bonds( new_pep[i][1], pep, ind): bonded[i][j] = lengths[i][j] bonded[j][i] = bonded[i][j] final = {} final['sequence'] = pep final['index'] = index final['secondary'] = lengths final['primary'] = bonded return final
def CheckforGaps(structure, max_bond_distance): """ This function checks all the residues in the protein and checks whether the N of the current residue and the C of the previous one are within peptidic bond distance (specified by the max_bond_distance variable) in order to check for gaps. :param structure: a prody object containing the structure to study. :param max_bond_distance: a float specifying the max bond distance to accept. :return: two dictionaries both containing the chain as key and the residues numbers (previous, current) involved in a bond as values, the first dictionary contains the information about the gaps and the second one contains the information about those not involved in gaps. """ gaps = {} not_gaps = {} if max_bond_distance < 1.55: max_bond_distance = 1.55 for chain in structure.iterChains(): # initial, final = FindInitialAndFinalResidues(chain) residues = [residue for residue in chain] previous_residue_number = None for i, residue in enumerate(chain.iterResidues()): chain_id = chain.getChid() # print residue.getResnum() < previous_residue_number if previous_residue_number is not None: # if residue.getResnum() > previous_residue_number + 1: if residue.getResname() in default_supported_aminoacids: if residue.getResname() == 'ACE': try: gaps[chain_id] except KeyError: gaps[chain_id] = [] gaps[chain_id].append( [previous_residue_number, residue.getResnum()]) else: current_residue_N = residue.getAtom("N") previous_residue = chain.getResidue( previous_residue_number) if not previous_residue: previous_residue = residues[i - 1] previous_residue_C = previous_residue.getAtom('C') if current_residue_N is not None and previous_residue_C is not None: distance = calcDistance(current_residue_N, previous_residue_C) if distance < max_bond_distance: try: not_gaps[chain_id] except KeyError: not_gaps[chain_id] = [] not_gaps[chain_id].append([ previous_residue_number, residue.getResnum() ]) else: try: gaps[chain_id] except KeyError: gaps[chain_id] = [] gaps[chain_id].append([ previous_residue_number, residue.getResnum() ]) elif current_residue_N is None: print(" * There's a problem with residue {} {} {} it" \ " doesn't have the N atom".format(residue.getResname(), residue.getResnum(), residue.getChid())) elif previous_residue_C is None: print( " * There's a problem with residue {} {} {} it doesn't have the C atom" .format(previous_residue.getResname(), previous_residue.getResnum(), previous_residue.getChid())) if residue.getResnum() == previous_residue_number: if residue.getIcode() == '': if residue.hetero is not None: residue.setResnum(previous_residue_number + 1) else: print(' * WARNING: There are two residues with the same resnum in the same chain.' \ 'The residue {} {} {} '.format(residue.getChid(), residue.getResnum(), residue.getResname())) elif residue.getResnum() < previous_residue_number + 1: if residue.hetero is not None: residue.setResnum(previous_residue_number + 1) else: # print residue.getResnum(), previous_residue_number print( " * WARNING! The next residue has a lower resnum than the previous one in the chain??" ) print(" * CHAIN: {} RESIDUE: {} {} {}".format( residue.getChid(), residue.getResnum(), residue.getChid(), residue.getResname())) print(" * previous: RESIDUE {} {} {}".format( previous_residue.getResname(), previous_residue.getResnum(), previous_residue.getChid())) sys.exit( 'This should never happen, please review your structure.' ) else: pass previous_residue_number = residue.getResnum() return gaps, not_gaps
def runThrough(pfile): appf = pr.parsePDB(pfile, model=1, secondary=True, chain='A', altLoc=False) # get secondary structure by aParsedPDBfile[i].getSecstr() nitrox_don = appf.select('element N O') #O can be donor in rare cases, the paper uses this convention oxygen_acc = appf.select('element O') fp = open('old-H.txt','wb') for no_d in nitrox_don: n_secStruct = getSSIndex(no_d) if(n_secStruct < 0): continue for ox_a in oxygen_acc: o_secStruct = getSSIndex(ox_a) if(o_secStruct != n_secStruct): continue #1 Dist(don, acc) < 3.5 da_dist = pr.calcDistance( no_d , ox_a ) if( da_dist >= DONOR_ACCEPTOR_MAXDISTANCE ): continue #2 Dist(h_don, acc) < 3.5 h_don = getHforAtom( appf, no_d ) #Get h_don ha_dist = pr.calcDistance(h_don, ox_a) if( ha_dist >= HYDROGEN_ACCEPTOR_MAXDISTANCE): continue #3 Angle(don, h_don, acc) > 90 dha_ang = pr.calcAngle( no_d, h_don, ox_a ) if( dha_ang < DHA_ANGLE ): continue #4 Angle(don, acc, acc_ante) > 90 acc_ante = getAntecedent(appf, ox_a) #Get acc_ante daa_ang = pr.calcAngle( no_d, ox_a, acc_ante) if( daa_ang < DAB_ANGLE ): continue #5 Angle(h_don, acc, acc_ante) > 90 haa_ang = pr.calcAngle( h_don, ox_a, acc_ante ) if( haa_ang < HAB_ANGLE ): continue #We have a valid H-Bond with no_d, ox_a, h_don # print 'HBond elements', h_don.getName(), no_d.getName(), ox_a.getName() # print 'DA dist', da_dist # print 'HA dist', ha_dist # print 'DHA ang', dha_ang # print 'DAA ang', daa_ang # print 'HAA ang', haa_ang #place holders for beta and gamma for now # beta_ang = 0 # gamm_ang = 0 beta_ang = getBetaAngle (appf,acc_ante,ox_a,h_don) gamm_ang = getGammaAngle(appf,acc_ante,ox_a,h_don) #PUT DATA INTO COLUMN DATA STRUCTURES ssindex = getSSIndex(acc_ante) #if (ssindex == -1): #Not a structure we need # continue towrite = ",".join([str(h_don.getResnum()),str(h_don.getCoords())]) fp.write(towrite+"\r\n") COLUMN_D_ON [ssindex].append(da_dist) COLUMN_D_OH [ssindex].append(ha_dist) COLUMN_A_NHO [ssindex].append(dha_ang) COLUMN_A_HOC [ssindex].append(haa_ang) COLUMN_BETA [ssindex].append(beta_ang) COLUMN_GAMMA [ssindex].append(gamm_ang) fp.close() COLUMN_D_ON_AV = [sum(x)/len(x) for x in COLUMN_D_ON if len(x) > 0] COLUMN_D_OH_AV = [sum(x)/len(x) for x in COLUMN_D_OH if len(x) > 0] COLUMN_A_NHO_AV = [sum(x)/len(x) for x in COLUMN_A_NHO if len(x) > 0] COLUMN_A_HOC_AV = [sum(x)/len(x) for x in COLUMN_A_HOC if len(x) > 0] COLUMN_BETA_AV = [sum(x)/len(x) for x in COLUMN_BETA if len(x) > 0] COLUMN_GAMMA_AV = [sum(x)/len(x) for x in COLUMN_GAMMA if len(x) > 0] TABLE = [COLUMN_D_ON_AV, COLUMN_D_OH_AV, COLUMN_A_NHO_AV, COLUMN_A_HOC_AV, COLUMN_BETA_AV, COLUMN_GAMMA_AV] print ' D_ON D_OH ANGLE(NHO) ANGLE(HOC) BETA GAMMA ' print np.array(TABLE).T
def CheckMetalsCoordination(structure): """ A function to detect the metal atoms that could be coordinated with the protein. :param structure: :return: """ selection_pattern = "(within 3 of metal) and (not resnum {}) and (not hydrogen) and (not carbon)" coordinated_metals = {} for metal in supported_metals: if structure.select('resname {}'.format(metal)) is not None: print(" * Checking the metals that can be coordinated. (" \ "a constraint should be used if they're really coordinated)") for metal_res in structure.select( 'resname {}'.format(metal)).copy().iterResidues(): coordinated_atoms_list = [] if metal_res.numAtoms() != 1: continue coordinated_atoms = structure.select(selection_pattern.format( metal_res.getResnum()), metal=metal_res) if coordinated_atoms is None: print( " * The metal atom {} isn't coordinated with the protein. Are you sure it's necessary?" ) else: prev_atom = None for at in coordinated_atoms.iterAtoms(): if prev_atom is None: coordinated_atoms_list.append(at) prev_atom = at else: if at.getResnum() == prev_atom.getResnum() and \ (at.getIndex() == prev_atom.getIndex() + 1 or at.getResname() in ['ASP', 'GLU']): if calcDistance(at, metal_res) > calcDistance( prev_atom, metal_res): prev_atom = None continue else: coordinated_atoms_list.pop(-1) coordinated_atoms_list.append(at) prev_atom = at coordinated_metals[ metal_res] = coordinated_atoms_list #, 'angles': angles} coordinated_atoms_ids = {} if coordinated_metals: for metal, atoms_list in coordinated_metals.iteritems(): metal_id = "{} {} {}".format(metal.getResname(), metal.getChid(), metal.getResnum()) atoms_ids = [[ "{} {} {} {}".format(at.getResnum(), at.getResname(), at.getChid(), at.getName()), calcDistance(metal, at)[0] ] for at in atoms_list] if len(atoms_list) in [ x[1] for x in coordination_geometries.itervalues() ]: coordinated_atoms_ids[metal_id] = atoms_ids print(" * The metal atom {0} has the following atoms within coordination " \ "distance:\n{1}".format(metal_id, "\n".join([' * {0}'.format(x[0]) for x in atoms_ids]))) angles = [[at, metal, at2, calcAngle(at, metal, at2)[0]] for idx, at in enumerate(atoms_list) for at2 in atoms_list[idx + 1:]] if len(atoms_list) <= 4: print( " * Checking for a tetrahedric coordination for the atom." ) found_conformation = CheckConformation(angles, 'tetrahedric') if not found_conformation: print( " * WARNING: The angles are too distorted to ascertain this configuration. CHECK IT manually" ) elif 4 < len(atoms_list) <= 6: #or not found_conformation: print( " * WARNING: Checking for an octahedric coordination for the atom." ) found_conformation = CheckConformation(angles, 'octahedric') if not found_conformation: print( " * The angles are too distorted to ascertain this configuration. CHECK IT manually" ) else: print( " * The metal doesn't have a coordination we can validate." ) else: print(" * There are no coordinated metals.") return coordinated_metals
def CheckClashes(mutated_protein, mutation, zmatrix, initial_residue, final_residue, overlapfactor=0.7): """ :param initial_residue: :param mutation: :rtype : dictionary :param mutated_protein: prody AtomGroup :param zmatrix: ZMATRIX :param overlapfactor: float A function to check the clashes between a given residue and the rest of the structure. It needs the number of the mutated residue the structure, the zmatrix of the mutated residue and the overlapfactor. The mutated protein is an Atomgroup object from prody that has the structure to check for clashes. The num_mutated_residue is the number of the mutated residue to check for clashes with the rest of the structure. The zmatrix contains the information extracted from the template of PELE. The overlapfactor by default is 0.7 but it can be set by the user to any number between 0 and 1. If it's one the atoms can be completely superimposed if the number is 0 they can't superimpose at all. """ max_vdw = max(zmatrix.VdWRadius) contacts_object = Contacts(mutated_protein) if mutation["chain"]: mutated_residue = mutated_protein.select( "resnum {0[resnum]} and chain {0[chain]} and not hydrogen".format( mutation)) else: mutated_residue = mutated_protein.select( "resnum {0[resnum]} and not hydrogen".format(mutation)) mutated_residue_indices = mutated_residue.getIndices() clashes2check = {} for at in mutated_residue.iterAtoms(): atom_name = at.getName() if atom_name in fixed_atoms: continue try: at_vdw = zmatrix.VdWRadius[zmatrix.AtomNames.index(atom_name)] except ValueError: print( "The atom {} in the mutated residue doesn't agree with the Zmatrix." .format(atom_name)) print(mutated_residue.getNames()) print(zmatrix.AtomNames) raise ValueError dist = (at_vdw + max_vdw) * overlapfactor at_contacts = contacts_object.select(dist, at.getCoords()) if at_contacts is None: continue possible_clashes = [ clashing_atom.getIndex() for clashing_atom in at_contacts if clashing_atom.getIndex() not in mutated_residue_indices ] if possible_clashes: clashes2check[atom_name] = possible_clashes if mutated_residue.getResnames()[0] == "PRO": try: cd_atom_clashes = clashes2check["CD"] except KeyError: pass else: n_atom_index = mutated_residue.select("name N").getIndices()[0] try: cd_atom_clashes.index(n_atom_index) except ValueError: pass else: cd_atom_clashes.pop(cd_atom_clashes.index(n_atom_index)) real_clashes = {} for key, indices in clashes2check.iteritems(): real_clashing_indices = [] for ind in indices: atom2check = mutated_protein.select('index {}'.format(ind)) atom2check_resname = atom2check.getResnames()[0] atom2check_resnum = atom2check.getResnums()[0] if atom2check_resnum == initial_residue and atom2check_resname in default_supported_aminoacids: atom2check_zmatrix = ZMATRIX(atom2check_resname + 'B') elif atom2check_resnum == final_residue: atom2check_zmatrix = ZMATRIX(atom2check_resname + 'E') else: atom2check_zmatrix = ZMATRIX(atom2check_resname) if atom2check_zmatrix.Name is None: print("Cannot load the zmatrix for the residue {} {}.\n" \ "It won't be checked for clashes.".format(atom2check_resname, atom2check_resnum)) continue atom2check_vdw_radius = atom2check_zmatrix.VdWRadius[ atom2check_zmatrix.AtomNames.index(atom2check.getNames()[0])] key_vwd_radius = zmatrix.VdWRadius[zmatrix.AtomNames.index(key)] min_distance = (atom2check_vdw_radius + key_vwd_radius) * overlapfactor real_distance = calcDistance( mutated_residue.select('name {}'.format(key)), atom2check) if real_distance < min_distance: real_clashing_indices.append(ind) if real_clashing_indices: real_clashes[key] = real_clashing_indices return real_clashes
def process(self, args=None): if not args: args = self.args # Sfn = args['Sfn'] # Open matrix file in parallel mode # Sf = h5py.File(Sfn, 'r') args['mpi'] = self.mpi extractor = er.PepExtractor(**args) lM = len(self.aplist) dtype = np.dtype([ ('name', 'S10'), ('pnum', '<i4'), ('rnum', '<i4'), ('dist', '<f8'), ('hdist1', '<f8'), ('hdist2', '<f8'), ('BD', '<f8'), ('FL', '<f8'), ('AT', '<f8'), ('dir', 'S3'), ('aln', 'S20') ]) if self.mpi.rank == 0: m = self.aplist[0] lm = len(m) S = extractor.extract_result(m) lS = S.numCoordsets() Sf = h5py.File(args['out'], 'w') out = Sf.create_dataset( 'out', (len(self.plist) * lS * lm, ), dtype=dtype) Sf.close() self.mpi.comm.Barrier() Sf = h5py.File(args['out'], 'r+', driver='mpio', comm=self.mpi.comm) out = Sf['out'] # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn # tSfn = args['output'] # stubs = { # 'ACE': 'X', # 'NME': 'Z', # } a = prody.parsePDB(args['receptor']) OG = a.select('resnum 151 name SG') Ob = a.select('resnum 168 and name O') ND1 = a.select('resnum 46 and name NE2') OXH = a.select("name N resnum 149 150 151") t0 = time.time() for cm in range(lM): m = self.aplist[cm] lm = len(m) t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % ( cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): S.setACSIndex(S_) tC = S.select('name C') dist = np.inf rnum = None for C in tC.iterAtoms(): rnum = C.getResnum() if rnum == 1: continue O = S.select('resnum %i and name O' % rnum) Nl = S.select('resnum %i and name N' % (rnum)) N = S.select('resnum %i and name N' % (rnum + 1)) C_ = C.getCoords() O_ = O.getCoords()[0] N_ = N.getCoords()[0] dist = prody.calcDistance(C, OG)[0] hdist1 = np.min(prody.calcDistance(OXH, O)) hdist2 = np.min(prody.calcDistance(Ob, Nl)) nC_ = np.cross((C_ - N_), (O_ - C_)) nC_ /= np.linalg.norm(nC_) nC_ = nC_ + C_ nC_ = nC_.reshape(1, 3) nC = C.copy() nC.setCoords(nC_) BD = prody.calcAngle(OG, C, O) FL = prody.calcDihedral(OG, nC, C, O) AT = prody.calcAngle(ND1, OG, C) angle_ = prody.calcDihedral(ND1, OG, C, N) # angle_ = prody.calcDistance(Od, N)[0] \ # - prody.calcDistance(Od, C)[0] if angle_ < 0: DIR = 'F' else: DIR = 'R' s = 'X' + m + 'Z' pref = '' if DIR == 'F': seq = s pref = 6 - rnum else: seq = s[::-1] pref = rnum suf = 12 - (pref + len(seq)) seq = '-' * pref + seq + '-' * suf # outfmt = "%-10s\t%d\t%6.2f\t%6.2f%6.2f\t%6.2f\t" \ # "%6.2f\t%6.2f\t%3s\t%12s\n" # outstr = outfmt % ( # ('%s_%02d' % (m, S_ + 1), # rnum, dist, hdist1, hdist2, BD, FL, AT, # DIR, seq) # ) outdata = (m, S_ + 1, rnum, dist, hdist1, hdist2, BD, FL, AT, DIR, seq) ind = (self.tb + cm) * lS * lm + S_ * lm + rnum - 2 out[ind] = outdata self.database.close() Sf.close()
def calculate_distance_of_to_atoms(atom1, atom2): coords_atom1 = atom1.getCoords() coords_atom2 = atom2.getCoords() distance = prody.calcDistance(coords_atom1, coords_atom2) return distance[0]
def CheckforGaps(structure): gaps = {} not_gaps = {} for chain in structure.iterChains(): # initial, final = FindInitialAndFinalResidues(chain) previous_residue_number = None for residue in chain.iterResidues(): chain_id = chain.getChid() # print residue.getResnum() < previous_residue_number if previous_residue_number is not None: # if residue.getResnum() > previous_residue_number + 1: if residue.getResname() in supported_aminoacids: if residue.getResname() == 'ACE': try: gaps[chain_id] except KeyError: gaps[chain_id] = [] gaps[chain_id].append([previous_residue_number, residue.getResnum()]) else: current_residue_N = residue.getAtom("N") previous_residue = chain.getResidue(previous_residue_number) previous_residue_C = previous_residue.getAtom('C') if current_residue_N is not None and previous_residue_C is not None: distance = calcDistance(current_residue_N, previous_residue_C) if distance < 2.5: try: not_gaps[chain_id] except KeyError: not_gaps[chain_id] = [] not_gaps[chain_id].append([previous_residue_number, residue.getResnum()]) else: try: gaps[chain_id] except KeyError: gaps[chain_id] = [] gaps[chain_id].append([previous_residue_number, residue.getResnum()]) elif current_residue_N is None: print(" * There's a problem with residue {} {} {} it" \ " doesn't have the N atom".format(residue.getResname(), residue.getResnum(), residue.getChid())) elif previous_residue_C is None: print(" * There's a problem with residue {} {} {} it doesn't have the C atom".format( previous_residue.getResname(), previous_residue.getResnum(), previous_residue.getChid())) if residue.getResnum() == previous_residue_number: if residue.getIcode() == '': if residue.hetero is not None: residue.setResnum(previous_residue_number + 1) else: print(' * WARNING: There are two residues with the same resnum in the same chain.' \ 'The residue {} {} {} '.format(residue.getChid(), residue.getResnum(), residue.getResname())) elif residue.getResnum() < previous_residue_number + 1: if residue.hetero is not None: residue.setResnum(previous_residue_number + 1) else: # print residue.getResnum(), previous_residue_number print(" * WARNING! The next residue has a lower resnum than the previous one in the chain??") print(" * CHAIN: {} RESIDUE: {} {} {}".format(residue.getChid(), residue.getResnum(), residue.getChid(), residue.getResname())) print(" * previous: RESIDUE {} {} {}".format(previous_residue.getResname(), previous_residue.getResnum(), previous_residue.getChid())) sys.exit('This should never happen, please review your structure.') else: pass previous_residue_number = residue.getResnum() return gaps, not_gaps
def SolveClashes(initial_structure, initial_clashes, mutation, zmatrix, initial_residue_number, final_residue_number): print("Trying to solve the clashes.") dihedral2check = 0 initial_part_of_the_protein = initial_structure.select( "resnum < {}".format(mutation["resnum"])).copy() final_part_of_the_protein = initial_structure.select("resnum > {}".format( mutation["resnum"])).copy() initial_residue = initial_structure.select("resnum {}".format( mutation["resnum"])).copy() resname = initial_residue.getResnames()[0] if resname in ["HIE", "HID", "HIP"]: resname = "HIS" elif resname in ["LYN"]: resname = "LYS" residue_rotamer_lib = ROTAMERLIB(resname) maximum_number_of_trials = residue_rotamer_lib.NumberOfDihedrals initial_number_of_clashes = 0 for values in initial_clashes.values(): initial_number_of_clashes += len(values) print("Starting number of heavy atoms clashes: {}".format( len(initial_clashes.keys()))) print("Starting number of total clashes: {}".format( initial_number_of_clashes)) zmatrix.ComputeDeltaFi() while maximum_number_of_trials > 0: maximum_number_of_trials -= 1 new_residue = ChangeResidueCoordinates(initial_residue.copy(), zmatrix, residue_rotamer_lib, dihedral2check) if mutation["fin_resname"] == "PRO": CD_N_distance = calcDistance(new_residue.select("name N"), new_residue.select("name CD")) if CD_N_distance > 1.5: dihedral2check += 1 continue dihedral2check += 1 mutated_structure = initial_part_of_the_protein + new_residue + final_part_of_the_protein new_clashes = CheckClashes(mutated_structure, mutation, zmatrix, initial_residue_number, final_residue_number) current_number_of_clashes = 0 for values in new_clashes.values(): current_number_of_clashes += len(values) if new_clashes == {}: initial_residue = new_residue break elif len(new_clashes.keys()) < len(initial_clashes.keys()): print( 'The number of clashing heavy atoms in the residue now is: {} and clashes with {} atoms' .format(len(new_clashes.keys()), current_number_of_clashes)) initial_residue = new_residue.copy() initial_number_of_clashes = current_number_of_clashes initial_clashes = new_clashes elif len(new_clashes.keys()) == len(initial_clashes.keys()): if current_number_of_clashes < initial_number_of_clashes: print( "The number of clashing heavy atoms is still the same but now they clash with {} atoms." .format(current_number_of_clashes)) initial_residue = new_residue.copy() initial_number_of_clashes = current_number_of_clashes return initial_part_of_the_protein + initial_residue + final_part_of_the_protein
def process(args): # Sfn = args['Sfn'] # Open matrix file in parallel mode # Sf = h5py.File(Sfn, 'r') extractor = er.PepExtractor(**args) lM = len(extractor.plist) # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn # tSfn = args['output'] stubs = { 'ACE': 'X', 'NME': 'Z', } a = prody.parsePDB(args['receptor']) SG = a.select('resnum 241 name OG') O = a.select('resnum 262 and name O') ND1 = a.select('resnum 79 and name NE2') tHN_ = a.select("name N resnum 239 240 241") tHN = np.average(tHN_.getCoords(), axis=0) t0 = time.time() out = open(args['out'], 'w') for cm in range(lM): m = extractor.plist[cm] t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % ( cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): S.setACSIndex(S_) tC = S.select('name C') dist = np.inf for c in tC.iterAtoms(): dist_ = prody.calcDistance(c, SG)[0] if dist_ < dist: dist = dist_ rnum = c.getResnum() C = S.select('resnum %i and name C' % rnum) CO = S.select('resnum %i and name O' % rnum) # N = S.select('resnum %i and name N' % rnum_) N_ = S.select('resnum %i and name N' % (rnum + 1)) angleAttack = prody.calcAngle(ND1, SG, C) angle_ = prody.calcDistance(O, N_)[0] - prody.calcDistance(O, C)[0] if angle_ > 0: DIR = 'F' else: DIR = 'R' s = 'X' + m + 'Z' pref = '' if DIR == 'F': seq = s pref = 5 - rnum else: seq = s[::-1] pref = rnum suf = 10 - (pref + len(seq)) seq = '-' * pref + seq + '-' * suf # hangle = prody.calcAngle(tHN, C, N) hdist = np.linalg.norm(tHN - CO.getCoords()) # HN = b.select('resnum %i and name H' % rnum) # N_B = b.select('resnum %i and name N' % (rnum + 1)) # HN_B = b.select('resnum %i and name H' % (rnum + 1)) # distN = prody.calcDistance(O, N)[0] # angleHN = prody.calcAngle(O, HN, N) # distN_B = prody.calcDistance(O, N_B)[0] # angleHN_B = prody.calcAngle(O, HN_B, N_B) outstr = "%-10s\t%6.2f\t%6.2f\t%6.2f\t%3s\t%12s\t%d\n" % ( ('%s_%02d' % (m, S_ + 1), dist, angleAttack, hdist, # distN, angleHN, # distN_B, angleHN_B, DIR, seq, rnum) ) out.write(outstr) # tSf.close() # Sf.close() # os.remove(tSfn) out.close()
def get_dist(vdm, ifg, comb): return pr.calcDistance( vdm.ires_sele.select('name ' + dist[vdm.resname][0]), ifg.sele.select('name ' + comb.vandarotamer_dict['A1']))[0]
def generateTrainingSet(inputdict, distance, output=None, combineOutput=True): devnull = open(os.devnull, 'w') subprocess.check_call('dssp --version', shell=True, stdout=devnull, stderr=devnull) subprocess.check_call('stride -h', shell=True, stdout=devnull, stderr=devnull) subprocess.check_call('netsurfp -h', shell=True, stdout=devnull, stderr=devnull) subprocess.check_call('runpsipred', shell=True, stdout=devnull, stderr=devnull) devnull.close() finalData = pd.DataFrame() if combineOutput else [] for target, models in inputdict.items(): targetPDB = prody.parsePDB(target) assert distance, "Distance is not valid" tempdir = tempfile.mkdtemp() # we don't want to run NetSurfP and PSIPRED over and over again for all # model structures. we compute them for target structure and just reuse # on model structures netsurfp.parseNetSurfP(netsurfp.execNetSurfP(target, outputdir=tempdir), targetPDB) psipred.parsePSIPRED(psipred.execPSIPRED(target, outputdir=tempdir), targetPDB) for i, modelFilename in enumerate(models): datadict = {} modelPDB = prody.parsePDB(modelFilename) if not modelPDB: print('Model file %s cannot be parsed, skipping...' % modelFilename) continue #if model has no chainID, let's assign one. That makes STRIDE parser #happy if np.unique(modelPDB.getChids()) == ' ': modelPDB.all.setChids('A') modelFilename = os.path.join(tempdir, os.path.basename(modelFilename)) modelFilename = prody.writePDB(modelFilename, modelPDB, autoext=False) #superimpose model onto target structure match = prody.matchAlign(modelPDB, targetPDB, tarsel='calpha', seqid=50, overlap=20) mapmodel = match[1] maptarget = match[2] #and copy NetSurfP and PSIPRED data from target to model copyDataFromTarget(targetPDB, modelPDB) #run STRIDE prody.parseSTRIDE(prody.execSTRIDE(modelFilename, outputdir=tempdir), modelPDB) datadict['STRIDEarea'] = \ pd.Series(modelPDB.ca.getData('stride_area')[mapmodel.getResindices()], index=maptarget.getResindices()) ss = pd.Series(mapmodel.getSecstrs(), index=maptarget.getResindices()) ss[ss == ''] = '-' #empty strings cause trouble in csv load/save datadict['STRIDEss'] = ss #run DSSP prody.parseDSSP(prody.execDSSP(modelFilename, outputdir=tempdir), modelPDB) datadict['DSSPacc'] = \ pd.Series(modelPDB.ca.getData('dssp_acc')[mapmodel.getResindices()], index=maptarget.getResindices()) ss = pd.Series(mapmodel.getSecstrs(), index=maptarget.getResindices()) ss[ss == ''] = '-' #empty strings cause trouble in csv load/save datadict['DSSPss'] = ss #save NetSurfP data datadict['NetSurfP_exp'] = \ pd.Series(modelPDB.ca.getData('netsurfp_exposure')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['NetSurfP_asa'] = \ pd.Series(modelPDB.ca.getData('netsurfp_asa')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['NetSurfP_rsa'] = \ pd.Series(modelPDB.ca.getData('netsurfp_rsa')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['NetSurfP_alpha'] = \ pd.Series(modelPDB.ca.getData('netsurfp_alphascore')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['NetSurfP_beta'] = \ pd.Series(modelPDB.ca.getData('netsurfp_betascore')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['NetSurfP_coil'] = \ pd.Series(modelPDB.ca.getData('netsurfp_coilscore')[mapmodel.getResindices()], index=maptarget.getResindices()) #save PSIPRED data datadict['PSIPRED_ss'] = \ pd.Series(modelPDB.ca.getData('psipred_ss')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['PSIPRED_coilscore'] = \ pd.Series(modelPDB.ca.getData('psipred_coilscore')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['PSIPRED_helixscore'] = \ pd.Series(modelPDB.ca.getData('psipred_helixscore')[mapmodel.getResindices()], index=maptarget.getResindices()) datadict['PSIPRED_strandscore'] = \ pd.Series(modelPDB.ca.getData('psipred_strandscore')[mapmodel.getResindices()], index=maptarget.getResindices()) #Compute class labels based on the distance argument datadict['ClassLabel'] = pd.Series((np.abs( prody.calcDistance(maptarget.copy(), mapmodel.copy())) < distance).astype(int), index=maptarget.getResindices()) if combineOutput: finalData = pd.concat([finalData, pd.DataFrame(datadict)]) else: finalData.append(pd.DataFrame(datadict)) #remove temporary directory shutil.rmtree(tempdir, ignore_errors=True) if output: if combineOutput: finalData.to_csv(output, index=False, quoting=csv.QUOTE_NONNUMERIC) #dataframe.to_csv(output) else: print('Warning! Output must be combined to be saved into a CSV ' 'file') return finalData
def compute_atom_distances(pdb_target, res_file, output_report, chain="L"): """ This function calculate atom-atom distances for ligand and residue atoms. The residue number and atom names (for both, ligand and residue) must be specified in a file ('res_file'). :param pdb_target: input PDB file path :param res_file: file with instructions. This file must have n rows with three format: RESNUM LATOMNAME/SLIGAND ATOMNAMES/SRESIDUE. If you want to calculate a distance using a center of mass write [ATOM1,ATOM2,ATOMN] :param output_report: :param chain: :return: """ # Load PDB files target = pdb2prody(pdb_target) ligand = target.select("chain {}".format(chain)) print(ligand.getNames()) # Reading instructions from file list_of_instructions = read_selecteds_from_file(res_file) # Select the input atoms report = [] for line in list_of_instructions: resnum, atom_name_ref, atom_name_tar = line.split() # If the user wants to select more than one atom he has to put them in a string with this format: [atom1,atomN...] if "[" in atom_name_ref or "]" in atom_name_ref: print("Multiple atom selection for the ligand") atom_string_with_comas = atom_name_ref.strip("[").strip("]") atom_list = atom_string_with_comas.split(",") atom_string = ' '.join(atom_list) atom_ref_selected = ligand.select("name {}".format(atom_string)) print("Selected atoms: {}".format(atom_ref_selected.getNames())) else: print("Single atom selection for the ligand") atom_ref_selected = ligand.select("name {}".format(atom_name_ref)) print("Selected atom: {}".format(atom_ref_selected.getNames())) if "[" in atom_name_tar or "]" in atom_name_tar: print("Multiple atom selection for the system") atom_string_with_comas = atom_name_tar.strip("[").strip("]") atom_list = atom_string_with_comas.split(",") atom_string = ' '.join(atom_list) atom_tar_selected = select_atom_given_name_type_and_num( target, resnum, atom_string) print("Selected atoms: {}".format(atom_tar_selected.getNames())) else: print("Single atom selection for the system") atom_tar_selected = select_atom_given_name_type_and_num( target, resnum, atom_name_tar) print("Selected atom: {}".format(atom_tar_selected.getNames())) try: number_of_selected_atoms_ref = len(atom_ref_selected.getNames()) except AttributeError: exit( "None atoms where selected. Please, check if the selected atoms exists in the ligand in {}" .format(pdb_target)) try: number_of_selected_atoms_tar = len(atom_tar_selected.getNames()) except AttributeError: exit( "None atoms where selected. Please, check if the selected atoms exists in the residue {} in {}" .format(resnum, pdb_target)) # Now there are four possibilities: len 1 in target and ref, len > 1 in one of both and len >1 in both. # If the len is more than 1 we will use the center of mass as a point to compute the distance. if number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar <= 1: distance = prody.calcDistance(atom_tar_selected, atom_ref_selected) elif number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar > 1: center_tar = prody.calcCenter(atom_tar_selected) atom_coords = atom_ref_selected.getCoords()[0] distance = np.linalg.norm(center_tar - atom_coords) elif number_of_selected_atoms_ref > 1 and number_of_selected_atoms_tar <= 1: center_ref = prody.calcCenter(atom_ref_selected) atom_coords = atom_tar_selected.getCoords()[0] distance = np.linalg.norm(atom_coords - center_ref) else: center_tar = prody.calcCenter(atom_tar_selected) center_ref = prody.calcCenter(atom_ref_selected) distance = np.linalg.norm(center_tar - center_ref) report_line = "{:4} {:10} {:10} {:6.3f}\n".format( resnum, ''.join(atom_ref_selected.getNames()), ''.join(atom_tar_selected.getNames()), distance) report.append(report_line) report_final = ''.join(report) if output_report: with open(output_report, "w") as report_file: report_file.write(report_final) print(report_final) return report_final
def main_calc(pep_info, seq_info, mV, file): Nterm = False # puts the pep sequence into pep pep = pep_info[0] # print(pep) # puts the pep extra info int ind ind = [pep_info[1], pep_info[2]] new_pep = [] # for everything in the sequence that we want for i in range(ind[0], ind[1]): # for all of the AtomObjects in the current seq for j in seq_info[i]: # append the current atom object and the current index in the sequence new_pep.append([file[j], i]) # length is a mV x mV array of 0s lengths = np.zeros((mV, mV)) # index is an element index + 1 x mV array of 0s index = np.zeros((len(ELEMENT_INDEX) + 1, mV)) # bonded is a mV x mV array of 0s bonded = np.zeros((mV, mV)) amino_acid_list = [] # for all of the atom objects for i in range(len(new_pep)): # ele is a float of the current atom object element index + 1 ele = float(ELEMENT_SYMBOLS.index(str( new_pep[i][0].getElement()))) + 1 # sets the 5th row of index to appropriate value if ele == 6.0: index[5][i] = 0.0 elif ele == 7.0 and Nterm: index[5][i] = 1.0 elif ele == 8.0: index[5][i] = 2.0 # checks to see if the current pep is N, and set NTerm accordingly if Nterm == False and new_pep[i][0].getName() == 'N': Nterm = True # sets the index of the row of the element index to one index[ELEMENT_INDEX.index(new_pep[i][0].getElement())][i] = 1 # add name and sequence of pep to the list amino_acid_list.append([ str(new_pep[i][0].getName()), str(new_pep[i][0].getSequence()) ]) # calculates lengths and bonds of peps for j in range(len(new_pep)): if i != j and lengths[i][j] == 0: lengths[i][j] = (float( pd.calcDistance(new_pep[i][0], new_pep[j][0]))) lengths[j][i] = lengths[i][j] if (lengths[i][j] < 1.9) and (new_pep[i][0] != new_pep[j][0]): if new_pep[j][0].getSequence() in possible_bonds( new_pep[i][1], pep, ind): bonded[i][j] = lengths[i][j] bonded[j][i] = bonded[i][j] # returns a final representation of the given pdb peps? final = {} final['sequence'] = pep final['ele_to_amino'] = amino_acid_list final['index'] = index final['secondary'] = lengths final['primary'] = bonded return final