def _ParseOutput(ent, output): hbonds=[] # skip header lines=list(output) for index, line in enumerate(lines): if line.startswith('<---DONOR---> <-ACCEPTOR-->'): lines=lines[index+4:] break for line in lines: if len(line.strip())==0: continue don_chain=line[0] don_rnum=int(line[1:5]) don_ins_c=line[5] don_atom=line[9:13] if don_chain=='-': don_chain=' ' if don_ins_c=='-': don_ins_c='\0' acc_chain=line[14] acc_rnum=int(line[15:19]) acc_ins_c=line[19] acc_atom=line[24:28] if acc_chain=='-': acc_chain=' ' if acc_ins_c=='-': acc_ins_c='\0' donor=ent.FindAtom(don_chain, mol.ResNum(don_rnum, don_ins_c), don_atom.strip()) acc=ent.FindAtom(acc_chain, mol.ResNum(acc_rnum, acc_ins_c), acc_atom.strip()) assert donor.IsValid() assert acc.IsValid() hbonds.append(HBond(donor, acc)) return hbonds
def test_starts_from_last_water_rnum(self): m = mol.CreateEntity() e = m.EditXCS(mol.BUFFERED_EDIT) c = e.InsertChain("A") e.SetChainType(c, mol.CHAINTYPE_WATER) e.AppendResidue(c, "HOH") pdbizer = mol.alg.PDBize() transformations = geom.Mat4List() transformations.append(geom.Mat4()) seqs = seq.CreateSequenceList() pdbizer.Add(m.Select(''), transformations, seqs) pdbizer.Add(m.Select(''), transformations, seqs) pdbized = pdbizer.Finish() self.assertEqual([c.name for c in pdbized.chains], ["-"]) residues = pdbized.residues self.assertEqual( [r.number for r in residues], [mol.ResNum(1, 'A'), mol.ResNum(1, 'B')])
def _ParseRsaFile(entity, file, asa_abs, asa_rel): """ Reads Area file (.rsa) and attach asa (absolute + relative) per residue to an entitiy :param entity: EntityHandle or EntityView for attaching sasa on atom level :param file: Filename of .rsa file :param asa_atom: Name of the float property for absolute SASA :param asa_atom: Name of the float property for relative SASA :exception: RuntimeError if residue names are not the same """ area_fh = open(file) area_lines = area_fh.readlines() area_fh.close() # shift first line area_lines = area_lines[4:] # parse lines for l in area_lines: if l.startswith("RES"): # extract data p = re.compile(r'\s+') res_name = l[3:8] res_name = res_name.strip() chain_id = l[8:9] res_number = l[9:14] res_number = res_number.strip() abs_all, rel_all = l[15:28].strip().split() m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number) di = m.groupdict() if di["ins"] == None: resNum = mol.ResNum(int(di["num"])) else: resNum = mol.ResNum(int(di["num"]), di["ins"]) # set res. props res = entity.FindResidue(chain_id, resNum) if res_name == res.name: res.SetFloatProp(asa_rel, float(rel_all)) res.SetFloatProp(asa_abs, float(abs_all)) else: raise RuntimeError( "Residue Names are not the same for ResNumb: %s (%s vs %s)" % (res_number, res.name, res_name))
def _ParseVoronotaLocal(lines): local_aa_dict={} chain_name_regex = r'c\<\D+\>' resnum_regex = r'r\<\d+\>' insertion_code_regex = r'i\<\D\>' for line in lines: local_aa = float(line.split()[-1]) if local_aa < 0.0: continue # invalid CAD score chain_data = re.findall(chain_name_regex, line) resnum_data = re.findall(resnum_regex, line) insertion_code_data = re.findall(insertion_code_regex, line) resnum = None if len(insertion_code_data) == 0: resnum = mol.ResNum(int(resnum_data[0][1:].strip('><'))) else: resnum = mol.ResNum(int(resnum_data[0][1:].strip('><')), insertion_code_data[0][1:].strip('><')) key = (chain_data[0][1:].strip('><'), resnum) local_aa_dict[key] = local_aa return local_aa_dict
def _ParseAsaFile(entity, file, asa_atom): """ Reads Area file (.asa) and attach asa per atom to an entitiy :param entity: EntityHandle or EntityView for attaching sasa on atom level :param file: Filename of area file :param asa_atom: Name of the float property for SASA """ asa_fh = open(file) asa_lines = asa_fh.readlines() asa_fh.close() for l in asa_lines: if l.startswith("ATOM"): # get res_number, chain_id and atom name atom_name = l[12:16] chain_id = l[21] res_number = l[22:27] asa = l[54:63] atom_name = atom_name.strip() chain_id = chain_id res_number = res_number.strip() asa = asa.strip() m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number) di = m.groupdict() if di["ins"] == None: resNum = mol.ResNum(int(di["num"])) else: resNum = mol.ResNum(int(di["num"]), di["ins"]) a = entity.FindAtom(chain_id, resNum, atom_name) if (a.IsValid()): a.SetFloatProp(asa_atom, float(asa)) else: LogWarning("NACCESS: invalid asa entry %s %s %s" \ % (chain_id, resNum, atom_name))
def _ParseCADLocal(lines): local_scores_idx = None for line_idx in range(len(lines)): if "local_scores" in lines[line_idx]: local_scores_idx = line_idx break if local_scores_idx == None: raise RuntimeError("Failed to parse local cadscores") local_aa_dict={} for line_idx in range(local_scores_idx+2, len(lines)): items=lines[line_idx].split() local_aa = float(items[2]) if local_aa < 0.0: continue # invalid CAD score key = (items[0], mol.ResNum(int(items[1]))) local_aa_dict[key] = local_aa return local_aa_dict
def test_fastModified(self): # phoshoserine: test if we correctly strip off modifications tpl = io.LoadPDB('testfiles/sep.pdb') new_hdl = mol.CreateEntity() ed = new_hdl.EditXCS() c = ed.InsertChain('A') ed.AppendResidue(c, 'SER') err, has_cbeta = mol.alg.CopyConserved(tpl.residues[0], new_hdl.residues[0], ed) self.assertTrue(err) self.assertTrue(has_cbeta) residues = new_hdl.residues self.assertEqual(len(residues), 1) self.assertEqual(len(residues[0].atoms), 6) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "N").IsValid()) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "CA").IsValid()) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "C").IsValid()) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "O").IsValid()) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "CB").IsValid()) self.assertTrue(new_hdl.FindAtom("A", mol.ResNum(1), "OG").IsValid())
def LoadDSSP(file_name, model, extract_burial_status=False, entity_saved=False, calculate_relative_sa=True): """ Loads DSSP output and assigns secondary structure states to the peptidic residues. If you would like to run dssp *and* assign the secondary structure, use :func:`AssignDSSP` instead. :param file_name: The filename of the DSSP output file :param model: The entity to which the secondary structure states should be assigned :param extract_burial_status: If true also calculates burial status of residues and assigns it to the burial_status string property. :param calculate_relative_sa: If true also relative solvent accessibility and and assigns it to the relative_solvent_accessibility float property of the residue. :param entity_save: Whether the entity was saved. """ if not model.IsValid(): raise ValueError('model entity is not valid') if model.atom_count==0: raise ValueError('model entity does not contain any atoms') stream=open(file_name) if not _SkipHeader(stream): stream.close() raise RuntimeError('Ill-formatted DSSP file') for line in stream: num=line[6:10].strip() ins_code=line[10].strip() chain_name=line[11] solvent_accessibility=float(line[34:39].strip()) #solvent_accessibility=line[34:39].strip() amino_acid=line[13] #print line if isinstance(model,mol.ChainView): chain=model else: chain=model.FindChain(chain_name) if not chain.IsValid(): continue if num=='': continue residue=None try: if ins_code == "": residue=chain.FindResidue(mol.ResNum(int(num))) else: residue=chain.FindResidue(mol.ResNum(int(num),ins_code)) # set property "burial status: if extract_burial_status: #set default (dummy) burial status for incomplete residues: residue.SetStringProp("burial_status", 'X') #handle seleno-methionine appearing as amino acid 'X' in DSSP: if residue.name=="MSE" and amino_acid=='X': amino_acid='M' residue.SetFloatProp("solvent_accessibility", solvent_accessibility) if calculate_relative_sa: relative_sa=_CalcRelativeSA(amino_acid,solvent_accessibility) residue.SetFloatProp("relative_solvent_accessibility", relative_sa) if relative_sa < 0.25: residue.SetStringProp("burial_status", 'b') else: residue.SetStringProp("burial_status", 'e') except Exception as e: print("ERROR:",e) continue rtype=line[16:17] rt=mol.SecStructure.COIL if rtype=='H': rt=mol.SecStructure.ALPHA_HELIX elif rtype=='E': rt=mol.SecStructure.EXTENDED elif rtype=='B': rt=mol.SecStructure.BETA_BRIDGE elif rtype=='S': rt=mol.SecStructure.BEND elif rtype=='T': rt=mol.SecStructure.TURN elif rtype=='I': rt=mol.SecStructure.PI_HELIX elif rtype=='G': rt=mol.SecStructure.THREE_TEN_HELIX # for corrupted DSSP files. Catch in calling routine: if not residue.IsValid(): #Todo: if residues with occupancy 0 have been removed before #using a selection statement, they are missed here #IMPORTANT: asign DSSP before any selections stream.close() raise RuntimeError('Ill-formatted DSSP file: invalid residue') residue.SetSecStructure(mol.SecStructure(rt)) stream.close()