def get_best_homologues(model, chain_ids=None): """ Do local BLAST search for homologues, pick the best resolution, fetch models chain_ids - list of chains to search. If none - do for all of them. As usual, multi-model is not supported. Returns dictionary of chain_id: model """ # Load data pdb_info = iotbx.bioinformatics.pdb_info.pdb_info_local() h = model.get_hierarchy() result = {} for chain in h.only_model().chains(): print "Working with chain '%s'" % chain.id if not chain.is_protein(): print " skipping, not protein. Maybe water or NA" continue if chain_ids is not None and chain.id not in chain_ids(): continue sequence = chain.as_padded_sequence() l_blast = local_blast.pdbaa(seq=sequence) # why sequence in init??? blast_xml_result = l_blast.run() # Probably alignment info is lost in this call, # but we have tools to do alignment, see mmtbx/alignment/__init__.py blast_summary = summarize_blast_output("\n".join(blast_xml_result)) pdb_ids_to_study = {} for hit in blast_summary: # print dir(hit) hit.show(out=sys.stdout) # Don't have clear idea what these values mean right now, # but it would be reasonable to filter somehow. if hit.identity < 70: continue pdb_ids_to_study[hit.pdb_id] = hit.chain_id # can add more info # info_list = pdb_info.get_info_list(pdb_ids_to_study.keys()) # It would be good to merge info_list with hits in blast_summary in # one data structure here and do better filtering. # Let's go without it for now. # Sort by resolution in place info_list.sort(key=lambda tup: tup[1]) best_pdb_id = info_list[0][0] best_pdb_chain = pdb_ids_to_study[info_list[0][0]] print "Best pdb:", info_list[0], "chain:", pdb_ids_to_study[ info_list[0][0]] # print info_list # Get actual selected model from PDB. data = fetch(id=best_pdb_id) m = mmtbx.model.manager(model_input=iotbx.pdb.input( source_info=None, lines=data.readlines())) sel = m.selection("chain '%s'" % best_pdb_chain) result[chain.id] = m.select(sel) return result
def perfect_pair(sequence, params, pdb_id=None): result = [] pdb_info = iotbx.bioinformatics.pdb_info.pdb_info_local() l_blast = local_blast.pdbaa(seq=sequence) blast_xml_result = l_blast.run(debug=False, binary=params.engine) try: blast_summary = summarize_blast_output("\n".join(blast_xml_result)) except StopIteration: return result except Exception as e: if (("mismatched tag" in str(e)) and params.engine == "blastall"): raise Sorry("setting engine=blastp and try again") pdb_ids_to_study = {} for hit in blast_summary: #hit.show(out=sys.stdout) hsp = hit.hsp ### The Gapped BLAST algorithm allows gaps (deletions and insertions) to ### be introduced into the alignments # blastall: the surprising default value (None, None) instead of an integer. if hsp.gaps == (None, None): hsp.gaps = 0 identity = (hsp.identities - hsp.gaps) * 100 / (hsp.align_length - hsp.gaps) if (not params.piece_matching): ali_identity = len(hsp.query.replace('X', '')) / len( sequence.replace('X', '')) identity = identity * ali_identity if identity < params.identity: continue for i in hit.all_ids: if i[0] == pdb_id: continue if (i[0] not in pdb_ids_to_study): pdb_ids_to_study[str(i[0])] = (str(i[1]), identity) info_lists = pdb_info.get_info_list(pdb_ids_to_study.keys()) info_lists.sort(key=lambda tup: tup[1]) if info_lists: for info_list in info_lists: if not info_list[1]: continue best_pdb_id = info_list[0] best_pdb_chain = pdb_ids_to_study[info_list[0]][0] identity = pdb_ids_to_study[info_list[0]][1] resolution = float(info_list[1]) g_a = group_args(pdb_code=best_pdb_id, chain_id=best_pdb_chain, resolution=resolution, identity=identity) if params.high_res is None: result.append(g_a) result.sort(key=lambda tup: (tup.resolution, -tup.identity)) elif resolution <= params.high_res: result.append(g_a) result.sort(key=lambda tup: (-tup.identity, tup.resolution)) result = result[:params.n] return result
def run(args=(), params=None, out=None): if (out is None): out = sys.stdout if (params is None): import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, seq_file_def="blast_pdb.file_name") params = cmdline.work.extract() validate_params(params) params = params.blast_pdb from iotbx.bioinformatics.structure import get_ncbi_pdb_blast, \ summarize_blast_output from iotbx.file_reader import any_file seq_file = any_file(params.file_name, force_type="seq", raise_sorry_if_not_expected_format=True, raise_sorry_if_errors=True) seq_file.check_file_type("seq") seq_objects = seq_file.file_object if (len(seq_objects) == 0): raise Sorry("Empty sequence file!") elif (len(seq_objects) > 1): print("WARNING: multiple sequences provided; searching only the 1st", file=out) sequence = seq_objects[0].sequence if (len(sequence) == 0): raise Sorry("No data in sequence file.") elif (len(sequence) < 6): raise Sorry("Sequence must be at least six residues.") if (params.output_file is None): params.output_file = "blast.xml" blast_out = get_ncbi_pdb_blast(sequence, file_name=params.output_file, blast_type=params.blast_type, expect=params.expect) print("Wrote results to %s" % params.output_file, file=out) results = summarize_blast_output(blast_out) if (len(args) != 0): # command-line mode print("", file=out) print("%d matching structures" % len(results), file=out) print("", file=out) print("ID Chain evalue length %ident %pos #structures", file=out) print("-" * 59, file=out) for result in results: result.show(out) if (len(results) > 0): return sequence, os.path.abspath(params.output_file) else: return sequence, None
def run (args=(), params=None, out=None) : if (out is None) : out = sys.stdout if (params is None) : import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, seq_file_def="blast_pdb.file_name") params = cmdline.work.extract() validate_params(params) params = params.blast_pdb from iotbx.bioinformatics.structure import get_ncbi_pdb_blast, \ summarize_blast_output from iotbx.file_reader import any_file seq_file = any_file(params.file_name, force_type="seq", raise_sorry_if_not_expected_format=True, raise_sorry_if_errors=True) seq_file.check_file_type("seq") seq_objects = seq_file.file_object if (len(seq_objects) == 0) : raise Sorry("Empty sequence file!") elif (len(seq_objects) > 1) : print >> out, "WARNING: multiple sequences provided; searching only the 1st" sequence = seq_objects[0].sequence if (len(sequence) == 0) : raise Sorry("No data in sequence file.") elif (len(sequence) < 6) : raise Sorry("Sequence must be at least six residues.") if (params.output_file is None) : params.output_file = "blast.xml" blast_out = get_ncbi_pdb_blast(sequence, file_name=params.output_file, blast_type=params.blast_type, expect=params.expect) print >> out, "Wrote results to %s" % params.output_file results = summarize_blast_output(blast_out) if (len(args) != 0) : # command-line mode print >> out, "" print >> out, "%d matching structures" % len(results) print >> out, "" print >> out, "ID Chain evalue length %ident %pos #structures" print >> out, "-" * 59 for result in results : result.show(out) if (len(results) > 0) : return sequence, os.path.abspath(params.output_file) else : return sequence, None
self.DeleteAllItems() for result in results: i = self.InsertStringItem(sys.maxint, result.pdb_id) self.SetStringItem(i, 1, result.chain_id) self.SetStringItem(i, 2, "%g" % result.evalue) self.SetStringItem(i, 3, "%d" % result.length) self.SetStringItem(i, 4, "%.2f" % result.identity) self.SetStringItem(i, 5, "%.2f" % result.positives) def GetSelectedID(self): item = self.GetFirstSelected() if (item >= 0): return self.GetItem(item, 0).GetText() return None if (__name__ == "__main__"): from iotbx.file_reader import any_file from iotbx.bioinformatics.structure import summarize_blast_output seq_file = any_file(sys.argv[1], force_type="seq") seq_file.check_file_type("seq") seq_objects = seq_file.file_object assert (len(seq_objects) == 1) sequence = seq_objects[0].sequence results = summarize_blast_output(blast_file=sys.argv[2]) app = wx.App(0) frame = BlastFrame(None, -1, "BLAST results") frame.SetResults(sequence, results) frame.Show() app.MainLoop()
def SetResults (self, results) : self.DeleteAllItems() for result in results : i = self.InsertStringItem(sys.maxint, result.pdb_id) self.SetStringItem(i, 1, result.chain_id) self.SetStringItem(i, 2, "%g" % result.evalue) self.SetStringItem(i, 3, "%d" % result.length) self.SetStringItem(i, 4, "%.2f" % result.identity) self.SetStringItem(i, 5, "%.2f" % result.positives) def GetSelectedID (self) : item = self.GetFirstSelected() if (item >= 0) : return self.GetItem(item, 0).GetText() return None if (__name__ == "__main__") : from iotbx.file_reader import any_file from iotbx.bioinformatics.structure import summarize_blast_output seq_file = any_file(sys.argv[1], force_type="seq") seq_file.check_file_type("seq") seq_objects = seq_file.file_object assert (len(seq_objects) == 1) sequence = seq_objects[0].sequence results = summarize_blast_output(blast_file=sys.argv[2]) app = wx.App(0) frame = BlastFrame(None, -1, "BLAST results") frame.SetResults(sequence, results) frame.Show() app.MainLoop()