Exemple #1
0
def get_best_homologues(model, chain_ids=None):
    """
  Do local BLAST search for homologues, pick the best resolution, fetch models
  chain_ids - list of chains to search. If none - do for all of them.
  As usual, multi-model is not supported.
  Returns dictionary of chain_id: model
  """

    # Load data
    pdb_info = iotbx.bioinformatics.pdb_info.pdb_info_local()

    h = model.get_hierarchy()
    result = {}
    for chain in h.only_model().chains():
        print "Working with chain '%s'" % chain.id
        if not chain.is_protein():
            print "  skipping, not protein. Maybe water or NA"
            continue
        if chain_ids is not None and chain.id not in chain_ids():
            continue
        sequence = chain.as_padded_sequence()
        l_blast = local_blast.pdbaa(seq=sequence)  # why sequence in init???
        blast_xml_result = l_blast.run()
        # Probably alignment info is lost in this call,
        # but we have tools to do alignment, see mmtbx/alignment/__init__.py
        blast_summary = summarize_blast_output("\n".join(blast_xml_result))
        pdb_ids_to_study = {}
        for hit in blast_summary:
            # print dir(hit)
            hit.show(out=sys.stdout)
            # Don't have clear idea what these values mean right now,
            # but it would be reasonable to filter somehow.
            if hit.identity < 70:
                continue
            pdb_ids_to_study[hit.pdb_id] = hit.chain_id  # can add more info
        #
        info_list = pdb_info.get_info_list(pdb_ids_to_study.keys())
        # It would be good to merge info_list with hits in blast_summary in
        # one data structure here and do better filtering.
        # Let's go without it for now.

        # Sort by resolution in place
        info_list.sort(key=lambda tup: tup[1])
        best_pdb_id = info_list[0][0]
        best_pdb_chain = pdb_ids_to_study[info_list[0][0]]
        print "Best pdb:", info_list[0], "chain:", pdb_ids_to_study[
            info_list[0][0]]
        # print info_list

        # Get actual selected model from PDB.
        data = fetch(id=best_pdb_id)
        m = mmtbx.model.manager(model_input=iotbx.pdb.input(
            source_info=None, lines=data.readlines()))
        sel = m.selection("chain '%s'" % best_pdb_chain)
        result[chain.id] = m.select(sel)
    return result
def perfect_pair(sequence, params, pdb_id=None):
    result = []
    pdb_info = iotbx.bioinformatics.pdb_info.pdb_info_local()
    l_blast = local_blast.pdbaa(seq=sequence)
    blast_xml_result = l_blast.run(debug=False, binary=params.engine)
    try:
        blast_summary = summarize_blast_output("\n".join(blast_xml_result))
    except StopIteration:
        return result
    except Exception as e:
        if (("mismatched tag" in str(e)) and params.engine == "blastall"):
            raise Sorry("setting engine=blastp and try again")
    pdb_ids_to_study = {}
    for hit in blast_summary:
        #hit.show(out=sys.stdout)
        hsp = hit.hsp
        ### The Gapped BLAST algorithm allows gaps (deletions and insertions) to
        ### be introduced into the alignments
        # blastall: the surprising default value (None, None) instead of an integer.
        if hsp.gaps == (None, None): hsp.gaps = 0
        identity = (hsp.identities - hsp.gaps) * 100 / (hsp.align_length -
                                                        hsp.gaps)
        if (not params.piece_matching):
            ali_identity = len(hsp.query.replace('X', '')) / len(
                sequence.replace('X', ''))
            identity = identity * ali_identity
        if identity < params.identity:
            continue
        for i in hit.all_ids:
            if i[0] == pdb_id: continue
            if (i[0] not in pdb_ids_to_study):
                pdb_ids_to_study[str(i[0])] = (str(i[1]), identity)
    info_lists = pdb_info.get_info_list(pdb_ids_to_study.keys())
    info_lists.sort(key=lambda tup: tup[1])
    if info_lists:
        for info_list in info_lists:
            if not info_list[1]: continue
            best_pdb_id = info_list[0]
            best_pdb_chain = pdb_ids_to_study[info_list[0]][0]
            identity = pdb_ids_to_study[info_list[0]][1]
            resolution = float(info_list[1])
            g_a = group_args(pdb_code=best_pdb_id,
                             chain_id=best_pdb_chain,
                             resolution=resolution,
                             identity=identity)
            if params.high_res is None:
                result.append(g_a)
                result.sort(key=lambda tup: (tup.resolution, -tup.identity))
            elif resolution <= params.high_res:
                result.append(g_a)
                result.sort(key=lambda tup: (-tup.identity, tup.resolution))
    result = result[:params.n]
    return result
Exemple #3
0
def run(args=(), params=None, out=None):
    if (out is None):
        out = sys.stdout
    if (params is None):
        import iotbx.phil
        cmdline = iotbx.phil.process_command_line_with_files(
            args=args,
            master_phil=master_phil,
            seq_file_def="blast_pdb.file_name")
        params = cmdline.work.extract()
    validate_params(params)
    params = params.blast_pdb
    from iotbx.bioinformatics.structure import get_ncbi_pdb_blast, \
      summarize_blast_output
    from iotbx.file_reader import any_file
    seq_file = any_file(params.file_name,
                        force_type="seq",
                        raise_sorry_if_not_expected_format=True,
                        raise_sorry_if_errors=True)
    seq_file.check_file_type("seq")
    seq_objects = seq_file.file_object
    if (len(seq_objects) == 0):
        raise Sorry("Empty sequence file!")
    elif (len(seq_objects) > 1):
        print("WARNING: multiple sequences provided; searching only the 1st",
              file=out)
    sequence = seq_objects[0].sequence
    if (len(sequence) == 0):
        raise Sorry("No data in sequence file.")
    elif (len(sequence) < 6):
        raise Sorry("Sequence must be at least six residues.")
    if (params.output_file is None):
        params.output_file = "blast.xml"
    blast_out = get_ncbi_pdb_blast(sequence,
                                   file_name=params.output_file,
                                   blast_type=params.blast_type,
                                   expect=params.expect)
    print("Wrote results to %s" % params.output_file, file=out)
    results = summarize_blast_output(blast_out)
    if (len(args) != 0):  # command-line mode
        print("", file=out)
        print("%d matching structures" % len(results), file=out)
        print("", file=out)
        print("ID    Chain     evalue  length  %ident    %pos  #structures",
              file=out)
        print("-" * 59, file=out)
        for result in results:
            result.show(out)
    if (len(results) > 0):
        return sequence, os.path.abspath(params.output_file)
    else:
        return sequence, None
Exemple #4
0
def run (args=(), params=None, out=None) :
  if (out is None) :
    out = sys.stdout
  if (params is None) :
    import iotbx.phil
    cmdline = iotbx.phil.process_command_line_with_files(
      args=args,
      master_phil=master_phil,
      seq_file_def="blast_pdb.file_name")
    params = cmdline.work.extract()
  validate_params(params)
  params = params.blast_pdb
  from iotbx.bioinformatics.structure import get_ncbi_pdb_blast, \
    summarize_blast_output
  from iotbx.file_reader import any_file
  seq_file = any_file(params.file_name,
    force_type="seq",
    raise_sorry_if_not_expected_format=True,
    raise_sorry_if_errors=True)
  seq_file.check_file_type("seq")
  seq_objects = seq_file.file_object
  if (len(seq_objects) == 0) :
    raise Sorry("Empty sequence file!")
  elif (len(seq_objects) > 1) :
    print >> out, "WARNING: multiple sequences provided; searching only the 1st"
  sequence = seq_objects[0].sequence
  if (len(sequence) == 0) :
    raise Sorry("No data in sequence file.")
  elif (len(sequence) < 6) :
    raise Sorry("Sequence must be at least six residues.")
  if (params.output_file is None) :
    params.output_file = "blast.xml"
  blast_out = get_ncbi_pdb_blast(sequence,
    file_name=params.output_file,
    blast_type=params.blast_type,
    expect=params.expect)
  print >> out, "Wrote results to %s" % params.output_file
  results = summarize_blast_output(blast_out)
  if (len(args) != 0) : # command-line mode
    print >> out, ""
    print >> out, "%d matching structures" % len(results)
    print >> out, ""
    print >> out, "ID    Chain     evalue  length  %ident    %pos  #structures"
    print >> out, "-" * 59
    for result in results :
      result.show(out)
  if (len(results) > 0) :
    return sequence, os.path.abspath(params.output_file)
  else :
    return sequence, None
Exemple #5
0
        self.DeleteAllItems()
        for result in results:
            i = self.InsertStringItem(sys.maxint, result.pdb_id)
            self.SetStringItem(i, 1, result.chain_id)
            self.SetStringItem(i, 2, "%g" % result.evalue)
            self.SetStringItem(i, 3, "%d" % result.length)
            self.SetStringItem(i, 4, "%.2f" % result.identity)
            self.SetStringItem(i, 5, "%.2f" % result.positives)

    def GetSelectedID(self):
        item = self.GetFirstSelected()
        if (item >= 0):
            return self.GetItem(item, 0).GetText()
        return None


if (__name__ == "__main__"):
    from iotbx.file_reader import any_file
    from iotbx.bioinformatics.structure import summarize_blast_output
    seq_file = any_file(sys.argv[1], force_type="seq")
    seq_file.check_file_type("seq")
    seq_objects = seq_file.file_object
    assert (len(seq_objects) == 1)
    sequence = seq_objects[0].sequence
    results = summarize_blast_output(blast_file=sys.argv[2])
    app = wx.App(0)
    frame = BlastFrame(None, -1, "BLAST results")
    frame.SetResults(sequence, results)
    frame.Show()
    app.MainLoop()
Exemple #6
0
  def SetResults (self, results) :
    self.DeleteAllItems()
    for result in results :
      i = self.InsertStringItem(sys.maxint, result.pdb_id)
      self.SetStringItem(i, 1, result.chain_id)
      self.SetStringItem(i, 2, "%g" % result.evalue)
      self.SetStringItem(i, 3, "%d" % result.length)
      self.SetStringItem(i, 4, "%.2f" % result.identity)
      self.SetStringItem(i, 5, "%.2f" % result.positives)

  def GetSelectedID (self) :
    item = self.GetFirstSelected()
    if (item >= 0) :
      return self.GetItem(item, 0).GetText()
    return None

if (__name__ == "__main__") :
  from iotbx.file_reader import any_file
  from iotbx.bioinformatics.structure import summarize_blast_output
  seq_file = any_file(sys.argv[1], force_type="seq")
  seq_file.check_file_type("seq")
  seq_objects = seq_file.file_object
  assert (len(seq_objects) == 1)
  sequence = seq_objects[0].sequence
  results = summarize_blast_output(blast_file=sys.argv[2])
  app = wx.App(0)
  frame = BlastFrame(None, -1, "BLAST results")
  frame.SetResults(sequence, results)
  frame.Show()
  app.MainLoop()