Ejemplo n.º 1
0
def BlastVersion(blast_location=None):
  """
  Returns the version of the BLAST executable, e.g. 2.2.24 as a string
  """

  try:
    blast_exe=settings.Locate('blastall',explicit_file_name=blast_location)
  except:
    try:
      blast_exe=settings.Locate('blastp', explicit_file_name=blast_location)
    except:
      raise RuntimeError('could not find blast executable')

  if os.path.basename(blast_exe)=='blastall':
    args=[blast_exe]
    pattern=re.compile(r'\s*blastall (\d+\.\d+\.\d+)\s+arguments:\s*')

  else:
    args=[blast_exe, '-version']
    pattern=re.compile(r'\s*Package: blast (\d+\.\d+\.\d+),\s+')

  blast_pipe=subprocess.Popen(args, stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
  stdout, _ = blast_pipe.communicate()
  lines=stdout.decode().splitlines()

  for line in lines:
    m=pattern.match(line)
    if m:
      return m.group(1)
  raise IOError("could not determine blast version for '%s'" % blast_exe)
Ejemplo n.º 2
0
  def testCADClassic(self):

    try:
      # all of the following need to be present
      cad_calc_path = settings.Locate('CADscore_calc.bash')  
      cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash')  
      cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash')
      executable_path = settings.Locate('voroprot2')
    except:
      print("Could not find CAD score classic executables: ignoring unit tests")
      return

    cad_result = cadscore.CADScore(self.protein, self.protein, 
                                   label="cad_classic")

    # model and reference are the same, we expect a global CAD score of 1
    self.assertEqual(cad_result.globalAA, 1.0)

    # one score per residue
    self.assertEqual(len(cad_result.localAA), len(self.protein.residues))

    # model and reference are the same, we expect local CAD scores of 0.0
    for score in cad_result.localAA.values():
      self.assertEqual(score, 0.0)

    # check whether this score is assigned to each residue as float property
    for r in self.protein.residues:
      self.assertTrue(r.HasProp("cad_classic"))
      self.assertEqual(r.GetFloatProp("cad_classic"), 0.0)
Ejemplo n.º 3
0
  def testCADVoronota(self):

    try:
      # all of the following need to be present
      voronota_cadscore_path = settings.Locate("voronota-cadscore")
      executable_path = settings.Locate("voronota")   
    except:
      print("Could not find CAD score voronota executables: ignoring unit tests")
      return

    cad_result = cadscore.CADScore(self.protein, self.protein, mode="voronota",
                                   label="cad_voronota")

    # model and reference are the same, we expect a global CAD score of 1
    self.assertEqual(cad_result.globalAA, 1.0)

    # one score per residue
    self.assertEqual(len(cad_result.localAA), len(self.protein.residues))

    # model and reference are the same, we expect local CAD scores of 1.0
    for score in cad_result.localAA.values():
      self.assertEqual(score, 1.0)

    # check whether this score is assigned to each residue as float property
    for r in self.protein.residues:
      self.assertTrue(r.HasProp("cad_voronota"))
      self.assertEqual(r.GetFloatProp("cad_voronota"), 1.0)
Ejemplo n.º 4
0
    def testAccNACCESS(self):

        # tests oligo mode by comparing the results from doing the
        # corresponding calculations manually
        ent_one = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))
        ent_two = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))

        # we're only interested in peptide stuff...
        ent_one = ent_one.Select("peptide=true")
        ent_two = ent_two.Select("peptide=true")

        acc_classic = AccessibilitiesRaw(ent_one)
        acc_oligo = AccessibilitiesOligo(ent_two)

        self.assertTrue(Compare(acc_classic, acc_oligo))

        # if there is naccess around, we also check for equality with
        # naccess results
        try:
            naccess_path = settings.Locate("naccess")
            ent_three = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))
            ent_three = ent_three.Select("peptide=true")
            acc_naccess = AccessibilitiesRaw(ent_three, use_naccess=True)
            self.assertTrue(Compare(acc_classic, acc_naccess))
        except:
            print(
                "Could not find NACCESS, could not compare Accessiblity function..."
            )
Ejemplo n.º 5
0
def _RunkClust(tmp_dir_name, clustering_thresh, create_alignments):

    bitscore = clustering_thresh * 0.060269 - 0.68498

    executable = settings.Locate('kClust')

    cmd = []
    cmd.append(executable)
    cmd.append('-i')
    cmd.append(os.path.join(tmp_dir_name, 'fastadb.fasta'))
    cmd.append('-d')
    cmd.append(tmp_dir_name)
    cmd.append('-s')
    cmd.append(str(bitscore))

    cmd = ' '.join(cmd)
    ps = subprocess.Popen(cmd,
                          shell=True,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)
    stdout, stderr = ps.communicate()

    result = _ParseOutput(tmp_dir_name)

    if (create_alignments):
        from ost.bindings import clustalw
        for c in result:
            if len(c.sequences) > 1:
                c.alignment = clustalw.ClustalW(c.sequences)
            else:
                aln = seq.CreateAlignment()
                aln.AddSequence(c.sequences[0])
                c.alignment = aln

    return result
Ejemplo n.º 6
0
def _GetExecutable(naccess_exe):
    """
  Method to check if naccess executable is present

  :param naccess:   Explicit path to naccess executable
  :returns:         Path to the executable
  :exception:       FileNotFound if executable is not found
  """
    return settings.Locate('naccess', explicit_file_name=naccess_exe)
Ejemplo n.º 7
0
def _RunTmScore(tmscore, tmp_dir):
  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
  model2_filename=os.path.join(tmp_dir, 'model02.pdb')  
  if platform.system() == "Windows":
    tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore)
    command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename, 
                             model2_filename)
  else:
    tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore)
    command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename, 
                                      model2_filename)
  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
  stdout,_=ps.communicate()
  lines=stdout.decode().splitlines()
  if (len(lines))<22:
    _CleanupFiles(tmp_dir)
    raise RuntimeError("tmscore superposition failed")
  return _ParseTmScore(lines)
Ejemplo n.º 8
0
 def __init__(self, context_menu):
   try:
     settings_name="msms"
     self.executable=settings.Locate(settings_name)
   except settings.FileNotFound:
     self.executable=""
   QtCore.QObject.__init__(self, context_menu.qobject)
   self.action = QtWidgets.QAction("Calculate Surface", self)
   self.action.triggered.connect(self.CalculateSurface)
   context_menu.AddAction(self.action, gui.ContextActionType.ENTITY)
Ejemplo n.º 9
0
def _RunTmAlign(tmalign, tmp_dir):
  model1_filename=os.path.join(tmp_dir, 'model01.pdb')
  model2_filename=os.path.join(tmp_dir, 'model02.pdb')
  if platform.system() == "Windows":
    tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign)
    command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
  else:
    tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign)  
    command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt'))
  ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
  stdout,_=ps.communicate()
  lines=stdout.decode().splitlines()
  if (len(lines))<22:
    _CleanupFiles(tmp_dir)
    raise RuntimeError("tmalign superposition failed")
  matrix_file=open(os.path.join(tmp_dir,'matrix.txt'))
  lines_matrix=matrix_file.readlines()
  matrix_file.close() 
  return _ParseTmAlign(lines,lines_matrix)
Ejemplo n.º 10
0
def _GetExecutable(msms_exe, msms_env):
  """
  Function to check if MSMS executable is present

  :param msms_exe: Explicit path to msms executable
  :param msms_env: Environment variable pointing to msms executable
  :returns: Path to the executable
  :raises:  :class:`~ost.FileNotFound` if executable is not found
  """
  return settings.Locate('msms', explicit_file_name=msms_exe,
                         env_name=msms_env)
Ejemplo n.º 11
0
def _ExecuteDSSP(path, dssp_bin, temp_dir=None):
  # use of mktemp is a safty problem (use mkstemp and provide file handle to 
  # subsequent process
  temp_dssp_path=tempfile.mktemp(suffix=".out",prefix="dssp", dir=temp_dir)
  dssp_abs_path=settings.Locate(['dsspcmbi','dssp','mkdssp'], env_name='DSSP_EXECUTABLE', 
                                explicit_file_name=dssp_bin)
  if os.path.isdir(dssp_abs_path):
    raise RuntimeError('"%s" is a directory. Specify path to DSSP binary' % dssp_abs_path)
  if not os.access(dssp_abs_path, os.X_OK):
    raise RuntimeError('"%s" is not executable' % dssp_abs_path)

  subprocess.run([dssp_abs_path, path, temp_dssp_path])

  return temp_dssp_path
Ejemplo n.º 12
0
def CreateDB(infasta, dbout, mkdb_cmd=None):
  """
  Create a blast DB from a fasta file

  :param infasta: the pdb fasta from which the database will be created
  :type infasta: :class:`string`

  :param dbout: output location for blastDB file
  :type dbout: :class:`string`


  """
  if mkdb_cmd==None:
    try:
      exe=settings.Locate('formatdb')
      args=[exe, '-i', infasta, '-n', dbout]
    except:
      try:
        exe=settings.Locate('makeblastdb')
        args=[exe, '-in', infasta, '-out', dbout, '-dbtype', 'prot']
      except:
        raise RuntimeError('could not find makeblastdb/formatdb executable')
  else:
    if os.path.basename(mkdb_cmd)=='makeblastdb':
      exe=settings.Locate('makeblastdb',explicit_file_name=mkdb_cmd)
      args=[exe, '-in', infasta, '-out', dbout, '-dbtype', 'prot']
    elif os.path.basename(mkdb_cmd)=='formatdb':
        exe=settings.Locate('formatdb',explicit_filename=mkdb_cmd)
        args=[exe, '-i', infasta, '-n', dbout]
    else:
      raise IOError('mkdb command must either be the path to formatdb or makeblastdb!')

  ost.LogInfo('creating blast DB (%s)' % ' '.join(args))
  blast_pipe=subprocess.Popen(args, stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
  blast_pipe.communicate()
Ejemplo n.º 13
0
def _RuniAlign(ialign, tmp_dir, options={}):
    opts = {
        'a': 1,  # concise output
        'w': tmp_dir
    }
    opts.update(options)
    cmd_opts = []
    for k, v in opts.items():
        if type(v) == type(True):
            if v == True:
                cmd_opts.append('-%s' % str(k))
        else:
            cmd_opts.append('-%s %s' % (str(k), str(v)))
    cmd_opts = ' '.join(cmd_opts)
    model1_filename = os.path.join(tmp_dir, 'model01.pdb')
    model2_filename = os.path.join(tmp_dir, 'model02.pdb')
    if platform.system() == "Windows":
        ialign_path = settings.Locate('ialign.pl', explicit_file_name=ialign)
        command = "\"%s\" %s %s %s" % (os.path.normpath(ialign_path),
                                       model1_filename, model2_filename,
                                       cmd_opts)
    else:
        ialign_path = settings.Locate('ialign.pl', explicit_file_name=ialign)
        command = "\"%s\" \"%s\" \"%s\" %s" % (ialign_path, model1_filename,
                                               model2_filename, cmd_opts)

    ps = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    stdout, _ = ps.communicate()
    lines = stdout.decode().splitlines()

    if (len(lines)) < 22:
        _CleanupFiles(tmp_dir)
        #for l in lines:
        #  print l
        raise RuntimeError("iAlign superposition failed")
    return _ParseiAlign(lines)
Ejemplo n.º 14
0
    def testLGA(self):

        try:
            lga_path = settings.Locate('lga')
        except:
            print("Could not find lga executable: ignoring unit tests")
            return

        lga_result = lga.GDT(self.chain_a,
                             self.chain_a,
                             reference_length=len(self.chain_a.residues))

        expected_transform = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
                                       0, 0, 1)
        self.assertEqual(lga_result.gdt_ts, 100.0)
        self.assertEqual(lga_result.gdt_ha, 100.0)
        self.assertEqual(lga_result.GetTransform(), expected_transform)
Ejemplo n.º 15
0
  def testIAlign(self):

    try:
      ialign_exec = settings.Locate('ialign.pl')  
    except:
      print("Could not find ialign master perl script: ignoring unit tests")
      return

    ialign_result = ialign.iAlign(self.protein, self.protein)

    # model and reference are the same, we expect pretty good results
    self.assertEqual(ialign_result.rmsd, 0.0)
    self.assertEqual(ialign_result.is_score, 1.0)
    self.assertEqual(SequenceIdentity(ialign_result.alignment), 100.0)
    self.assertEqual(ialign_result.aligned_residues, 78)
    self.assertEqual(ialign_result.aligned_contacts, 91)

    # transformation should be identity matrix (no transformation at all...)
    identity = geom.Mat4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1)
    self.assertEqual(ialign_result.transform, identity)
Ejemplo n.º 16
0
    def testTMAlign(self):

        try:
            cad_calc_path = settings.Locate('tmalign')
        except:
            print("Could not find tmalign executable: ignoring unit tests")
            return

        tm_result = tmtools.TMAlign(self.protein, self.protein)

        # model and reference are the same, we expect pretty good results
        self.assertEqual(tm_result.rmsd, 0.0)
        self.assertEqual(tm_result.tm_score, 1.0)
        self.assertEqual(tm_result.aligned_length,
                         len(self.protein.chains[0].residues))
        self.assertEqual(SequenceIdentity(tm_result.alignment), 100.0)

        # transformation should be identity matrix (no transformation at all...)
        identity = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1)
        self.assertEqual(tm_result.transform, identity)
Ejemplo n.º 17
0
    def testAccDSSP(self):

        # only relevant if dssp there
        try:
            # same check used in dssp binding
            dssp_path = settings.Locate(['dsspcmbi', 'dssp', 'mkdssp'],
                                        env_name='DSSP_EXECUTABLE')
        except:
            print(
                "Could not find DSSP, could not compare Accessibility function..."
            )
            return

        # we assume oligo mode to be working as it is tested in
        # testAccNACCESS. So we only test the single residue
        # accessibilitities
        ent_one = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))
        ent_two = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))
        ent_one = ent_one.Select("peptide=true")
        ent_two = ent_two.Select("peptide=true")

        dssp.AssignDSSP(ent_one,
                        extract_burial_status=True,
                        dssp_bin=dssp_path)
        mol.alg.Accessibility(ent_two,
                              algorithm=mol.alg.AccessibilityAlgorithm.DSSP)

        for a, b in zip(ent_one.residues, ent_two.residues):

            # overall accessibility
            if a.HasProp("solvent_accessibility") and b.HasProp("asaAbs"):
                diff = abs(a.GetFloatProp("solvent_accessibility") -\
                           round(b.GetFloatProp("asaAbs")))
                self.assertTrue(diff < 0.01)

            # relative accessibility
            if a.HasProp("relative_solvent_accessibility") and b.HasProp(
                    "asaRel"):
                diff = abs(a.GetFloatProp("relative_solvent_accessibility") -\
                           b.GetFloatProp("asaRel"))
                self.assertTrue(diff < 0.01)
Ejemplo n.º 18
0
    def testTMScore(self):

        try:
            cad_calc_path = settings.Locate('tmscore')
        except:
            print("Could not find tmalign executable: ignoring unit tests")
            return

        tm_result = tmtools.TMScore(self.protein, self.protein)

        # model and reference are the same, we expect pretty good results
        self.assertEqual(tm_result.rmsd_common, 0.0)
        self.assertEqual(tm_result.tm_score, 1.0)
        self.assertEqual(tm_result.max_sub, 1.0)
        self.assertEqual(tm_result.gdt_ts, 1.0)
        self.assertEqual(tm_result.gdt_ha, 1.0)
        self.assertEqual(tm_result.rmsd_below_five, 0.0)

        # transformation should be identity matrix (no transformation at all...)
        identity = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1)
        self.assertEqual(tm_result.transform, identity)
Ejemplo n.º 19
0
    def testSecStruct(self):

        # unit test only makes sense, when a dssp binary is around
        try:
            # same check used in dssp binding
            dssp_path = settings.Locate(['dsspcmbi', 'dssp', 'mkdssp'],
                                        env_name='DSSP_EXECUTABLE')
        except:
            print(
                "Could not find DSSP, could not compare sec struct assignment..."
            )
            return

        dssp_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))
        ost_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb"))

        dssp.AssignDSSP(dssp_ent, dssp_bin=dssp_path)
        mol.alg.AssignSecStruct(ost_ent)

        for a, b in zip(dssp_ent.residues, ost_ent.residues):
            self.assertTrue(
                str(a.GetSecStructure()) == str(b.GetSecStructure()))
Ejemplo n.º 20
0
 def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None):
     self.query = query
     self.hhsuite_root = hhsuite_root
     if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')):
         self.bin_dir = os.path.join(self.hhsuite_root, 'bin')
         self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits')
     else:
         self.hhblits_bin = settings.Locate('hhblits',
                                            explicit_file_name=hhblits_bin)
         self.bin_dir = os.path.dirname(self.hhblits_bin)
         # guess root folder (note: this may fail in future)
         self.hhsuite_root = os.path.dirname(self.bin_dir)
     self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh')
     if working_dir:
         self.needs_cleanup = False
         self.working_dir = working_dir
         if not os.path.exists(working_dir):
             os.mkdir(working_dir)
         if isinstance(query, str):
             self.filename = os.path.abspath(os.path.join(
                 self.working_dir, os.path.basename(query)))
             if self.filename != os.path.abspath(query):
                 shutil.copy(query, self.filename)
         else:
             self.filename = os.path.join(self.working_dir,
                                          '%s.fasta' % HHblits.OUTPUT_PREFIX)
             ost.io.SaveSequence(query, self.filename)
     else:
         self.needs_cleanup = True
         if isinstance(query, str):
             self.working_dir = tempfile.mkdtemp()
             self.filename = os.path.abspath(os.path.join(
                 self.working_dir, os.path.basename(query)))
             shutil.copy(query, self.filename)
         else:
             tmp_dir = utils.TempDirWithFiles((query,))
             self.working_dir = tmp_dir.dirname
             self.filename = tmp_dir.files[0]
Ejemplo n.º 21
0
        # that it works when data is used which has a correpsonding path.
        na_tmp_dir = tempfile.mkdtemp(prefix="ih.")

        def cleanup():
            shutil.rmtree(na_tmp_dir)

        self.addCleanup(cleanup)
        ost_ent = io.LoadPDB('testfiles/testprotein.pdb')
        excp_raised = False
        try:
            sasa = naccess.CalculateSurfaceArea(ost_ent,
                                                scratch_dir=na_tmp_dir)
        except:
            excp_raised = True
            raise
        self.assertEqual(excp_raised,
                         False,
                         msg="Naccess raised an " +
                         "exception on a path containing a '.'. This is not " +
                         "supposed to happen.")


if __name__ == "__main__":
    try:
        settings.Locate("naccess")
    except:
        print("Could not find NACCESS, could not test binding...")
        sys.exit(0)
    from ost import testutils
    testutils.RunTests()
Ejemplo n.º 22
0
def _LocateHBPlus(hbplus_bin):
  return settings.Locate('hbplus', explicit_file_name=hbplus_bin,
                         env_name="HBPLUS")
Ejemplo n.º 23
0
        for aln, ch_name in zip(qs_scorer.alignments, sorted(chm_names_1)):
            self.assertEqual(aln.sequences[0].name, ch_name)
            self.assertEqual(aln.sequences[1].name,
                             qs_scorer.chain_mapping[ch_name])
            self.assertTrue(aln.sequences[0].HasAttachedView())
            self.assertTrue(aln.sequences[1].HasAttachedView())
            self.assertEqual(aln.sequences[0].attached_view.handle,
                             qs_ent_1.ent)
            self.assertEqual(aln.sequences[1].attached_view.handle,
                             qs_ent_2.ent)
            self.assertTrue(ch_name in qs_scorer.mapped_residues)
        # best_score / global_score in [0,1]
        self.assertGreaterEqual(qs_scorer.best_score, 0.0)
        self.assertLessEqual(qs_scorer.best_score, 1.0)
        self.assertGreaterEqual(qs_scorer.global_score, 0.0)
        self.assertLessEqual(qs_scorer.global_score, 1.0)


if __name__ == "__main__":
    try:
        settings.Locate(('clustalw', 'clustalw2'))
    except:
        print("Could not find ClustalW. Ignoring test_qsscoring.py tests.")
        sys.exit(0)
    from ost import testutils
    if testutils.SetDefaultCompoundLib():
        testutils.RunTests()
    else:
        print(
            'No compound library available. Ignoring test_qsscoring.py tests.')
Ejemplo n.º 24
0
import unittest
import sys
from ost import *
from ost import settings
from ost.bindings import hbplus


class TestHBPlusBinding(unittest.TestCase):
    def setUp(self):
        self.protein = io.LoadEntity("testfiles/testprotein.pdb")

    def testHBondList(self):
        hbond_list = hbplus.HBondList(self.protein)
        self.assertEqual(len(hbond_list), 499)

    def testHBondScore(self):
        self.assertEqual(hbplus.HBondScore(self.protein, self.protein), 1.0)


if __name__ == "__main__":
    try:
        settings.Locate("hbplus")
    except:
        print("Could not find hbplus, could not test binding...")
        sys.exit(0)
    from ost import testutils
    testutils.RunTests()
Ejemplo n.º 25
0
    def testkClustParseOutput(self):

        representative_ids = [
            'Q5KTS5.1', 'P46331.2', 'P66776.1', 'P42317.2', 'P50199.1',
            'Q59787.1', 'Q9WYG0.1', 'P50197.1', 'P50198.1', 'Q7Z4W1.2',
            'Q04520.1'
        ]

        cluster_sizes = [3, 3, 4, 1, 7, 3, 2, 1, 1, 3, 1]

        result = kclust._ParseOutput('testfiles')

        self.assertEqual(len(result), 11)

        for c, r_id in zip(result, representative_ids):
            self.assertEqual(c.representative_id, r_id)
        for c, s in zip(result, cluster_sizes):
            self.assertEqual(len(c.sequences), s)


if __name__ == "__main__":
    # test if kClust is available on system, otherwise ignore tests
    try:
        blastpath = settings.Locate(('kClust'))
    except (settings.FileNotFound):
        print("Could not find kClust executable: ignoring unit tests")
        sys.exit(0)
    from ost import testutils
    testutils.RunTests()
Ejemplo n.º 26
0
def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62',
         blast_location=None, outfmt=0, filter_low_complexity=True):
  """
  Runs a protein vs. protein blast search. The results are returned
  according to the value of the ``outfmt`` parameter.

  :param query: the query sequence
  :type query: :class:`seq.ConstSequenceHandle`

  :param database: The filename of the sequence database. Make sure that
      formatdb has been run on the database and the <database>.pin file exists.
  :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45',
     'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'.
  :param gap_open: Gap opening penalty. Note that only a subset of gap opening
     penalties is supported for each substitutition matrix. Consult the blast
     docs for more information.
  :param gap_ext: Gap extension penalty. Only a subset of gap extension
     penalties are supported for each of the substitution matrices. Consult the
     blast docs for more information.
  :param outfmt: output format, where '0' corresponds to default output (parsed 
     blast output and 1 to raw string output).
  :param filter_low_complexity: Mask off segments of the query sequence that 
     have low compositional complexity, as determined by the SEG program of 
     Wootton & Federhen (Computers and Chemistry, 1993)
  :rtype: :class:`BlastHit` (with ``outfmt=0``) or :class:`str` 
     (with ``outfmt=1``)
  """
  subst_mats=('BLOSUM45', 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70',)
  if matrix not in subst_mats:
    raise ValueError('matrix must be one of %s' % ', '.join(subst_mats))
  if not os.path.exists('%s.pin' % database) and not os.path.exists('%s.pal' % database):
    raise IOError("Database %s does not exist" % database)
  if blast_location!=None and not os.path.exists(blast_location):
    ost.LogScript('Could not find %s' %blast_location)

  if blast_location==None:
    try:
      blast_exe=settings.Locate('blastall')
    except:
      try:
        blast_exe=settings.Locate('blastp')
      except:
        raise RuntimeError('could not find blast executable')
  else:
    blast_exe=settings.Locate(os.path.basename(blast_location),explicit_file_name=blast_location)

  if os.path.basename(blast_exe)=='blastall':
    args=[blast_exe, '-d', database, '-p', 'blastp',
          '-m', '7', '-M', matrix, '-G', str(gap_open), '-E', str(gap_ext)]
    if filter_low_complexity==False:
      args.append('-F')
      args.append('F')
          
  else:
    complexity_opt='-seg'
    if filter_low_complexity==True:
      complexity_arg='yes'
    else:
      complexity_arg='no'
    args=[blast_exe, '-db', database, '-matrix', matrix,
          '-gapopen', str(gap_open), '-gapextend', str(gap_ext), '-outfmt', '5', complexity_opt, complexity_arg ]

  ost.LogInfo('running BLAST (%s)' % ' '.join(args))
  blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE,
                              stdout=subprocess.PIPE, stdin=subprocess.PIPE)
  if isinstance(query, str):
    stdout, stderr=blast_pipe.communicate(query.encode())
  else:
    stdout, stderr=blast_pipe.communicate(io.SequenceToString(query, 'fasta').encode())

  if len(stderr)>0:
     pattern=re.compile(r'^\[.*\]\s+ERROR:\s+(.*)')
     lines=stderr.decode().split('\n')
     error_message=pattern.match(lines[0])
     if error_message:
       raise BlastError(error_message.group(1), '\n'.join(lines[1:]))
  if outfmt==0:
    return ParseBlastOutput(stdout.decode())
  else:
    return stdout.decode()
Ejemplo n.º 27
0
    self.assertTrue(isinstance(version,str))
    re_v = re.compile('\d+\.\d+\.\d+')
    match=False
    if re_v.match(version):
      match=True
    self.assertEqual(match,True)

  def testBuildDatabase(self):

    tmp_dir_name=tempfile.mkdtemp()
    db_name=os.path.join(tmp_dir_name,'testdb')
    blast.CreateDB('testfiles/multiple.fasta',db_name)
    self.assertRaises(IOError,blast.CreateDB, 'testfiles/multiple.fasta',db_name,
                      mkdb_cmd='I/am/not/a/damn/executable')

    test_seq=seq.CreateSequence('B','ALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDV'+
                                    'SRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESIT')
    blastout=blast.Blast(test_seq,db_name)
    self.assertTrue(len(blastout)>10)
    shutil.rmtree(tmp_dir_name)

if __name__ == "__main__":
  # test if blast package is available on system, otherwise ignore tests
  try:
    blastpath=settings.Locate(('blastp','blastall'))
  except(settings.FileNotFound):
    print("Could not find blast executable: ignoring unit tests")
    sys.exit(0)
  from ost import testutils
  testutils.RunTests()
Ejemplo n.º 28
0
def ClustalW(seq1,
             seq2=None,
             clustalw=None,
             keep_files=False,
             nopgap=False,
             clustalw_option_string=False):
    '''
  Runs a ClustalW multiple sequence alignment. The results are returned as a
  :class:`~ost.seq.AlignmentHandle` instance.
  
  There are two ways to use this function:
  
   - align exactly two sequences:
   
      :param seq1: sequence_one
      :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str`
      
      :param seq2: sequence_two
      :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str`
  
      The two sequences can be specified as two separate function parameters 
      (`seq1`, `seq2`). The type of both parameters can be either
      :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for
      both parameters.
      
   - align two or more sequences:
   
      :param seq1: sequence_list
      :type seq1: :class:`~ost.seq.SequenceList`
      
      :param seq2: must be :class:`None`
      
      Two or more sequences can be specified by using a
      :class:`~ost.seq.SequenceList`. It is then passed as the first function 
      parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`.
      
       
  :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`)
  :type clustalw: :class:`str`
  :param nopgap: turn residue-specific gaps off
  :type nopgap: :class:`bool`
  :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt)
  :type clustalw_option_string: :class:`str`
  :param keep_files: do not delete temporary files
  :type keep_files: :class:`bool`

  .. note ::
   
    - In the passed sequences ClustalW will convert lowercase to uppercase, and
      change all '.' to '-'. OST will convert and '?' to 'X' before aligning
      sequences with ClustalW.
    - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces,
      only the part before the space is considered as sequence name. To avoid
      surprises, you should remove spaces from the sequence name.
    - Sequence names must be unique (:class:`ValueError` exception raised
      otherwise).

  ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes:

  ======= ======================= ======= ============================ 
  Residue  Name                   Residue  Name 
  ======= ======================= ======= ============================
     A    alanine                    P    proline
     B    aspartate or asparagine    Q    glutamine
     C    cystine                    R    arginine
     D    aspartate                  S    serine
     E    glutamate                  T    threonine
     F    phenylalanine              U    selenocysteine
     G    glycine                    V    valine
     H    histidine                  W    tryptophan
     I    isoleucine                 Y    tyrosine
     K    lysine                     Z    glutamate or glutamine
     L    leucine                    X    any
     M    methionine                 \\*   translation stop
     N    asparagine                 \\-   gap of indeterminate length
  ======= ======================= ======= ============================ 

  '''
    clustalw_path = settings.Locate(('clustalw', 'clustalw2'),
                                    explicit_file_name=clustalw)

    if seq2 != None:
        if isinstance(seq1, seq.SequenceHandle) and isinstance(
                seq2, seq.SequenceHandle):
            seq_list = seq.CreateSequenceList()
            seq_list.AddSequence(seq1)
            seq_list.AddSequence(seq2)
        elif isinstance(seq1, str) and isinstance(seq2, str):
            seqh1 = seq.CreateSequence("seq1", seq1)
            seqh2 = seq.CreateSequence("seq2", seq2)
            seq_list = seq.CreateSequenceList()
            seq_list.AddSequence(seqh1)
            seq_list.AddSequence(seqh2)
        else:
            LogError("WARNING: Specify at least two Sequences")
            return
    elif isinstance(seq1, seq.SequenceList):
        seq_list = seq1
    else:
        LogError(
            "WARNING: Specify either two SequenceHandles or one SequenceList")
        return

    sequence_names = set()
    for s in seq_list:
        # we cut out anything after a space to be consistent with ClustalW behaviour
        sequence_names.add(s.GetName().split(' ')[0])
    if len(sequence_names) < len(seq_list):
        raise ValueError(
            "ClustalW can only process sequences with unique identifiers!")

    new_list = seq.CreateSequenceList()
    for s in seq_list:
        ss = s.Copy()
        for i, c in enumerate(ss):
            if c == '?':
                ss[i] = 'X'
        new_list.AddSequence(ss)

    seq_list = new_list

    temp_dir = utils.TempDirWithFiles((seq_list, ))
    out = os.path.join(temp_dir.dirname, 'out.fasta')
    command = '%s -infile="%s" -output=fasta -outfile="%s"' % (
        clustalw_path, temp_dir.files[0], out)
    if nopgap:
        command += " -nopgap"
    if clustalw_option_string != False:
        command = command + " " + clustalw_option_string  #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params

    subprocess.run(command, shell=True, stdout=subprocess.DEVNULL)

    aln = io.LoadAlignment(out)

    for sequence in seq_list:
        for seq_num, aln_seq in enumerate(aln.sequences):
            if aln_seq.GetName() == sequence.GetName():
                break
        aln.SetSequenceOffset(seq_num, sequence.offset)
        if sequence.HasAttachedView():
            aln.AttachView(seq_num, sequence.GetAttachedView().Copy())

    if not keep_files:
        temp_dir.Cleanup()

    return aln
Ejemplo n.º 29
0
def _FindLGABinary(lga_bin):
    return settings.Locate('lga',
                           explicit_file_name=lga_bin,
                           env_name='LGA_BINARY')
Ejemplo n.º 30
0
def RunAntechamber(res_name,
                   filename,
                   format='ccif',
                   amberhome=None,
                   base_out_dir=None):
    """Run Antechamber to guess force field parameters for a given residue name.

  This requires an installation of AmberTools (tested with AmberTools15) with
  binaries ``antechamber`` and ``parmchk2``.

  This has the same restrictions as Antechamber itself and we assume the input
  to be uncharged. Note that Antechamber cannot deal with metal ions and other
  non-organic elements.

  The results are stored in a separate folder named `res_name` within
  `base_out_dir` (if given, otherwise the current working directory). The main
  output files are ``frcmod`` and ``out.mpdb``. The former contains force field
  parameters and masses. The latter maps atom names to atom types and defines
  the partial charges. The same output could be obtained as follows:

  .. code-block:: console

     $ antechamber -i <FILENAME> -fi <FORMAT> -bk '<RES_NAME>' -o out.mol2 -fo mol2 -c bcc -pf yes
     $ parmchk2 -i out.mol2 -f mol2 -o frcmod -a Y
     $ antechamber -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes

  The force field parameters can be manually modified if needed. It can for
  instance happen that some parameters cannot be identified. Those lines will
  be marked with a comment "ATTN, need revision".

  :param res_name: Residue name for which we desire force field parameters.
  :type res_name:  :class:`str`
  :param filename: Path to a file which contains the necessary information for
                   `res_name`. It must include all hydrogens.
  :type filename:  :class:`str`
  :param format: Format of file given with `filename`. Common formats are 'ccif'
                 for PDB's component dictionary or 'pdb' for a PDB file
                 containing the desired residue with all hydrogens.
  :type format:  :class:`str`
  :param amberhome: Base path of your AmberTools installation. If not None,
                    we look for ``antechamber`` and ``parmchk2`` within
                    ``AMBERHOME/bin`` additionally to the system's ``PATH``.
  :type amberhome:  :class:`str`
  :param base_out_dir: Path to a base path, where the output will be stored.
                       If None, the current working directory is used.
  :type base_out_dir:  :class:`str`
  """
    # find antechamber binaries
    if amberhome is None:
        search_paths = []
    else:
        search_paths = [os.path.join(amberhome, 'bin')]
    try:
        antechamber = settings.Locate('antechamber', search_paths=search_paths)
        parmchk2 = settings.Locate('parmchk2', search_paths=search_paths)
    except settings.FileNotFound as ex:
        ost.LogError("Failed to find Antechamber binaries. Make sure you have "
                     "AmberTools installed!")
        raise ex

    # prepare path
    cwd = os.getcwd()
    if base_out_dir is None:
        base_out_dir = cwd
    out_dir = os.path.abspath(os.path.join(base_out_dir, res_name))
    if not os.path.exists(out_dir):
        # note: this creates intermediate folders too
        try:
            os.makedirs(out_dir)
        except Exception as ex:
            ost.LogError("Failed to create output directory " + out_dir + "!")
            raise ex

    # execute it
    os.chdir(out_dir)
    try:
        cmds = [antechamber + " -i " + filename + " -fi " + format + " -bk " \
                + res_name + " -o out.mol2 -fo mol2 -c bcc -pf yes",
                parmchk2 + " -i out.mol2 -f mol2 -o frcmod -a Y",
                antechamber + " -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes"]
        all_sout = "Generating force field parameters for " + res_name + "\n"
        all_serr = ""
        for cmd in cmds:
            all_sout += "-" * 70 + "\n" + "Stdout of: " + cmd + "\n" + "-" * 70 + "\n"
            all_serr += "-" * 70 + "\n" + "Stderr of: " + cmd + "\n" + "-" * 70 + "\n"
            job = subprocess.Popen(cmd.split(" "),
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
            sout, serr = job.communicate()
            all_sout += sout
            all_serr += serr
            if job.returncode != 0:
                ost.LogError("Unsuccessful execution of " + cmd + ". Return code: "\
                             + str(job.returncode))
        # write command line outputs
        with open("console.stdout", "w") as txt_file:
            txt_file.write(all_sout)
        with open("console.stderr", "w") as txt_file:
            txt_file.write(all_serr)
    except Exception as ex:
        ost.LogError("Failed to excecute antechamber binaries!")
        raise ex

    # get back to original path
    os.chdir(cwd)

    # check result
    frcmod_filename = os.path.join(out_dir, 'frcmod')
    mpdb_filename = os.path.join(out_dir, 'out.mpdb')
    if not os.path.exists(frcmod_filename):
        raise RuntimeError("Failed to generate frcmod file with Antechamber!")
    if not os.path.exists(mpdb_filename):
        raise RuntimeError(
            "Failed to generate out.mpdb file with Antechamber!")