def BlastVersion(blast_location=None): """ Returns the version of the BLAST executable, e.g. 2.2.24 as a string """ try: blast_exe=settings.Locate('blastall',explicit_file_name=blast_location) except: try: blast_exe=settings.Locate('blastp', explicit_file_name=blast_location) except: raise RuntimeError('could not find blast executable') if os.path.basename(blast_exe)=='blastall': args=[blast_exe] pattern=re.compile(r'\s*blastall (\d+\.\d+\.\d+)\s+arguments:\s*') else: args=[blast_exe, '-version'] pattern=re.compile(r'\s*Package: blast (\d+\.\d+\.\d+),\s+') blast_pipe=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, _ = blast_pipe.communicate() lines=stdout.decode().splitlines() for line in lines: m=pattern.match(line) if m: return m.group(1) raise IOError("could not determine blast version for '%s'" % blast_exe)
def testCADClassic(self): try: # all of the following need to be present cad_calc_path = settings.Locate('CADscore_calc.bash') cad_read_g_path = settings.Locate('CADscore_read_global_scores.bash') cad_read_l_path = settings.Locate('CADscore_read_local_scores.bash') executable_path = settings.Locate('voroprot2') except: print("Could not find CAD score classic executables: ignoring unit tests") return cad_result = cadscore.CADScore(self.protein, self.protein, label="cad_classic") # model and reference are the same, we expect a global CAD score of 1 self.assertEqual(cad_result.globalAA, 1.0) # one score per residue self.assertEqual(len(cad_result.localAA), len(self.protein.residues)) # model and reference are the same, we expect local CAD scores of 0.0 for score in cad_result.localAA.values(): self.assertEqual(score, 0.0) # check whether this score is assigned to each residue as float property for r in self.protein.residues: self.assertTrue(r.HasProp("cad_classic")) self.assertEqual(r.GetFloatProp("cad_classic"), 0.0)
def testCADVoronota(self): try: # all of the following need to be present voronota_cadscore_path = settings.Locate("voronota-cadscore") executable_path = settings.Locate("voronota") except: print("Could not find CAD score voronota executables: ignoring unit tests") return cad_result = cadscore.CADScore(self.protein, self.protein, mode="voronota", label="cad_voronota") # model and reference are the same, we expect a global CAD score of 1 self.assertEqual(cad_result.globalAA, 1.0) # one score per residue self.assertEqual(len(cad_result.localAA), len(self.protein.residues)) # model and reference are the same, we expect local CAD scores of 1.0 for score in cad_result.localAA.values(): self.assertEqual(score, 1.0) # check whether this score is assigned to each residue as float property for r in self.protein.residues: self.assertTrue(r.HasProp("cad_voronota")) self.assertEqual(r.GetFloatProp("cad_voronota"), 1.0)
def testAccNACCESS(self): # tests oligo mode by comparing the results from doing the # corresponding calculations manually ent_one = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) ent_two = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) # we're only interested in peptide stuff... ent_one = ent_one.Select("peptide=true") ent_two = ent_two.Select("peptide=true") acc_classic = AccessibilitiesRaw(ent_one) acc_oligo = AccessibilitiesOligo(ent_two) self.assertTrue(Compare(acc_classic, acc_oligo)) # if there is naccess around, we also check for equality with # naccess results try: naccess_path = settings.Locate("naccess") ent_three = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) ent_three = ent_three.Select("peptide=true") acc_naccess = AccessibilitiesRaw(ent_three, use_naccess=True) self.assertTrue(Compare(acc_classic, acc_naccess)) except: print( "Could not find NACCESS, could not compare Accessiblity function..." )
def _RunkClust(tmp_dir_name, clustering_thresh, create_alignments): bitscore = clustering_thresh * 0.060269 - 0.68498 executable = settings.Locate('kClust') cmd = [] cmd.append(executable) cmd.append('-i') cmd.append(os.path.join(tmp_dir_name, 'fastadb.fasta')) cmd.append('-d') cmd.append(tmp_dir_name) cmd.append('-s') cmd.append(str(bitscore)) cmd = ' '.join(cmd) ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = ps.communicate() result = _ParseOutput(tmp_dir_name) if (create_alignments): from ost.bindings import clustalw for c in result: if len(c.sequences) > 1: c.alignment = clustalw.ClustalW(c.sequences) else: aln = seq.CreateAlignment() aln.AddSequence(c.sequences[0]) c.alignment = aln return result
def _GetExecutable(naccess_exe): """ Method to check if naccess executable is present :param naccess: Explicit path to naccess executable :returns: Path to the executable :exception: FileNotFound if executable is not found """ return settings.Locate('naccess', explicit_file_name=naccess_exe)
def _RunTmScore(tmscore, tmp_dir): model1_filename=os.path.join(tmp_dir, 'model01.pdb') model2_filename=os.path.join(tmp_dir, 'model02.pdb') if platform.system() == "Windows": tmscore_path=settings.Locate('tmscore.exe', explicit_file_name=tmscore) command="\"%s\" %s %s" %(os.path.normpath(tmscore_path), model1_filename, model2_filename) else: tmscore_path=settings.Locate('tmscore', explicit_file_name=tmscore) command="\"%s\" \"%s\" \"%s\"" % (tmscore_path, model1_filename, model2_filename) ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) stdout,_=ps.communicate() lines=stdout.decode().splitlines() if (len(lines))<22: _CleanupFiles(tmp_dir) raise RuntimeError("tmscore superposition failed") return _ParseTmScore(lines)
def __init__(self, context_menu): try: settings_name="msms" self.executable=settings.Locate(settings_name) except settings.FileNotFound: self.executable="" QtCore.QObject.__init__(self, context_menu.qobject) self.action = QtWidgets.QAction("Calculate Surface", self) self.action.triggered.connect(self.CalculateSurface) context_menu.AddAction(self.action, gui.ContextActionType.ENTITY)
def _RunTmAlign(tmalign, tmp_dir): model1_filename=os.path.join(tmp_dir, 'model01.pdb') model2_filename=os.path.join(tmp_dir, 'model02.pdb') if platform.system() == "Windows": tmalign_path=settings.Locate('tmalign.exe', explicit_file_name=tmalign) command="\"%s\" %s %s -m %s" %(os.path.normpath(tmalign_path), model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt')) else: tmalign_path=settings.Locate('tmalign', explicit_file_name=tmalign) command="\"%s\" \"%s\" \"%s\" -m \"%s\"" %(tmalign_path, model1_filename, model2_filename, os.path.join(tmp_dir,'matrix.txt')) ps=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) stdout,_=ps.communicate() lines=stdout.decode().splitlines() if (len(lines))<22: _CleanupFiles(tmp_dir) raise RuntimeError("tmalign superposition failed") matrix_file=open(os.path.join(tmp_dir,'matrix.txt')) lines_matrix=matrix_file.readlines() matrix_file.close() return _ParseTmAlign(lines,lines_matrix)
def _GetExecutable(msms_exe, msms_env): """ Function to check if MSMS executable is present :param msms_exe: Explicit path to msms executable :param msms_env: Environment variable pointing to msms executable :returns: Path to the executable :raises: :class:`~ost.FileNotFound` if executable is not found """ return settings.Locate('msms', explicit_file_name=msms_exe, env_name=msms_env)
def _ExecuteDSSP(path, dssp_bin, temp_dir=None): # use of mktemp is a safty problem (use mkstemp and provide file handle to # subsequent process temp_dssp_path=tempfile.mktemp(suffix=".out",prefix="dssp", dir=temp_dir) dssp_abs_path=settings.Locate(['dsspcmbi','dssp','mkdssp'], env_name='DSSP_EXECUTABLE', explicit_file_name=dssp_bin) if os.path.isdir(dssp_abs_path): raise RuntimeError('"%s" is a directory. Specify path to DSSP binary' % dssp_abs_path) if not os.access(dssp_abs_path, os.X_OK): raise RuntimeError('"%s" is not executable' % dssp_abs_path) subprocess.run([dssp_abs_path, path, temp_dssp_path]) return temp_dssp_path
def CreateDB(infasta, dbout, mkdb_cmd=None): """ Create a blast DB from a fasta file :param infasta: the pdb fasta from which the database will be created :type infasta: :class:`string` :param dbout: output location for blastDB file :type dbout: :class:`string` """ if mkdb_cmd==None: try: exe=settings.Locate('formatdb') args=[exe, '-i', infasta, '-n', dbout] except: try: exe=settings.Locate('makeblastdb') args=[exe, '-in', infasta, '-out', dbout, '-dbtype', 'prot'] except: raise RuntimeError('could not find makeblastdb/formatdb executable') else: if os.path.basename(mkdb_cmd)=='makeblastdb': exe=settings.Locate('makeblastdb',explicit_file_name=mkdb_cmd) args=[exe, '-in', infasta, '-out', dbout, '-dbtype', 'prot'] elif os.path.basename(mkdb_cmd)=='formatdb': exe=settings.Locate('formatdb',explicit_filename=mkdb_cmd) args=[exe, '-i', infasta, '-n', dbout] else: raise IOError('mkdb command must either be the path to formatdb or makeblastdb!') ost.LogInfo('creating blast DB (%s)' % ' '.join(args)) blast_pipe=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) blast_pipe.communicate()
def _RuniAlign(ialign, tmp_dir, options={}): opts = { 'a': 1, # concise output 'w': tmp_dir } opts.update(options) cmd_opts = [] for k, v in opts.items(): if type(v) == type(True): if v == True: cmd_opts.append('-%s' % str(k)) else: cmd_opts.append('-%s %s' % (str(k), str(v))) cmd_opts = ' '.join(cmd_opts) model1_filename = os.path.join(tmp_dir, 'model01.pdb') model2_filename = os.path.join(tmp_dir, 'model02.pdb') if platform.system() == "Windows": ialign_path = settings.Locate('ialign.pl', explicit_file_name=ialign) command = "\"%s\" %s %s %s" % (os.path.normpath(ialign_path), model1_filename, model2_filename, cmd_opts) else: ialign_path = settings.Locate('ialign.pl', explicit_file_name=ialign) command = "\"%s\" \"%s\" \"%s\" %s" % (ialign_path, model1_filename, model2_filename, cmd_opts) ps = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) stdout, _ = ps.communicate() lines = stdout.decode().splitlines() if (len(lines)) < 22: _CleanupFiles(tmp_dir) #for l in lines: # print l raise RuntimeError("iAlign superposition failed") return _ParseiAlign(lines)
def testLGA(self): try: lga_path = settings.Locate('lga') except: print("Could not find lga executable: ignoring unit tests") return lga_result = lga.GDT(self.chain_a, self.chain_a, reference_length=len(self.chain_a.residues)) expected_transform = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1) self.assertEqual(lga_result.gdt_ts, 100.0) self.assertEqual(lga_result.gdt_ha, 100.0) self.assertEqual(lga_result.GetTransform(), expected_transform)
def testIAlign(self): try: ialign_exec = settings.Locate('ialign.pl') except: print("Could not find ialign master perl script: ignoring unit tests") return ialign_result = ialign.iAlign(self.protein, self.protein) # model and reference are the same, we expect pretty good results self.assertEqual(ialign_result.rmsd, 0.0) self.assertEqual(ialign_result.is_score, 1.0) self.assertEqual(SequenceIdentity(ialign_result.alignment), 100.0) self.assertEqual(ialign_result.aligned_residues, 78) self.assertEqual(ialign_result.aligned_contacts, 91) # transformation should be identity matrix (no transformation at all...) identity = geom.Mat4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1) self.assertEqual(ialign_result.transform, identity)
def testTMAlign(self): try: cad_calc_path = settings.Locate('tmalign') except: print("Could not find tmalign executable: ignoring unit tests") return tm_result = tmtools.TMAlign(self.protein, self.protein) # model and reference are the same, we expect pretty good results self.assertEqual(tm_result.rmsd, 0.0) self.assertEqual(tm_result.tm_score, 1.0) self.assertEqual(tm_result.aligned_length, len(self.protein.chains[0].residues)) self.assertEqual(SequenceIdentity(tm_result.alignment), 100.0) # transformation should be identity matrix (no transformation at all...) identity = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1) self.assertEqual(tm_result.transform, identity)
def testAccDSSP(self): # only relevant if dssp there try: # same check used in dssp binding dssp_path = settings.Locate(['dsspcmbi', 'dssp', 'mkdssp'], env_name='DSSP_EXECUTABLE') except: print( "Could not find DSSP, could not compare Accessibility function..." ) return # we assume oligo mode to be working as it is tested in # testAccNACCESS. So we only test the single residue # accessibilitities ent_one = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) ent_two = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) ent_one = ent_one.Select("peptide=true") ent_two = ent_two.Select("peptide=true") dssp.AssignDSSP(ent_one, extract_burial_status=True, dssp_bin=dssp_path) mol.alg.Accessibility(ent_two, algorithm=mol.alg.AccessibilityAlgorithm.DSSP) for a, b in zip(ent_one.residues, ent_two.residues): # overall accessibility if a.HasProp("solvent_accessibility") and b.HasProp("asaAbs"): diff = abs(a.GetFloatProp("solvent_accessibility") -\ round(b.GetFloatProp("asaAbs"))) self.assertTrue(diff < 0.01) # relative accessibility if a.HasProp("relative_solvent_accessibility") and b.HasProp( "asaRel"): diff = abs(a.GetFloatProp("relative_solvent_accessibility") -\ b.GetFloatProp("asaRel")) self.assertTrue(diff < 0.01)
def testTMScore(self): try: cad_calc_path = settings.Locate('tmscore') except: print("Could not find tmalign executable: ignoring unit tests") return tm_result = tmtools.TMScore(self.protein, self.protein) # model and reference are the same, we expect pretty good results self.assertEqual(tm_result.rmsd_common, 0.0) self.assertEqual(tm_result.tm_score, 1.0) self.assertEqual(tm_result.max_sub, 1.0) self.assertEqual(tm_result.gdt_ts, 1.0) self.assertEqual(tm_result.gdt_ha, 1.0) self.assertEqual(tm_result.rmsd_below_five, 0.0) # transformation should be identity matrix (no transformation at all...) identity = geom.Mat4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1) self.assertEqual(tm_result.transform, identity)
def testSecStruct(self): # unit test only makes sense, when a dssp binary is around try: # same check used in dssp binding dssp_path = settings.Locate(['dsspcmbi', 'dssp', 'mkdssp'], env_name='DSSP_EXECUTABLE') except: print( "Could not find DSSP, could not compare sec struct assignment..." ) return dssp_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) ost_ent = io.LoadPDB(os.path.join("testfiles", "1a0s.pdb")) dssp.AssignDSSP(dssp_ent, dssp_bin=dssp_path) mol.alg.AssignSecStruct(ost_ent) for a, b in zip(dssp_ent.residues, ost_ent.residues): self.assertTrue( str(a.GetSecStructure()) == str(b.GetSecStructure()))
def __init__(self, query, hhsuite_root, hhblits_bin=None, working_dir=None): self.query = query self.hhsuite_root = hhsuite_root if os.path.exists(os.path.join(self.hhsuite_root, 'bin/hhblits')): self.bin_dir = os.path.join(self.hhsuite_root, 'bin') self.hhblits_bin = os.path.join(self.hhsuite_root, 'bin/hhblits') else: self.hhblits_bin = settings.Locate('hhblits', explicit_file_name=hhblits_bin) self.bin_dir = os.path.dirname(self.hhblits_bin) # guess root folder (note: this may fail in future) self.hhsuite_root = os.path.dirname(self.bin_dir) self.hhlib_dir = os.path.join(self.hhsuite_root, 'lib', 'hh') if working_dir: self.needs_cleanup = False self.working_dir = working_dir if not os.path.exists(working_dir): os.mkdir(working_dir) if isinstance(query, str): self.filename = os.path.abspath(os.path.join( self.working_dir, os.path.basename(query))) if self.filename != os.path.abspath(query): shutil.copy(query, self.filename) else: self.filename = os.path.join(self.working_dir, '%s.fasta' % HHblits.OUTPUT_PREFIX) ost.io.SaveSequence(query, self.filename) else: self.needs_cleanup = True if isinstance(query, str): self.working_dir = tempfile.mkdtemp() self.filename = os.path.abspath(os.path.join( self.working_dir, os.path.basename(query))) shutil.copy(query, self.filename) else: tmp_dir = utils.TempDirWithFiles((query,)) self.working_dir = tmp_dir.dirname self.filename = tmp_dir.files[0]
# that it works when data is used which has a correpsonding path. na_tmp_dir = tempfile.mkdtemp(prefix="ih.") def cleanup(): shutil.rmtree(na_tmp_dir) self.addCleanup(cleanup) ost_ent = io.LoadPDB('testfiles/testprotein.pdb') excp_raised = False try: sasa = naccess.CalculateSurfaceArea(ost_ent, scratch_dir=na_tmp_dir) except: excp_raised = True raise self.assertEqual(excp_raised, False, msg="Naccess raised an " + "exception on a path containing a '.'. This is not " + "supposed to happen.") if __name__ == "__main__": try: settings.Locate("naccess") except: print("Could not find NACCESS, could not test binding...") sys.exit(0) from ost import testutils testutils.RunTests()
def _LocateHBPlus(hbplus_bin): return settings.Locate('hbplus', explicit_file_name=hbplus_bin, env_name="HBPLUS")
for aln, ch_name in zip(qs_scorer.alignments, sorted(chm_names_1)): self.assertEqual(aln.sequences[0].name, ch_name) self.assertEqual(aln.sequences[1].name, qs_scorer.chain_mapping[ch_name]) self.assertTrue(aln.sequences[0].HasAttachedView()) self.assertTrue(aln.sequences[1].HasAttachedView()) self.assertEqual(aln.sequences[0].attached_view.handle, qs_ent_1.ent) self.assertEqual(aln.sequences[1].attached_view.handle, qs_ent_2.ent) self.assertTrue(ch_name in qs_scorer.mapped_residues) # best_score / global_score in [0,1] self.assertGreaterEqual(qs_scorer.best_score, 0.0) self.assertLessEqual(qs_scorer.best_score, 1.0) self.assertGreaterEqual(qs_scorer.global_score, 0.0) self.assertLessEqual(qs_scorer.global_score, 1.0) if __name__ == "__main__": try: settings.Locate(('clustalw', 'clustalw2')) except: print("Could not find ClustalW. Ignoring test_qsscoring.py tests.") sys.exit(0) from ost import testutils if testutils.SetDefaultCompoundLib(): testutils.RunTests() else: print( 'No compound library available. Ignoring test_qsscoring.py tests.')
import unittest import sys from ost import * from ost import settings from ost.bindings import hbplus class TestHBPlusBinding(unittest.TestCase): def setUp(self): self.protein = io.LoadEntity("testfiles/testprotein.pdb") def testHBondList(self): hbond_list = hbplus.HBondList(self.protein) self.assertEqual(len(hbond_list), 499) def testHBondScore(self): self.assertEqual(hbplus.HBondScore(self.protein, self.protein), 1.0) if __name__ == "__main__": try: settings.Locate("hbplus") except: print("Could not find hbplus, could not test binding...") sys.exit(0) from ost import testutils testutils.RunTests()
def testkClustParseOutput(self): representative_ids = [ 'Q5KTS5.1', 'P46331.2', 'P66776.1', 'P42317.2', 'P50199.1', 'Q59787.1', 'Q9WYG0.1', 'P50197.1', 'P50198.1', 'Q7Z4W1.2', 'Q04520.1' ] cluster_sizes = [3, 3, 4, 1, 7, 3, 2, 1, 1, 3, 1] result = kclust._ParseOutput('testfiles') self.assertEqual(len(result), 11) for c, r_id in zip(result, representative_ids): self.assertEqual(c.representative_id, r_id) for c, s in zip(result, cluster_sizes): self.assertEqual(len(c.sequences), s) if __name__ == "__main__": # test if kClust is available on system, otherwise ignore tests try: blastpath = settings.Locate(('kClust')) except (settings.FileNotFound): print("Could not find kClust executable: ignoring unit tests") sys.exit(0) from ost import testutils testutils.RunTests()
def Blast(query, database, gap_open=11, gap_ext=1, matrix='BLOSUM62', blast_location=None, outfmt=0, filter_low_complexity=True): """ Runs a protein vs. protein blast search. The results are returned according to the value of the ``outfmt`` parameter. :param query: the query sequence :type query: :class:`seq.ConstSequenceHandle` :param database: The filename of the sequence database. Make sure that formatdb has been run on the database and the <database>.pin file exists. :param matrix: The substitution matrix to be used. Must be one of 'BLOSUM45', 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70'. :param gap_open: Gap opening penalty. Note that only a subset of gap opening penalties is supported for each substitutition matrix. Consult the blast docs for more information. :param gap_ext: Gap extension penalty. Only a subset of gap extension penalties are supported for each of the substitution matrices. Consult the blast docs for more information. :param outfmt: output format, where '0' corresponds to default output (parsed blast output and 1 to raw string output). :param filter_low_complexity: Mask off segments of the query sequence that have low compositional complexity, as determined by the SEG program of Wootton & Federhen (Computers and Chemistry, 1993) :rtype: :class:`BlastHit` (with ``outfmt=0``) or :class:`str` (with ``outfmt=1``) """ subst_mats=('BLOSUM45', 'BLOSUM62', 'BLOSUM80', 'PAM30', 'PAM70',) if matrix not in subst_mats: raise ValueError('matrix must be one of %s' % ', '.join(subst_mats)) if not os.path.exists('%s.pin' % database) and not os.path.exists('%s.pal' % database): raise IOError("Database %s does not exist" % database) if blast_location!=None and not os.path.exists(blast_location): ost.LogScript('Could not find %s' %blast_location) if blast_location==None: try: blast_exe=settings.Locate('blastall') except: try: blast_exe=settings.Locate('blastp') except: raise RuntimeError('could not find blast executable') else: blast_exe=settings.Locate(os.path.basename(blast_location),explicit_file_name=blast_location) if os.path.basename(blast_exe)=='blastall': args=[blast_exe, '-d', database, '-p', 'blastp', '-m', '7', '-M', matrix, '-G', str(gap_open), '-E', str(gap_ext)] if filter_low_complexity==False: args.append('-F') args.append('F') else: complexity_opt='-seg' if filter_low_complexity==True: complexity_arg='yes' else: complexity_arg='no' args=[blast_exe, '-db', database, '-matrix', matrix, '-gapopen', str(gap_open), '-gapextend', str(gap_ext), '-outfmt', '5', complexity_opt, complexity_arg ] ost.LogInfo('running BLAST (%s)' % ' '.join(args)) blast_pipe=subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE, stdin=subprocess.PIPE) if isinstance(query, str): stdout, stderr=blast_pipe.communicate(query.encode()) else: stdout, stderr=blast_pipe.communicate(io.SequenceToString(query, 'fasta').encode()) if len(stderr)>0: pattern=re.compile(r'^\[.*\]\s+ERROR:\s+(.*)') lines=stderr.decode().split('\n') error_message=pattern.match(lines[0]) if error_message: raise BlastError(error_message.group(1), '\n'.join(lines[1:])) if outfmt==0: return ParseBlastOutput(stdout.decode()) else: return stdout.decode()
self.assertTrue(isinstance(version,str)) re_v = re.compile('\d+\.\d+\.\d+') match=False if re_v.match(version): match=True self.assertEqual(match,True) def testBuildDatabase(self): tmp_dir_name=tempfile.mkdtemp() db_name=os.path.join(tmp_dir_name,'testdb') blast.CreateDB('testfiles/multiple.fasta',db_name) self.assertRaises(IOError,blast.CreateDB, 'testfiles/multiple.fasta',db_name, mkdb_cmd='I/am/not/a/damn/executable') test_seq=seq.CreateSequence('B','ALRLVKDGFAVAIADYNDATATAVAAEINQAGGRAVAIKVDV'+ 'SRRDQVFAAVEQARKALGGFNVIVNNAGIAPSTPIESIT') blastout=blast.Blast(test_seq,db_name) self.assertTrue(len(blastout)>10) shutil.rmtree(tmp_dir_name) if __name__ == "__main__": # test if blast package is available on system, otherwise ignore tests try: blastpath=settings.Locate(('blastp','blastall')) except(settings.FileNotFound): print("Could not find blast executable: ignoring unit tests") sys.exit(0) from ost import testutils testutils.RunTests()
def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, clustalw_option_string=False): ''' Runs a ClustalW multiple sequence alignment. The results are returned as a :class:`~ost.seq.AlignmentHandle` instance. There are two ways to use this function: - align exactly two sequences: :param seq1: sequence_one :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str` :param seq2: sequence_two :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str` The two sequences can be specified as two separate function parameters (`seq1`, `seq2`). The type of both parameters can be either :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for both parameters. - align two or more sequences: :param seq1: sequence_list :type seq1: :class:`~ost.seq.SequenceList` :param seq2: must be :class:`None` Two or more sequences can be specified by using a :class:`~ost.seq.SequenceList`. It is then passed as the first function parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`. :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`) :type clustalw: :class:`str` :param nopgap: turn residue-specific gaps off :type nopgap: :class:`bool` :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt) :type clustalw_option_string: :class:`str` :param keep_files: do not delete temporary files :type keep_files: :class:`bool` .. note :: - In the passed sequences ClustalW will convert lowercase to uppercase, and change all '.' to '-'. OST will convert and '?' to 'X' before aligning sequences with ClustalW. - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces, only the part before the space is considered as sequence name. To avoid surprises, you should remove spaces from the sequence name. - Sequence names must be unique (:class:`ValueError` exception raised otherwise). ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes: ======= ======================= ======= ============================ Residue Name Residue Name ======= ======================= ======= ============================ A alanine P proline B aspartate or asparagine Q glutamine C cystine R arginine D aspartate S serine E glutamate T threonine F phenylalanine U selenocysteine G glycine V valine H histidine W tryptophan I isoleucine Y tyrosine K lysine Z glutamate or glutamine L leucine X any M methionine \\* translation stop N asparagine \\- gap of indeterminate length ======= ======================= ======= ============================ ''' clustalw_path = settings.Locate(('clustalw', 'clustalw2'), explicit_file_name=clustalw) if seq2 != None: if isinstance(seq1, seq.SequenceHandle) and isinstance( seq2, seq.SequenceHandle): seq_list = seq.CreateSequenceList() seq_list.AddSequence(seq1) seq_list.AddSequence(seq2) elif isinstance(seq1, str) and isinstance(seq2, str): seqh1 = seq.CreateSequence("seq1", seq1) seqh2 = seq.CreateSequence("seq2", seq2) seq_list = seq.CreateSequenceList() seq_list.AddSequence(seqh1) seq_list.AddSequence(seqh2) else: LogError("WARNING: Specify at least two Sequences") return elif isinstance(seq1, seq.SequenceList): seq_list = seq1 else: LogError( "WARNING: Specify either two SequenceHandles or one SequenceList") return sequence_names = set() for s in seq_list: # we cut out anything after a space to be consistent with ClustalW behaviour sequence_names.add(s.GetName().split(' ')[0]) if len(sequence_names) < len(seq_list): raise ValueError( "ClustalW can only process sequences with unique identifiers!") new_list = seq.CreateSequenceList() for s in seq_list: ss = s.Copy() for i, c in enumerate(ss): if c == '?': ss[i] = 'X' new_list.AddSequence(ss) seq_list = new_list temp_dir = utils.TempDirWithFiles((seq_list, )) out = os.path.join(temp_dir.dirname, 'out.fasta') command = '%s -infile="%s" -output=fasta -outfile="%s"' % ( clustalw_path, temp_dir.files[0], out) if nopgap: command += " -nopgap" if clustalw_option_string != False: command = command + " " + clustalw_option_string #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params subprocess.run(command, shell=True, stdout=subprocess.DEVNULL) aln = io.LoadAlignment(out) for sequence in seq_list: for seq_num, aln_seq in enumerate(aln.sequences): if aln_seq.GetName() == sequence.GetName(): break aln.SetSequenceOffset(seq_num, sequence.offset) if sequence.HasAttachedView(): aln.AttachView(seq_num, sequence.GetAttachedView().Copy()) if not keep_files: temp_dir.Cleanup() return aln
def _FindLGABinary(lga_bin): return settings.Locate('lga', explicit_file_name=lga_bin, env_name='LGA_BINARY')
def RunAntechamber(res_name, filename, format='ccif', amberhome=None, base_out_dir=None): """Run Antechamber to guess force field parameters for a given residue name. This requires an installation of AmberTools (tested with AmberTools15) with binaries ``antechamber`` and ``parmchk2``. This has the same restrictions as Antechamber itself and we assume the input to be uncharged. Note that Antechamber cannot deal with metal ions and other non-organic elements. The results are stored in a separate folder named `res_name` within `base_out_dir` (if given, otherwise the current working directory). The main output files are ``frcmod`` and ``out.mpdb``. The former contains force field parameters and masses. The latter maps atom names to atom types and defines the partial charges. The same output could be obtained as follows: .. code-block:: console $ antechamber -i <FILENAME> -fi <FORMAT> -bk '<RES_NAME>' -o out.mol2 -fo mol2 -c bcc -pf yes $ parmchk2 -i out.mol2 -f mol2 -o frcmod -a Y $ antechamber -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes The force field parameters can be manually modified if needed. It can for instance happen that some parameters cannot be identified. Those lines will be marked with a comment "ATTN, need revision". :param res_name: Residue name for which we desire force field parameters. :type res_name: :class:`str` :param filename: Path to a file which contains the necessary information for `res_name`. It must include all hydrogens. :type filename: :class:`str` :param format: Format of file given with `filename`. Common formats are 'ccif' for PDB's component dictionary or 'pdb' for a PDB file containing the desired residue with all hydrogens. :type format: :class:`str` :param amberhome: Base path of your AmberTools installation. If not None, we look for ``antechamber`` and ``parmchk2`` within ``AMBERHOME/bin`` additionally to the system's ``PATH``. :type amberhome: :class:`str` :param base_out_dir: Path to a base path, where the output will be stored. If None, the current working directory is used. :type base_out_dir: :class:`str` """ # find antechamber binaries if amberhome is None: search_paths = [] else: search_paths = [os.path.join(amberhome, 'bin')] try: antechamber = settings.Locate('antechamber', search_paths=search_paths) parmchk2 = settings.Locate('parmchk2', search_paths=search_paths) except settings.FileNotFound as ex: ost.LogError("Failed to find Antechamber binaries. Make sure you have " "AmberTools installed!") raise ex # prepare path cwd = os.getcwd() if base_out_dir is None: base_out_dir = cwd out_dir = os.path.abspath(os.path.join(base_out_dir, res_name)) if not os.path.exists(out_dir): # note: this creates intermediate folders too try: os.makedirs(out_dir) except Exception as ex: ost.LogError("Failed to create output directory " + out_dir + "!") raise ex # execute it os.chdir(out_dir) try: cmds = [antechamber + " -i " + filename + " -fi " + format + " -bk " \ + res_name + " -o out.mol2 -fo mol2 -c bcc -pf yes", parmchk2 + " -i out.mol2 -f mol2 -o frcmod -a Y", antechamber + " -i out.mol2 -fi mol2 -o out.mpdb -fo mpdb -pf yes"] all_sout = "Generating force field parameters for " + res_name + "\n" all_serr = "" for cmd in cmds: all_sout += "-" * 70 + "\n" + "Stdout of: " + cmd + "\n" + "-" * 70 + "\n" all_serr += "-" * 70 + "\n" + "Stderr of: " + cmd + "\n" + "-" * 70 + "\n" job = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = job.communicate() all_sout += sout all_serr += serr if job.returncode != 0: ost.LogError("Unsuccessful execution of " + cmd + ". Return code: "\ + str(job.returncode)) # write command line outputs with open("console.stdout", "w") as txt_file: txt_file.write(all_sout) with open("console.stderr", "w") as txt_file: txt_file.write(all_serr) except Exception as ex: ost.LogError("Failed to excecute antechamber binaries!") raise ex # get back to original path os.chdir(cwd) # check result frcmod_filename = os.path.join(out_dir, 'frcmod') mpdb_filename = os.path.join(out_dir, 'out.mpdb') if not os.path.exists(frcmod_filename): raise RuntimeError("Failed to generate frcmod file with Antechamber!") if not os.path.exists(mpdb_filename): raise RuntimeError( "Failed to generate out.mpdb file with Antechamber!")