def testSmallRna(self): if is_test_enabled( TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name]) ): cmd = [ "-i", self.small_rna, "-t", self.small_tree, "-d", "rna", "-o", self.ts.top_level_temp, "--temporaries=%s" % self.ts.top_level_temp, "-j", self.job_name, "--keeptemp", "--iter-limit=1", ] self._exe(cmd) self.assert_is_nuc(self.small_rna, "RNA") self.assert_is_nuc(self.small_aln_path, "RNA") self.assertSameInputOutputSequenceData([self.small_rna], [self.small_aln_path]) self.assertNoGapColumns([self.small_aln_path, self.iter_aln_path]) self.assert_is_nuc(self.iter_aln_path, "DNA") self.assertSameSequences( [self.small_rna, self.small_aln_path, self.convert_rna_to_dna(self.iter_aln_path, reverse=True)] ) cfg = get_configuration(self.cfg_path) self.assertEqual(cfg.commandline.datatype.upper(), "RNA")
def testProteinAmbiguousCharactersMafftFasttreeTrusted(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): arg_list = [ '-d', 'protein', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.ambig_aa, '-t', self.ambig_aa_tree, '--aligner=mafft', '--merger=muscle', '--tree-estimator=fasttree', ] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData([self.ambig_aa], [ os.path.join( self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.ambiguities.aln') ]) self.assertNoGapColumns([ os.path.join( self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.ambiguities.aln') ])
def testSingleAminoAcidLocusRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): arg_list = [ '-d', 'protein', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.caenophidia_file, ] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData([self.caenophidia_file], [ os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.aln') ]) self.assertNoGapColumns([ os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.aln') ])
def testSmallRna(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): cmd = [ '-i', self.small_rna, '-t', self.small_tree, '-d', 'rna', '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-j', self.job_name, '--keeptemp', '--iter-limit=1' ] self._exe(cmd) self.assert_is_nuc(self.small_rna, 'RNA') self.assert_is_nuc(self.small_aln_path, 'RNA') self.assertSameInputOutputSequenceData([self.small_rna], [self.small_aln_path]) self.assertNoGapColumns([self.small_aln_path, self.iter_aln_path]) self.assert_is_nuc(self.iter_aln_path, 'DNA') self.assertSameSequences([ self.small_rna, self.small_aln_path, self.convert_rna_to_dna(self.iter_aln_path, reverse=True) ]) cfg = get_configuration(self.cfg_path) self.assertEqual(cfg.commandline.datatype.upper(), 'RNA')
def testMultiDnaLocusRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): arg_list = ['-d', 'dna', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-m', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.multi_dir,] self._exe_run_sate(arg_list, rc=0) seqs_in1_path = os.path.join(self.multi_dir, '1.fasta') seqs_in2_path = os.path.join(self.multi_dir, '2.fasta') seqs_out1_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.1.aln') seqs_out2_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker002.2.aln') self.assertSameInputOutputSequenceData( [seqs_in1_path, seqs_in2_path], [seqs_out1_path, seqs_out2_path]) concat_out = os.path.join(self.ts.top_level_temp, self.job_name + '_temp_iteration_0_seq_alignment.txt') self.assertSameConcatenatedSequences( concatenated_data=concat_out, seq_data_list=[seqs_in1_path, seqs_in2_path]) self.assertNoGapColumns([seqs_out1_path, seqs_out2_path, concat_out])
def testRnaDnaIdentity(self): if is_test_enabled( TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name]) ): dna_cmd = [ "-i", self.dna, "-d", "dna", "-t", self.tree, "-o", self.dna_tmp, "--temporaries=%s" % self.dna_tmp, "-j", self.job_name, "--aligner=mafft", "--merger=muscle", "--tree-estimator=fasttree", "--start-tree-search-from-current", "--tree-estimator-model=-gtr -gamma -seed 1111", "--iter-limit=1", ] self._exe_run_sate(dna_cmd) rna_cmd = [ "-i", self.rna, "-d", "rna", "-t", self.tree, "-o", self.rna_tmp, "--temporaries=%s" % self.rna_tmp, "-j", self.job_name, "--aligner=mafft", "--merger=muscle", "--tree-estimator=fasttree", "--start-tree-search-from-current", "--tree-estimator-model=-gtr -gamma -seed 1111", "--iter-limit=1", ] self._exe_run_sate(rna_cmd) self.assert_is_nuc(self.dna_aln, "DNA") self.assert_is_nuc(self.rna_aln, "RNA") self.assertNoGapColumns([self.dna_aln, self.rna_aln]) self.assertSameDataSet( [ self.rna, self.rna_aln, self.convert_rna_to_dna(self.dna, reverse=True), self.convert_rna_to_dna(self.dna_aln, reverse=True), ] ) # self.assertSameScores([self.dna_score, self.rna_score]) # self.assertSameTrees([self.dna_tree, self.rna_tree]) self.assertSameFiles([self.dna_tmp_aln, self.rna_tmp_aln])
def testHummingBirdDataRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): arg_list = [ '-d', 'dna', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '--start-tree-search-from-current', '--treefile=%s' % os.path.join(self.hummingbird_dir, 'starting.tre'), '--merger=muscle', '--tree-estimator=fasttree', '-m', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.hummingbird_dir, ] self._exe_run_sate(arg_list, rc=0) seqs_in1_path = os.path.join(self.hummingbird_dir, 'AK1.fasta') seqs_in2_path = os.path.join(self.hummingbird_dir, 'bfib.fasta') seqs_in3_path = os.path.join(self.hummingbird_dir, 'nd2.fasta') seqs_in4_path = os.path.join(self.hummingbird_dir, 'nd4.fasta') seqs_out1_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.AK1.aln') seqs_out2_path = os.path.join( self.ts.top_level_temp, self.job_name + '.marker002.bfib.aln') seqs_out3_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker003.nd2.aln') seqs_out4_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker004.nd4.aln') self.assertSameInputOutputSequenceData( [seqs_in1_path, seqs_in2_path, seqs_in3_path, seqs_in4_path], [ seqs_out1_path, seqs_out2_path, seqs_out3_path, seqs_out4_path ]) concat_out = os.path.join( self.ts.top_level_temp, self.job_name + '_temp_iteration_0_seq_alignment.txt') self.assertSameConcatenatedSequences(concatenated_data=concat_out, seq_data_list=[ seqs_in1_path, seqs_in2_path, seqs_in3_path, seqs_in4_path ]) self.assertNoGapColumns([ seqs_out1_path, seqs_out2_path, seqs_out3_path, seqs_out4_path, concat_out ])
def testMulti(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): self._main_execution([ '-m', '-i', data_source_path('testmulti'), '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-j', self.job_name, '--iter-limit=1' ])
def testHummingBirdDataRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): arg_list = ['-d', 'dna', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '--start-tree-search-from-current', '--treefile=%s' % os.path.join( self.hummingbird_dir, 'starting.tre'), '--merger=muscle', '--tree-estimator=fasttree', '-m', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.hummingbird_dir,] self._exe_run_sate(arg_list, rc=0) seqs_in1_path = os.path.join(self.hummingbird_dir, 'AK1.fasta') seqs_in2_path = os.path.join(self.hummingbird_dir, 'bfib.fasta') seqs_in3_path = os.path.join(self.hummingbird_dir, 'nd2.fasta') seqs_in4_path = os.path.join(self.hummingbird_dir, 'nd4.fasta') seqs_out1_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.AK1.aln') seqs_out2_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker002.bfib.aln') seqs_out3_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker003.nd2.aln') seqs_out4_path = os.path.join(self.ts.top_level_temp, self.job_name + '.marker004.nd4.aln') self.assertSameInputOutputSequenceData( [seqs_in1_path, seqs_in2_path, seqs_in3_path, seqs_in4_path], [seqs_out1_path, seqs_out2_path, seqs_out3_path, seqs_out4_path]) concat_out = os.path.join(self.ts.top_level_temp, self.job_name + '_temp_iteration_0_seq_alignment.txt') self.assertSameConcatenatedSequences( concatenated_data=concat_out, seq_data_list=[seqs_in1_path, seqs_in2_path, seqs_in3_path, seqs_in4_path]) self.assertNoGapColumns([seqs_out1_path, seqs_out2_path, seqs_out3_path, seqs_out4_path, concat_out])
def testMulti(self): if is_test_enabled( TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name]) ): self._main_execution( [ "-m", "-i", data_source_path("testmulti"), "-o", self.ts.top_level_temp, "--temporaries=%s" % self.ts.top_level_temp, "-j", self.job_name, "--iter-limit=1", ] )
def testSingleAminoAcidLocusRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): arg_list = ['-d', 'protein', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.caenophidia_file,] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData( [self.caenophidia_file], [os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.aln')]) self.assertNoGapColumns([os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.aln')])
def testMultiAminoAcidLocusRun(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): arg_list = [ '-d', 'protein', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-m', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.multi_aa_dir, ] self._exe_run_sate(arg_list, rc=0) seqs_in1_path = os.path.join(self.multi_aa_dir, 'caenophidia_mos.fasta') seqs_in2_path = os.path.join(self.multi_aa_dir, 'caenophidia_mos2.fasta') seqs_out1_path = os.path.join( self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.aln') seqs_out2_path = os.path.join( self.ts.top_level_temp, self.job_name + '.marker002.caenophidia_mos2.aln') self.assertSameInputOutputSequenceData( [seqs_in1_path, seqs_in2_path], [seqs_out1_path, seqs_out2_path]) concat_out = os.path.join( self.ts.top_level_temp, self.job_name + '_temp_iteration_0_seq_alignment.txt') self.assertSameConcatenatedSequences( concatenated_data=concat_out, seq_data_list=[seqs_in1_path, seqs_in2_path]) self.assertNoGapColumns( [seqs_out1_path, seqs_out2_path, concat_out])
def testProteinAmbiguousCharactersMafftFasttreeTrusted(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): arg_list = ['-d', 'protein', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.ambig_aa, '-t', self.ambig_aa_tree, '--aligner=mafft', '--merger=muscle', '--tree-estimator=fasttree',] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData( [self.ambig_aa], [os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.ambiguities.aln')]) self.assertNoGapColumns([os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.caenophidia_mos.ambiguities.aln')])
def testRnaDnaIdentity(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): dna_cmd = [ '-i', self.dna, '-d', 'dna', '-t', self.tree, '-o', self.dna_tmp, '--temporaries=%s' % self.dna_tmp, '-j', self.job_name, '--aligner=mafft', '--merger=muscle', '--tree-estimator=fasttree', '--start-tree-search-from-current', '--tree-estimator-model=-gtr -gamma -seed 1111', '--iter-limit=1' ] self._exe_run_sate(dna_cmd) rna_cmd = [ '-i', self.rna, '-d', 'rna', '-t', self.tree, '-o', self.rna_tmp, '--temporaries=%s' % self.rna_tmp, '-j', self.job_name, '--aligner=mafft', '--merger=muscle', '--tree-estimator=fasttree', '--start-tree-search-from-current', '--tree-estimator-model=-gtr -gamma -seed 1111', '--iter-limit=1' ] self._exe_run_sate(rna_cmd) self.assert_is_nuc(self.dna_aln, 'DNA') self.assert_is_nuc(self.rna_aln, 'RNA') self.assertNoGapColumns([self.dna_aln, self.rna_aln]) self.assertSameDataSet([ self.rna, self.rna_aln, self.convert_rna_to_dna(self.dna, reverse=True), self.convert_rna_to_dna(self.dna_aln, reverse=True) ]) # self.assertSameScores([self.dna_score, self.rna_score]) # self.assertSameTrees([self.dna_tree, self.rna_tree]) self.assertSameFiles([self.dna_tmp_aln, self.rna_tmp_aln])
def testDnaAmbiguousCharactersClustalRaxmlUntrusted(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])): arg_list = ['-d', 'dna', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.ambig_dna, '-t', self.ambig_dna_tree, '--aligner=clustalw2', '--merger=muscle', '--tree-estimator=raxml', '--untrusted',] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData( [self.ambig_dna], [os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.small.ambiguities.aln')]) self.assertNoGapColumns([os.path.join(self.ts.top_level_temp, self.job_name + '.marker001.small.ambiguities.aln')])
def testDnaAmbiguousCharactersClustalRaxmlUntrusted(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): arg_list = [ '-d', 'dna', '--temporaries=%s' % self.ts.top_level_temp, '--iter-limit=1', '-j', self.job_name, '-o', self.ts.top_level_temp, '-i', self.ambig_dna, '-t', self.ambig_dna_tree, '--aligner=clustalw2', '--merger=muscle', '--tree-estimator=raxml', '--untrusted', ] self._exe_run_sate(arg_list, rc=0) self.assertSameInputOutputSequenceData([self.ambig_dna], [ os.path.join( self.ts.top_level_temp, self.job_name + '.marker001.small.ambiguities.aln') ]) self.assertNoGapColumns([ os.path.join( self.ts.top_level_temp, self.job_name + '.marker001.small.ambiguities.aln') ])
def testOpal(self): if is_test_enabled(TestLevel.SLOW, _LOG): self._impl_test_merger('opal')
def testMuscle(self): if is_test_enabled(TestLevel.SLOW, _LOG): self._impl_test_merger('muscle')
def testMafft(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG): self._impl_test_aligner('mafft', 'anolis.fasta')
def testOpal(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG): self._impl_test_aligner('opal', 'anolis.fasta')
def testClustalW2(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG): self._impl_test_aligner('clustalw2', 'anolis.fasta')