Example #1
0
 def testSmallRna(self):
     if is_test_enabled(
         TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])
     ):
         cmd = [
             "-i",
             self.small_rna,
             "-t",
             self.small_tree,
             "-d",
             "rna",
             "-o",
             self.ts.top_level_temp,
             "--temporaries=%s" % self.ts.top_level_temp,
             "-j",
             self.job_name,
             "--keeptemp",
             "--iter-limit=1",
         ]
         self._exe(cmd)
         self.assert_is_nuc(self.small_rna, "RNA")
         self.assert_is_nuc(self.small_aln_path, "RNA")
         self.assertSameInputOutputSequenceData([self.small_rna], [self.small_aln_path])
         self.assertNoGapColumns([self.small_aln_path, self.iter_aln_path])
         self.assert_is_nuc(self.iter_aln_path, "DNA")
         self.assertSameSequences(
             [self.small_rna, self.small_aln_path, self.convert_rna_to_dna(self.iter_aln_path, reverse=True)]
         )
         cfg = get_configuration(self.cfg_path)
         self.assertEqual(cfg.commandline.datatype.upper(), "RNA")
Example #2
0
 def testProteinAmbiguousCharactersMafftFasttreeTrusted(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         arg_list = [
             '-d',
             'protein',
             '--temporaries=%s' % self.ts.top_level_temp,
             '--iter-limit=1',
             '-j',
             self.job_name,
             '-o',
             self.ts.top_level_temp,
             '-i',
             self.ambig_aa,
             '-t',
             self.ambig_aa_tree,
             '--aligner=mafft',
             '--merger=muscle',
             '--tree-estimator=fasttree',
         ]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData([self.ambig_aa], [
             os.path.join(
                 self.ts.top_level_temp, self.job_name +
                 '.marker001.caenophidia_mos.ambiguities.aln')
         ])
         self.assertNoGapColumns([
             os.path.join(
                 self.ts.top_level_temp, self.job_name +
                 '.marker001.caenophidia_mos.ambiguities.aln')
         ])
Example #3
0
 def testSingleAminoAcidLocusRun(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         arg_list = [
             '-d',
             'protein',
             '--temporaries=%s' % self.ts.top_level_temp,
             '--iter-limit=1',
             '-j',
             self.job_name,
             '-o',
             self.ts.top_level_temp,
             '-i',
             self.caenophidia_file,
         ]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData([self.caenophidia_file], [
             os.path.join(self.ts.top_level_temp,
                          self.job_name + '.marker001.caenophidia_mos.aln')
         ])
         self.assertNoGapColumns([
             os.path.join(self.ts.top_level_temp,
                          self.job_name + '.marker001.caenophidia_mos.aln')
         ])
Example #4
0
 def testSmallRna(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         cmd = [
             '-i', self.small_rna, '-t', self.small_tree, '-d', 'rna', '-o',
             self.ts.top_level_temp,
             '--temporaries=%s' % self.ts.top_level_temp, '-j',
             self.job_name, '--keeptemp', '--iter-limit=1'
         ]
         self._exe(cmd)
         self.assert_is_nuc(self.small_rna, 'RNA')
         self.assert_is_nuc(self.small_aln_path, 'RNA')
         self.assertSameInputOutputSequenceData([self.small_rna],
                                                [self.small_aln_path])
         self.assertNoGapColumns([self.small_aln_path, self.iter_aln_path])
         self.assert_is_nuc(self.iter_aln_path, 'DNA')
         self.assertSameSequences([
             self.small_rna, self.small_aln_path,
             self.convert_rna_to_dna(self.iter_aln_path, reverse=True)
         ])
         cfg = get_configuration(self.cfg_path)
         self.assertEqual(cfg.commandline.datatype.upper(), 'RNA')
Example #5
0
 def testMultiDnaLocusRun(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         arg_list = ['-d', 'dna',
                     '--temporaries=%s' % self.ts.top_level_temp,
                     '--iter-limit=1',
                     '-m',
                     '-j', self.job_name,
                     '-o', self.ts.top_level_temp,
                     '-i', self.multi_dir,]
         self._exe_run_sate(arg_list, rc=0)
         seqs_in1_path = os.path.join(self.multi_dir, '1.fasta')
         seqs_in2_path = os.path.join(self.multi_dir, '2.fasta')
         seqs_out1_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker001.1.aln')
         seqs_out2_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker002.2.aln')
         self.assertSameInputOutputSequenceData(
                 [seqs_in1_path, seqs_in2_path],
                 [seqs_out1_path, seqs_out2_path])
         concat_out = os.path.join(self.ts.top_level_temp,
                 self.job_name + '_temp_iteration_0_seq_alignment.txt')
         self.assertSameConcatenatedSequences(
                 concatenated_data=concat_out,
                 seq_data_list=[seqs_in1_path, seqs_in2_path])
         self.assertNoGapColumns([seqs_out1_path, seqs_out2_path,
                 concat_out])
Example #6
0
 def testRnaDnaIdentity(self):
     if is_test_enabled(
         TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])
     ):
         dna_cmd = [
             "-i",
             self.dna,
             "-d",
             "dna",
             "-t",
             self.tree,
             "-o",
             self.dna_tmp,
             "--temporaries=%s" % self.dna_tmp,
             "-j",
             self.job_name,
             "--aligner=mafft",
             "--merger=muscle",
             "--tree-estimator=fasttree",
             "--start-tree-search-from-current",
             "--tree-estimator-model=-gtr -gamma -seed 1111",
             "--iter-limit=1",
         ]
         self._exe_run_sate(dna_cmd)
         rna_cmd = [
             "-i",
             self.rna,
             "-d",
             "rna",
             "-t",
             self.tree,
             "-o",
             self.rna_tmp,
             "--temporaries=%s" % self.rna_tmp,
             "-j",
             self.job_name,
             "--aligner=mafft",
             "--merger=muscle",
             "--tree-estimator=fasttree",
             "--start-tree-search-from-current",
             "--tree-estimator-model=-gtr -gamma -seed 1111",
             "--iter-limit=1",
         ]
         self._exe_run_sate(rna_cmd)
         self.assert_is_nuc(self.dna_aln, "DNA")
         self.assert_is_nuc(self.rna_aln, "RNA")
         self.assertNoGapColumns([self.dna_aln, self.rna_aln])
         self.assertSameDataSet(
             [
                 self.rna,
                 self.rna_aln,
                 self.convert_rna_to_dna(self.dna, reverse=True),
                 self.convert_rna_to_dna(self.dna_aln, reverse=True),
             ]
         )
         # self.assertSameScores([self.dna_score, self.rna_score])
         # self.assertSameTrees([self.dna_tree, self.rna_tree])
         self.assertSameFiles([self.dna_tmp_aln, self.rna_tmp_aln])
Example #7
0
 def testHummingBirdDataRun(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         arg_list = [
             '-d',
             'dna',
             '--temporaries=%s' % self.ts.top_level_temp,
             '--iter-limit=1',
             '--start-tree-search-from-current',
             '--treefile=%s' %
             os.path.join(self.hummingbird_dir, 'starting.tre'),
             '--merger=muscle',
             '--tree-estimator=fasttree',
             '-m',
             '-j',
             self.job_name,
             '-o',
             self.ts.top_level_temp,
             '-i',
             self.hummingbird_dir,
         ]
         self._exe_run_sate(arg_list, rc=0)
         seqs_in1_path = os.path.join(self.hummingbird_dir, 'AK1.fasta')
         seqs_in2_path = os.path.join(self.hummingbird_dir, 'bfib.fasta')
         seqs_in3_path = os.path.join(self.hummingbird_dir, 'nd2.fasta')
         seqs_in4_path = os.path.join(self.hummingbird_dir, 'nd4.fasta')
         seqs_out1_path = os.path.join(self.ts.top_level_temp,
                                       self.job_name + '.marker001.AK1.aln')
         seqs_out2_path = os.path.join(
             self.ts.top_level_temp, self.job_name + '.marker002.bfib.aln')
         seqs_out3_path = os.path.join(self.ts.top_level_temp,
                                       self.job_name + '.marker003.nd2.aln')
         seqs_out4_path = os.path.join(self.ts.top_level_temp,
                                       self.job_name + '.marker004.nd4.aln')
         self.assertSameInputOutputSequenceData(
             [seqs_in1_path, seqs_in2_path, seqs_in3_path, seqs_in4_path], [
                 seqs_out1_path, seqs_out2_path, seqs_out3_path,
                 seqs_out4_path
             ])
         concat_out = os.path.join(
             self.ts.top_level_temp,
             self.job_name + '_temp_iteration_0_seq_alignment.txt')
         self.assertSameConcatenatedSequences(concatenated_data=concat_out,
                                              seq_data_list=[
                                                  seqs_in1_path,
                                                  seqs_in2_path,
                                                  seqs_in3_path,
                                                  seqs_in4_path
                                              ])
         self.assertNoGapColumns([
             seqs_out1_path, seqs_out2_path, seqs_out3_path, seqs_out4_path,
             concat_out
         ])
Example #8
0
 def testMulti(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         self._main_execution([
             '-m', '-i',
             data_source_path('testmulti'), '-o', self.ts.top_level_temp,
             '--temporaries=%s' % self.ts.top_level_temp, '-j',
             self.job_name, '--iter-limit=1'
         ])
Example #9
0
 def testHummingBirdDataRun(self):
      if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         arg_list = ['-d', 'dna',
                     '--temporaries=%s' % self.ts.top_level_temp,
                     '--iter-limit=1',
                     '--start-tree-search-from-current',
                     '--treefile=%s' % os.path.join(
                             self.hummingbird_dir,
                             'starting.tre'),
                     '--merger=muscle',
                     '--tree-estimator=fasttree',
                     '-m',
                     '-j', self.job_name,
                     '-o', self.ts.top_level_temp,
                     '-i', self.hummingbird_dir,]
         self._exe_run_sate(arg_list, rc=0)
         seqs_in1_path = os.path.join(self.hummingbird_dir,
                 'AK1.fasta')
         seqs_in2_path = os.path.join(self.hummingbird_dir,
                 'bfib.fasta')
         seqs_in3_path = os.path.join(self.hummingbird_dir,
                 'nd2.fasta')
         seqs_in4_path = os.path.join(self.hummingbird_dir,
                 'nd4.fasta')
         seqs_out1_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker001.AK1.aln')
         seqs_out2_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker002.bfib.aln')
         seqs_out3_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker003.nd2.aln')
         seqs_out4_path = os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker004.nd4.aln')
         self.assertSameInputOutputSequenceData(
                 [seqs_in1_path, seqs_in2_path,
                  seqs_in3_path, seqs_in4_path],
                 [seqs_out1_path, seqs_out2_path,
                  seqs_out3_path, seqs_out4_path])
         concat_out = os.path.join(self.ts.top_level_temp,
                 self.job_name + '_temp_iteration_0_seq_alignment.txt')
         self.assertSameConcatenatedSequences(
                 concatenated_data=concat_out,
                 seq_data_list=[seqs_in1_path, seqs_in2_path,
                         seqs_in3_path, seqs_in4_path])
         self.assertNoGapColumns([seqs_out1_path, seqs_out2_path,
                 seqs_out3_path, seqs_out4_path, concat_out])       
Example #10
0
 def testMulti(self):
     if is_test_enabled(
         TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])
     ):
         self._main_execution(
             [
                 "-m",
                 "-i",
                 data_source_path("testmulti"),
                 "-o",
                 self.ts.top_level_temp,
                 "--temporaries=%s" % self.ts.top_level_temp,
                 "-j",
                 self.job_name,
                 "--iter-limit=1",
             ]
         )
Example #11
0
 def testSingleAminoAcidLocusRun(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         arg_list = ['-d', 'protein',
                     '--temporaries=%s' % self.ts.top_level_temp,
                     '--iter-limit=1',
                     '-j', self.job_name,
                     '-o', self.ts.top_level_temp,
                     '-i', self.caenophidia_file,]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData(
                 [self.caenophidia_file],
                 [os.path.join(self.ts.top_level_temp,
                         self.job_name + '.marker001.caenophidia_mos.aln')])
         self.assertNoGapColumns([os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker001.caenophidia_mos.aln')])
Example #12
0
 def testMultiAminoAcidLocusRun(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         arg_list = [
             '-d',
             'protein',
             '--temporaries=%s' % self.ts.top_level_temp,
             '--iter-limit=1',
             '-m',
             '-j',
             self.job_name,
             '-o',
             self.ts.top_level_temp,
             '-i',
             self.multi_aa_dir,
         ]
         self._exe_run_sate(arg_list, rc=0)
         seqs_in1_path = os.path.join(self.multi_aa_dir,
                                      'caenophidia_mos.fasta')
         seqs_in2_path = os.path.join(self.multi_aa_dir,
                                      'caenophidia_mos2.fasta')
         seqs_out1_path = os.path.join(
             self.ts.top_level_temp,
             self.job_name + '.marker001.caenophidia_mos.aln')
         seqs_out2_path = os.path.join(
             self.ts.top_level_temp,
             self.job_name + '.marker002.caenophidia_mos2.aln')
         self.assertSameInputOutputSequenceData(
             [seqs_in1_path, seqs_in2_path],
             [seqs_out1_path, seqs_out2_path])
         concat_out = os.path.join(
             self.ts.top_level_temp,
             self.job_name + '_temp_iteration_0_seq_alignment.txt')
         self.assertSameConcatenatedSequences(
             concatenated_data=concat_out,
             seq_data_list=[seqs_in1_path, seqs_in2_path])
         self.assertNoGapColumns(
             [seqs_out1_path, seqs_out2_path, concat_out])
Example #13
0
 def testProteinAmbiguousCharactersMafftFasttreeTrusted(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         arg_list = ['-d', 'protein',
                     '--temporaries=%s' % self.ts.top_level_temp,
                     '--iter-limit=1',
                     '-j', self.job_name,
                     '-o', self.ts.top_level_temp,
                     '-i', self.ambig_aa,
                     '-t', self.ambig_aa_tree,
                     '--aligner=mafft',
                     '--merger=muscle',
                     '--tree-estimator=fasttree',]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData(
                 [self.ambig_aa],
                 [os.path.join(self.ts.top_level_temp,
                     self.job_name + '.marker001.caenophidia_mos.ambiguities.aln')])
         self.assertNoGapColumns([os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker001.caenophidia_mos.ambiguities.aln')])
Example #14
0
 def testRnaDnaIdentity(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         dna_cmd = [
             '-i', self.dna, '-d', 'dna', '-t', self.tree, '-o',
             self.dna_tmp,
             '--temporaries=%s' % self.dna_tmp, '-j', self.job_name,
             '--aligner=mafft', '--merger=muscle',
             '--tree-estimator=fasttree',
             '--start-tree-search-from-current',
             '--tree-estimator-model=-gtr -gamma -seed 1111',
             '--iter-limit=1'
         ]
         self._exe_run_sate(dna_cmd)
         rna_cmd = [
             '-i', self.rna, '-d', 'rna', '-t', self.tree, '-o',
             self.rna_tmp,
             '--temporaries=%s' % self.rna_tmp, '-j', self.job_name,
             '--aligner=mafft', '--merger=muscle',
             '--tree-estimator=fasttree',
             '--start-tree-search-from-current',
             '--tree-estimator-model=-gtr -gamma -seed 1111',
             '--iter-limit=1'
         ]
         self._exe_run_sate(rna_cmd)
         self.assert_is_nuc(self.dna_aln, 'DNA')
         self.assert_is_nuc(self.rna_aln, 'RNA')
         self.assertNoGapColumns([self.dna_aln, self.rna_aln])
         self.assertSameDataSet([
             self.rna, self.rna_aln,
             self.convert_rna_to_dna(self.dna, reverse=True),
             self.convert_rna_to_dna(self.dna_aln, reverse=True)
         ])
         # self.assertSameScores([self.dna_score, self.rna_score])
         # self.assertSameTrees([self.dna_tree, self.rna_tree])
         self.assertSameFiles([self.dna_tmp_aln, self.rna_tmp_aln])
Example #15
0
 def testDnaAmbiguousCharactersClustalRaxmlUntrusted(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         arg_list = ['-d', 'dna',
                     '--temporaries=%s' % self.ts.top_level_temp,
                     '--iter-limit=1',
                     '-j', self.job_name,
                     '-o', self.ts.top_level_temp,
                     '-i', self.ambig_dna,
                     '-t', self.ambig_dna_tree,
                     '--aligner=clustalw2',
                     '--merger=muscle',
                     '--tree-estimator=raxml',
                     '--untrusted',]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData(
                 [self.ambig_dna],
                 [os.path.join(self.ts.top_level_temp,
                     self.job_name + '.marker001.small.ambiguities.aln')])
         self.assertNoGapColumns([os.path.join(self.ts.top_level_temp,
                 self.job_name + '.marker001.small.ambiguities.aln')])
Example #16
0
 def testDnaAmbiguousCharactersClustalRaxmlUntrusted(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         arg_list = [
             '-d',
             'dna',
             '--temporaries=%s' % self.ts.top_level_temp,
             '--iter-limit=1',
             '-j',
             self.job_name,
             '-o',
             self.ts.top_level_temp,
             '-i',
             self.ambig_dna,
             '-t',
             self.ambig_dna_tree,
             '--aligner=clustalw2',
             '--merger=muscle',
             '--tree-estimator=raxml',
             '--untrusted',
         ]
         self._exe_run_sate(arg_list, rc=0)
         self.assertSameInputOutputSequenceData([self.ambig_dna], [
             os.path.join(
                 self.ts.top_level_temp,
                 self.job_name + '.marker001.small.ambiguities.aln')
         ])
         self.assertNoGapColumns([
             os.path.join(
                 self.ts.top_level_temp,
                 self.job_name + '.marker001.small.ambiguities.aln')
         ])
Example #17
0
 def testOpal(self):
     if is_test_enabled(TestLevel.SLOW, _LOG):
         self._impl_test_merger('opal')
Example #18
0
 def testOpal(self):
     if is_test_enabled(TestLevel.SLOW, _LOG):
         self._impl_test_merger('opal')
Example #19
0
 def testMuscle(self):
     if is_test_enabled(TestLevel.SLOW, _LOG):
         self._impl_test_merger('muscle')
Example #20
0
 def testMafft(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('mafft', 'anolis.fasta')
Example #21
0
 def testOpal(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('opal', 'anolis.fasta')
Example #22
0
 def testClustalW2(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('clustalw2', 'anolis.fasta')
Example #23
0
 def testClustalW2(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('clustalw2', 'anolis.fasta')
Example #24
0
 def testOpal(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('opal', 'anolis.fasta')
Example #25
0
 def testMafft(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._impl_test_aligner('mafft', 'anolis.fasta')
Example #26
0
 def testMuscle(self):
     if is_test_enabled(TestLevel.SLOW, _LOG):
         self._impl_test_merger('muscle')