def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'}, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result)
def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'cp100-10': { 'superfamily': 'Papilionoidea' }, 'cp100-11': { 'superfamily': '' }, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result)
class CreateDatasetUtilsTest(TestCase): def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI-begin') g2 = Genes.objects.get(gene_code='ef1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'degen_translations': None, 'number_genes': None, 'file_format': 'FASTA', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None def test_create_dataset(self): expected = '>CP100_10_Papilionoidea_Aus_aus' result = self.dataset_creator.dataset_str self.assertTrue(expected in result) def test_create_dataset_with_gene_code(self): self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE'] dataset_creator = CreateDataset(self.cleaned_data) expected = ">CP100_10\n" result = dataset_creator.dataset_str self.assertTrue(expected in result) def test_get_taxon_names_for_taxa(self): expected = { 'CP100-10': {'code': 'CP100-10', 'genus': 'Aus', 'species': 'aus', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'genus': 'Aus', 'species': 'bus', 'superfamily': 'Papilionoidea'}, } result = self.dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get(taxonset_name='all_taxa') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100_11' not in result) def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'}, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_dataset_all_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_as_one_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') g1.reading_frame = None g1.save() cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'],) cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_partitions_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_each(self): # TODO fix test when dataset-creator has issue #26 fixed g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_each(self): # TODO Fix this test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_2nd_3rd_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st ---- >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_2nd ---- >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_has_no_sequences_for_input_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000' dataset_creator = CreateDataset(cleaned_data) self.assertTrue('Could not find voucher CP1000' in dataset_creator.warnings) def test_creating_dataset_filename(self): tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt' result = guess_file_extension(tmp_file_name) expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg' self.assertEqual(expected, result)
class CreateDatasetUtilsTest(TestCase): def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI') g2 = Genes.objects.get(gene_code='EF1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], 'partition_by_positions': 'ONE', } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None def test_create_dataset(self): expected = '>coi\n--------------------\n>CP100-10_Papilionoidea_Melitaea_diamina' result = self.dataset_creator.dataset_str self.assertTrue(expected in result) def test_create_dataset_with_gene_code(self): self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE'] dataset_creator = CreateDataset(self.cleaned_data) expected = ">CP100-10_coi\n" result = dataset_creator.dataset_str self.assertTrue(expected in result) def test_get_taxon_names_for_taxa(self): expected = { 'cp100-10': { 'code': 'CP100-10', 'genus': 'Melitaea', 'species': 'diamina', 'superfamily': 'Papilionoidea' }, 'cp100-11': { 'code': 'CP100-11', 'genus': 'Melitaea', 'species': 'diamina', 'superfamily': '' }, } result = self.dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get( taxonset_name='Erebia') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100-11' not in result) def test_from_seq_objs_to_fasta(self): expected = 2706 result = self.dataset_creator.from_seq_objs_to_fasta() self.assertEqual(expected, len(result)) def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'cp100-10': { 'superfamily': 'Papilionoidea' }, 'cp100-11': { 'superfamily': '' }, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_get_sequence_first_codon_position(self): self.cleaned_data['positions'] = ['1st'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq("CGGTGATAAAGCTATATGGAGACAAGATGAG") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_get_sequence_second_codon_position(self): self.cleaned_data['positions'] = ['2nd'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq("ATACGACCCCGATTAAGGGTAaGCTAATAAA") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_get_sequence_third_codon_position(self): self.cleaned_data['positions'] = ['3rd'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq("CCCCCGCCCCTCGTCATTTCCATCCGGCGG") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_get_sequence_first_and_second_codon_position(self): self.cleaned_data['positions'] = ['1st', '2nd'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq( "CAGTGATCGGAATCACACACGGCATTATTAAATGGGGGATGAAaCGACATGAAATTGAAAGA") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_get_sequence_first_and_third_codon_position(self): self.cleaned_data['positions'] = ['1st', '3rd'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq( "CCGCGCTCGCAGTCACACACGTCCTGATTCAATTGTGTACGCAACTACACGGAGTCGGAGG") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_get_sequence_second_and_third_codon_position(self): self.cleaned_data['positions'] = ['2nd', '3rd'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq( "ACTCACCCGCAGCCCCCCCCGTACTGTTACAAGTGTGTTCACaAGTCCTCAGAGTCAGAGA") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0]) def test_dataset_all_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_2nd_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_2nd_3rd_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_2nd_3rd_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? >CP100-11_Melitaea_diamina ??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC????????????????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_partitions_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL'], ) cleaned_data['partition_by_positions'] = 'EACH', dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_partitions_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'], ) cleaned_data['partition_by_positions'] = 'EACH', dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_2nd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'], ) cleaned_data['partition_by_positions'] = 'EACH', dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], ) cleaned_data['partition_by_positions'] = 'EACH', dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????????????TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? >CP100-11_Melitaea_diamina ?TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_2nd_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????????????GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? >CP100-11_Melitaea_diamina TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????????????TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC?????????????????????? >CP100-11_Melitaea_diamina ?GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC?????????????????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_2nd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_2nd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_1st_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual('\n' + expected.strip(), result) def test_dataset_1st_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result) def test_dataset_ALL_1st_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result)
class CreateDatasetUtilsTest(TestCase): def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI-begin') g2 = Genes.objects.get(gene_code='ef1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'degen_translations': None, 'number_genes': None, 'file_format': 'FASTA', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None def test_create_dataset(self): expected = '>CP100_10_Papilionoidea_Aus_aus' result = self.dataset_creator.dataset_str self.assertTrue(expected in result) def test_create_dataset_with_gene_code(self): self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE'] dataset_creator = CreateDataset(self.cleaned_data) expected = ">CP100_10\n" result = dataset_creator.dataset_str self.assertTrue(expected in result) def test_get_taxon_names_for_taxa(self): expected = { 'CP100-10': { 'code': 'CP100-10', 'genus': 'Aus', 'species': 'aus', 'superfamily': 'Papilionoidea' }, 'CP100-11': { 'code': 'CP100-11', 'genus': 'Aus', 'species': 'bus', 'superfamily': 'Papilionoidea' }, } result = self.dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get( taxonset_name='all_taxa') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100_11' not in result) def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'CP100-10': { 'code': 'CP100-10', 'superfamily': 'Papilionoidea' }, 'CP100-11': { 'code': 'CP100-11', 'superfamily': 'Papilionoidea' }, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_dataset_all_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_as_one_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'], ) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame( self): g1 = Genes.objects.get(gene_code='COI-begin') g1.reading_frame = None g1.save() cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'], ) cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_partitions_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL'], ) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'], ) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'], ) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], ) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame( self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_each(self): # TODO fix test when dataset-creator has issue #26 fixed g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame( self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_each(self): # TODO Fix this test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_2nd_3rd_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st ---- >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_2nd ---- >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_has_no_sequences_for_input_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000' dataset_creator = CreateDataset(cleaned_data) self.assertTrue( 'Could not find voucher CP1000' in dataset_creator.warnings) def test_creating_dataset_filename(self): tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt' result = guess_file_extension(tmp_file_name) expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg' self.assertEqual(expected, result)