コード例 #1
0
ファイル: tests_utils.py プロジェクト: carlosp420/VoSeq
 def test_get_taxon_names_for_taxa_additional_fields(self):
     self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
     dataset_creator = CreateDataset(self.cleaned_data)
     expected = {
         'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'},
         'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'},
     }
     result = dataset_creator.get_taxon_names_for_taxa()
     self.assertEqual(expected, result)
コード例 #2
0
    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'cp100-10': {
                'superfamily': 'Papilionoidea'
            },
            'cp100-11': {
                'superfamily': ''
            },
        }
        result = dataset_creator.get_taxon_names_for_taxa()

        self.assertEqual(expected, result)
コード例 #3
0
ファイル: tests_utils.py プロジェクト: carlosp420/VoSeq
class CreateDatasetUtilsTest(TestCase):
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI-begin')
        g2 = Genes.objects.get(gene_code='ef1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'degen_translations': None,
            'number_genes': None,
            'file_format': 'FASTA',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None

    def test_create_dataset(self):
        expected = '>CP100_10_Papilionoidea_Aus_aus'
        result = self.dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_create_dataset_with_gene_code(self):
        self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = ">CP100_10\n"
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_get_taxon_names_for_taxa(self):
        expected = {
            'CP100-10': {'code': 'CP100-10', 'genus': 'Aus', 'species': 'aus', 'superfamily': 'Papilionoidea'},
            'CP100-11': {'code': 'CP100-11', 'genus': 'Aus', 'species': 'bus', 'superfamily': 'Papilionoidea'},
        }
        result = self.dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_create_dataset_drop_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11'
        cleaned_data['taxonset'] = TaxonSets.objects.get(taxonset_name='all_taxa')
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str
        self.assertTrue('CP100_11' not in result)

    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'},
            'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'},
        }
        result = dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_dataset_all_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_as_one_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        g1.reading_frame = None
        g1.save()
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'],)
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_partitions_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_each(self):
        # TODO fix test when dataset-creator has issue #26 fixed
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_each(self):
        # TODO Fix this test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_2nd_3rd_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st
----
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_2nd
----
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_has_no_sequences_for_input_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000'
        dataset_creator = CreateDataset(cleaned_data)
        self.assertTrue('Could not find voucher CP1000' in dataset_creator.warnings)

    def test_creating_dataset_filename(self):
        tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt'
        result = guess_file_extension(tmp_file_name)
        expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg'
        self.assertEqual(expected, result)
コード例 #4
0
class CreateDatasetUtilsTest(TestCase):
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI')
        g2 = Genes.objects.get(gene_code='EF1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'partition_by_positions': 'ONE',
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None

    def test_create_dataset(self):
        expected = '>coi\n--------------------\n>CP100-10_Papilionoidea_Melitaea_diamina'
        result = self.dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_create_dataset_with_gene_code(self):
        self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = ">CP100-10_coi\n"
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_get_taxon_names_for_taxa(self):
        expected = {
            'cp100-10': {
                'code': 'CP100-10',
                'genus': 'Melitaea',
                'species': 'diamina',
                'superfamily': 'Papilionoidea'
            },
            'cp100-11': {
                'code': 'CP100-11',
                'genus': 'Melitaea',
                'species': 'diamina',
                'superfamily': ''
            },
        }
        result = self.dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_create_dataset_drop_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11'
        cleaned_data['taxonset'] = TaxonSets.objects.get(
            taxonset_name='Erebia')
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str
        self.assertTrue('CP100-11' not in result)

    def test_from_seq_objs_to_fasta(self):
        expected = 2706
        result = self.dataset_creator.from_seq_objs_to_fasta()
        self.assertEqual(expected, len(result))

    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'cp100-10': {
                'superfamily': 'Papilionoidea'
            },
            'cp100-11': {
                'superfamily': ''
            },
        }
        result = dataset_creator.get_taxon_names_for_taxa()

        self.assertEqual(expected, result)

    def test_get_sequence_first_codon_position(self):
        self.cleaned_data['positions'] = ['1st']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq("CGGTGATAAAGCTATATGGAGACAAGATGAG")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_get_sequence_second_codon_position(self):
        self.cleaned_data['positions'] = ['2nd']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq("ATACGACCCCGATTAAGGGTAaGCTAATAAA")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_get_sequence_third_codon_position(self):
        self.cleaned_data['positions'] = ['3rd']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq("CCCCCGCCCCTCGTCATTTCCATCCGGCGG")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_get_sequence_first_and_second_codon_position(self):
        self.cleaned_data['positions'] = ['1st', '2nd']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq(
            "CAGTGATCGGAATCACACACGGCATTATTAAATGGGGGATGAAaCGACATGAAATTGAAAGA")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_get_sequence_first_and_third_codon_position(self):
        self.cleaned_data['positions'] = ['1st', '3rd']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq(
            "CCGCGCTCGCAGTCACACACGTCCTGATTCAATTGTGTACGCAACTACACGGAGTCGGAGG")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_get_sequence_second_and_third_codon_position(self):
        self.cleaned_data['positions'] = ['2nd', '3rd']
        self.cleaned_data['gene_codes'] = [
            Genes.objects.get(gene_code='wingless')
        ]
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = Seq(
            "ACTCACCCGCAGCCCCCCCCGTACTGTTACAAGTGTGTTCACaAGTCCTCAGAGTCAGAGA")
        sequence = Seq(
            "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
        )
        result = dataset_creator.get_sequence_based_on_codon_positions(
            'wingless', sequence)
        self.assertEqual(expected, result[0])

    def test_dataset_all_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
>CP100-11_Melitaea_diamina
??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_as_one(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
>CP100-11_Melitaea_diamina
??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_2nd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
>CP100-11_Melitaea_diamina
??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
>CP100-11_Melitaea_diamina
??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
?????????????????????????TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
>CP100-11_Melitaea_diamina
??TGAGCCGGTATAATTGGTACATCCCTAAGTCTTATTATTCGAACCGAATTAGGAAATCCTAGTTTTTTAATTGGAGATGATCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATGCCAATTATAATTGGAGGATTTGGTAATTGACTTGTACCATTAATATTGGGAGCCCCAGATATAGCTTTCCCCCGAATAAATTATATAAGATTTTGATTATTGCCTCCATCCTTAATTCTTTTAATTTCAAGTAGAATTGTAGAAAATGGGGCAGGAACTGGATGAACAGTTTACCCCCCACTTTCATCTAATATTGCCCATAGAGGAGCTTCAGTGGATTTAGCTATTTTTTCTTTACATTTAGCTGGGATTTCCTCTATCTTAGGAGCTATTAATTTTATTACTACAATTATTAATATACGAATTAATAATATATCTTATGATCAAATACCTTTATTTGTATGAGCAGTAGGAATTACAGCATTACTTCTCTTATTATCTTTACCAGTTTTAGCTGGAGCTATTACTATACTTTTAACGGATCGAAATCTTAATACCTCATTTTTTGATTCCTGCGGAGGAGGAGATCC?????????????????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL'], )
        cleaned_data['partition_by_positions'] = 'EACH',

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'], )
        cleaned_data['partition_by_positions'] = 'EACH',

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_2nd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'], )
        cleaned_data['partition_by_positions'] = 'EACH',

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], )
        cleaned_data['partition_by_positions'] = 'EACH',

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????????????TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC??????????????????????
>CP100-11_Melitaea_diamina
?TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC??????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_2nd_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????????????GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC??????????????????????
>CP100-11_Melitaea_diamina
TGGCGGATATGGACTCCTAGCTATATCGACGATTGGAACCAGTTTTATGGGAGACAATTAAAACATGTACGCCAGCTTATATATTTTTATGTATCCATATATGGGGTTGGAATGCTGTCCTTATTTGGGCCCGAATGCTTCCCGATAATAATAGTTTGTTTTCCCCTCTTATCTTTATTCAGAGATGTGAAAGGGCGGACGGTGACGTTACCCCCTTCTCAAATGCCAAGGGGCTCGTGATTGCATTTTCTTCATTGCGGATTCTCATTTGGGCATAATTATACACATATAAATCGATAAAAATTCTAGACAATCCTTTTGTTGGCGTGGATACGCTTCTCTTTTTTCTTCCGTTTGCGGGCATACATCTTTACGACGAACTAAACTCTTTTGATCTGGGGGGGGACC??????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????????????TAGCGTAAATGTAATCCAATCTATATCAACGATAGAATCTATTTTAATGAGTGTCAATTTATACATGAAAGTCTGTTTATAAATTTTTAAGTAGCAATAAATGAGATTGTATTACTGACATAAATGGAGCCAGTAAGTTCCCCAAAATTTAAAATTTATATGCTCATCTAATCTTAATTAATAAATGAGAATGGGAGAATGATAAAGTTCCCCACTTATTATATGCCTAAGAGTTAGGGTTAGTATTTTTTACTTAGTGGATTCTTACTAGAGTATATTTATATAAATATATAACAATATATAATTTTGTCAAACTTATTGATAGAGAGAATAAGATACTCCTATATTTACAGTTAGTGAGTATATAACTTAAGGTCAATCTATACTATTTTGTTCTCGAGAGAGTC??????????????????????
>CP100-11_Melitaea_diamina
?GACCGTTATTGTCACCTAGTTTTTTTGACCAATAGAATCTGTTTTATTGAATATAATTATATCCTTTACACTATCTTTTTTATTTTTTTATTTGCATTTATTGAGATTGTATGATTTACATATATGGACCCAATTACTTCCCGATAATATTAGATTGATATGCTCACCTATTTTTATTCAGTGATTTAAAATGGCAGACTGAGACATTACCCCATTCACTATTTCCATGAGACTCATGATTACTTTTTCTTAATTACTGGTTCCCTTCTAGACTTTATTTTTCTCATTTTATTAGATTATATTACTATATAATACTTATTTAGACATAGATTCACATATTTCTATACTTACATTTACTGACTTTCTTATTTACGATGAATTTATCCCATTTTATCCGCGAGAGAATC??????????????????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_2nd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_2nd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_1st_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual('\n' + expected.strip(), result)

    def test_dataset_1st_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)

    def test_dataset_ALL_1st_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)
コード例 #5
0
class CreateDatasetUtilsTest(TestCase):
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI-begin')
        g2 = Genes.objects.get(gene_code='ef1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'degen_translations': None,
            'number_genes': None,
            'file_format': 'FASTA',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None

    def test_create_dataset(self):
        expected = '>CP100_10_Papilionoidea_Aus_aus'
        result = self.dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_create_dataset_with_gene_code(self):
        self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = ">CP100_10\n"
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_get_taxon_names_for_taxa(self):
        expected = {
            'CP100-10': {
                'code': 'CP100-10',
                'genus': 'Aus',
                'species': 'aus',
                'superfamily': 'Papilionoidea'
            },
            'CP100-11': {
                'code': 'CP100-11',
                'genus': 'Aus',
                'species': 'bus',
                'superfamily': 'Papilionoidea'
            },
        }
        result = self.dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_create_dataset_drop_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11'
        cleaned_data['taxonset'] = TaxonSets.objects.get(
            taxonset_name='all_taxa')
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str
        self.assertTrue('CP100_11' not in result)

    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'CP100-10': {
                'code': 'CP100-10',
                'superfamily': 'Papilionoidea'
            },
            'CP100-11': {
                'code': 'CP100-11',
                'superfamily': 'Papilionoidea'
            },
        }
        result = dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_dataset_all_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_as_one_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'], )

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame(
            self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        g1.reading_frame = None
        g1.save()
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'], )
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_partitions_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL'], )
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'], )
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'], )
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'], )
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame(
            self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_each(self):
        # TODO fix test when dataset-creator has issue #26 fixed
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame(
            self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_each(self):
        # TODO Fix this test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_2nd_3rd_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st
----
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_2nd
----
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_has_no_sequences_for_input_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000'
        dataset_creator = CreateDataset(cleaned_data)
        self.assertTrue(
            'Could not find voucher CP1000' in dataset_creator.warnings)

    def test_creating_dataset_filename(self):
        tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt'
        result = guess_file_extension(tmp_file_name)
        expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg'
        self.assertEqual(expected, result)