예제 #1
0
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI-begin')
        g2 = Genes.objects.get(gene_code='ef1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'degen_translations': None,
            'number_genes': None,
            'file_format': 'FASTA',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None
예제 #2
0
 def test_get_taxon_names_for_taxa_additional_fields(self):
     self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
     dataset_creator = CreateDataset(self.cleaned_data)
     expected = {
         'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'},
         'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'},
     }
     result = dataset_creator.get_taxon_names_for_taxa()
     self.assertEqual(expected, result)
예제 #3
0
def results(request):
    context = get_context(request)

    if request.method == 'POST':
        form = GenBankFastaForm(request.POST)

        if form.is_valid():
            cleaned_data = form.cleaned_data
            cleaned_data['file_format'] = 'GenBankFASTA'
            cleaned_data['number_genes'] = ''
            cleaned_data['translations'] = False
            cleaned_data['aminoacids'] = False
            cleaned_data['positions'] = 'ALL'
            cleaned_data['partition_by_positions'] = 'by gene'
            cleaned_data['taxon_names'] = ['CODE', 'GENUS', 'SPECIES']
            cleaned_data['outgroup'] = ''

            dataset_creator = CreateDataset(cleaned_data)
            dataset = dataset_creator.dataset_str
            dataset_short = dataset[
                0:
                1500] + '\n...\n\n\n' + '#######\nComplete dataset file available for download.\n#######'  # noqa
            errors = dataset_creator.errors
            warnings = dataset_creator.warnings
            dataset_file_abs = dataset_creator.dataset_file
            items_with_accession = dataset_creator.sequences_skipped
            if dataset_file_abs is not None:
                dataset_file = os.path.basename(dataset_file_abs)
            else:
                dataset_file = False

            cleaned_data['aminoacids'] = True
            dataset_creator = CreateDataset(cleaned_data)
            aa_dataset = dataset_creator.dataset_str
            aa_dataset_file_abs = dataset_creator.dataset_file
            if aa_dataset_file_abs is not None:
                aa_dataset_file = os.path.basename(aa_dataset_file_abs)
            else:
                aa_dataset_file = False

            context['items_with_accession'] = items_with_accession
            context['dataset'] = dataset_short
            context['fasta_file'] = dataset_file
            context['protein'] = aa_dataset
            context['errors'] = errors
            context['protein_file'] = aa_dataset_file
            context['warnings'] = warnings
            return render(request, 'genbank_fasta/results.html', context)
        else:
            context["form"] = form
            return render(request, 'genbank_fasta/index.html', context)

    return HttpResponseRedirect('/genbank_fasta/')
예제 #4
0
 def test_get_sequence_first_codon_position(self):
     self.cleaned_data['positions'] = ['1st']
     self.cleaned_data['gene_codes'] = [
         Genes.objects.get(gene_code='wingless')
     ]
     dataset_creator = CreateDataset(self.cleaned_data)
     expected = Seq("CGGTGATAAAGCTATATGGAGACAAGATGAG")
     sequence = Seq(
         "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA"
     )
     result = dataset_creator.get_sequence_based_on_codon_positions(
         'wingless', sequence)
     self.assertEqual(expected, result[0])
예제 #5
0
    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'cp100-10': {
                'superfamily': 'Papilionoidea'
            },
            'cp100-11': {
                'superfamily': ''
            },
        }
        result = dataset_creator.get_taxon_names_for_taxa()

        self.assertEqual(expected, result)
예제 #6
0
 def test_create_dataset_outgroup(self):
     cleaned_data = self.cleaned_data
     cleaned_data['outgroup'] = 'CP100-11'
     dataset_creator = CreateDataset(cleaned_data)
     expected = 'nstates dna;\nxread\n1909 2\n\n&[dna]\nCP100_11_Aus_bus'
     result = dataset_creator.dataset_str
     self.assertTrue(expected in result)
예제 #7
0
 def test_fill_seqs_with_missing_chars(self):
     cleaned_data = self.cleaned_data
     cleaned_data['positions'] = ['ALL']
     dataset_creator = CreateDataset(cleaned_data)
     result = dataset_creator.dataset_str
     expected = "CP100_10_Aus_aus                                       CGACGACGACGACGACGACG"
     self.assertTrue(expected in result)
예제 #8
0
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI-begin')
        g2 = Genes.objects.get(gene_code='ef1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'degen_translations': None,
            'number_genes': None,
            'file_format': 'FASTA',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None
    def setUp(self):
        args = []
        opts = {'dumpfile': settings.MEDIA_ROOT + 'test_data.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        gene_set = GeneSets.objects.get(geneset_name='all_genes')
        taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa')
        self.cleaned_data = {
            'gene_codes': '',
            'taxonset': taxon_set,
            'voucher_codes': '',
            'geneset': gene_set,
            'taxon_names': ['CODE', 'GENUS', 'SPECIES'],
            'number_genes': None,
            'degen_translations': None,
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'file_format': 'PHYLIP',
            'aminoacids': False,
            'outgroup': '',
        }
        self.dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset',
                                         'tests', 'create_phylip_dataset', 'dataset.phy')
        self.aa_dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset',
                                            'tests', 'create_phylip_dataset', 'aa_dataset.phy')

        self.user = User.objects.get(username='******')
        self.user.set_password('pass')
        self.user.save()

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None
예제 #10
0
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        gene_set = GeneSets.objects.get(geneset_name='all_genes')
        taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa')
        self.cleaned_data = {
            'gene_codes': '',
            'taxonset': taxon_set,
            'voucher_codes': '',
            'geneset': gene_set,
            'taxon_names': ['CODE', 'GENUS', 'SPECIES'],
            'translations': False,
            'degen_translations': 'normal',
            'number_genes': None,
            'positions': ['ALL'],
            'partition_by_positions': 'by gene',
            'file_format': 'NEXUS',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None
예제 #11
0
    def test_dataset_all_1st_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)
예제 #12
0
    def test_dataset_ALL_1st_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'EACH'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>coi_1st_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>CP100-11_Melitaea_diamina
?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>coi_2nd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
>CP100-11_Melitaea_diamina
TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC???????????
>coi_3rd_codon
--------------------
>CP100-10_Papilionoidea_Melitaea_diamina
????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT???????????
>CP100-11_Melitaea_diamina
GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC???????????
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.strip(), result)
예제 #13
0
    def test_dataset_1st_2nd_each(self):
        # TODO fix test when dataset-creator has issue #26 fixed
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)
예제 #14
0
    def test_dataset_1st_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)
예제 #15
0
 def test_nexus_1st_3rd_codon_as_1st2nd_3rd(self):
     cleaned_data = self.cleaned_data.copy()
     cleaned_data['positions'] = ['1st', '3rd']
     cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'
     dataset_creator = CreateDataset(cleaned_data)
     expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
     result = dataset_creator.errors
     self.assertTrue(expected in ''.join([str(i) for i in result]))
    def test_partitioned_each(self):
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['partition_by_positions'] = 'by codon position'
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str

        expected = "CP100_10_Aus_aus  ACGACGACGA CGACGACGAC GACGACGACG ACGACGACGA CGACGACGAC"
        self.assertTrue(expected in result)
예제 #17
0
 def test_create_dataset_drop_voucher(self):
     cleaned_data = self.cleaned_data
     cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11'
     cleaned_data['taxonset'] = TaxonSets.objects.get(
         taxonset_name='Erebia')
     dataset_creator = CreateDataset(cleaned_data)
     result = dataset_creator.dataset_str
     self.assertTrue('CP100-11' not in result)
예제 #18
0
 def test_nexus_with_outgroup(self):
     cleaned_data = self.cleaned_data
     cleaned_data['outgroup'] = 'CP100-11'
     cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes')
     dataset_creator = CreateDataset(cleaned_data)
     result = dataset_creator.dataset_str
     expected = "outgroup CP100_11_Aus_bus;"
     self.assertTrue(expected in result)
 def test_dataset_1st3rd_codon_partitioned_each(self):
     cleaned_data = self.cleaned_data.copy()
     cleaned_data['partition_by_positions'] = 'by codon position'
     cleaned_data['positions'] = ['1st', '3rd']
     dataset_creator = CreateDataset(cleaned_data)
     expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
     result = dataset_creator.errors[0]
     self.assertEqual(expected, str(result))
예제 #20
0
    def test_dataset_with_partitions(self):
        cleaned_data = self.cleaned_data
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'
        dataset_creator = CreateDataset(cleaned_data)

        expected = ''
        result = dataset_creator.dataset_str
        self.assertEqual(expected, result)
예제 #21
0
    def test_dataset_as_aminoacids(self):
        cleaned_data = self.cleaned_data
        cleaned_data['aminoacids'] = True
        dataset_creator = CreateDataset(cleaned_data)

        expected = 'DDDDDDDDDDDDDDDDDDDDDDDDDDD'
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)
 def test_dataset_2nd3rd_codon_partitioned_1st2nd_3rd(self):
     cleaned_data = self.cleaned_data.copy()
     cleaned_data['partition_by_positions'] = '1st-2nd,3rd'
     cleaned_data['positions'] = ['2nd', '3rd']
     dataset_creator = CreateDataset(cleaned_data)
     expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
     result = dataset_creator.errors[0]
     self.assertEqual(expected, str(result))
예제 #23
0
 def test_char_lengths_for_partitions_aminoacids(self):
     cleaned_data = self.cleaned_data
     cleaned_data['aminoacids'] = True
     cleaned_data['outgroup'] = ''
     cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes')
     dataset_creator = CreateDataset(cleaned_data)
     result = dataset_creator.dataset_str
     expected = "charset ef1a = 689-1101"
     self.assertTrue(expected in result)
예제 #24
0
 def test_create_dataset_aa_with_outgroup(self):
     cleaned_data = self.cleaned_data
     cleaned_data['positions'] = ['ALL']
     cleaned_data['outgroup'] = 'CP100-11'
     cleaned_data['aminoacids'] = True
     dataset_creator = CreateDataset(cleaned_data)
     expected = '&[protein]\nCP100_11_Aus_bus                                       DDDDDDDDDDDDDDDDDDDDDDDD'
     result = dataset_creator.dataset_str
     self.assertTrue(expected in result)
예제 #25
0
    def test_dataset_with_degen_tranlations(self):
        cleaned_data = self.cleaned_data
        cleaned_data['degen_translations'] = 'S'
        cleaned_data['translations'] = True
        dataset_creator = CreateDataset(cleaned_data)

        expected = 'GAYGAYGAYGAYGAYGAYGAYGAY'
        result = dataset_creator.dataset_str.strip()
        self.assertTrue(expected in result)
예제 #26
0
    def test_dataset_with_partitions_and_degen_tranlations(self):
        cleaned_data = self.cleaned_data
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'
        cleaned_data['translations'] = True
        cleaned_data['degen_translations'] = 'normal'
        dataset_creator = CreateDataset(cleaned_data)

        expected = ''
        result = dataset_creator.dataset_str.strip()
        self.assertEqual(expected, result)
예제 #27
0
파일: tests.py 프로젝트: danmcelroy/VoSeq
    def test_warning_when_missing_seqs_for_voucher(self):
        Vouchers(code='CP100-13').save()

        cleaned_data = self.cleaned_data.copy()
        cleaned_data['voucher_codes'] = 'CP100-13'

        expected = 'Could not find sequences for voucher CP100-13 and gene_code CC'
        result = CreateDataset(cleaned_data)

        self.assertTrue(expected in result.warnings)
예제 #28
0
    def test_order_of_vouchers_is_kept_along_partitions(self):
        cleaned_data = self.cleaned_data
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str
        expected = """
CP100_19_Aus_jus                                       ??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????

[ef1a]
"""
        self.assertTrue(expected.strip() in result)
예제 #29
0
    def test_create_dataset_1st_codon(self):
        cleaned_data = self.cleaned_data
        cleaned_data['positions'] = ['1st']
        dataset_creator = CreateDataset(cleaned_data)
        expected = 'nstates dna;\nxread\n636 2\n\n&[dna]\nCP100_10_Aus_aus'
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)

        expected = 'AAAAAAAAAAAAAAAAAAAAAAAAAAA'
        self.assertTrue(expected in result)
예제 #30
0
 def test_try_dataset_degenerated_in_partitions(self):
     cleaned_data = self.cleaned_data
     cleaned_data['voucher_codes'] = 'CP100-10'
     cleaned_data['degen_translations'] = 'normal'
     cleaned_data['partition_by_positions'] = 'by gene'
     cleaned_data['translations'] = True
     dataset_creator = CreateDataset(cleaned_data)
     result = dataset_creator.dataset_str
     expected = "DIMENSIONS NTAX=10 NCHAR=4732;"
     self.assertTrue(expected in result)
    def test_create_aa_dataset(self):
        with open(self.aa_dataset_file, "r") as handle:
            expected = handle.read()

        cleaned_data = self.cleaned_data.copy()
        cleaned_data['aminoacids'] = True
        dataset_creator = CreateDataset(cleaned_data)

        result = dataset_creator.dataset_str
        self.assertEqual(expected, result)
예제 #32
0
 def test_all_codons_partitioned_as_each(self):
     g1 = Genes.objects.get(gene_code='COI-begin')
     cleaned_data = self.cleaned_data.copy()
     cleaned_data['gene_codes'] = [g1]
     cleaned_data['positions'] = ['ALL']
     cleaned_data['partition_by_positions'] = 'by codon position'
     dataset_creator = CreateDataset(cleaned_data)
     expected = 'Cannot produce MEGA dataset with codon positions in different partitions'
     result = dataset_creator.errors
     self.assertEqual(expected, str(result[0]))
예제 #33
0
class CreateDatasetUtilsTest(TestCase):
    def setUp(self):
        args = []
        opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0}
        cmd = 'migrate_db'
        call_command(cmd, *args, **opts)

        g1 = Genes.objects.get(gene_code='COI-begin')
        g2 = Genes.objects.get(gene_code='ef1a')
        self.cleaned_data = {
            'gene_codes': [g1, g2],
            'taxonset': None,
            'voucher_codes': 'CP100-10\r\nCP100-11',
            'geneset': None,
            'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'],
            'positions': ['ALL'],
            'translations': False,
            'partition_by_positions': 'by gene',
            'degen_translations': None,
            'number_genes': None,
            'file_format': 'FASTA',
            'aminoacids': False,
            'outgroup': None,
        }

        self.c = Client()
        self.dataset_creator = CreateDataset(self.cleaned_data)
        self.maxDiff = None

    def test_create_dataset(self):
        expected = '>CP100_10_Papilionoidea_Aus_aus'
        result = self.dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_create_dataset_with_gene_code(self):
        self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = ">CP100_10\n"
        result = dataset_creator.dataset_str
        self.assertTrue(expected in result)

    def test_get_taxon_names_for_taxa(self):
        expected = {
            'CP100-10': {'code': 'CP100-10', 'genus': 'Aus', 'species': 'aus', 'superfamily': 'Papilionoidea'},
            'CP100-11': {'code': 'CP100-11', 'genus': 'Aus', 'species': 'bus', 'superfamily': 'Papilionoidea'},
        }
        result = self.dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_create_dataset_drop_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11'
        cleaned_data['taxonset'] = TaxonSets.objects.get(taxonset_name='all_taxa')
        dataset_creator = CreateDataset(cleaned_data)
        result = dataset_creator.dataset_str
        self.assertTrue('CP100_11' not in result)

    def test_get_taxon_names_for_taxa_additional_fields(self):
        self.cleaned_data['taxon_names'] = ['SUPERFAMILY']
        dataset_creator = CreateDataset(self.cleaned_data)
        expected = {
            'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'},
            'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'},
        }
        result = dataset_creator.get_taxon_names_for_taxa()
        self.assertEqual(expected, result)

    def test_dataset_all_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_as_one_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_one(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'],)

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        g1.reading_frame = None
        g1.save()
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['1st', '2nd', '3rd'],)
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_partitions_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data.copy()
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],)
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_one_partition(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_each(self):
        # TODO fix test when dataset-creator has issue #26 fixed
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC
ACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 1st and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_each(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = 'Cannot create dataset for only codon positions 2nd and 3rd.'
        result = dataset_creator.errors
        self.assertTrue(expected in ''.join(str(i) for i in result))

    def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd', '3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = ""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_each(self):
        # TODO Fix this test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_2nd_3rd_each(self):
        # TODO: fix test after fixing dataset-creator issue #26
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd']
        cleaned_data['partition_by_positions'] = 'by gene'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>CP100_10_Papilionoidea_Aus_aus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
>CP100_11_Papilionoidea_Aus_bus
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA
CGACGACGA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st
----
>CP100_10_Papilionoidea_Aus_aus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>CP100_11_Papilionoidea_Aus_bus
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_2nd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_2nd
----
>CP100_10_Papilionoidea_Aus_aus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
>CP100_11_Papilionoidea_Aus_bus
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_3rd_codon_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['3rd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['1st', '2nd']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_dataset_all_1st_codons_1st2nd_3rd(self):
        g1 = Genes.objects.get(gene_code='COI-begin')
        cleaned_data = self.cleaned_data
        cleaned_data['gene_codes'] = [g1]
        del cleaned_data['positions']
        cleaned_data['positions'] = ['ALL', '1st']
        cleaned_data['partition_by_positions'] = '1st-2nd, 3rd'

        dataset_creator = CreateDataset(cleaned_data)
        expected = """
>COI-begin_1st-2nd
----
>CP100_10_Papilionoidea_Aus_aus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA
>CP100_11_Papilionoidea_Aus_bus
ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA

>COI-begin_3rd
----
>CP100_10_Papilionoidea_Aus_aus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
>CP100_11_Papilionoidea_Aus_bus
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
"""
        result = dataset_creator.dataset_str
        self.assertEqual(expected.lstrip(), result)

    def test_has_no_sequences_for_input_voucher(self):
        cleaned_data = self.cleaned_data
        cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000'
        dataset_creator = CreateDataset(cleaned_data)
        self.assertTrue('Could not find voucher CP1000' in dataset_creator.warnings)

    def test_creating_dataset_filename(self):
        tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt'
        result = guess_file_extension(tmp_file_name)
        expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg'
        self.assertEqual(expected, result)