def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI-begin') g2 = Genes.objects.get(gene_code='ef1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'degen_translations': None, 'number_genes': None, 'file_format': 'FASTA', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'}, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result)
def results(request): context = get_context(request) if request.method == 'POST': form = GenBankFastaForm(request.POST) if form.is_valid(): cleaned_data = form.cleaned_data cleaned_data['file_format'] = 'GenBankFASTA' cleaned_data['number_genes'] = '' cleaned_data['translations'] = False cleaned_data['aminoacids'] = False cleaned_data['positions'] = 'ALL' cleaned_data['partition_by_positions'] = 'by gene' cleaned_data['taxon_names'] = ['CODE', 'GENUS', 'SPECIES'] cleaned_data['outgroup'] = '' dataset_creator = CreateDataset(cleaned_data) dataset = dataset_creator.dataset_str dataset_short = dataset[ 0: 1500] + '\n...\n\n\n' + '#######\nComplete dataset file available for download.\n#######' # noqa errors = dataset_creator.errors warnings = dataset_creator.warnings dataset_file_abs = dataset_creator.dataset_file items_with_accession = dataset_creator.sequences_skipped if dataset_file_abs is not None: dataset_file = os.path.basename(dataset_file_abs) else: dataset_file = False cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) aa_dataset = dataset_creator.dataset_str aa_dataset_file_abs = dataset_creator.dataset_file if aa_dataset_file_abs is not None: aa_dataset_file = os.path.basename(aa_dataset_file_abs) else: aa_dataset_file = False context['items_with_accession'] = items_with_accession context['dataset'] = dataset_short context['fasta_file'] = dataset_file context['protein'] = aa_dataset context['errors'] = errors context['protein_file'] = aa_dataset_file context['warnings'] = warnings return render(request, 'genbank_fasta/results.html', context) else: context["form"] = form return render(request, 'genbank_fasta/index.html', context) return HttpResponseRedirect('/genbank_fasta/')
def test_get_sequence_first_codon_position(self): self.cleaned_data['positions'] = ['1st'] self.cleaned_data['gene_codes'] = [ Genes.objects.get(gene_code='wingless') ] dataset_creator = CreateDataset(self.cleaned_data) expected = Seq("CGGTGATAAAGCTATATGGAGACAAGATGAG") sequence = Seq( "ACACGTCGACTCCGGCAAGTCCACCACCACCGGTCACTTGATTTACAAATGTGGTGGTATCGACAaACGTACCATCGAGAAGTTCGAGAAGGA" ) result = dataset_creator.get_sequence_based_on_codon_positions( 'wingless', sequence) self.assertEqual(expected, result[0])
def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'cp100-10': { 'superfamily': 'Papilionoidea' }, 'cp100-11': { 'superfamily': '' }, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result)
def test_create_dataset_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['outgroup'] = 'CP100-11' dataset_creator = CreateDataset(cleaned_data) expected = 'nstates dna;\nxread\n1909 2\n\n&[dna]\nCP100_11_Aus_bus' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def test_fill_seqs_with_missing_chars(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['ALL'] dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "CP100_10_Aus_aus CGACGACGACGACGACGACG" self.assertTrue(expected in result)
def setUp(self): args = [] opts = {'dumpfile': settings.MEDIA_ROOT + 'test_data.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) gene_set = GeneSets.objects.get(geneset_name='all_genes') taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa') self.cleaned_data = { 'gene_codes': '', 'taxonset': taxon_set, 'voucher_codes': '', 'geneset': gene_set, 'taxon_names': ['CODE', 'GENUS', 'SPECIES'], 'number_genes': None, 'degen_translations': None, 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'file_format': 'PHYLIP', 'aminoacids': False, 'outgroup': '', } self.dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset', 'tests', 'create_phylip_dataset', 'dataset.phy') self.aa_dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset', 'tests', 'create_phylip_dataset', 'aa_dataset.phy') self.user = User.objects.get(username='******') self.user.set_password('pass') self.user.save() self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) gene_set = GeneSets.objects.get(geneset_name='all_genes') taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa') self.cleaned_data = { 'gene_codes': '', 'taxonset': taxon_set, 'voucher_codes': '', 'geneset': gene_set, 'taxon_names': ['CODE', 'GENUS', 'SPECIES'], 'translations': False, 'degen_translations': 'normal', 'number_genes': None, 'positions': ['ALL'], 'partition_by_positions': 'by gene', 'file_format': 'NEXUS', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def test_dataset_all_1st_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def test_dataset_ALL_1st_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result)
def test_dataset_1st_2nd_each(self): # TODO fix test when dataset-creator has issue #26 fixed g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def test_nexus_1st_3rd_codon_as_1st2nd_3rd(self): cleaned_data = self.cleaned_data.copy() cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join([str(i) for i in result]))
def test_partitioned_each(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = 'by codon position' dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "CP100_10_Aus_aus ACGACGACGA CGACGACGAC GACGACGACG ACGACGACGA CGACGACGAC" self.assertTrue(expected in result)
def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get( taxonset_name='Erebia') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100-11' not in result)
def test_nexus_with_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['outgroup'] = 'CP100-11' cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "outgroup CP100_11_Aus_bus;" self.assertTrue(expected in result)
def test_dataset_1st3rd_codon_partitioned_each(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = 'by codon position' cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors[0] self.assertEqual(expected, str(result))
def test_dataset_with_partitions(self): cleaned_data = self.cleaned_data cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = '' result = dataset_creator.dataset_str self.assertEqual(expected, result)
def test_dataset_as_aminoacids(self): cleaned_data = self.cleaned_data cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) expected = 'DDDDDDDDDDDDDDDDDDDDDDDDDDD' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def test_dataset_2nd3rd_codon_partitioned_1st2nd_3rd(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = '1st-2nd,3rd' cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors[0] self.assertEqual(expected, str(result))
def test_char_lengths_for_partitions_aminoacids(self): cleaned_data = self.cleaned_data cleaned_data['aminoacids'] = True cleaned_data['outgroup'] = '' cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "charset ef1a = 689-1101" self.assertTrue(expected in result)
def test_create_dataset_aa_with_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['ALL'] cleaned_data['outgroup'] = 'CP100-11' cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) expected = '&[protein]\nCP100_11_Aus_bus DDDDDDDDDDDDDDDDDDDDDDDD' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def test_dataset_with_degen_tranlations(self): cleaned_data = self.cleaned_data cleaned_data['degen_translations'] = 'S' cleaned_data['translations'] = True dataset_creator = CreateDataset(cleaned_data) expected = 'GAYGAYGAYGAYGAYGAYGAYGAY' result = dataset_creator.dataset_str.strip() self.assertTrue(expected in result)
def test_dataset_with_partitions_and_degen_tranlations(self): cleaned_data = self.cleaned_data cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' cleaned_data['translations'] = True cleaned_data['degen_translations'] = 'normal' dataset_creator = CreateDataset(cleaned_data) expected = '' result = dataset_creator.dataset_str.strip() self.assertEqual(expected, result)
def test_warning_when_missing_seqs_for_voucher(self): Vouchers(code='CP100-13').save() cleaned_data = self.cleaned_data.copy() cleaned_data['voucher_codes'] = 'CP100-13' expected = 'Could not find sequences for voucher CP100-13 and gene_code CC' result = CreateDataset(cleaned_data) self.assertTrue(expected in result.warnings)
def test_order_of_vouchers_is_kept_along_partitions(self): cleaned_data = self.cleaned_data dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = """ CP100_19_Aus_jus ?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? [ef1a] """ self.assertTrue(expected.strip() in result)
def test_create_dataset_1st_codon(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = 'nstates dna;\nxread\n636 2\n\n&[dna]\nCP100_10_Aus_aus' result = dataset_creator.dataset_str self.assertTrue(expected in result) expected = 'AAAAAAAAAAAAAAAAAAAAAAAAAAA' self.assertTrue(expected in result)
def test_try_dataset_degenerated_in_partitions(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10' cleaned_data['degen_translations'] = 'normal' cleaned_data['partition_by_positions'] = 'by gene' cleaned_data['translations'] = True dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "DIMENSIONS NTAX=10 NCHAR=4732;" self.assertTrue(expected in result)
def test_create_aa_dataset(self): with open(self.aa_dataset_file, "r") as handle: expected = handle.read() cleaned_data = self.cleaned_data.copy() cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertEqual(expected, result)
def test_all_codons_partitioned_as_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = ['ALL'] cleaned_data['partition_by_positions'] = 'by codon position' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot produce MEGA dataset with codon positions in different partitions' result = dataset_creator.errors self.assertEqual(expected, str(result[0]))
class CreateDatasetUtilsTest(TestCase): def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI-begin') g2 = Genes.objects.get(gene_code='ef1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'SUPERFAMILY', 'GENUS', 'SPECIES'], 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'degen_translations': None, 'number_genes': None, 'file_format': 'FASTA', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None def test_create_dataset(self): expected = '>CP100_10_Papilionoidea_Aus_aus' result = self.dataset_creator.dataset_str self.assertTrue(expected in result) def test_create_dataset_with_gene_code(self): self.cleaned_data['taxon_names'] = ['CODE', 'GENECODE'] dataset_creator = CreateDataset(self.cleaned_data) expected = ">CP100_10\n" result = dataset_creator.dataset_str self.assertTrue(expected in result) def test_get_taxon_names_for_taxa(self): expected = { 'CP100-10': {'code': 'CP100-10', 'genus': 'Aus', 'species': 'aus', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'genus': 'Aus', 'species': 'bus', 'superfamily': 'Papilionoidea'}, } result = self.dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get(taxonset_name='all_taxa') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100_11' not in result) def test_get_taxon_names_for_taxa_additional_fields(self): self.cleaned_data['taxon_names'] = ['SUPERFAMILY'] dataset_creator = CreateDataset(self.cleaned_data) expected = { 'CP100-10': {'code': 'CP100-10', 'superfamily': 'Papilionoidea'}, 'CP100-11': {'code': 'CP100-11', 'superfamily': 'Papilionoidea'}, } result = dataset_creator.get_taxon_names_for_taxa() self.assertEqual(expected, result) def test_dataset_all_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_as_one_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_one(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'],) dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_as_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') g1.reading_frame = None g1.save() cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['1st', '2nd', '3rd'],) cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_partitions_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_codons_1st_2nd_3rd_partitions_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = list(['ALL', '1st', '2nd', '3rd'],) cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_1st_3rd_codon_partition_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_each(self): # TODO fix test when dataset-creator has issue #26 fixed g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result)) def test_dataset_2nd_3rd_paritions_1st2nd_3rd_gene_with_no_reading_frame(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = "" result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_each(self): # TODO Fix this test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_2nd_3rd_each(self): # TODO: fix test after fixing dataset-creator issue #26 g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA >CP100_11_Papilionoidea_Aus_bus CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGACGA CGACGACGA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st ---- >CP100_10_Papilionoidea_Aus_aus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA >CP100_11_Papilionoidea_Aus_bus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_2nd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_2nd ---- >CP100_10_Papilionoidea_Aus_aus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC >CP100_11_Papilionoidea_Aus_bus CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_3rd_codon_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_1st_2nd_3rd_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_dataset_all_1st_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result) def test_has_no_sequences_for_input_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\nCP100-11\r\nCP1000' dataset_creator = CreateDataset(cleaned_data) self.assertTrue('Could not find voucher CP1000' in dataset_creator.warnings) def test_creating_dataset_filename(self): tmp_file_name = 'MEGA_b879d2a046d04821be618bf481b6b08d.txt' result = guess_file_extension(tmp_file_name) expected = 'MEGA_b879d2a046d04821be618bf481b6b08d.meg' self.assertEqual(expected, result)