def results(request): context = get_context(request) if request.method == 'POST': form = GenBankFastaForm(request.POST) if form.is_valid(): cleaned_data = form.cleaned_data cleaned_data['file_format'] = 'GenBankFASTA' cleaned_data['number_genes'] = '' cleaned_data['translations'] = False cleaned_data['aminoacids'] = False cleaned_data['positions'] = 'ALL' cleaned_data['partition_by_positions'] = 'by gene' cleaned_data['taxon_names'] = ['CODE', 'GENUS', 'SPECIES'] cleaned_data['outgroup'] = '' dataset_creator = CreateDataset(cleaned_data) dataset = dataset_creator.dataset_str dataset_short = dataset[ 0: 1500] + '\n...\n\n\n' + '#######\nComplete dataset file available for download.\n#######' # noqa errors = dataset_creator.errors warnings = dataset_creator.warnings dataset_file_abs = dataset_creator.dataset_file items_with_accession = dataset_creator.sequences_skipped if dataset_file_abs is not None: dataset_file = os.path.basename(dataset_file_abs) else: dataset_file = False cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) aa_dataset = dataset_creator.dataset_str aa_dataset_file_abs = dataset_creator.dataset_file if aa_dataset_file_abs is not None: aa_dataset_file = os.path.basename(aa_dataset_file_abs) else: aa_dataset_file = False context['items_with_accession'] = items_with_accession context['dataset'] = dataset_short context['fasta_file'] = dataset_file context['protein'] = aa_dataset context['errors'] = errors context['protein_file'] = aa_dataset_file context['warnings'] = warnings return render(request, 'genbank_fasta/results.html', context) else: context["form"] = form return render(request, 'genbank_fasta/index.html', context) return HttpResponseRedirect('/genbank_fasta/')
def test_dataset_1st_2nd_each(self): # TODO fix test when dataset-creator has issue #26 fixed g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] cleaned_data['partition_by_positions'] = 'by gene' dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def test_dataset_1st_2nd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '2nd'] dataset_creator = CreateDataset(cleaned_data) expected = """ >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACAC ACACACACACACACACACACACACA """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def test_fill_seqs_with_missing_chars(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['ALL'] dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "CP100_10_Aus_aus CGACGACGACGACGACGACG" self.assertTrue(expected in result)
def test_create_dataset_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['outgroup'] = 'CP100-11' dataset_creator = CreateDataset(cleaned_data) expected = 'nstates dna;\nxread\n1909 2\n\n&[dna]\nCP100_11_Aus_bus' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) g1 = Genes.objects.get(gene_code='COI-begin') g2 = Genes.objects.get(gene_code='ef1a') self.cleaned_data = { 'gene_codes': [g1, g2], 'taxonset': None, 'voucher_codes': 'CP100-10\r\nCP100-11', 'geneset': None, 'taxon_names': ['CODE', 'GENUS', 'SPECIES'], 'number_genes': None, 'positions': ['ALL'], 'degen_translations': None, 'translations': False, 'partition_by_positions': 'by gene', 'file_format': 'MEGA', 'aminoacids': False, 'outgroup': '', } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def test_dataset_ALL_1st_2nd_3rd_each(self): g1 = Genes.objects.get(gene_code='COI') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st', '2nd', '3rd'] cleaned_data['partition_by_positions'] = 'EACH' dataset_creator = CreateDataset(cleaned_data) expected = """ >coi_1st_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >CP100-11_Melitaea_diamina ?ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >coi_2nd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? >CP100-11_Melitaea_diamina TGGAAGATCACAACAGTGACATTAGGGCATAAAGAGCGTAAATTAGACAAAGGTGATCGCTATGGCGAGTCCAATAATTTTCCTTACTATAAAGGAGGGAGTAGTCCCTTAAGCAGGTGGTGATTTCTGGATTATGGAATAAAAAAACAAAATTGCACTTGTGGGAAGTCCTTTTCGTGGGAAACTAGCACAATTTGTTGGGGC??????????? >coi_3rd_codon -------------------- >CP100-10_Papilionoidea_Melitaea_diamina ????????ACTATTACATTTTACAAATTTTATATTATTTCTAATTTTTATTTATGATATAATTTATAAAAGACATATCCAATTAATAAGTACATTATATATAATGAATAAATCCATATTTCTAATAGTATTTTATATGTCTCAATTTTTTATTTAATTTATTTAATATAAAAATAAATCAATAATATATTTATAGTATTTCATTTCCAAAT??????????? >CP100-11_Melitaea_diamina GCGTTGCCTGTTTGCATGACGTTTGAAATAACTTCCACTTTTTTTTTCTTTGGTGAGTTCTTTGCCATCTCGTAATGTGTTCCCTTTTTCGGTTAAGCGCGGCTACCTCCATCAGGCCTATCTTCTATCGTCCTTGCTATTCCTTATGTAATCAAATCTTTGCTGTCCTTTTTCTCTTCGCTCTTTCAGATACCTTACGGGGAC??????????? """ result = dataset_creator.dataset_str self.assertEqual(expected.strip(), result)
def setUp(self): args = [] opts = {'dumpfile': 'test_db_dump2.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) gene_set = GeneSets.objects.get(geneset_name='all_genes') taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa') self.cleaned_data = { 'gene_codes': '', 'taxonset': taxon_set, 'voucher_codes': '', 'geneset': gene_set, 'taxon_names': ['CODE', 'GENUS', 'SPECIES'], 'translations': False, 'degen_translations': 'normal', 'number_genes': None, 'positions': ['ALL'], 'partition_by_positions': 'by gene', 'file_format': 'NEXUS', 'aminoacids': False, 'outgroup': None, } self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def test_dataset_all_1st_codons_1st2nd_3rd(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['ALL', '1st'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = """ >COI-begin_1st-2nd ---- >CP100_10_Papilionoidea_Aus_aus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >CP100_11_Papilionoidea_Aus_bus ACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACA >COI-begin_3rd ---- >CP100_10_Papilionoidea_Aus_aus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG >CP100_11_Papilionoidea_Aus_bus GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG """ result = dataset_creator.dataset_str self.assertEqual(expected.lstrip(), result)
def setUp(self): args = [] opts = {'dumpfile': settings.MEDIA_ROOT + 'test_data.xml', 'verbosity': 0} cmd = 'migrate_db' call_command(cmd, *args, **opts) gene_set = GeneSets.objects.get(geneset_name='all_genes') taxon_set = TaxonSets.objects.get(taxonset_name='all_taxa') self.cleaned_data = { 'gene_codes': '', 'taxonset': taxon_set, 'voucher_codes': '', 'geneset': gene_set, 'taxon_names': ['CODE', 'GENUS', 'SPECIES'], 'number_genes': None, 'degen_translations': None, 'positions': ['ALL'], 'translations': False, 'partition_by_positions': 'by gene', 'file_format': 'PHYLIP', 'aminoacids': False, 'outgroup': '', } self.dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset', 'tests', 'create_phylip_dataset', 'dataset.phy') self.aa_dataset_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset', 'tests', 'create_phylip_dataset', 'aa_dataset.phy') self.user = User.objects.get(username='******') self.user.set_password('pass') self.user.save() self.c = Client() self.dataset_creator = CreateDataset(self.cleaned_data) self.maxDiff = None
def test_dataset_2nd3rd_codon_partitioned_1st2nd_3rd(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = '1st-2nd,3rd' cleaned_data['positions'] = ['2nd', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 2nd and 3rd.' result = dataset_creator.errors[0] self.assertEqual(expected, str(result))
def test_dataset_as_aminoacids(self): cleaned_data = self.cleaned_data cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) expected = 'DDDDDDDDDDDDDDDDDDDDDDDDDDD' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def test_nexus_1st_3rd_codon_as_1st2nd_3rd(self): cleaned_data = self.cleaned_data.copy() cleaned_data['positions'] = ['1st', '3rd'] cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join([str(i) for i in result]))
def test_partitioned_each(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = 'by codon position' dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "CP100_10_Aus_aus ACGACGACGA CGACGACGAC GACGACGACG ACGACGACGA CGACGACGAC" self.assertTrue(expected in result)
def test_nexus_with_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['outgroup'] = 'CP100-11' cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "outgroup CP100_11_Aus_bus;" self.assertTrue(expected in result)
def test_create_dataset_drop_voucher(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10\r\n--CP100-11' cleaned_data['taxonset'] = TaxonSets.objects.get( taxonset_name='Erebia') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertTrue('CP100-11' not in result)
def test_dataset_with_partitions(self): cleaned_data = self.cleaned_data cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' dataset_creator = CreateDataset(cleaned_data) expected = '' result = dataset_creator.dataset_str self.assertEqual(expected, result)
def test_dataset_1st3rd_codon_partitioned_each(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = 'by codon position' cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors[0] self.assertEqual(expected, str(result))
def test_dataset_with_degen_tranlations(self): cleaned_data = self.cleaned_data cleaned_data['degen_translations'] = 'S' cleaned_data['translations'] = True dataset_creator = CreateDataset(cleaned_data) expected = 'GAYGAYGAYGAYGAYGAYGAYGAY' result = dataset_creator.dataset_str.strip() self.assertTrue(expected in result)
def test_char_lengths_for_partitions_aminoacids(self): cleaned_data = self.cleaned_data cleaned_data['aminoacids'] = True cleaned_data['outgroup'] = '' cleaned_data['geneset'] = GeneSets.objects.get(geneset_name='all_genes') dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "charset ef1a = 689-1101" self.assertTrue(expected in result)
def test_create_dataset_aa_with_outgroup(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['ALL'] cleaned_data['outgroup'] = 'CP100-11' cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) expected = '&[protein]\nCP100_11_Aus_bus DDDDDDDDDDDDDDDDDDDDDDDD' result = dataset_creator.dataset_str self.assertTrue(expected in result)
def test_try_dataset_degenerated_in_partitions(self): cleaned_data = self.cleaned_data cleaned_data['voucher_codes'] = 'CP100-10' cleaned_data['degen_translations'] = 'normal' cleaned_data['partition_by_positions'] = 'by gene' cleaned_data['translations'] = True dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = "DIMENSIONS NTAX=10 NCHAR=4732;" self.assertTrue(expected in result)
def test_order_of_vouchers_is_kept_along_partitions(self): cleaned_data = self.cleaned_data dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str expected = """ CP100_19_Aus_jus ?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? [ef1a] """ self.assertTrue(expected.strip() in result)
def test_warning_when_missing_seqs_for_voucher(self): Vouchers(code='CP100-13').save() cleaned_data = self.cleaned_data.copy() cleaned_data['voucher_codes'] = 'CP100-13' expected = 'Could not find sequences for voucher CP100-13 and gene_code CC' result = CreateDataset(cleaned_data) self.assertTrue(expected in result.warnings)
def test_dataset_with_partitions_and_degen_tranlations(self): cleaned_data = self.cleaned_data cleaned_data['partition_by_positions'] = '1st-2nd, 3rd' cleaned_data['translations'] = True cleaned_data['degen_translations'] = 'normal' dataset_creator = CreateDataset(cleaned_data) expected = '' result = dataset_creator.dataset_str.strip() self.assertEqual(expected, result)
def test_create_dataset_1st_codon(self): cleaned_data = self.cleaned_data cleaned_data['positions'] = ['1st'] dataset_creator = CreateDataset(cleaned_data) expected = 'nstates dna;\nxread\n636 2\n\n&[dna]\nCP100_10_Aus_aus' result = dataset_creator.dataset_str self.assertTrue(expected in result) expected = 'AAAAAAAAAAAAAAAAAAAAAAAAAAA' self.assertTrue(expected in result)
def test_create_aa_dataset(self): with open(self.aa_dataset_file, "r") as handle: expected = handle.read() cleaned_data = self.cleaned_data.copy() cleaned_data['aminoacids'] = True dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.dataset_str self.assertEqual(expected, result)
def test_all_codons_partitioned_as_each(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data.copy() cleaned_data['gene_codes'] = [g1] cleaned_data['positions'] = ['ALL'] cleaned_data['partition_by_positions'] = 'by codon position' dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot produce MEGA dataset with codon positions in different partitions' result = dataset_creator.errors self.assertEqual(expected, str(result[0]))
def test_dataset_1st_3rd_codon_one_partition(self): g1 = Genes.objects.get(gene_code='COI-begin') cleaned_data = self.cleaned_data cleaned_data['gene_codes'] = [g1] del cleaned_data['positions'] cleaned_data['positions'] = ['1st', '3rd'] dataset_creator = CreateDataset(cleaned_data) expected = 'Cannot create dataset for only codon positions 1st and 3rd.' result = dataset_creator.errors self.assertTrue(expected in ''.join(str(i) for i in result))
def test_charset_block_partitioned_each(self): cleaned_data = self.cleaned_data.copy() cleaned_data['partition_by_positions'] = 'by codon position' dataset_creator = CreateDataset(cleaned_data) result = dataset_creator.charset_block charset_block_file = os.path.join(settings.BASE_DIR, '..', 'create_dataset', 'tests', 'create_phylip_dataset', 'charset_block_file_partitioned_1st_2nd_3rd.txt') with open(charset_block_file, "r") as handle: expected = handle.read() self.assertEqual(expected, result)