def test_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[6:20] + 'aaaaaaaaa' r = blast_genome(g1, 'blastn', query) # only returns hit from genome self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 7) self.assertEquals(r[0].subject_end, 20) self.assertEquals(r[0].strand(), 1)
def test_finds_sequence_on_specified_genome(self): s1 = get_random_sequence(200) s2 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) f2 = Fragment.create_with_sequence("Baz", s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name="Far") g2.save() f3 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[6:20] + "aaaaaaaaa" r = blast_genome(g1, "blastn", query) # only returns hit from genome self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 7) self.assertEquals(r[0].subject_end, 20) self.assertEquals(r[0].strand(), 1)
def test_can_create_fragment_with_different_chunk_sizes(self): s = 'gataccggtactag' f = Fragment.create_with_sequence('Foo', s, initial_chunk_size=len(s)) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence('Foo', s, initial_chunk_size=0) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence('Foo', s, initial_chunk_size=1) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence('Foo', s, initial_chunk_size=3) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence('Foo', s, initial_chunk_size=len(s) * 1000) self.assertEquals(f.sequence, s)
def test_blast_finds_sequence_on_specified_genome(self): s1 = get_random_sequence(200) s2 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) f2 = Fragment.create_with_sequence("Baz", s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name="Far") g2.save() f3 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except BaseException: pass build_all_genome_dbs(refresh=True) query = s1[6:20] + "aaaaaaaaa" res = self.client.post( "/edge/genomes/%s/blast/" % g1.id, data=json.dumps(dict(program="blastn", query=query)), content_type="application/json", ) self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]["fragment_id"], f1.id) self.assertEquals(d[0]["query_start"], 1) self.assertEquals(d[0]["query_end"], 14) self.assertEquals(d[0]["subject_start"], 7) self.assertEquals(d[0]["subject_end"], 20) # blast in other genome works too res = self.client.post( "/edge/genomes/%s/blast/" % g2.id, data=json.dumps(dict(program="blastn", query=query)), content_type="application/json", ) self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]["fragment_id"], f3.id)
def test_can_create_fragment_with_different_chunk_sizes(self): s = "gataccggtactag" f = Fragment.create_with_sequence("Foo", s, initial_chunk_size=len(s)) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence("Foo", s, initial_chunk_size=0) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence("Foo", s, initial_chunk_size=1) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence("Foo", s, initial_chunk_size=3) self.assertEquals(f.sequence, s) f = Fragment.create_with_sequence("Foo", s, initial_chunk_size=len(s) * 1000) self.assertEquals(f.sequence, s)
def test_blast_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) query = s1[6:20] + 'aaaaaaaaa' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps( dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 7) self.assertEquals(d[0]['subject_end'], 20) # blast in other genome works too res = self.client.post('/edge/genomes/%s/blast/' % g2.id, data=json.dumps( dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f3.id)
def test_user_defined_fragments_does_not_include_inactive_fragments(self): f1 = Fragment.create_with_sequence('Bar', 'aacctaaaattataa') self.assertEquals(len(Fragment.user_defined_fragments()), 1) self.assertEquals(Fragment.user_defined_fragments()[0].id, f1.id) f1.active = False f1.save() self.assertEquals(len(Fragment.user_defined_fragments()), 0)
def test_blast_aligns_sequence_to_antisense_strand(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) query = str(Seq(s1[6:20]).reverse_complement()) + "tttttttttt" res = self.client.post( "/edge/genomes/%s/blast/" % g1.id, data=json.dumps(dict(program="blastn", query=query)), content_type="application/json", ) self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]["fragment_id"], f1.id) self.assertEquals(d[0]["query_start"], 1) self.assertEquals(d[0]["query_end"], 14) self.assertEquals(d[0]["subject_start"], 20) self.assertEquals(d[0]["subject_end"], 7)
def test_does_not_allow_insert_same_fragment_twice_which_creates_loops(self): u = self.root.update('Bar') new_f = Fragment.create_with_sequence('Test', 'gataca') new_f = new_f.update('Test') new_f.insert_bases(2, 'ccc') u.insert_fragment(3, new_f) self.assertRaises(Exception, u.insert_fragment, 3, new_f)
def test_blast_aligns_sequence_to_antisense_strand(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) query = str(Seq(s1[6:20]).reverse_complement()) + 'tttttttttt' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps( dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 20) self.assertEquals(d[0]['subject_end'], 7)
def test_converts_to_reference_based_chunks(self): f = Fragment.create_with_sequence("FooSeq", self.root_sequence, reference_based=False) old_chunk_ids = [ c.id for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ] converted = f.indexed_fragment().convert_chunks_to_reference_based() f = f.indexed_fragment() self.assertTrue(converted) self.assertTrue( all([ c.id in old_chunk_ids for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ])) self.assertTrue( all([ c.is_reference_based for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ])) self.assertEqual(self.root_sequence, f.sequence) converted_twice = f.indexed_fragment( ).convert_chunks_to_reference_based() self.assertFalse(converted_twice)
def test_inherits_new_annotations_on_inserted_fragment(self): new_f = Fragment.create_with_sequence('Test', 'gataca') self.assertEquals(len(new_f.annotations()), 0) self.assertEquals(len(self.root.annotations()), 0) f = self.root.update('Bar') f.insert_fragment(3, new_f) self.assertEquals(len(new_f.annotations()), 0) self.assertEquals(len(self.root.annotations()), 0) self.assertEquals(len(f.annotations()), 0) new_f.annotate(2, 4, 'X1', 'gene', 1) self.root = Fragment.objects.get(pk=self.root.pk).indexed_fragment() f = Fragment.objects.get(pk=f.pk).indexed_fragment() self.assertEquals(len(new_f.annotations()), 1) self.assertEquals(len(self.root.annotations()), 0) self.assertEquals(len(f.annotations()), 1) self.assertEquals(f.annotations()[0].base_first, 4) self.assertEquals(f.annotations()[0].base_last, 6) self.assertEquals(f.annotations()[0].feature.name, 'X1') self.assertEquals(f.annotations()[0].feature_base_first, 1) self.assertEquals(f.annotations()[0].feature_base_last, 3)
def test_blast_aligns_sequence_to_antisense_strand(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) query = str(Seq(s1[6:20]).reverse_complement())+'tttttttttt' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 20) self.assertEquals(d[0]['subject_end'], 7)
def test_insert_existing_fragment_in_middle(self): existing_f = Fragment.create_with_sequence('Test', 'gataca') f = self.root.update('Bar') f.insert_fragment(3, existing_f) self.assertEquals(f.sequence, self.root_sequence[0:2] + 'gataca' + self.root_sequence[2:]) # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def setUp(self): self.sequence = "agttcgaggctga" self.genome = Genome.create("Foo") self.fragment = Fragment.create_with_sequence("Bar", self.sequence) Genome_Fragment(genome=self.genome, fragment=self.fragment, inherited=False).save()
def test_blast_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) query = s1[6:20]+'aaaaaaaaa' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 7) self.assertEquals(d[0]['subject_end'], 20) # blast in other genome works too res = self.client.post('/edge/genomes/%s/blast/' % g2.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f3.id)
def test_insert_fragment_inherits_annotations_from_new_fragment(self): f = self.root.update('Bar') self.assertEquals([a for a in f.annotations() if a.name == 'Uma'], []) new_f = Fragment.create_with_sequence('Test', 'gataca') new_f.annotate(2, 4, 'Uma', 'feature', 1) f.insert_fragment(2, new_f) self.assertEquals([a for a in f.annotations() if a.feature.name == 'Uma'][0].base_first, 3) self.assertEquals([a for a in f.annotations() if a.feature.name == 'Uma'][0].base_last, 5)
def test_computing_lengths_between_bps(self): f = Fragment.create_with_sequence("Foo", self.root_sequence, circular=True) self.assertEquals(f.bp_covered_length(2, 5), 4) # across circular boundary self.assertEquals(f.bp_covered_length(9, 3), (len(self.root_sequence) - 9 + 1) + 3)
def test_does_not_allow_insert_same_fragment_twice_which_creates_loops( self): u = self.root.update("Bar") new_f = Fragment.create_with_sequence("Test", "gataca") new_f = new_f.update("Test") new_f.insert_bases(2, "ccc") u.insert_fragment(3, new_f) self.assertRaises(Exception, u.insert_fragment, 3, new_f)
def test_user_defined_fragments_does_not_include_genomic_fragment(self): genome = Genome.create('Foo') s = 'atggcatattcgcagct' genome.add_fragment('chrI', s) f = Fragment.create_with_sequence('Bar', 'aacctaaaattataa') self.assertEquals(len(Fragment.user_defined_fragments()), 1) self.assertEquals(Fragment.user_defined_fragments()[0].name, 'Bar') self.assertEquals(Fragment.user_defined_fragments()[0].id, f.id)
def test_user_defined_fragments_does_not_include_genomic_fragment(self): genome = Genome.create("Foo") s = "atggcatattcgcagct" genome.add_fragment("chrI", s) f = Fragment.create_with_sequence("Bar", "aacctaaaattataa") self.assertEquals(len(Fragment.user_defined_fragments()), 1) self.assertEquals(Fragment.user_defined_fragments()[0].name, "Bar") self.assertEquals(Fragment.user_defined_fragments()[0].id, f.id)
def test_insert_new_fragment_at_end(self): f = self.root.update('Bar') new_f = Fragment.create_with_sequence('Test', 'gataca') new_f = new_f.update('Test') new_f.insert_bases(2, 'ccc') f.insert_fragment(None, new_f) self.assertEquals(f.sequence, self.root_sequence + 'gcccataca') # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_insert_existing_fragment_in_middle(self): existing_f = Fragment.create_with_sequence("Test", "gataca") f = self.root.update("Bar") f.insert_fragment(3, existing_f) self.assertEquals( f.sequence, self.root_sequence[0:2] + "gataca" + self.root_sequence[2:]) # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_insert_new_fragment_at_end(self): f = self.root.update("Bar") new_f = Fragment.create_with_sequence("Test", "gataca") new_f = new_f.update("Test") new_f.insert_bases(2, "ccc") f.insert_fragment(None, new_f) self.assertEquals(f.sequence, self.root_sequence + "gcccataca") # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_replace_fragment_past_end(self): f = self.root.update('Bar') new_f = Fragment.create_with_sequence('Test', 'gataca') new_f = new_f.update('Test') new_f.insert_bases(2, 'ccc') f.replace_with_fragment(len(self.root_sequence) - 3, 6, new_f) self.assertEquals(f.name, 'Bar') self.assertEquals(f.sequence, self.root_sequence[:-4] + 'gcccataca') # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_replace_fragment_past_end(self): f = self.root.update("Bar") new_f = Fragment.create_with_sequence("Test", "gataca") new_f = new_f.update("Test") new_f.insert_bases(2, "ccc") f.replace_with_fragment(len(self.root_sequence) - 3, 6, new_f) self.assertEquals(f.name, "Bar") self.assertEquals(f.sequence, self.root_sequence[:-4] + "gcccataca") # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_annotate_circular_fragment_ending_at_base_last(self): f = Fragment.create_with_sequence('Foo', self.root_sequence, circular=True) f.annotate(9, len(self.root_sequence), 'A1', 'gene', 1) self.assertEquals(len(f.annotations()), 1) self.assertEquals(f.annotations()[0].base_first, 9) self.assertEquals(f.annotations()[0].base_last, len(self.root_sequence)) self.assertEquals(f.annotations()[0].feature.name, 'A1') self.assertEquals(f.annotations()[0].feature_base_first, 1) self.assertEquals(f.annotations()[0].feature_base_last, len(self.root_sequence) - 9 + 1) self.assertEquals(f.annotations()[0].feature.length, len(self.root_sequence) - 9 + 1)
def test_insert_new_fragment_in_middle(self): f = self.root.update('Bar') new_f = Fragment.create_with_sequence('Test', 'gataca') new_f = new_f.update('Test') new_f.insert_bases(2, 'ccc') f.insert_fragment(3, new_f) self.assertEquals( f.sequence, self.root_sequence[0: 2] + 'gcccataca' + self.root_sequence[2:]) # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_insert_new_fragment_in_middle(self): f = self.root.update("Bar") new_f = Fragment.create_with_sequence("Test", "gataca") new_f = new_f.update("Test") new_f.insert_bases(2, "ccc") f.insert_fragment(3, new_f) self.assertEquals( f.sequence, self.root_sequence[0:2] + "gcccataca" + self.root_sequence[2:]) # does not affect root self.assertEquals(self.root.sequence, self.root_sequence)
def test_insert_fragment_inherits_annotations_from_new_fragment(self): f = self.root.update("Bar") self.assertEquals([a for a in f.annotations() if a.name == "Uma"], []) new_f = Fragment.create_with_sequence("Test", "gataca") new_f.annotate(2, 4, "Uma", "feature", 1) f.insert_fragment(2, new_f) self.assertEquals([ a for a in f.annotations() if a.feature.name == "Uma" ][0].base_first, 3) self.assertEquals([ a for a in f.annotations() if a.feature.name == "Uma" ][0].base_last, 5)
def build_genome(self, circular, *templates): g = Genome(name='Foo') g.save() for seq in templates: f = Fragment.create_with_sequence('Bar', seq, circular=circular) Genome_Fragment(genome=g, fragment=f, inherited=False).save() try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) return Genome.objects.get(pk=g.id)
def test_replace_fragment_at_end_then_insert_again(self): u = self.root.update('Bar') new_f = Fragment.create_with_sequence('Test', 'gataca') new_f = new_f.update('Test') new_f.insert_bases(2, 'ccc') u.replace_with_fragment(len(self.root_sequence) - 5, 6, new_f) f1 = u u = f1.update('Bar') u.insert_bases(None, 'aaaa') f2 = u self.assertEquals(f2.sequence, self.root_sequence[:-6] + 'gcccatacaaaaa') self.assertEquals(f1.sequence, self.root_sequence[:-6] + 'gcccataca') self.assertEquals(self.root.sequence, self.root_sequence)
def test_insert_new_fragment_at_end_then_insert_again(self): u = self.root.update("Bar") new_f = Fragment.create_with_sequence("Test", "gataca") new_f = new_f.update("Test") new_f.insert_bases(2, "ccc") u.insert_fragment(None, new_f) f1 = u u = f1.update("Bar") u.insert_bases(None, "aaaa") f2 = u self.assertEquals(f2.sequence, self.root_sequence + "gcccatacaaaaa") self.assertEquals(f1.sequence, self.root_sequence + "gcccataca") self.assertEquals(self.root.sequence, self.root_sequence)
def test_builds_genome_db_with_different_names_on_separate_attempts(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() dbname1 = build_genome_db(g1) self.assertEquals(dbname1 is None, False) self.assertEquals(dbname1, g1.blastdb) g1.blastdb = None dbname2 = build_genome_db(g1) self.assertEquals(dbname2 == dbname1, False)
def test_propagates_conversion_to_reference_based_chunks(self): f = Fragment.create_with_sequence("FooSeq", self.root_sequence, reference_based=False) c1 = f.update("Child 1") c1.insert_bases(7, "gataca") c2 = c1.update("Child 2") c2.insert_bases(3, "atta") old_chunk_ids = [ c.id for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ] converted = f.convert_chunks_to_reference_based() f = f.indexed_fragment() c1 = c1.indexed_fragment() c2 = c2.indexed_fragment() self.assertTrue(converted) self.assertTrue( all([ c.id in old_chunk_ids for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ])) self.assertTrue( all([ c.is_reference_based for c in f.chunks_by_walking() if c.initial_fragment.id == f.id ])) self.assertEqual(self.root_sequence, f.sequence) converted_c1 = c1.convert_chunks_to_reference_based() self.assertFalse(converted_c1) converted_c2 = c2.convert_chunks_to_reference_based() self.assertFalse(converted_c2) unconverted_f_chunks = [ c for c in f.chunks_by_walking() if c.is_sequence_based ] unconverted_c1_chunks = [ c for c in c1.chunks_by_walking() if c.is_sequence_based ] unconverted_c2_chunks = [ c for c in c2.chunks_by_walking() if c.is_sequence_based ] self.assertEqual(len(unconverted_f_chunks), 0) self.assertEqual(len(unconverted_c1_chunks), 1) self.assertEqual(len(unconverted_c2_chunks), 2) self.assertTrue(unconverted_c1_chunks[0] in unconverted_c2_chunks)
def test_does_not_return_duplicate_hits_for_circular_fragments(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[5:20] + 'tttttttttt' r = blast_genome(g1, 'blastn', query) self.assertEquals(len(r), 1)
def test_builds_fragment_fastas(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() fn = fragment_fasta_fn(f1) try: os.unlink(fn) except BaseException: pass fn = build_fragment_fasta(f1) records = list(SeqIO.parse(fn, "fasta")) self.assertEquals(len(records), 1) self.assertEquals(str(records[0].seq), s1)
def test_does_not_return_duplicate_hits_for_circular_fragments(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[5:20] + "tttttttttt" r = blast_genome(g1, "blastn", query) self.assertEquals(len(r), 1)
def test_inherits_annotations_from_inserted_fragment(self): new_f = Fragment.create_with_sequence("Test", "gataca") new_f.annotate(2, 4, "X1", "gene", 1) self.assertEquals(len(new_f.annotations()), 1) self.assertEquals(len(self.root.annotations()), 0) f = self.root.update("Bar") f.insert_fragment(3, new_f) self.assertEquals(len(new_f.annotations()), 1) self.assertEquals(len(self.root.annotations()), 0) self.assertEquals(len(f.annotations()), 1) self.assertEquals(f.annotations()[0].base_first, 4) self.assertEquals(f.annotations()[0].base_last, 6) self.assertEquals(f.annotations()[0].feature.name, "X1") self.assertEquals(f.annotations()[0].feature_base_first, 1) self.assertEquals(f.annotations()[0].feature_base_last, 3)
def test_does_not_align_sequence_across_boundry_for_non_circular_fragment(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1, circular=False) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + "tttttttttt" res = blast_genome(g1, "blastn", query) for r in res: self.assertEquals(r.subject_start > 0 and r.subject_start <= len(s1), True) self.assertEquals(r.subject_end > 0 and r.subject_end <= len(s1), True)
def test_does_not_align_sequence_across_boundry_for_non_circular_fragment(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=False) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + 'tttttttttt' res = blast_genome(g1, 'blastn', query) for r in res: self.assertEquals(r.subject_start > 0 and r.subject_start <= len(s1), True) self.assertEquals(r.subject_end > 0 and r.subject_end <= len(s1), True)
def test_aligns_sequence_to_antisense_strand(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = str(Seq(s1[6:20]).reverse_complement()) + 'tttttttttt' r = blast_genome(g1, 'blastn', query) self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 20) self.assertEquals(r[0].subject_end, 7) self.assertEquals(r[0].strand(), -1)
def test_aligns_sequence_across_boundry_for_circular_fragment(self): s1 = 'atcggtatctactatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + 'ttttttttttt' res = blast_genome(g1, 'blastn', query) # we are not removing redundant matches when matching across circular # boundaries, since blasting across circular boundary of a genome is a # rare case. so in this particular case, you will find two results, one # for the end of the query at the start of the genome, one for across # the circular boundary. found = False for r in res: if r.query_start == 1 and r.query_end == 20: self.assertEquals(r.fragment_id, f1.id) self.assertEquals(r.query_start, 1) self.assertEquals(r.query_end, 20) self.assertEquals(r.subject_start, len(s1) - 10 + 1) self.assertEquals(r.subject_end, len(s1) + 10) self.assertEquals(r.fragment_length, len(s1)) self.assertEquals(r.strand(), 1) found = True break self.assertEquals(found, True)
def test_can_create_fragment_with_no_chunk_size(self): f = Fragment.create_with_sequence('Foo', 'gataccggtactag', initial_chunk_size=None) self.assertEquals(f.sequence, 'gataccggtactag')
def on_post(self, request): args = fragment_parser.parse_args(request) fragment = Fragment.create_with_sequence(name=args['name'], sequence=args['sequence'], circular=args['circular']) return FragmentView.to_dict(fragment), 201
def test_get_circular_sequence(self): s = 'agttcgaggctga' f = Fragment.create_with_sequence('Foo', s, circular=True) self.assertEquals(f.get_sequence(len(s) - 3 + 1, 4), s[-3:] + s[:4])
def setUp(self): self.genome = Genome.create('Test') self.root_sequence = 'agttcgaggctga' self.root = Fragment.create_with_sequence('Foo', self.root_sequence)
def setUp(self): self.root_sequence = 'agttcgaggctga' self.root = Fragment.create_with_sequence('Foo', self.root_sequence)