def test_adds_annotations_correctly_with_reverse_cassette(self): replaced = [c for c in self.middle] self.assertEquals(replaced[10], 'a') replaced[10] = 'c' replaced = ''.join(replaced) cassette = ''.join([self.front_bs, replaced, self.back_bs]) cassette = str(Seq(cassette).reverse_complement()) self.fragment.annotate( len(self.upstream) + len(self.front_bs) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle), 'Foo', 'gene', 1) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 2) a = annotations[0] self.assertEquals(a.feature.type, 'operation') a = annotations[1] self.assertEquals(a.feature.name, 'Foo A11C') self.assertEquals(a.feature.type, 'gene') self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream) + len(self.front_bs) + 1) self.assertEquals( a.base_last, len(self.upstream) + len(self.front_bs) + len(self.middle))
def test_recombines_multiple_times_on_different_fragments(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) f1 = 't'*20+template+'c'*20+template+'c'*30 f2 = 't'*40+template+'c'*15+template+'c'*20 arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, f1, f2) self.assertEquals(g.fragments.count(), 2) c = recombine(g, cassette, arm_len) self.assertEquals(c.fragments.count(), 2) sequences = [f.indexed_fragment().sequence for f in c.fragments.all()] sequences = sorted(sequences, key=lambda s: len(s)) self.assertEquals(sequences[0], 't'*20+upstream+cassette+downstream + 'c'*20+upstream+cassette+downstream+'c'*30) self.assertEquals(sequences[1], 't'*40+upstream+cassette+downstream + 'c'*15+upstream+cassette+downstream+'c'*20)
def test_annotates_reversed_cassette(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = str(Seq(''.join([front_bs, replaced, back_bs])).reverse_complement()) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) a = g.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 0) c = recombine(g, cassette, arm_len) a = c.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 1) self.assertEquals(a[0].base_first, len(upstream)+1) self.assertEquals(a[0].base_last, len(upstream+cassette)) self.assertEquals(a[0].feature_base_first, 1) self.assertEquals(a[0].feature_base_last, len(cassette)) # on reverse strand self.assertEquals(a[0].feature.strand, -1) self.assertEquals(a[0].feature.operation.type, Operation.RECOMBINATION[0]) self.assertEquals(a[0].feature.operation.genome, c)
def test_integrates_and_annotates_cassette_across_circular_boundary(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) template = ''.join([middle[8:], back_bs, downstream, upstream, front_bs, middle[0:8]]) g = self.build_genome(True, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([downstream, upstream, cassette])) a = c.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 1) self.assertEquals(a[0].base_first, len(downstream+upstream)+1) self.assertEquals(a[0].base_last, len(downstream+upstream+cassette)) self.assertEquals(a[0].feature_base_first, 1) self.assertEquals(a[0].feature_base_last, len(cassette)) self.assertEquals(a[0].feature.strand, 1) self.assertEquals(a[0].feature.operation.type, Operation.RECOMBINATION[0]) self.assertEquals(a[0].feature.operation.genome, c)
def test_adds_annotations_correctly_with_reverse_cassette(self): replaced = [c for c in self.middle] self.assertEquals(replaced[10], 'a') replaced[10] = 'c' replaced = ''.join(replaced) cassette = ''.join([self.front_bs, replaced, self.back_bs]) cassette = str(Seq(cassette).reverse_complement()) self.fragment.annotate(len(self.upstream)+len(self.front_bs)+1, len(self.upstream)+len(self.front_bs)+len(self.middle), 'Foo', 'gene', 1) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 2) a = annotations[0] self.assertEquals(a.feature.type, 'operation') a = annotations[1] self.assertEquals(a.feature.name, 'Foo A11C') self.assertEquals(a.feature.type, 'gene') self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream)+len(self.front_bs)+1) self.assertEquals(a.base_last, len(self.upstream)+len(self.front_bs)+len(self.middle))
def test_annotates_reversed_cassette(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = str( Seq(''.join([front_bs, replaced, back_bs])).reverse_complement()) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) a = g.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 0) c = recombine(g, cassette, arm_len) a = c.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 1) self.assertEquals(a[0].base_first, len(upstream) + 1) self.assertEquals(a[0].base_last, len(upstream + cassette)) self.assertEquals(a[0].feature_base_first, 1) self.assertEquals(a[0].feature_base_last, len(cassette)) # on reverse strand self.assertEquals(a[0].feature.strand, -1) self.assertEquals(a[0].feature.operation.type, Operation.RECOMBINATION[0]) self.assertEquals(a[0].feature.operation.genome, c)
def test_recombines_multiple_times_on_different_fragments(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) f1 = 't' * 20 + template + 'c' * 20 + template + 'c' * 30 f2 = 't' * 40 + template + 'c' * 15 + template + 'c' * 20 arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, f1, f2) self.assertEquals(g.fragments.count(), 2) c = recombine(g, cassette, arm_len) self.assertEquals(c.fragments.count(), 2) sequences = [f.indexed_fragment().sequence for f in c.fragments.all()] sequences = sorted(sequences, key=lambda s: len(s)) self.assertEquals( sequences[0], 't' * 20 + upstream + cassette + downstream + 'c' * 20 + upstream + cassette + downstream + 'c' * 30) self.assertEquals( sequences[1], 't' * 40 + upstream + cassette + downstream + 'c' * 15 + upstream + cassette + downstream + 'c' * 20)
def test_adds_multiple_annotations_to_modified_genome(self): replaced = self.middle[0:10]+self.middle[11:] cassette = ''.join([self.front_bs, replaced, self.back_bs]) self.fragment.annotate(len(self.upstream)+2, len(self.upstream)+10, 'Bar', 'static', -1) self.fragment.annotate(len(self.upstream)+len(self.front_bs)+1, len(self.upstream)+len(self.front_bs)+len(self.middle), 'Foo', 'changed', 1) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 3) a = annotations[0] self.assertEquals(a.feature.type, 'operation') a = annotations[1] self.assertEquals(a.feature.name, 'Bar') self.assertEquals(a.feature.type, 'static') self.assertEquals(a.feature.strand, -1) self.assertEquals(a.base_first, len(self.upstream)+2) self.assertEquals(a.base_last, len(self.upstream)+10) a = annotations[2] self.assertEquals(a.feature.name, 'Foo -11A') self.assertEquals(a.feature.type, 'changed') self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream)+len(self.front_bs)+1) self.assertEquals(a.base_last, len(self.upstream)+len(self.front_bs)+len(self.middle)-1)
def test_integrates_and_annotates_cassette_across_circular_boundary(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) template = ''.join( [middle[8:], back_bs, downstream, upstream, front_bs, middle[0:8]]) g = self.build_genome(True, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([downstream, upstream, cassette])) a = c.fragments.all()[0].indexed_fragment().annotations() self.assertEquals(len(a), 1) self.assertEquals(a[0].base_first, len(downstream + upstream) + 1) self.assertEquals(a[0].base_last, len(downstream + upstream + cassette)) self.assertEquals(a[0].feature_base_first, 1) self.assertEquals(a[0].feature_base_last, len(cassette)) self.assertEquals(a[0].feature.strand, 1) self.assertEquals(a[0].feature.operation.type, Operation.RECOMBINATION[0]) self.assertEquals(a[0].feature.operation.genome, c)
def test_returns_new_orf_in_reverse(self): replaced = "atgatcatcatcatcatcatcatcatcatcatcatcatcatcatcatctaa" replaced = str(Seq(replaced).reverse_complement()) cassette = "".join([self.front_bs, replaced, self.back_bs]) self.fragment.annotate( len(self.upstream) + len(self.front_bs) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle), "Foo", "gene", 1, ) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 2) a = annotations[0] self.assertEquals(a.feature.type, "operation") a = annotations[1] self.assertEquals(a.base_first, len(self.upstream + self.front_bs) + 1) self.assertEquals(a.base_last, len(self.upstream + self.front_bs + replaced)) self.assertEquals(a.feature.name, "ORF frame 1") self.assertEquals(a.feature.type, "ORF") self.assertEquals(a.feature.strand, -1)
def test_preserves_annotations_on_homology_arm_rev(self): cassette = "".join([self.front_bs, self.back_bs]) # add annotaiton on reverse strand of downstream arm self.fragment.annotate( len(self.upstream) + len(self.front_bs) + len(self.middle) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle) + len(self.back_bs), "Down arm", "feature", -1, ) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 1) a = annotations[0] self.assertEquals(a.feature.name, "Down arm") self.assertEquals(a.feature.type, "feature") self.assertEquals(a.feature.strand, -1) self.assertEquals(a.base_first, len(self.upstream) + len(self.front_bs) + 1) self.assertEquals( a.base_last, len(self.upstream) + len(self.front_bs) + len(self.back_bs) )
def test_preserves_annotations_on_homology_arm_fwd_when_doing_ko(self): cassette = "".join([self.front_bs, self.back_bs]) # add annotaiton on upstream arm self.fragment.annotate( len(self.upstream) + 2, len(self.upstream) + len(self.front_bs), "Up arm", "feature", 1, ) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 1) a = annotations[0] self.assertEquals(a.feature.name, "Up arm") self.assertEquals(a.feature.type, "feature") self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream) + 2) self.assertEquals(a.base_last, len(self.upstream) + len(self.front_bs))
def test_adds_new_annotations_on_homology_arm( self, ): donor = "a" * 100 + "c" * 100 + "t" * 100 cassette = "".join([self.front_bs, donor, self.back_bs]) annotations = [ dict( base_first=1, base_last=3, name="foo", type="feature", strand=1, qualifiers=None, ), dict( base_first=len(self.front_bs + donor + self.back_bs) - 4, base_last=len(self.front_bs + donor + self.back_bs), name="bar", type="feature", strand=-1, qualifiers=None ) ] c = recombine(self.genome, cassette, self.arm_len, annotations=annotations) f = c.fragments.all()[0].indexed_fragment() fragment_sequence = f.sequence ans = f.annotations() self.assertEquals(len(ans), 3) ans = sorted(ans, key=lambda a: (a.base_first, -a.base_last)) self.assertEquals(ans[0].feature.type, "feature") self.assertEquals(ans[0].feature.name, "foo") self.assertEquals(ans[0].feature.strand, 1) self.assertEquals(ans[0].base_first, len(self.upstream) + 1) self.assertEquals(ans[0].base_last, len(self.upstream) + 3) self.assertEquals( fragment_sequence[ans[0].base_first - 1 : ans[0].base_last], self.front_bs[:3] ) self.assertEquals(ans[1].feature.type, "operation") self.assertEquals(ans[2].feature.type, "feature") self.assertEquals(ans[2].feature.name, "bar") self.assertEquals(ans[2].feature.strand, -1) self.assertEquals(ans[2].base_first, len(self.upstream + cassette) - 4) self.assertEquals(ans[2].base_last, len(self.upstream + cassette)) self.assertEquals( fragment_sequence[ans[2].base_first - 1 : ans[2].base_last], self.back_bs[-5:] )
def test_annotates_correctly_when_one_bp_is_removed(self): replaced = self.middle[0:13] + self.middle[14:] cassette = "".join([self.front_bs, replaced, self.back_bs]) self.fragment.annotate( len(self.upstream) + 2, len(self.upstream) + 10, "Bar", "static", -1 ) self.fragment.annotate( len(self.upstream) + len(self.front_bs) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle), "Foo", "changed", 1, ) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 3) a = annotations[0] self.assertEquals(a.feature.name, "Bar") self.assertEquals(a.feature.type, "static") self.assertEquals(a.feature.strand, -1) self.assertEquals(a.base_first, len(self.upstream) + 2) self.assertEquals(a.base_last, len(self.upstream) + 10) a = annotations[1] self.assertEquals(a.feature.name, "Foo") self.assertEquals(a.feature.type, "changed") self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream + self.front_bs) + 1) self.assertEquals(a.base_last, len(self.upstream + self.front_bs) + 13) self.assertEquals(a.feature_base_first, 1) self.assertEquals(a.feature_base_last, 13) a = annotations[2] self.assertEquals(a.feature.name, "Foo") self.assertEquals(a.feature.type, "changed") self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream + self.front_bs) + 14) self.assertEquals(a.base_last, len(self.upstream + self.front_bs) + 44) self.assertEquals(a.feature_base_first, 15) self.assertEquals(a.feature_base_last, 45)
def test_single_crossover_integrates_correctly(self): upstream = "gagattgtccgcgtttt" locus = "catagcgcacaggacgcggagtaggcgtagtcggttgatctgatgtc" downstream = "gttaaggcgcgaacat" insertion = "aaaaaaaaaaaaaaaaaaa" locus_len = len(locus) bs_len = int(locus_len/2) template = ''.join([upstream, locus, downstream]) cassette = ''.join([locus[locus_len-bs_len:], insertion, locus[0:bs_len]]) g = self.build_genome(False, template) c = recombine(g, cassette, bs_len-2) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, locus, insertion, locus, downstream]))
def test_adds_single_snp_change_splits_original_annotation_when_cassette_is_reversed(self): replaced = [c for c in self.middle] self.assertEquals(replaced[10], "a") replaced[10] = "c" replaced = "".join(replaced) cassette = "".join([self.front_bs, replaced, self.back_bs]) cassette = str(Seq(cassette).reverse_complement()) self.fragment.annotate( len(self.upstream) + len(self.front_bs) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle), "Foo", "gene", 1, ) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 3) for a in annotations: print(str(a)) a = annotations[0] self.assertEquals(a.feature.name, "Foo") self.assertEquals(a.feature.type, "gene") self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream) + len(self.front_bs) + 1) self.assertEquals(a.base_last, len(self.upstream) + len(self.front_bs) + 10) self.assertEquals(a.feature_base_first, 1) self.assertEquals(a.feature_base_last, 10) a = annotations[1] self.assertEquals(a.feature.type, "operation") a = annotations[2] self.assertEquals(a.feature.name, "Foo") self.assertEquals(a.feature.type, "gene") self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream) + len(self.front_bs) + 12) self.assertEquals(a.base_last, len(self.upstream) + len(self.front_bs) + 45) self.assertEquals(a.feature_base_first, 12) self.assertEquals(a.feature_base_last, 45)
def test_recombines_ignoring_upstream_and_downstream_bases(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join(['a'*6+front_bs, replaced, back_bs+'a'*6]) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, front_bs, replaced, back_bs, downstream]))
def test_recombines_with_reverse_complement_cassette_correctly(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = str(Seq(''.join([front_bs, replaced, back_bs])).reverse_complement()) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, front_bs, replaced, back_bs, downstream]))
def test_recombine_when_back_arm_is_across_circular_boundary(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) template = ''.join([back_bs[8:], downstream, upstream, front_bs, middle, back_bs[0:8]]) g = self.build_genome(True, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([downstream, upstream, cassette]))
def test_recombines_correctly(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, cassette, downstream]))
def test_recombines_multiple_times_on_circular_fragment(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) f = middle[0:8]+back_bs+downstream+'t'*20+template+'c'*20+upstream+front_bs+middle[8:] arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(True, f) c = recombine(g, cassette, arm_len) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, downstream+'t'*20+upstream+cassette+downstream + 'c'*20+upstream+cassette)
def test_single_crossover_integrates_correctly(self): upstream = "gagattgtccgcgtttt" locus = "catagcgcacaggacgcggagtaggcgtagtcggttgatctgatgtc" downstream = "gttaaggcgcgaacat" insertion = "aaaaaaaaaaaaaaaaaaa" locus_len = len(locus) bs_len = int(locus_len / 2) template = ''.join([upstream, locus, downstream]) cassette = ''.join( [locus[locus_len - bs_len:], insertion, locus[0:bs_len]]) g = self.build_genome(False, template) c = recombine(g, cassette, bs_len - 2) self.assertNotEqual(g.id, c.id) self.assertEquals( c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, locus, insertion, locus, downstream]))
def test_creates_operation(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) self.assertEquals(Operation.objects.count(), 0) c = recombine(g, cassette, arm_len) self.assertEquals(Operation.objects.count(), 1) self.assertEquals(c.operation_set.all()[0].type, Operation.RECOMBINATION[0]) self.assertEquals(c.operation_set.all()[0].params, json.dumps(dict(cassette=cassette, homology_arm_length=arm_len)))
def test_recombine_when_back_arm_is_across_circular_boundary(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) template = ''.join([ back_bs[8:], downstream, upstream, front_bs, middle, back_bs[0:8] ]) g = self.build_genome(True, template) c = recombine(g, cassette, arm_len) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([downstream, upstream, cassette]))
def test_single_crossover_integrates_correctly_with_reverse_complement_and_gap_in_locus(self): upstream = "gagattgtccgcgtttt" locus = "catagcgcacaggacgcggagtaggcgtagtcggttgatctgatgtc" downstream = "gttaaggcgcgaacat" insertion = "aaaaaaaaaaaaaaaaaaa" locus_len = len(locus) gap = self.new_max_gap/2 arm_short = 2 bs_len = int(locus_len/2)-(gap-arm_short) template = ''.join([upstream, locus, downstream]) cassette = ''.join([locus[locus_len-bs_len:], insertion, locus[0:bs_len]]) cassette = str(Seq(cassette).reverse_complement()) g = self.build_genome(False, template) c = recombine(g, cassette, bs_len-arm_short) self.assertNotEqual(g.id, c.id) self.assertEquals(c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, locus, insertion, locus, downstream]))
def test_creates_operation(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(False, template) self.assertEquals(Operation.objects.count(), 0) c = recombine(g, cassette, arm_len) self.assertEquals(Operation.objects.count(), 1) self.assertEquals(c.operation_set.all()[0].type, Operation.RECOMBINATION[0]) self.assertEquals( c.operation_set.all()[0].params, json.dumps(dict(cassette=cassette, homology_arm_length=arm_len)))
def test_recombines_multiple_times_on_circular_fragment(self): upstream = "gagattgtccgcgtttt" front_bs = "catagcgcacaggacgcggag" middle = "cggcacctgtgagccg" back_bs = "taatgaccccgaagcagg" downstream = "gttaaggcgcgaacat" replaced = "aaaaaaaaaaaaaaaaaaaaaa" template = ''.join([upstream, front_bs, middle, back_bs, downstream]) cassette = ''.join([front_bs, replaced, back_bs]) f = middle[ 0: 8] + back_bs + downstream + 't' * 20 + template + 'c' * 20 + upstream + front_bs + middle[ 8:] arm_len = min(len(front_bs), len(back_bs)) g = self.build_genome(True, f) c = recombine(g, cassette, arm_len) self.assertEquals( c.fragments.all()[0].indexed_fragment().sequence, downstream + 't' * 20 + upstream + cassette + downstream + 'c' * 20 + upstream + cassette)
def test_single_crossover_integrates_correctly_with_reverse_complement_and_gap_in_locus( self): upstream = "gagattgtccgcgtttt" locus = "catagcgcacaggacgcggagtaggcgtagtcggttgatctgatgtc" downstream = "gttaaggcgcgaacat" insertion = "aaaaaaaaaaaaaaaaaaa" locus_len = len(locus) gap = self.new_max_gap / 2 arm_short = 2 bs_len = int(locus_len / 2) - (gap - arm_short) template = ''.join([upstream, locus, downstream]) cassette = ''.join( [locus[locus_len - bs_len:], insertion, locus[0:bs_len]]) cassette = str(Seq(cassette).reverse_complement()) g = self.build_genome(False, template) c = recombine(g, cassette, bs_len - arm_short) self.assertNotEqual(g.id, c.id) self.assertEquals( c.fragments.all()[0].indexed_fragment().sequence, ''.join([upstream, locus, insertion, locus, downstream]))
def test_adds_multiple_annotations_to_modified_genome(self): replaced = self.middle[0:13] + self.middle[14:] cassette = ''.join([self.front_bs, replaced, self.back_bs]) self.fragment.annotate( len(self.upstream) + 2, len(self.upstream) + 10, 'Bar', 'static', -1) self.fragment.annotate( len(self.upstream) + len(self.front_bs) + 1, len(self.upstream) + len(self.front_bs) + len(self.middle), 'Foo', 'changed', 1) c = recombine(self.genome, cassette, self.arm_len) f = c.fragments.all()[0].indexed_fragment() annotations = f.annotations() self.assertEquals(len(annotations), 3) a = annotations[0] self.assertEquals(a.feature.type, 'operation') a = annotations[1] self.assertEquals(a.feature.name, 'Bar') self.assertEquals(a.feature.type, 'static') self.assertEquals(a.feature.strand, -1) self.assertEquals(a.base_first, len(self.upstream) + 2) self.assertEquals(a.base_last, len(self.upstream) + 10) a = annotations[2] self.assertEquals(a.feature.name, 'Foo -14G') self.assertEquals(a.feature.type, 'changed') self.assertEquals(a.feature.strand, 1) self.assertEquals(a.base_first, len(self.upstream) + len(self.front_bs) + 1) self.assertEquals( a.base_last, len(self.upstream) + len(self.front_bs) + len(self.middle) - 1)
def on_post(self, request, genome_id): from edge.recombine import find_swap_region, recombine genome = get_genome_or_404(genome_id) parser = RequestParser() parser.add_argument('cassette', field_type=str, required=True, location='json') parser.add_argument('homology_arm_length', field_type=int, required=True, location='json') parser.add_argument('create', field_type=bool, required=True, location='json') parser.add_argument('genome_name', field_type=str, required=False, default=None, location='json') parser.add_argument('cassette_name', field_type=str, required=False, default=None, location='json') parser.add_argument('notes', field_type=str, required=False, default=None, location='json') args = parser.parse_args(request) cassette = args['cassette'] arm_length = args['homology_arm_length'] create = args['create'] genome_name = args['genome_name'] cassette_name = args['cassette_name'] notes = args['notes'] if create is False: r = find_swap_region(genome, cassette, arm_length) return [x.to_dict() for x in r], 200 else: c = recombine(genome, cassette, arm_length, genome_name=genome_name, cassette_name=cassette_name, notes=notes) if c is None: return None, 400 else: schedule_building_blast_db(c.id) return GenomeView.to_dict(c), 201
def __test_verification_primers(self, template, middle, cassette, arm_len, is_reversed): from edge.pcr import pcr_from_genome g = self.build_genome(False, template) r = find_swap_region(g, cassette, arm_len, design_primers=True) self.assertEquals(len(r), 1) self.assertEquals(len(r[0].verification_cassette), 5) self.assertEquals(len(r[0].verification_front), 5) self.assertEquals(len(r[0].verification_back), 5) # cassette verification primers should work on unmodified genome for primer in r[0].verification_cassette: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(middle) >= 0, True) # front verification primers should NOT produce product for primer in r[0].verification_front: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # back verification primers should NOT produce product for primer in r[0].verification_back: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # do recombination, then try primers again on modified genome c = recombine(g, cassette, arm_len) for f in c.fragments.all(): try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) # reload to get blastdb c = Genome.objects.get(pk=c.id) if is_reversed: cassette = str(Seq(cassette).reverse_complement()) # cassette verification primers should work on modified genome, finding cassette for primer in r[0].verification_cassette: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette) >= 0, True) # front verification primers should find a product including front of cassette for primer in r[0].verification_front: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals( p[0].index(cassette[0:edge.recombine.CHECK_JUNCTION_LEFT_DN]) >= 0, True) # back verification primers should find a product including back of cassette for primer in r[0].verification_back: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals( p[0].index(cassette[-edge.recombine.CHECK_JUNCTION_RIGHT_UP:]) >= 0, True)
def test_adds_annotations_correctly_when_homology_arm_has_extra_non_matching_bps( self, ): donor = "a" * 100 + "g" * 100 + "c" * 50 + "t" * 50 + "g" * 100 + "c" * 100 cassette = "".join(["aaa" + self.front_bs, donor, self.back_bs + "tt"]) flen = len(self.front_bs) + 3 annotations = [ dict( base_first=flen + 1, base_last=flen + 100, name="pFavorite", type="promoter", strand=1, qualifiers=None, ), dict( base_first=flen + 101, base_last=flen + 200, name="Favorite", type="gene", strand=1, qualifiers=dict(locus="Favorite", product="Favoritep"), ), dict( base_first=flen + 201, base_last=flen + 250, name="tFavorite", type="terminator", strand=1, qualifiers=None, ), dict( base_first=flen + 251, base_last=flen + 300, name="tBest", type="terminator", strand=-1, qualifiers=None, ), dict( base_first=flen + 301, base_last=flen + 400, name="Best", type="gene", strand=-1, qualifiers=dict(locus="Best", product="Bestp"), ), dict( base_first=flen + 401, base_last=flen + 500, name="pBest", type="promoter", strand=-1, qualifiers=None, ), ] c = recombine(self.genome, cassette, self.arm_len, annotations=annotations) f = c.fragments.all()[0].indexed_fragment() fragment_sequence = f.sequence ans = f.annotations() self.assertEquals(len(ans), 7) ans = sorted(ans, key=lambda a: (a.base_first, -a.base_last)) self.assertEquals(ans[0].feature.type, "operation") self.assertEquals(ans[1].feature.type, "promoter") self.assertEquals(ans[1].feature.name, "pFavorite") self.assertEquals(ans[1].feature.strand, 1) self.assertEquals( ans[1].base_first, len(self.upstream) + len(self.front_bs) + 1 ) self.assertEquals( ans[1].base_last, len(self.upstream) + len(self.front_bs) + 100 ) self.assertEquals( fragment_sequence[ans[1].base_first - 1 : ans[1].base_last], "a" * 100 ) self.assertEquals(ans[2].feature.type, "gene") self.assertEquals(ans[2].feature.name, "Favorite") self.assertEquals(ans[2].feature.strand, 1) self.assertEquals( ans[2].base_first, len(self.upstream) + len(self.front_bs) + 100 + 1 ) self.assertEquals( ans[2].base_last, len(self.upstream) + len(self.front_bs) + 100 + 100 ) self.assertEquals( fragment_sequence[ans[2].base_first - 1 : ans[2].base_last], "g" * 100 ) self.assertEquals(ans[3].feature.type, "terminator") self.assertEquals(ans[3].feature.name, "tFavorite") self.assertEquals(ans[3].feature.strand, 1) self.assertEquals( ans[3].base_first, len(self.upstream) + len(self.front_bs) + 200 + 1 ) self.assertEquals( ans[3].base_last, len(self.upstream) + len(self.front_bs) + 200 + 50 ) self.assertEquals( fragment_sequence[ans[3].base_first - 1 : ans[3].base_last], "c" * 50 ) self.assertEquals(ans[4].feature.type, "terminator") self.assertEquals(ans[4].feature.name, "tBest") self.assertEquals(ans[4].feature.strand, -1) self.assertEquals( ans[4].base_first, len(self.upstream) + len(self.front_bs) + 250 + 1 ) self.assertEquals( ans[4].base_last, len(self.upstream) + len(self.front_bs) + 250 + 50 ) self.assertEquals( fragment_sequence[ans[4].base_first - 1 : ans[4].base_last], "t" * 50 ) self.assertEquals(ans[5].feature.type, "gene") self.assertEquals(ans[5].feature.name, "Best") self.assertEquals(ans[5].feature.strand, -1) self.assertEquals( ans[5].base_first, len(self.upstream) + len(self.front_bs) + 300 + 1 ) self.assertEquals( ans[5].base_last, len(self.upstream) + len(self.front_bs) + 300 + 100 ) self.assertEquals( fragment_sequence[ans[5].base_first - 1 : ans[5].base_last], "g" * 100 ) self.assertEquals(ans[6].feature.type, "promoter") self.assertEquals(ans[6].feature.name, "pBest") self.assertEquals(ans[6].feature.strand, -1) self.assertEquals( ans[6].base_first, len(self.upstream) + len(self.front_bs) + 400 + 1 ) self.assertEquals( ans[6].base_last, len(self.upstream) + len(self.front_bs) + 400 + 100 ) self.assertEquals( fragment_sequence[ans[6].base_first - 1 : ans[6].base_last], "c" * 100 )
def __test_verification_primers(self, template, middle, cassette, arm_len, is_reversed): from edge.pcr import pcr_from_genome g = self.build_genome(False, template) r = find_swap_region(g, cassette, arm_len, design_primers=True) self.assertEquals(len(r), 1) self.assertEquals(len(r[0].verification_cassette), 5) self.assertEquals(len(r[0].verification_front), 5) self.assertEquals(len(r[0].verification_back), 5) # cassette verification primers should work on unmodified genome for primer in r[0].verification_cassette: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(middle) >= 0, True) # front verification primers should NOT produce product for primer in r[0].verification_front: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # back verification primers should NOT produce product for primer in r[0].verification_back: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # do recombination, then try primers again on modified genome c = recombine(g, cassette, arm_len) for f in c.fragments.all(): try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) # reload to get blastdb c = Genome.objects.get(pk=c.id) if is_reversed: cassette = str(Seq(cassette).reverse_complement()) # cassette verification primers should work on modified genome, finding cassette for primer in r[0].verification_cassette: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette) >= 0, True) # front verification primers should find a product including front of cassette for primer in r[0].verification_front: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette[0:edge.recombine.CHECK_JUNCTION_LEFT_DN]) >= 0, True) # back verification primers should find a product including back of cassette for primer in r[0].verification_back: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette[-edge.recombine.CHECK_JUNCTION_RIGHT_UP:]) >= 0, True)
def test_adds_annotations_correctly_when_bps_flanking_new_seq_matches_wt_genome_sequence( self, ): donor = self.middle[:3] + "a" * 97 + "c" * 100 + "t" * 97 + self.middle[-3:] cassette = "".join([self.front_bs, donor, self.back_bs]) flen = len(self.front_bs) annotations = [ dict( base_first=flen + 1, base_last=flen + 100, name="pFavorite", type="promoter", strand=1, qualifiers=None, ), dict( base_first=flen + 101, base_last=flen + 200, name="Favorite", type="gene", strand=1, qualifiers=dict(locus="Favorite", product="Favoritep"), ), dict( base_first=flen + 201, base_last=flen + 300, name="tFavorite", type="terminator", strand=1, qualifiers=None, ), ] c = recombine(self.genome, cassette, self.arm_len, annotations=annotations) f = c.fragments.all()[0].indexed_fragment() fragment_sequence = f.sequence ans = f.annotations() self.assertEquals(len(ans), 4) ans = sorted(ans, key=lambda a: (a.base_first, -a.base_last)) self.assertEquals(ans[0].feature.type, "promoter") self.assertEquals(ans[0].feature.name, "pFavorite") self.assertEquals(ans[0].feature.strand, 1) self.assertEquals( ans[0].base_first, len(self.upstream + self.front_bs) + 1 ) self.assertEquals( ans[0].base_last, len(self.upstream + self.front_bs) + 100 ) self.assertEquals( fragment_sequence[ans[0].base_first - 1 : ans[0].base_last], self.middle[:3] + "a" * 97 ) self.assertEquals(ans[1].feature.type, "operation") self.assertEquals(ans[2].feature.type, "gene") self.assertEquals(ans[2].feature.name, "Favorite") self.assertEquals(ans[2].feature.strand, 1) self.assertEquals( ans[2].base_first, len(self.upstream + self.front_bs) + 100 + 1 ) self.assertEquals( ans[2].base_last, len(self.upstream + self.front_bs) + 100 + 100 ) self.assertEquals( fragment_sequence[ans[2].base_first - 1 : ans[2].base_last], "c" * 100 ) self.assertEquals(ans[3].feature.type, "terminator") self.assertEquals(ans[3].feature.name, "tFavorite") self.assertEquals(ans[3].feature.strand, 1) self.assertEquals( ans[3].base_first, len(self.upstream + self.front_bs) + 200 + 1 ) self.assertEquals( ans[3].base_last, len(self.upstream + self.front_bs) + 200 + 100 ) self.assertEquals( fragment_sequence[ans[3].base_first - 1 : ans[3].base_last], "t" * 97 + self.middle[-3:] )