예제 #1
0
파일: reads.py 프로젝트: MingleiYang/spats
    def process_tags(self):
        """Processes the tags in the input data for analysis.
        """
        s = self._spats
        pair_db = self._pair_db
        s.loadTargets(pair_db)
        s.run.pair_length = pair_db.pair_length()
        p = s._processor
        for target in pair_db.targets():
            p.addTagTarget(target[0], target[1])
            p.addTagTarget(target[0] + "_rc", reverse_complement(str(target[1])))
        p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
        p.addTagTarget("adapter_b", s.run.adapter_b)
        if s.run.cotrans:
            p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
            p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))
        for tag in self._extra_tag_targets:
            p.addTagTarget(tag[0], tag[1])
        for tag, handler in self._plugins.iteritems():
            p.addTagPlugin(tag, handler)

        s.process_pair_db(pair_db, batch_size = 10000)
        self.result_set_id = pair_db.result_set_id_for_name(s.run.result_set_name)
        pair_db.count_tags(self.result_set_id)
        pair_db.store_run(s.run)
예제 #2
0
 def test_SRPs(self):
     self.assertEqual(self.target.find_partial(reverse_complement("GGGCCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:], [0, 31, 106]) #R1 0_0
     self.assertEqual(self.target.find_partial(reverse_complement("CCCGCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:], [0, 31, 106]) #R1 0_1
     self.assertEqual(self.target.find_partial(reverse_complement("CCCGCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:], [0, 31, 106]) #R1 106
     self.assertEqual(self.target.find_partial(reverse_complement("GGGCCTGACTCGGGCACCAAGGACGGGTGGGGGCA"))[1:], [1, 30, 107]) #R1 107
     self.assertEqual(self.target.find_partial(reverse_complement("GGGCCTGACTCGGGCACCAAGGACAGATCGGAAGA"))[1:], [11, 20, 117]) #R1 117
     self.assertEqual(self.target.find_partial("ATCGGGGGCTCTGTTGGTTCTCCCGCAACGCTACT")[1:], [0, 35, 0]) #R2 0_0
     self.assertEqual(self.target.find_partial("ATCGGGGGCTCTGTTGGTTCTCCCGCAACGCTACT")[1:], [0, 35, 0]) #R2 0_1
     self.assertEqual(self.target.find_partial("GCAGGGCCCCCACCCGTCCTTGGTGCCCGAGTCAG")[1:], [0, 35, 102]) #R2 102
     self.assertEqual(self.target.find_partial("CAGGGCCCCCACCCGTCCTTGGTGCCCGAGTCAGG")[1:], [0, 34, 103]) #R2 103
     self.assertEqual(self.target.find_partial("GGCCCCCACCCGTCCTTGGTGCCCGAGTCAGGCCC")[1:], [0, 31, 106]) #R2 106
     self.assertEqual(self.target.find_partial("GCCCCCACCCGTCCTTGGTGCCCGAGTCAGGCCCA")[1:], [0, 30, 107]) #R2 107
     self.assertEqual(self.target.find_partial("GTCCTTGGTGCCCGAGTCAGGCCCAGATCGGAAGA")[1:], [0, 20, 117]) #R2 117
예제 #3
0
파일: misc.py 프로젝트: MingleiYang/spats
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size=100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc",
                       reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
예제 #4
0
파일: misc.py 프로젝트: LucksLab/spats
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size = 100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
예제 #5
0
 def test_SRPs(self):
     self.assertEqual(
         self.target.find_partial(
             reverse_complement("GGGCCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:],
         [0, 31, 106])  #R1 0_0
     self.assertEqual(
         self.target.find_partial(
             reverse_complement("CCCGCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:],
         [0, 31, 106])  #R1 0_1
     self.assertEqual(
         self.target.find_partial(
             reverse_complement("CCCGCTGACTCGGGCACCAAGGACGGGTGGGGGCC"))[1:],
         [0, 31, 106])  #R1 106
     self.assertEqual(
         self.target.find_partial(
             reverse_complement("GGGCCTGACTCGGGCACCAAGGACGGGTGGGGGCA"))[1:],
         [1, 30, 107])  #R1 107
     self.assertEqual(
         self.target.find_partial(
             reverse_complement("GGGCCTGACTCGGGCACCAAGGACAGATCGGAAGA"))[1:],
         [11, 20, 117])  #R1 117
     self.assertEqual(
         self.target.find_partial("ATCGGGGGCTCTGTTGGTTCTCCCGCAACGCTACT")
         [1:], [0, 35, 0])  #R2 0_0
     self.assertEqual(
         self.target.find_partial("ATCGGGGGCTCTGTTGGTTCTCCCGCAACGCTACT")
         [1:], [0, 35, 0])  #R2 0_1
     self.assertEqual(
         self.target.find_partial("GCAGGGCCCCCACCCGTCCTTGGTGCCCGAGTCAG")
         [1:], [0, 35, 102])  #R2 102
     self.assertEqual(
         self.target.find_partial("CAGGGCCCCCACCCGTCCTTGGTGCCCGAGTCAGG")
         [1:], [0, 34, 103])  #R2 103
     self.assertEqual(
         self.target.find_partial("GGCCCCCACCCGTCCTTGGTGCCCGAGTCAGGCCC")
         [1:], [0, 31, 106])  #R2 106
     self.assertEqual(
         self.target.find_partial("GCCCCCACCCGTCCTTGGTGCCCGAGTCAGGCCCA")
         [1:], [0, 30, 107])  #R2 107
     self.assertEqual(
         self.target.find_partial("GTCCTTGGTGCCCGAGTCAGGCCCAGATCGGAAGA")
         [1:], [0, 20, 117])  #R2 117
예제 #6
0
파일: misc.py 프로젝트: MingleiYang/spats
def align_pairs():
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.target import Targets
    from spats_shape_seq.util import reverse_complement, AlignmentParams
    from spats_shape_seq.mask import Mask, match_mask_optimized, base_similarity_ind

    target_seq = "GGACCCGATGCCGGACGAAAGTCCGCGCATCAACTATGCCTCTACCTGCTTCGGCCGATAAAGCCGACGATAATACTCCCAAAGCCC"  # HairpinC_SS2
    r1_seq = "GGGTGAGCGTGCTTTGGGAGTATTATCGTCGGCTTTATCGGCCGAAGCAGGTAGTGCATAGTTGATGCTCGGACTTTCG"
    r2_seq = "GGACCCGATGCCGGACGAAAGTCCGAGCATCAACTATGCCCTACCTGCTTCGGCCGATAAAGCCAAAAGACGATAAT"

    pair = Pair()
    pair.set_from_data("TEST_PAIR", r1_seq, r2_seq)
    targets = Targets()
    targets.minimum_match_length = 10
    targets.addTarget("TEST_TARGET", target_seq, 0)
    targets.index()

    mask = match_mask_optimized(pair.r1.original_seq)
    assert (mask)
    pair.set_mask(Mask(mask))
    target = pair.r1.find_in_targets(targets)
    pair.target = pair.r2.find_in_targets(targets, force_target=target)
    assert (pair.matched)

    masklen = pair.mask.length()
    adapter_t = "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
    r2suffix = reverse_complement(
        pair.r1.original_seq[:masklen]) + reverse_complement(adapter_t)
    simfn = lambda nt1, nt2: base_similarity_ind(nt1, nt2, 3, 2, 1.5)
    ap = AlignmentParams(simfn, 5, 1)

    pair.r2.align_with_target(pair.target, ap, r2suffix)
    r2_adapter_trim = max(
        0, pair.r2.match_index + pair.r2.match_len - pair.target.n)
    r1_adapter_trim = pair.r1.seq_len - (pair.target.n - pair.r2.match_index)
    if r1_adapter_trim > 0:
        pair.r1.rtrim += r1_adapter_trim
        pair.r1.match_start -= r1_adapter_trim
    pair.r1.align_with_target(pair.target, ap)

    exit(0)
예제 #7
0
파일: misc.py 프로젝트: LucksLab/spats
def align_pairs():
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.target import Targets
    from spats_shape_seq.util import reverse_complement, AlignmentParams
    from spats_shape_seq.mask import Mask, match_mask_optimized, base_similarity_ind

    target_seq = "GGACCCGATGCCGGACGAAAGTCCGCGCATCAACTATGCCTCTACCTGCTTCGGCCGATAAAGCCGACGATAATACTCCCAAAGCCC"  # HairpinC_SS2
    r1_seq = "GGGTGAGCGTGCTTTGGGAGTATTATCGTCGGCTTTATCGGCCGAAGCAGGTAGTGCATAGTTGATGCTCGGACTTTCG"
    r2_seq = "GGACCCGATGCCGGACGAAAGTCCGAGCATCAACTATGCCCTACCTGCTTCGGCCGATAAAGCCAAAAGACGATAAT"

    pair = Pair()
    pair.set_from_data("TEST_PAIR", r1_seq, r2_seq)
    targets = Targets()
    targets.minimum_match_length = 10
    targets.addTarget("TEST_TARGET", target_seq, 0)
    targets.index()

    mask = match_mask_optimized(pair.r1.original_seq)
    assert(mask)
    pair.set_mask(Mask(mask))
    target = pair.r1.find_in_targets(targets)
    pair.target = pair.r2.find_in_targets(targets, force_target = target)
    assert(pair.matched)

    masklen = pair.mask.length()
    adapter_t = "AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"
    r2suffix = reverse_complement(pair.r1.original_seq[:masklen]) + reverse_complement(adapter_t)
    simfn = lambda nt1, nt2: base_similarity_ind(nt1, nt2, 3, 2, 1.5)
    ap = AlignmentParams(simfn, 5, 1)

    pair.r2.align_with_target(pair.target, ap, r2suffix)
    r2_adapter_trim = max(0, pair.r2.match_index + pair.r2.match_len - pair.target.n)
    r1_adapter_trim = pair.r1.seq_len - (pair.target.n - pair.r2.match_index)
    if r1_adapter_trim > 0:
        pair.r1.rtrim += r1_adapter_trim
        pair.r1.match_start -= r1_adapter_trim
    pair.r1.align_with_target(pair.target, ap)

    exit(0)
예제 #8
0
    def _loadDBAndModel(self):

        rsnames = self._db.result_sets()
        if rsnames:
            self.result_set_id = self._db.result_set_id_for_name(rsnames[0])
            self._db.index_results()
            self.has_tags = bool(self.result_set_id)
        else:
            self.result_set_id = -1
            self.has_tags = False

        self.has_counters = self._db.has_counters()

        s = Spats()
        self._db.load_run(s.run)
        s.run._p_use_tag_processor = True
        s.loadTargets(self._db)
        if self.has_counters:
            self._db.load_counters("spats", s.counters)

        if self.has_tags:
            p = s._processor
            for t in s._targets.targets:
                p.addTagTarget(t.name, t.seq)
                p.addTagTarget(t.name + "_rc", reverse_complement(t.seq))
                self.colors._colors[t.name.lower()] = self.colors.color("target")
            p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
            p.addTagTarget("adapter_b", s.run.adapter_b)
            if s.run.cotrans:
                p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
                p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))
            if s.run._p_extra_tags:
                for tag, seq in s.run._p_extra_tags.iteritems():
                    p.addTagTarget(tag, seq)
            if not self.has_counters:
                p.counters.load_from_db_data(self._db.counter_data_for_results(self.result_set_id))

        self._spats = s
예제 #9
0
파일: misc.py 프로젝트: LucksLab/spats
def test_tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.tag import TagProcessor
    s.run._processor_class = TagProcessor

    #from spats_shape_seq.target import Targets
    #s.addTargets(bp + "5S.fa")
    s.addTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    #s.addTarget("adapter_t", s.run.adapter_t)
    #s.addTarget("adapter_b", s.run.adapter_b)
    #s._targets._index_word_length = 8
    #s._targets._minimum_length = 8
    #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b))

    p = s._processor
    p.addTagTarget("5s", "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC")
    p.addTagTarget("5s_rc", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    from spats_shape_seq.util import reverse_complement
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)

    from spats_shape_seq.pair import Pair
    cases = [
        [ "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA" ],
        [ "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG", "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG" ],
        [ "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA", "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA" ],
        [ "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG", "GGCATCTGACTCGGGCACCAAGGACATACAGATCG" ],
        [ "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG", "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG" ],
    ]

    pair = Pair()
    for case in cases:
        pair.set_from_data(case[0], case[1], case[2])
        s.process_pair(pair)

        print pair.r1.original_seq
        print pair.r1.tags
        print pair.r2.original_seq
        print pair.r2.tags
        print "-----------------------------"
예제 #10
0
파일: misc.py 프로젝트: LucksLab/spats
def rc():
    from spats_shape_seq.util import reverse_complement
    print reverse_complement(sys.argv[2])
예제 #11
0
 def test_reverse_complement(self):
     self.assertEqual("GATC", reverse_complement("GATC"))
     self.assertEqual("CGTCCAA", reverse_complement("TTGGACG"))
     self.assertEqual("CAACAGAGCCCCCGAT", reverse_complement("ATCGGGGGCTCTGTTG"))
     self.assertEqual("GATNC", reverse_complement("GNATC"))
예제 #12
0
파일: misc.py 프로젝트: MingleiYang/spats
def test_tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.tag import TagProcessor
    s.run._processor_class = TagProcessor

    #from spats_shape_seq.target import Targets
    #s.addTargets(bp + "5S.fa")
    s.addTarget(
        "5s",
        "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC"
    )
    #s.addTarget("rc(5s)", "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC")
    #s.addTarget("adapter_t", s.run.adapter_t)
    #s.addTarget("adapter_b", s.run.adapter_b)
    #s._targets._index_word_length = 8
    #s._targets._minimum_length = 8
    #s.addTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    #s.addTarget("adapter_b_rc", reverse_complement(s.run.adapter_b))

    p = s._processor
    p.addTagTarget(
        "5s",
        "GGATGCCTGGCGGCCGTAGCGCGGTGGTCCCACCTGACCCCATGCCGAACTCAGAAGTGAAACGCCGTAGCGCCGATGGTAGTGTGGGGTCTCCCCATGCGAGAGTAGGGAACTGCCAGGCATCTGACTCGGGCACCAAGGAC"
    )
    p.addTagTarget(
        "5s_rc",
        "GTCCTTGGTGCCCGAGTCAGATGCCTGGCAGTTCCCTACTCTCGCATGGGGAGACCCCACACTACCATCGGCGCTACGGCGTTTCACTTCTGAGTTCGGCATGGGGTCAGGTGGGACCACCGCGCTACGGCCGCCAGGCATCC"
    )
    from spats_shape_seq.util import reverse_complement
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)

    from spats_shape_seq.pair import Pair
    cases = [
        [
            "1101:20069:1063", "TTTAGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG",
            "TCCCACCTGACCCCATGCCGAACTCAGAAGTGAAA"
        ],
        [
            "1101:11562:1050", "AAACGTCCTTGGTGCCCGAGTCAGATGCCTGGCAG",
            "CCACCTGACCCCATGCCGAACTCAGAAGTGAAACG"
        ],
        [
            "21189", "TTTGGTCCTTGGTGCCCGAGTCAGAGATCGGAAGA",
            "CTGACTCGGGCACCAAGGACCAAAAGATCGGAAGA"
        ],
        [
            "1101:12888:8140", "GGATGTCCTTGGTGCCCGAGTCAGATGCCAGATCG",
            "GGCATCTGACTCGGGCACCAAGGACATACAGATCG"
        ],
        [
            "18333", "GAGTGTCCTTGGTGCCCGAGTCAGTGGTAGATCGG",
            "ACCACTGACTCGGGCACCAAGGACACTCAGATCGG"
        ],
    ]

    pair = Pair()
    for case in cases:
        pair.set_from_data(case[0], case[1], case[2])
        s.process_pair(pair)

        print pair.r1.original_seq
        print pair.r1.tags
        print pair.r2.original_seq
        print pair.r2.tags
        print "-----------------------------"
예제 #13
0
파일: misc.py 프로젝트: MingleiYang/spats
def rc():
    from spats_shape_seq.util import reverse_complement
    print reverse_complement(sys.argv[2])
예제 #14
0
 def test_reverse_complement(self):
     self.assertEqual("GATC", reverse_complement("GATC"))
     self.assertEqual("CGTCCAA", reverse_complement("TTGGACG"))
     self.assertEqual("CAACAGAGCCCCCGAT", reverse_complement("ATCGGGGGCTCTGTTG"))
     self.assertEqual("GATNC", reverse_complement("GNATC"))
예제 #15
0
파일: pair.py 프로젝트: LucksLab/spats
    def build(self):

        BaseScene.build(self)

        processor = self.ui.processor
        seqs = {
            "adapter_t_rc" : reverse_complement(processor._run.adapter_t),
            "adapter_b" : processor._run.adapter_b,
            "RRRY" : "RRRY",
            "YYYR" : "YYYR",
        }
        for target in processor._tag_targets.targets:
            seqs[target.name] = target.seq
            seqs[target.name + "_rc"] = reverse_complement(target.seq)
        self.tag_seqs = seqs

        colors = self.ui.colors
        target = self.pair.target
        colors._colors[target.name] = colors.color("target")
        tcol = colors.color("target")
        nomatch_col = colors.color("grey")
        error_col = colors.color("error")

        skips = self._skips()

        tseq = target.seq
        tlen = target.n
        def should_skip(idx):
            for skip in skips:
                if idx >= skip[0] and idx < skip[0] + skip[1]:
                    return True
            return False
        self.parts[target.name] = [ None if should_skip(i) else self.addNucView(Nuc(tseq[i], (target.name, i, None)), tcol) for i in range(tlen)  ]
        self.labels[target.name] = self.addLabel(target.name, bg = tcol)

        for part_name in ( "r1", "r2" ):
            parts = []
            part = getattr(self.pair, part_name)
            seq = part.original_seq
            idx = 0

            for tag in sorted(part.tags, key = lambda t : t[1]):
                tkey = tag[0].rstrip("_rc_")
                while idx < tag[1] + tag[2]:
                    ntcol = nomatch_col if idx < tag[1] else colors.color(tkey)
                    if idx in part.match_errors or idx in part.adapter_errors:
                        ntcol = error_col
                    parts.append(self.addNucView(Nuc(seq[idx], (part_name, idx, None if idx < tag[1] else tag[0])), ntcol))
                    idx += 1

                if self.expanded:
                    if tag[0] == target.name + "_rc":
                        rc = reverse_complement(seq[tag[1]:tag[1] + tag[2]])
                        self.parts[part_name + tag[0]] = [ self.addNucView(Nuc(rc[j], (tag[0], j, None)), tcol) for j in range(0, tag[2]) ]
                        self.labels[part_name + tag[0]] = self.addLabel("R1_rc")
                    elif tkey.startswith("adapter"):
                        tagseq = self.tag_seqs[tag[0]]
                        aparts = []
                        for j in range(max(tag[3] - 4, 0), min(tag[2] + 4, len(tagseq))):
                            v = self.addNucView(Nuc(tagseq[j], (tag[0], j, None)), colors.color(tkey))
                            if j < tag[3] or j >= tag[3] + tag[2]:
                                v.alpha = 0.5
                            aparts.append(v)
                        self.parts[part_name + tag[0]] = aparts
                        self.labels[part_name + tag[0]] = self.addLabel(tag[0], bg = colors.color(tkey))
                    elif part_name == "r1" and (tag[0] == 'YYYR' or tag[0] == 'RRRY'):
                        hcol = colors.color(tag[0])
                        self.parts[part_name + tag[0]] = [ self.addNucView(Nuc(tag[0][j], (tag[0], j, None)), hcol) for j in range(len(tag[0])) ]
                        self.labels[part_name + tag[0]] = self.addLabel(tag[0], bg = hcol)

            while idx < len(seq):
                parts.append(self.addNucView(Nuc(seq[idx], (part_name, idx, None)), nomatch_col))
                idx += 1

            self.parts[part_name] = parts
            self.labels[part_name] = self.addLabel(part_name.upper())