예제 #1
0
    def add_sequence(self, label, seq, align_to=None):
        """Append another sequence, aligned to the sequence 'contig' if it exists.

        If there is no 'contig', then it starts at 0."""
        if align_to is None or align_to not in self:
            self[label] = aflist(0, seq, '-')
            return self
        template = ''.join(self[align_to].values)
        ((contig_offset, aligned_contig), 
         (seq_offset, aligned_seq)) = align.ssearch36(template, seq)
        aligned1 = aflist(self[align_to].offset, aligned_contig, gap='-')
        aligned2 = aflist(self[align_to].offset + seq_offset - contig_offset,
                          aligned_seq, '-')
        assem, al1, al2 = conform_gaps(self, align_to, aligned1, aligned2)
        assem[label] = al2
        return assem
예제 #2
0
    def add_sequence(self, label, seq, align_to=None):
        """Append another sequence, aligned to the sequence 'contig' if it exists.

        If there is no 'contig', then it starts at 0."""
        if align_to is None or align_to not in self:
            self[label] = aflist(0, seq, '-')
            return self
        template = ''.join(self[align_to].values)
        ((contig_offset, aligned_contig),
         (seq_offset, aligned_seq)) = align.ssearch36(template, seq)
        aligned1 = aflist(self[align_to].offset, aligned_contig, gap='-')
        aligned2 = aflist(self[align_to].offset + seq_offset - contig_offset,
                          aligned_seq, '-')
        assem, al1, al2 = conform_gaps(self, align_to, aligned1, aligned2)
        assem[label] = al2
        return assem
예제 #3
0
파일: contig.py 프로젝트: nhoffman/seqlabd
def assemble(seq1, conf1, traces1, seq2, conf2, traces2):
    """Combine two reads into a contig.

    Returns an Assembly with the reads (with used sections marked),
    and a string specifying fate: 'both', 'strand 1', 'strand 2',
    'none'. If the fate is not 'none', then there will be a key
    'contig' in the Assembly.
    """
    assert len(seq1) == len(conf1)
    assert len(seq2) == len(conf2)

    # Pull out high quality segments
    hqint1, hqint2 = highqualityinterval(conf1), highqualityinterval(conf2)
    segment1 = seq1[hqint1.left() : hqint1.right()] if hqint1.isproper() else ""
    segment2 = seq2[hqint2.left() : hqint2.right()] if hqint2.isproper() else ""
    # Align them
    # If you were going to add assembly against a template, the major
    # change would be to write a function that took segment1,
    # segment2, and template, and returned a third argument
    # (templateoffset, rawaltemplate), then called added the template
    # in the Assembly lines below. The easiest path would probably be
    # to make the template argument to assemble optional, defaulting
    # to None, and call ssearch36 if it is None.
    # The function to do the templated alignment would align both
    # segments against the template, and then go through the two
    # alignments to combine them (inserting -'s appropriately, etc.).
    # For a very similar algorithm that may help in writing that, see assembly.conform_gaps.
    (offset1, rawalsegment1), (offset2, rawalsegment2) = align.ssearch36(segment1, segment2)
    alsegment1, alsegment2 = (
        aflist(offset1, rawalsegment1, gap="-", trackclass="nucleotide"),
        aflist(offset2, rawalsegment2, gap="-", trackclass="nucleotide"),
    )
    alhqint1 = ProperInterval(offset1, offset1 + alsegment1.width())
    alhqint2 = ProperInterval(offset2, offset2 + alsegment2.width())
    alseq1, alseq2 = extend(alsegment1, hqint1, aflist(0, seq1, "-")), extend(alsegment2, hqint2, aflist(0, seq2, "-"))
    alconf1, alconf2 = tracealong(conf1, alseq1), tracealong(conf2, alseq2)
    altraces1, altraces2 = (
        tracealong(traces1, alseq1) if traces1 else None,
        tracealong(traces2, alseq2) if traces2 else None,
    )

    for i, s in (
        (alhqint1, alseq1),
        (alhqint1, alconf1),
        (alhqint1, altraces1),
        (alhqint2, alseq2),
        (alhqint2, alconf2),
        (alhqint2, altraces2),
    ):
        if s is None:
            continue
        if i.isempty():
            s.appendfeature(interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5))
        else:
            if i.left() > s.left():
                s.appendfeature(interval(neginf, i.left(), name="leftunused", red=0, green=0, blue=0, alpha=0.5))
            if i.right() < s.right():
                s.appendfeature(interval(i.right(), posinf, name="rightunused", red=0, green=0, blue=0, alpha=0.5))

    assert alsegment1.width() == alconf1[alhqint1].width()
    assert altraces1 is None or alsegment1.width() == altraces1[alhqint1].width()
    assert alsegment2.width() == alconf2[alhqint2].width()
    assert altraces2 is None or alsegment2.width() == altraces2[alhqint2].width()

    contig = combine((alsegment1, alconf1[alhqint1]), (alsegment2, alconf2[alhqint2]))

    alconf1.setmeta("trackclass", "integer")
    alconf2.setmeta("trackclass", "integer")
    if altraces1:
        altraces1.setmeta("trackclass", "svg")
    if altraces2:
        altraces2.setmeta("trackclass", "svg")
    alseq1.setmeta("trackclass", "nucleotide")
    alseq2.setmeta("trackclass", "nucleotide")
    contig.setmeta("trackclass", "nucleotide")

    if alsegment1.width() != 0 and alsegment2.width() != 0:  # both strands
        a = Assembly()
        if altraces1:
            a["traces 1"] = altraces1
        a["confidences 1"] = alconf1
        a["bases 1"] = alseq1
        if altraces2:
            a["traces 2"] = altraces2
        a["confidences 2"] = alconf2
        a["bases 2"] = alseq2
        a["contig"] = contig
        return a.toorigin()
    elif alsegment1.width() != 0:  # strand 1 only
        a = Assembly()
        if altraces2:
            a["traces 2"] = (
                ProperList(
                    0,
                    traces2,
                    gap=None,
                    trackclass="svg",
                    features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
                )
                >> alconf1.left()
            )
        a["confidences 2"] = (
            ProperList(
                0,
                conf2,
                gap=None,
                trackclass="integer",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
            >> alconf1.left()
        )
        a["bases 2"] = (
            ProperList(
                0,
                seq2,
                gap="-",
                trackclass="nucleotide",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
            >> alconf1.left()
        )
        if altraces1:
            a["traces 1"] = altraces1
        a["confidences 1"] = alconf1
        a["bases 1"] = alseq1
        a["contig"] = contig
        return a.toorigin()
    elif alsegment2.width() != 0:  # strand 2 only
        a = Assembly()
        if altraces1:
            a["traces 1"] = (
                ProperList(
                    0,
                    traces1,
                    gap=None,
                    trackclass="svg",
                    features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
                )
                >> alconf2.left()
            )
        a["confidences 1"] = (
            ProperList(
                0,
                conf1,
                gap=None,
                trackclass="integer",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
            >> alconf2.left()
        )
        a["bases 1"] = (
            ProperList(
                0,
                seq1,
                gap="-",
                trackclass="nucleotide",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
            >> alconf2.left()
        )
        if altraces2:
            a["traces 2"] = altraces2
        a["confidences 2"] = alconf2
        a["bases 2"] = alseq2
        a["contig"] = contig
        return a.toorigin()
    else:
        a = Assembly()
        if traces1:
            a["traces 1"] = ProperList(
                0,
                traces1,
                gap=None,
                trackclass="svg",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
        a["confidences 1"] = ProperList(
            0,
            conf1,
            gap=None,
            trackclass="integer",
            features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
        )
        a["bases 1"] = ProperList(
            0,
            seq1,
            gap="-",
            trackclass="nucleotide",
            features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
        )
        if traces2:
            a["traces 2"] = ProperList(
                0,
                traces2,
                gap=None,
                trackclass="svg",
                features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
            )
        a["confidences 2"] = ProperList(
            0,
            conf2,
            gap=None,
            trackclass="integer",
            features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
        )
        a["bases 2"] = ProperList(
            0,
            seq2,
            gap="-",
            trackclass="nucleotide",
            features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)],
        )
        return a
예제 #4
0
파일: contig.py 프로젝트: madhadron/seqlabd
def assemble(seq1, conf1, traces1, seq2, conf2, traces2):
    """Combine two reads into a contig.

    Returns an Assembly with the reads (with used sections marked),
    and a string specifying fate: 'both', 'strand 1', 'strand 2',
    'none'. If the fate is not 'none', then there will be a key
    'contig' in the Assembly.
    """
    assert len(seq1) == len(conf1)
    assert len(seq2) == len(conf2)

    # Pull out high quality segments
    hqint1, hqint2 = highqualityinterval(conf1), highqualityinterval(conf2)
    segment1 = seq1[hqint1.left():hqint1.right()] if hqint1.isproper() else ""
    segment2 = seq2[hqint2.left():hqint2.right()] if hqint2.isproper() else ""
    # Align them
    # If you were going to add assembly against a template, the major
    # change would be to write a function that took segment1,
    # segment2, and template, and returned a third argument
    # (templateoffset, rawaltemplate), then called added the template
    # in the Assembly lines below. The easiest path would probably be
    # to make the template argument to assemble optional, defaulting
    # to None, and call ssearch36 if it is None.
    # The function to do the templated alignment would align both
    # segments against the template, and then go through the two
    # alignments to combine them (inserting -'s appropriately, etc.).
    # For a very similar algorithm that may help in writing that, see assembly.conform_gaps.
    (offset1,
     rawalsegment1), (offset2,
                      rawalsegment2) = align.ssearch36(segment1, segment2)
    alsegment1, alsegment2 = aflist(offset1, rawalsegment1, gap='-', trackclass='nucleotide'), \
        aflist(offset2, rawalsegment2, gap='-', trackclass='nucleotide')
    alhqint1 = ProperInterval(offset1, offset1 + alsegment1.width())
    alhqint2 = ProperInterval(offset2, offset2 + alsegment2.width())
    alseq1, alseq2 = extend(alsegment1, hqint1, aflist(0,seq1,'-')), \
        extend(alsegment2, hqint2, aflist(0,seq2,'-'))
    alconf1, alconf2 = tracealong(conf1, alseq1), tracealong(conf2, alseq2)
    altraces1, altraces2 = tracealong(traces1, alseq1) if traces1 else None, \
        tracealong(traces2, alseq2) if traces2 else None

    for i,s in (alhqint1, alseq1), (alhqint1, alconf1), (alhqint1, altraces1), \
            (alhqint2, alseq2), (alhqint2, alconf2), (alhqint2, altraces2):
        if s is None:
            continue
        if i.isempty():
            s.appendfeature(
                interval(neginf,
                         posinf,
                         name='unused',
                         red=0,
                         green=0,
                         blue=0,
                         alpha=0.5))
        else:
            if i.left() > s.left():
                s.appendfeature(
                    interval(neginf,
                             i.left(),
                             name='leftunused',
                             red=0,
                             green=0,
                             blue=0,
                             alpha=0.5))
            if i.right() < s.right():
                s.appendfeature(
                    interval(i.right(),
                             posinf,
                             name='rightunused',
                             red=0,
                             green=0,
                             blue=0,
                             alpha=0.5))

    assert alsegment1.width() == alconf1[alhqint1].width()
    assert altraces1 is None or alsegment1.width(
    ) == altraces1[alhqint1].width()
    assert alsegment2.width() == alconf2[alhqint2].width()
    assert altraces2 is None or alsegment2.width(
    ) == altraces2[alhqint2].width()

    contig = combine((alsegment1, alconf1[alhqint1]),
                     (alsegment2, alconf2[alhqint2]))

    alconf1.setmeta('trackclass', 'integer')
    alconf2.setmeta('trackclass', 'integer')
    if altraces1:
        altraces1.setmeta('trackclass', 'svg')
    if altraces2:
        altraces2.setmeta('trackclass', 'svg')
    alseq1.setmeta('trackclass', 'nucleotide')
    alseq2.setmeta('trackclass', 'nucleotide')
    contig.setmeta('trackclass', 'nucleotide')

    if alsegment1.width() != 0 and alsegment2.width() != 0:  # both strands
        a = Assembly()
        if altraces1:
            a['traces 1'] = altraces1
        a['confidences 1'] = alconf1
        a['bases 1'] = alseq1
        if altraces2:
            a['traces 2'] = altraces2
        a['confidences 2'] = alconf2
        a['bases 2'] = alseq2
        a['contig'] = contig
        return a.toorigin()
    elif alsegment1.width() != 0:  # strand 1 only
        a = Assembly()
        if altraces2:
            a['traces 2'] = ProperList(0,
                                       traces2,
                                       gap=None,
                                       trackclass='svg',
                                       features=[
                                           interval(neginf,
                                                    posinf,
                                                    name='unused',
                                                    red=0,
                                                    green=0,
                                                    blue=0,
                                                    alpha=0.5)
                                       ]) >> alconf1.left()
        a['confidences 2'] = ProperList(0,
                                        conf2,
                                        gap=None,
                                        trackclass='integer',
                                        features=[
                                            interval(neginf,
                                                     posinf,
                                                     name='unused',
                                                     red=0,
                                                     green=0,
                                                     blue=0,
                                                     alpha=0.5)
                                        ]) >> alconf1.left()
        a['bases 2'] = ProperList(0,
                                  seq2,
                                  gap='-',
                                  trackclass='nucleotide',
                                  features=[
                                      interval(neginf,
                                               posinf,
                                               name='unused',
                                               red=0,
                                               green=0,
                                               blue=0,
                                               alpha=0.5)
                                  ]) >> alconf1.left()
        if altraces1:
            a['traces 1'] = altraces1
        a['confidences 1'] = alconf1
        a['bases 1'] = alseq1
        a['contig'] = contig
        return a.toorigin()
    elif alsegment2.width() != 0:  # strand 2 only
        a = Assembly()
        if altraces1:
            a['traces 1'] = ProperList(0,
                                       traces1,
                                       gap=None,
                                       trackclass='svg',
                                       features=[
                                           interval(neginf,
                                                    posinf,
                                                    name='unused',
                                                    red=0,
                                                    green=0,
                                                    blue=0,
                                                    alpha=0.5)
                                       ]) >> alconf2.left()
        a['confidences 1'] = ProperList(0,
                                        conf1,
                                        gap=None,
                                        trackclass='integer',
                                        features=[
                                            interval(neginf,
                                                     posinf,
                                                     name='unused',
                                                     red=0,
                                                     green=0,
                                                     blue=0,
                                                     alpha=0.5)
                                        ]) >> alconf2.left()
        a['bases 1'] = ProperList(0,
                                  seq1,
                                  gap='-',
                                  trackclass='nucleotide',
                                  features=[
                                      interval(neginf,
                                               posinf,
                                               name='unused',
                                               red=0,
                                               green=0,
                                               blue=0,
                                               alpha=0.5)
                                  ]) >> alconf2.left()
        if altraces2:
            a['traces 2'] = altraces2
        a['confidences 2'] = alconf2
        a['bases 2'] = alseq2
        a['contig'] = contig
        return a.toorigin()
    else:
        a = Assembly()
        if traces1:
            a['traces 1'] = ProperList(0,
                                       traces1,
                                       gap=None,
                                       trackclass='svg',
                                       features=[
                                           interval(neginf,
                                                    posinf,
                                                    name='unused',
                                                    red=0,
                                                    green=0,
                                                    blue=0,
                                                    alpha=0.5)
                                       ])
        a['confidences 1'] = ProperList(0,
                                        conf1,
                                        gap=None,
                                        trackclass='integer',
                                        features=[
                                            interval(neginf,
                                                     posinf,
                                                     name='unused',
                                                     red=0,
                                                     green=0,
                                                     blue=0,
                                                     alpha=0.5)
                                        ])
        a['bases 1'] = ProperList(0,
                                  seq1,
                                  gap='-',
                                  trackclass='nucleotide',
                                  features=[
                                      interval(neginf,
                                               posinf,
                                               name='unused',
                                               red=0,
                                               green=0,
                                               blue=0,
                                               alpha=0.5)
                                  ])
        if traces2:
            a['traces 2'] = ProperList(0,
                                       traces2,
                                       gap=None,
                                       trackclass='svg',
                                       features=[
                                           interval(neginf,
                                                    posinf,
                                                    name='unused',
                                                    red=0,
                                                    green=0,
                                                    blue=0,
                                                    alpha=0.5)
                                       ])
        a['confidences 2'] = ProperList(0,
                                        conf2,
                                        gap=None,
                                        trackclass='integer',
                                        features=[
                                            interval(neginf,
                                                     posinf,
                                                     name='unused',
                                                     red=0,
                                                     green=0,
                                                     blue=0,
                                                     alpha=0.5)
                                        ])
        a['bases 2'] = ProperList(0,
                                  seq2,
                                  gap='-',
                                  trackclass='nucleotide',
                                  features=[
                                      interval(neginf,
                                               posinf,
                                               name='unused',
                                               red=0,
                                               green=0,
                                               blue=0,
                                               alpha=0.5)
                                  ])
        return a