예제 #1
0
파일: rescaffold.py 프로젝트: trmznt/seqpy
def map_sequences():

    contigs = bioio.load(contigsfile)
    rseq = bioio.load(args.reffile)

    for contig in contigs:

        # map contig to ref sequence
        start, end, mismatch, _, _ = map_sequence(contig, ref, max_mismatch)
        if start < 0:
            contig = funcs.reverse_complemented(contig)
            start, end, mismatch, _, _ = map_sequence(contig, ref,
                                                      max_mismatch)
            if start < 0:
                continue
예제 #2
0
 def reverse_complemented(self):
     return self.clone().set_sequence(funcs.reverse_complemented(self.seq))
예제 #3
0
 def reverse_complement(self):
     self.seq = funcs.reverse_complemented(self.seq)
     self._reverse_complemented = False if self._reverse_complemented else True
     return self
예제 #4
0
 def reverse_complement(self):
     m = self.model()
     if isinstance(m.selection(), LabelSelection):
         m.selection().apply( lambda x: x.set_sequence(funcs.reverse_complemented(x)) )
     else:
         m.apply( lambda x: x.set_sequence(funcs.reverse_complemented(x) ) )
예제 #5
0
def recircularize_sequence(seq, ref, max_mismatch):

    revcomp = funcs.reverse_complemented(seq)
    ref_start, ref_end, query_start, query_end, arseq, aqseq, score = align_ref(
        ref, seq, max_mismatch)
    #print(arseq)
    #print(aqseq)
    ref_start2, ref_end2, query_start2, query_end2, arseq2, aqseq2, score2 = align_ref(
        ref, revcomp, max_mismatch)
    #print(arseq2)
    #print(aqseq2)
    if score2 > score:
        cerr('-> use reverse complement seq')
        ref_start, ref_end = ref_start2, ref_end2
        query_start, query_end = query_start2, query_end2
        seq = revcomp
        arseq = arseq2
        aqseq = aqseq2

    # starting from here, seq = original query seq, qseq, aqseq = aligned query seq

    circularized_seq = aqseq[ref_start:ref_end + 1]

    upstream = downstream = ''

    print(arseq)
    print(aqseq)

    if ref_start > query_start:
        # sequence has headings
        upstream = aqseq[0:ref_start]

    if ref_end < query_end:
        # sequence has downstream / tail
        downstream = aqseq[ref_end + 1:]
        #print('boundary ->', aqseq[ref_end-5:ref_end+5])

    print('ustream ->', upstream)
    print('dstream ->', downstream)

    arseq = arseq[ref_start:ref_end + 1]

    if len(upstream) > 15:
        merged_1, merged_2, _ = funcs.align([arseq, upstream], degap=False)
        print('merged_1 >< merged_2 >< circularized_seq')
        print(merged_1)
        print(merged_2)
        print(circularized_seq)
        circularized_seq = funcs.merged([circularized_seq, merged_2])

    if len(downstream) > 15:
        #print('ref >< circularized_seq')
        #print(arseq)
        #print(circularized_seq)
        merged_3, merged_4, _ = funcs.align(
            [funcs.degapped(arseq), downstream], degap=False)
        print('merged_3 >< merged_4')
        print(merged_3)
        print(merged_4)
        merged_5, merged_6, _ = funcs.align(
            [merged_3, funcs.degapped(circularized_seq)], degap=False)
        print('merged_5 >< merged_6')
        print(merged_5)
        print(merged_6)
        print('circ >< merged_4 >< merged_6')
        print(circularized_seq)
        print(merged_4)
        print(merged_6)
        circularized_seq = funcs.merged([merged_4, merged_6])
        #print('ref >< circularized_seq')
        #print(ref)
        #print(circularized_seq)

    return circularized_seq
예제 #6
0
def recircularize_sequence(seq, ref, match_len=30, max_mismatch=0.9):
    """ recircularize a circular DNA, based on ref
        return the recircularized sequence
    """

    # first, map ref to seq to find head position(s)

    head_start, _, _, _, _ = map_sequence(ref[:match_len], seq, max_mismatch)
    if head_start < 0:

        seq = funcs.reverse_complemented(seq)
        head_start, _, _, _, _ = map_sequence(ref[:match_len], seq,
                                              max_mismatch)
        if head_start < 0:
            cerr('>>> head not found!')
            return seq

    head_start2 = -1
    if len(seq) - head_start >= len(ref):
        # remaining seq is longer than ref, possibly 2nd head pos exists
        # just pass half of ref
        offset = head_start + match_len
        head_start2, _, _, _, _ = map_sequence(ref[:match_len], seq[offset:],
                                               max_mismatch)
        if head_start2 >= 0:
            head_start2 += offset

    #print(head_start, head_start2)

    if head_start2 > head_start:
        # this part deals in case where the full sequence appear in the seq

        circularized_seq = seq[head_start:head_start2]
        # create merged with seq[:head_pos] and seq[head_pos2:]

        upstream_part = seq[head_start2:]
        downstream_part = seq[:head_start]
        if len(upstream_part) > 0:
            merged_1, merged_2, _ = funcs.align(
                [circularized_seq, upstream_part])
            circularized_seq = funcs.merged([merged_1, merged_2])
        if len(downstream_part) > 0:
            merged_3, merged_4, _ = funcs.align(
                [circularized_seq, downstream_part])
            circularized_seq = funcs.merged([merged_3, merged_4])

    else:

        upstream_part = seq[head_start:]
        downstream_part = seq[:head_start]
        overlap_len = len(seq) - len(ref) + 15

        overlap_len = min(overlap_len, len(upstream_part),
                          len(downstream_part))

        if overlap_len == 0:
            return upstream_part + downstream_part

        # map the end of upstream_part to ref
        #print( upstream_part[-overlap_len:] )
        #print( downstream_part[:overlap_len] )
        overlap_start1, overlap_end1, mismatch1, ins1, dels1 = map_sequence(
            upstream_part[-overlap_len:], ref)
        overlap_start2, overlap_end2, mismatch2, ins2, dels2 = map_sequence(
            downstream_part[:overlap_len], ref)
        #print(overlap_start1, overlap_end1)
        #print(overlap_start2, overlap_end2)
        if overlap_start2 >= overlap_end1:
            cerr('>> algorithm problem for this sample!')
            return seq
        upstream_tobe_merged, downstream_tobe_merged, score = funcs.align([
            upstream_part[overlap_start2:overlap_end1],
            downstream_part[:overlap_end1 - overlap_start2]
        ])
        merged_segment = funcs.merged(
            [upstream_tobe_merged, downstream_tobe_merged])
        circularized_seq = upstream_part[:
                                         overlap_start2] + merged_segment + downstream_part[
                                             overlap_end1 - overlap_start2:]

    return circularized_seq