예제 #1
0
파일: contig.py 프로젝트: madhadron/seqlabd
def ab1toassembly(filename1, filename2):
    """Takes two AB1 filenames and returns an Assembly of them."""
    read1 = ab1.read(filename1)
    read2 = ab1.read(filename2)
    return assemble(read1['sequence'], read1['confidences'], read1['traces'],
                    rcbases(read2['sequence']),
                    rcconfidences(read2['confidences']),
                    rctraces(read2['traces']))
예제 #2
0
파일: contig.py 프로젝트: nhoffman/seqlabd
def ab1toassembly(filename1, filename2):
    """Takes two AB1 filenames and returns an Assembly of them."""
    read1 = ab1.read(filename1)
    read2 = ab1.read(filename2)
    return assemble(
        read1["sequence"],
        read1["confidences"],
        read1["traces"],
        rcbases(read2["sequence"]),
        rcconfidences(read2["confidences"]),
        rctraces(read2["traces"]),
    )
예제 #3
0
def assemble(read1, read2, *extra_seqs):
    tracks1 = ab1.read(read1)
    tracks2 = ab1.read(read2)
    ref = contig.contig(tracks1['sequence'], tracks1['confidences'],
                        tracks.revcomp(tracks2['sequence']), 
                        tracks.revcomp(tracks2['confidences']))
    t = tracks.TrackSet()

    read1_offset, read1_sequence = ref['read1']
    read2_offset, read2_sequence = ref['read2']
    read1_confs = tracks.regap(read1_sequence, tracks1['confidences'])
    read2_confs = tracks.regap(read2_sequence, tracks.revcomp(tracks2['confidences']))
    read1_traces = tracks.regap(read1_sequence, tracks1['traces'])
    read2_traces = tracks.regap(read2_sequence, tracks.revcomp(tracks2['traces']))

    t.extend([
              tracks.TrackEntry('read 1 traces', read1_offset, read1_traces),
              tracks.TrackEntry('read 1 confidences', read1_offset, read1_confs),
              tracks.TrackEntry('read 1 bases', read1_offset, read1_sequence),
              tracks.TrackEntry('read 2 traces', read2_offset, read2_traces),
              tracks.TrackEntry('read 2 confidences', read2_offset, read2_confs),
              tracks.TrackEntry('read 2 bases', read2_offset, read2_sequence)])


    if ref['reference'] != None:
        reference_offset, reference_sequence = ref['reference']
        t.append(tracks.TrackEntry('reference', reference_offset, reference_sequence))

    for (name,s) in extra_seqs:
        if ref['reference'] != None:
            (roffset, _), (soffset, saligned) = fasta.fasta(reference_sequence, s)
            t.append(tracks.TrackEntry(name, reference_offset + soffset - roffset,
                                       tracks.sequence(saligned)))
        else:
            t.append(tracks.TrackEntry(name, 0, s))

    # Now add an assembly of the lab sequence and reference sequence
    if len(extra_seqs) == 1 and ref['reference'] != None:
        labtrack = t[-1]
        reftrack = t[-2]
        offset = max(labtrack.offset, reftrack.offset)
        loffset = offset - labtrack.offset
        roffset = offset - reftrack.offset
        assert loffset >= 0 and roffset >= 0 and (loffset == 0 or roffset == 0)
        bases = tracks.sequence(''.join([a == b and ' ' or 'X' for a,b in
                                         zip(labtrack.track[loffset:], 
                                             reftrack.track[roffset:])]))
        if 'X' in bases:
            t.append(tracks.TrackEntry('mismatches', offset, bases))
    return (t, ref['strands'])
예제 #4
0
def ab1tohtml(ab1filename):
    r = ab1.read(ab1filename)
    a = Assembly([('traces', aflist(0, r['traces'], gap=None, trackclass='svg')),
                  ('confidences', aflist(0, r['confidences'], gap=None,
                                         trackclass='integer')),
                  ('bases', aflist(0, r['sequence'], gap='-',
                                   trackclass='nucleotide'))])
    s = "<!DOCTYPE html>"
    s += "<html><body>"
    s += renderassembly(a)
    s += "</body></html>"
    return s
예제 #5
0
def ab1tohtml(ab1filename):
    r = ab1.read(ab1filename)
    a = Assembly([
        ('traces', aflist(0, r['traces'], gap=None, trackclass='svg')),
        ('confidences',
         aflist(0, r['confidences'], gap=None, trackclass='integer')),
        ('bases', aflist(0, r['sequence'], gap='-', trackclass='nucleotide'))
    ])
    s = "<!DOCTYPE html>"
    s += "<html><body>"
    s += renderassembly(a)
    s += "</body></html>"
    return s
예제 #6
0
    'vent', r'\b[Bb]acteri(um|a)', r'sp\.', r'str\.'
]))


def is_unclassified(s):
    """Is string *s* containining and organism description ill posed?"""
    return re.search(unclassified_regex, s) and True or False


# workup should be a dictionary with the keys "accession", "workup",
# "pat_name", "amp_name", "seq_key", as selected directly from the
# workups view of the database. In production, it will be found in a
# JSON file written in the directory.
def generate_report(lookup_fun, assembled_render, strandwise_render):
    def f((workup, read1path, read2path), omit_blast=False):
        read1 = ab1.read(read1path)
        read2 = ab1.read(read2path)
        assembly = contig.assemble(read1['sequence'], read1['confidences'],
                                   read1['traces'], read2['sequence'],
                                   read2['confidences'], read2['traces'])
        if 'contig' in assembly:
            if not omit_blast:
                v = lookup_fun(''.join(assembly['contig'].values),
                               save_path=os.path.join(workup['path'],
                                                      'blast.xml'),
                               save_json=os.path.join(workup['path'],
                                                      'blast.json'))
                body = assembled_render(workup, assembly, v, omit_blast=False)
            else:
                body = assembled_render(workup,
                                        assembly,
예제 #7
0
                                              r'sp\.',
                                              r'str\.']))


def is_unclassified(s):
    """Is string *s* containining and organism description ill posed?"""
    return re.search(unclassified_regex, s) and True or False


# workup should be a dictionary with the keys "accession", "workup",
# "pat_name", "amp_name", "seq_key", as selected directly from the
# workups view of the database. In production, it will be found in a
# JSON file written in the directory.
def generate_report(lookup_fun, assembled_render, strandwise_render):
    def f((workup, read1path, read2path), omit_blast=False):
        read1 = ab1.read(read1path)
        read2 = ab1.read(read2path)
        assembly = contig.assemble(read1['sequence'], read1['confidences'], read1['traces'],
                                   read2['sequence'], read2['confidences'], read2['traces'])
        if 'contig' in assembly:
            if not omit_blast:
                v = lookup_fun(''.join(assembly['contig'].values), save_path=os.path.join(workup['path'], 'blast.xml'),
                               save_json=os.path.join(workup['path'], 'blast.json'))
                body = assembled_render(workup, assembly, v, omit_blast=False)
            else:
                body = assembled_render(workup, assembly, None, omit_blast=True)
            return ('assembled', body)
        else:
            if not omit_blast:
                v1 = lookup_fun(''.join(assembly['bases 1'].values), save_path=workup['path'],
                                save_json=os.path.join(workup['path'], 'blast.json'))