Esempio n. 1
0
def check_barcode(fastadict, barcodedict, barcodelength, maxdistance):
    "check for barcode and update sample data"

    samplematch = None
    barcodedata = None
    spacermismatch = False
    barcode_distance = 0
    halfbarcode = int(barcodelength / 2)
    fseq = fastadict['forward_sequence']
    rseq = fastadict['reverse_sequence']
    barcode = fseq[:halfbarcode] + rseq[:halfbarcode]

    #check for perfect match first:
    for sample, samplebarcodedict in barcodedict.items():
        if samplebarcodedict['barcode'] == barcode:
            samplematch = sample
            barcodedata = samplebarcodedict

    #if not choose closest
    if not samplematch:
        for sample, samplebarcodedict in barcodedict.items():
            hdist = hamdist(samplebarcodedict['barcode'], barcode)
            if hdist <= maxdistance:
                barcode_distance = hdist
                samplematch = sample
                barcodedata = samplebarcodedict

    # trim the sequences after checking the spacer sequence between the barcode and the primer
    fseq = fseq[halfbarcode:]
    rseq = rseq[halfbarcode:]

    if barcodedata is not None:
        forward_spacer = barcodedata['forward_spacer']
        reverse_spacer = barcodedata['reverse_spacer']

        if fseq.startswith(forward_spacer):
            fseq = fseq[len(forward_spacer):]
        else:
            fseq = fseq[len(forward_spacer):]
            spacermismatch = True
        if rseq.startswith(reverse_spacer):
            rseq = rseq[len(reverse_spacer):]
        else:
            rseq = rseq[len(reverse_spacer):]
            spacermismatch = True

    # return updated values
    return thread_first(fastadict, (assoc, "sample", samplematch),
                        (assoc, "spacermismatch", spacermismatch),
                        (assoc, "barcode", barcode),
                        (assoc, "barcode_distance", barcode_distance),
                        (assoc, "forward_sequence", fseq),
                        (assoc, "reverse_sequence", rseq))
def truncate_by_size(fastadict, trimsize_forward, trimsize_reverse):
    "subset sequence and indicate if short"
    fseq = fastadict['forward_sequence']
    rseq = fastadict['reverse_sequence']
    tooshort = False
    if len(fseq) < trimsize_forward:
        tooshort = True
    if len(rseq) < trimsize_reverse:
        tooshort = True

    return thread_first(fastadict, (assoc, "tooshort", tooshort),
                        (assoc, "forward_sequence", fseq[:trimsize_forward]),
                        (assoc, "reverse_sequence", rseq[:trimsize_reverse]))
Esempio n. 3
0
 def from_yaml(cls, yaml_f):
     with open(yaml_f) as fh:
         y = yaml.load(fh)
         if y['surveys']:
             y['surveys']['meta'] = pd.DataFrame(
                 y['surveys']['meta']['rows'],
                 columns=y['surveys']['meta']['cols']
             )
     logger.info('loading cfg')
     cfg = thread_first(
         y,
         (assoc, 'facet_levels', None if 'facet_levels' not in y else y['facet_levels']),
         (assoc, 'questions', None if 'questions' not in y else y['questions']),
         (assoc, 'national', None if not y['national'] else ColumnFilter(**y['national'])),
         (assoc, 'socrata', None if not y['socrata'] else SocrataConfig(**y['socrata'])),
         (assoc, 'surveys', None if not y['surveys'] else SurveyConfig(**y['surveys']))
     )
     return cls(**cfg)
Esempio n. 4
0
def test_thread_first():
    assert thread_first(2) == 2
    assert thread_first(2, inc) == 3
    assert thread_first(2, inc, inc) == 4
    assert thread_first(2, double, inc) == 5
    assert thread_first(2, (add, 5), double) == 14
Esempio n. 5
0
def test_thread_first():
    assert thread_first(2) == 2
    assert thread_first(2, inc) == 3
    assert thread_first(2, inc, inc) == 4
    assert thread_first(2, double, inc) == 5
    assert thread_first(2, (add, 5), double) == 14