Ejemplo n.º 1
0
def test_seq_align_to_ref_multi():

    ref_seq = 'ATCGATTGC'
    test_seq = 'ATCGATGC'
    cor_mapping = 'ATCGA-TGC'

    inp = [('test1', test_seq)] * 10

    res = list(GeneralSeqTools.seq_align_to_ref(inp, ref_seq, max_workers=5))
    result = [('test1', cor_mapping)] * 10

    eq_(res, result)
Ejemplo n.º 2
0
GGAGTGGtcaaCCCtCaGatGctgCATATAAGCagcTGCTTTtcgcctgt
actgggtctctctaggtagaccagatctgagcctgggagctctctggcta
tctagggaacccactgcttaagcctcaataaagcttgccttgagtgctct
aagtagtgtgtgccctctgttttgactctggtaactagagatccctcaga
cccttttggtagtgaggaaatctctagca""".replace('\n', '').upper()


ltrs = {'B':conb_ltr, 'C':conc_ltr}

# <codecell>

ltr_aligns = []
for align_sub, ltr_seq in ltrs.items():
    for sub, df in pseqdf.groupby(level = 'Subtype'):
        ltr_seqs = [(gi, seq) for (_, gi), seq in df['ltr'].dropna().to_dict().items()]
        for num, (gi, align) in enumerate(GeneralSeqTools.seq_align_to_ref(ltr_seqs, ltr_seq)):
            if (num == 0) or (num == 100) or (num == 10) or (num % 1000 == 0):
                print align_sub, sub, num
            
            ltr_aligns.append({
                               'Subtype':sub,
                               'ID':gi,
                               'Align-Con'+align_sub:align
                               })

# <codecell>

ltr_align_df = pd.pivot_table(pd.DataFrame(ltr_aligns),
                              rows = ['Subtype', 'ID'],
                              values = ['Align-ConC', 'Align-ConB'],
                              aggfunc = 'first')