def test_seq_align_to_ref_multi(): ref_seq = 'ATCGATTGC' test_seq = 'ATCGATGC' cor_mapping = 'ATCGA-TGC' inp = [('test1', test_seq)] * 10 res = list(GeneralSeqTools.seq_align_to_ref(inp, ref_seq, max_workers=5)) result = [('test1', cor_mapping)] * 10 eq_(res, result)
GGAGTGGtcaaCCCtCaGatGctgCATATAAGCagcTGCTTTtcgcctgt actgggtctctctaggtagaccagatctgagcctgggagctctctggcta tctagggaacccactgcttaagcctcaataaagcttgccttgagtgctct aagtagtgtgtgccctctgttttgactctggtaactagagatccctcaga cccttttggtagtgaggaaatctctagca""".replace('\n', '').upper() ltrs = {'B':conb_ltr, 'C':conc_ltr} # <codecell> ltr_aligns = [] for align_sub, ltr_seq in ltrs.items(): for sub, df in pseqdf.groupby(level = 'Subtype'): ltr_seqs = [(gi, seq) for (_, gi), seq in df['ltr'].dropna().to_dict().items()] for num, (gi, align) in enumerate(GeneralSeqTools.seq_align_to_ref(ltr_seqs, ltr_seq)): if (num == 0) or (num == 100) or (num == 10) or (num % 1000 == 0): print align_sub, sub, num ltr_aligns.append({ 'Subtype':sub, 'ID':gi, 'Align-Con'+align_sub:align }) # <codecell> ltr_align_df = pd.pivot_table(pd.DataFrame(ltr_aligns), rows = ['Subtype', 'ID'], values = ['Align-ConC', 'Align-ConB'], aggfunc = 'first')