def make_consensus(bam_file, ref_file, freebayes_vcf): ''':retrurn str''' refs = list(SeqIO.parse(ref_file, 'fasta')) df = bioframes.load_vcf(freebayes_vcf) ids, raw_segment_dfs = zip(*df.groupby('CHROM')) segment_dfs = map(fix_fb_df, raw_segment_dfs) refs = sorted(refs, key=lambda x: x.id) assert list(pluck_attr('id', refs)) == sorted(ids) ref_seqs = (str(s.seq) for s in refs) #pluck_attr('seq', refs) segment_dfs = sorted(segment_dfs, key=lambda x: x.CHROM.iloc[0]) def process(original_ref, df): new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0)) #currently only handle inserts, > 0 offs, off_pos = df[df.OFF > 0].OFF, df[df.OFF > 0].POS pileup_positions = zero_coverage_positions(bam_file, ref_file) return gap_fill_ref(original_ref, new_ref, pileup_positions, offs, off_pos, off) #return map(process, ref_seqs, segment_dfs) return zip(sorted(ids), map(process, ref_seqs, segment_dfs))
def process(original_ref, df): new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0)) #currently only handle inserts, > 0 offs, off_pos = df[df.OFF > 0].OFF, df[df.OFF > 0].POS pileup_positions = zero_coverage_positions(bam_file, ref_file) return gap_fill_ref(original_ref, new_ref, pileup_positions, offs, off_pos, off)
def make_dict(classes): return dict(zip(map(call('__name__'), classes, classes)))