def parse_alleles(toks, options): """ return the alleles (first sample /last column) todo: more general?""" if toks[-2].split(":")[0] == "GT": gtcol = len(toks) - 1 if options.g: gttok = "0|1" else: gttok = toks[gtcol].split(":")[0] gts = "|".join(gttok.replace(".", "0").split("/")).split("|") vals = [parse_ref(toks)] + parse_alts(toks) alleles = [vals[int(x)] for x in gts] return alleles return []
def make_allele_dict(vcf_path, options): """ load up all variants by their coordinates map (chrom, pos) -> [allele1, allele2] """ vcf_dict = dict() ref_dict = dict() with open(vcf_path) as f: for line in f: skip = line[0] == "#" for ignore_keyword in options.i: if ignore_keyword in line: skip = True if not skip: toks = line.split() chrom = toks[0] if options.c is False else None pos = int(toks[1]) alleles = parse_alleles(toks, options) vcf_dict[(chrom, pos)] = alleles ref_dict[(chrom, pos)] = [parse_ref(toks)] return vcf_dict, ref_dict