def main(args): h5file = tables.open_file(args.hdf5PosFile, mode = "r") table = h5file.root.posCollection.posLs lookUpSnvs = getSNVInfo(args.exacVcf, table) useLowCov1KG = True oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG) tot = 0 with open(args.exacVcf) as f: for line in f: if line[0] != '#': sp = line.strip().split() chrom = sp[0] pos = int(sp[1]) resLs = [x['pos'] for x in table.where('(chrom == b"%s") & (pos == %d)' % (chrom, pos))] if resLs: key = chrom + ':' + str(pos) ref = sp[3] altLs = sp[4].split(',') p = 'AC_' + args.pop + '=' if p in sp[7]: for pZyg in ('Het_' + args.pop + '=', 'Hom_' + args.pop + '='): countLs = sp[7].split(pZyg)[1].split(';')[0].split(',') effField = sp[-1] for count in getCounts(ref, countLs, key, altLs, oneKPopInfo, effField): tot += int(count) h5file.close() with open(args.outFile, 'w') as fout: print(str(tot), file=fout)
def main(args): lookUpSnvs = getSNVInfo(args.vcfFile) useLowCov1KG = True oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG) with open(args.vcfFile) as f, open(args.outFile, 'w') as fout: for line in f: if line[0] != '#': sp = line.strip().split() chrom = sp[0] pos = sp[1] key = chrom + ':' + pos ref = sp[3] altLs = sp[4].split(',') isRare = checkRare(oneKPopInfo, altLs, ref, key) if isRare: print(line.strip(), file=fout)
def main(args): genes = countTargetSampleTotalVarsForGene.loadGenes(args.geneLsFile) goodPos = loadPos(args.goodVarPosFile) h5file = tables.open_file(args.hdf5PosFile, mode = "r") table = h5file.root.posCollection.posLs lookUpSnvs = getSNVInfo(args.exacVcf) useLowCov1KG = True oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG) tot = defaultdict(int) with open(args.exacVcf) as f, open(args.varLsOutFile, 'w') as fout: print('gene\tvarCount\tchrom\tpos\trs\tref\talt\tqual\tfilter\tinfo', file=fout) for line in f: if line[0] != '#': sp = line.strip().split() chrom = sp[0] pos = int(sp[1]) resGeneSet = set([x['gene'].decode("utf-8") for x in table.where('(chrom == b"%s") & (pos == %d)' % (chrom, pos))]) key = chrom + ':' + str(pos) inGoodPos = key in goodPos ref = sp[3] altLs = sp[4].split(',') p = 'AC_' + args.pop + '=' effField = sp[-1] if p in sp[7]: for gene in resGeneSet & genes: for pZyg in ('Het_' + args.pop + '=', 'Hom_' + args.pop + '='): countLs = sp[7].split(pZyg)[1].split(';')[0].split(',') for count in getCounts(ref, countLs, key, altLs, oneKPopInfo, effField, gene, inGoodPos): tot[gene] += int(count) if int(count): print('\t'.join( (gene, count) ) + '\t' + line.strip(), file=fout) h5file.close() with open(args.outFile, 'w') as fout: for gene in tot: print(gene + '\t' + str(tot[gene]), file=fout)