コード例 #1
0
def main(args):
    h5file = tables.open_file(args.hdf5PosFile, mode = "r")
    table = h5file.root.posCollection.posLs
    lookUpSnvs = getSNVInfo(args.exacVcf, table)
    useLowCov1KG = True
    oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG)

    tot = 0
    with open(args.exacVcf) as f:
        for line in f:
            if line[0] != '#':
                sp = line.strip().split()
                chrom = sp[0]
                pos = int(sp[1])
                resLs = [x['pos'] for x in
                         table.where('(chrom == b"%s") & (pos == %d)' % (chrom, pos))]
                if resLs:
                    key = chrom + ':' + str(pos)
                    ref = sp[3]
                    altLs = sp[4].split(',')

                    p = 'AC_' + args.pop + '='
                    if p in sp[7]:
                        for pZyg in ('Het_' + args.pop + '=', 'Hom_' + args.pop + '='):
                            countLs = sp[7].split(pZyg)[1].split(';')[0].split(',')
                            effField = sp[-1]
                            for count in getCounts(ref, countLs, key, altLs, oneKPopInfo, effField):
                                tot += int(count)
    h5file.close()

    with open(args.outFile, 'w') as fout:
        print(str(tot), file=fout)
コード例 #2
0
def main(args):
    lookUpSnvs = getSNVInfo(args.vcfFile)
    useLowCov1KG = True
    oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG)
    with open(args.vcfFile) as f, open(args.outFile, 'w') as fout:
        for line in f:
            if line[0] != '#':
                sp = line.strip().split()
                chrom = sp[0]
                pos = sp[1]
                key = chrom + ':' + pos
                ref = sp[3]
                altLs = sp[4].split(',')
                isRare = checkRare(oneKPopInfo, altLs, ref, key)
                if isRare:
                    print(line.strip(), file=fout)    
コード例 #3
0
def main(args):
    genes = countTargetSampleTotalVarsForGene.loadGenes(args.geneLsFile)

    goodPos = loadPos(args.goodVarPosFile)

    h5file = tables.open_file(args.hdf5PosFile, mode = "r")
    table = h5file.root.posCollection.posLs

    lookUpSnvs = getSNVInfo(args.exacVcf)
    useLowCov1KG = True
    oneKPopInfo = annotateEVSMajorFreq.get1kInfoAllPop(lookUpSnvs, args.oneKgFile, useLowCov1KG)

    tot = defaultdict(int)
    with open(args.exacVcf) as f, open(args.varLsOutFile, 'w') as fout:
        print('gene\tvarCount\tchrom\tpos\trs\tref\talt\tqual\tfilter\tinfo', file=fout)
        for line in f:
            if line[0] != '#':
                sp = line.strip().split()
                chrom = sp[0]
                pos = int(sp[1])
                resGeneSet = set([x['gene'].decode("utf-8") for x in
                                  table.where('(chrom == b"%s") & (pos == %d)' % (chrom, pos))])
                key = chrom + ':' + str(pos)
                inGoodPos = key in goodPos
                ref = sp[3]
                altLs = sp[4].split(',')
                p = 'AC_' + args.pop + '='
                effField = sp[-1]
                if p in sp[7]:
                    for gene in resGeneSet & genes:
                        for pZyg in ('Het_' + args.pop + '=', 'Hom_' + args.pop + '='):
                            countLs = sp[7].split(pZyg)[1].split(';')[0].split(',')
                            for count in getCounts(ref, countLs, key, altLs, oneKPopInfo, effField, gene, inGoodPos):
                                tot[gene] += int(count)
                                if int(count):
                                    print('\t'.join( (gene, count) ) + '\t' + line.strip(), file=fout)

    h5file.close()

    with open(args.outFile, 'w') as fout:
        for gene in tot:
            print(gene + '\t' + str(tot[gene]), file=fout)