def calcCarriage(vcfLine, vcfIndices, kgpLine, kgpIndices, alleles): if len(vcfIndices) > 0: vcfLine.extractAlleles() vcfLine.extractGenotypes(vcfIndices) if len(kgpIndices) > 0: kgpLine.extractAlleles() kgpLine.extractGenotypes(kgpIndices) counts = countingDict() for i in vcfIndices: allele0 = vcfLine.genotypes[i][0] allele1 = vcfLine.genotypes[i][1] if allele0 != None: if allele0 == allele1: counts[allele0] += 1 else: counts[allele0] += 1 counts[allele1] += 1 for i in kgpIndices: allele0 = kgpLine.genotypes[i][0] allele1 = kgpLine.genotypes[i][1] if allele0 != None: if allele0 == allele1: counts[allele0] += 1 else: counts[allele0] += 1 counts[allele1] += 1 return [counts.get(i, 0) for i, a in enumerate(alleles)]
def calcAF(vcfLine, vcfIndices, kgpLine, kgpIndices, alleles): if len(vcfIndices) > 0: vcfLine.extractAlleles() vcfLine.extractGenotypes(vcfIndices) if len(kgpIndices) > 0: kgpLine.extractAlleles() kgpLine.extractGenotypes(kgpIndices) count = 0.0 matches = countingDict() for i in vcfIndices: allele0 = vcfLine.genotypes[i][0] allele1 = vcfLine.genotypes[i][1] if allele0 != None: count += 2.0 matches[allele0] += 1 matches[allele1] += 1 for i in kgpIndices: allele0 = kgpLine.genotypes[i][0] allele1 = kgpLine.genotypes[i][1] if allele0 != None: count += 2.0 matches[allele0] += 1 matches[allele1] += 1 if count == 0.0: return [float('Inf') for a in alleles] else: return [matches.get(i, 0) / count for i, a in enumerate(alleles)]
def calcCarriage(vcfLine,vcfIndices,kgpLine,kgpIndices,alleles): if len(vcfIndices) > 0: vcfLine.extractAlleles() vcfLine.extractGenotypes(vcfIndices) if len(kgpIndices) > 0: kgpLine.extractAlleles() kgpLine.extractGenotypes(kgpIndices) counts = countingDict() for i in vcfIndices: allele0 = vcfLine.genotypes[i][0] allele1 = vcfLine.genotypes[i][1] if allele0 != None: if allele0 == allele1: counts[allele0] += 1 else: counts[allele0] += 1 counts[allele1] += 1 for i in kgpIndices: allele0 = kgpLine.genotypes[i][0] allele1 = kgpLine.genotypes[i][1] if allele0 != None: if allele0 == allele1: counts[allele0] += 1 else: counts[allele0] += 1 counts[allele1] += 1 return [counts.get(i,0) for i,a in enumerate(alleles)]
def calcAF(vcfLine,vcfIndices,kgpLine,kgpIndices,alleles): if len(vcfIndices) > 0: vcfLine.extractAlleles() vcfLine.extractGenotypes(vcfIndices) if len(kgpIndices) > 0: kgpLine.extractAlleles() kgpLine.extractGenotypes(kgpIndices) count = 0.0 matches = countingDict() for i in vcfIndices: allele0 = vcfLine.genotypes[i][0] allele1 = vcfLine.genotypes[i][1] if allele0 != None: count += 2.0 matches[allele0] += 1 matches[allele1] += 1 for i in kgpIndices: allele0 = kgpLine.genotypes[i][0] allele1 = kgpLine.genotypes[i][1] if allele0 != None: count += 2.0 matches[allele0] += 1 matches[allele1] += 1 if count == 0.0: return [float('Inf') for a in alleles] else: return [matches.get(i,0)/count for i,a in enumerate(alleles)]
def run(args): kgp = kgpInterface(args.data, sys.path[0] + "/KGP_populations.txt") outfile = open(args.outfile, 'wb') freqOnly = args.frequencies_only.lower().startswith('t') wroteHeader = False for line in kgp.iterate(): if not wroteHeader: outfile.write('CHROM\tPOS\tID') if not freqOnly: for p in kgp.populations[args.pop]: outfile.write('\t%s_1\t%s_2' % (p, p)) outfile.write('\n') wroteHeader = True line.extractChrAndPos() line.extractAlleles() line.extractGenotypes() outfile.write('\t'.join( [line.chromosome, str(line.position), line.name])) if freqOnly: counts = countingDict() total = 0.0 for p in kgp.populations[args.pop]: i = kgp.individualIndices[p] if line.genotypes[i][0] != None: counts[line.genotypes[i][0]] += 1 total += 1.0 if line.genotypes[i][1] != None: counts[line.genotypes[i][1]] += 1 total += 1.0 for i, c in counts.iteritems(): outfile.write('\t%s:\t%f' % (line.alleles[i], c / total)) else: for p in kgp.populations[args.pop]: i = kgp.individualIndices[p] a1 = line.genotypes[i][0] if a1 == None: a1 = '.' else: a1 = line.alleles[a1] a2 = line.genotypes[i][1] if a2 == None: a2 = '.' else: a2 = line.alleles[a2] outfile.write('\t%s\t%s' % (a1, a2)) outfile.write('\n') outfile.close()
def run(args): kgp = kgpInterface(args.data,sys.path[0] + "/KGP_populations.txt") outfile = open(args.outfile,'wb') freqOnly = args.frequencies_only.lower().startswith('t') wroteHeader = False for line in kgp.iterate(): if not wroteHeader: outfile.write('CHROM\tPOS\tID') if not freqOnly: for p in kgp.populations[args.pop]: outfile.write('\t%s_1\t%s_2' % (p,p)) outfile.write('\n') wroteHeader = True line.extractChrAndPos() line.extractAlleles() line.extractGenotypes() outfile.write('\t'.join([line.chromosome,str(line.position),line.name])) if freqOnly: counts = countingDict() total = 0.0 for p in kgp.populations[args.pop]: i = kgp.individualIndices[p] if line.genotypes[i][0] != None: counts[line.genotypes[i][0]] += 1 total += 1.0 if line.genotypes[i][1] != None: counts[line.genotypes[i][1]] += 1 total += 1.0 for i,c in counts.iteritems(): outfile.write('\t%s:\t%f' % (line.alleles[i],c/total)) else: for p in kgp.populations[args.pop]: i = kgp.individualIndices[p] a1 = line.genotypes[i][0] if a1 == None: a1 = '.' else: a1 = line.alleles[a1] a2 = line.genotypes[i][1] if a2 == None: a2 = '.' else: a2 = line.alleles[a2] outfile.write('\t%s\t%s' % (a1,a2)) outfile.write('\n') outfile.close()