Exemple #1
0
    def calcCarriage(vcfLine, vcfIndices, kgpLine, kgpIndices, alleles):
        if len(vcfIndices) > 0:
            vcfLine.extractAlleles()
            vcfLine.extractGenotypes(vcfIndices)
        if len(kgpIndices) > 0:
            kgpLine.extractAlleles()
            kgpLine.extractGenotypes(kgpIndices)

        counts = countingDict()
        for i in vcfIndices:
            allele0 = vcfLine.genotypes[i][0]
            allele1 = vcfLine.genotypes[i][1]

            if allele0 != None:
                if allele0 == allele1:
                    counts[allele0] += 1
                else:
                    counts[allele0] += 1
                    counts[allele1] += 1
        for i in kgpIndices:
            allele0 = kgpLine.genotypes[i][0]
            allele1 = kgpLine.genotypes[i][1]

            if allele0 != None:
                if allele0 == allele1:
                    counts[allele0] += 1
                else:
                    counts[allele0] += 1
                    counts[allele1] += 1
        return [counts.get(i, 0) for i, a in enumerate(alleles)]
Exemple #2
0
    def calcAF(vcfLine, vcfIndices, kgpLine, kgpIndices, alleles):
        if len(vcfIndices) > 0:
            vcfLine.extractAlleles()
            vcfLine.extractGenotypes(vcfIndices)
        if len(kgpIndices) > 0:
            kgpLine.extractAlleles()
            kgpLine.extractGenotypes(kgpIndices)

        count = 0.0
        matches = countingDict()
        for i in vcfIndices:
            allele0 = vcfLine.genotypes[i][0]
            allele1 = vcfLine.genotypes[i][1]
            if allele0 != None:
                count += 2.0
                matches[allele0] += 1
                matches[allele1] += 1
        for i in kgpIndices:
            allele0 = kgpLine.genotypes[i][0]
            allele1 = kgpLine.genotypes[i][1]
            if allele0 != None:
                count += 2.0
                matches[allele0] += 1
                matches[allele1] += 1

        if count == 0.0:
            return [float('Inf') for a in alleles]
        else:
            return [matches.get(i, 0) / count for i, a in enumerate(alleles)]
 def calcCarriage(vcfLine,vcfIndices,kgpLine,kgpIndices,alleles):
     if len(vcfIndices) > 0:
         vcfLine.extractAlleles()
         vcfLine.extractGenotypes(vcfIndices)
     if len(kgpIndices) > 0:
         kgpLine.extractAlleles()
         kgpLine.extractGenotypes(kgpIndices)
     
     counts = countingDict()
     for i in vcfIndices:
         allele0 = vcfLine.genotypes[i][0]
         allele1 = vcfLine.genotypes[i][1]
         
         if allele0 != None:
             if allele0 == allele1:
                 counts[allele0] += 1
             else:
                 counts[allele0] += 1
                 counts[allele1] += 1
     for i in kgpIndices:
         allele0 = kgpLine.genotypes[i][0]
         allele1 = kgpLine.genotypes[i][1]
         
         if allele0 != None:
             if allele0 == allele1:
                 counts[allele0] += 1
             else:
                 counts[allele0] += 1
                 counts[allele1] += 1
     return [counts.get(i,0) for i,a in enumerate(alleles)]
 def calcAF(vcfLine,vcfIndices,kgpLine,kgpIndices,alleles):
     if len(vcfIndices) > 0:
         vcfLine.extractAlleles()
         vcfLine.extractGenotypes(vcfIndices)
     if len(kgpIndices) > 0:
         kgpLine.extractAlleles()
         kgpLine.extractGenotypes(kgpIndices)
     
     count = 0.0
     matches = countingDict()
     for i in vcfIndices:
         allele0 = vcfLine.genotypes[i][0]
         allele1 = vcfLine.genotypes[i][1]
         if allele0 != None:
             count += 2.0
             matches[allele0] += 1
             matches[allele1] += 1
     for i in kgpIndices:
         allele0 = kgpLine.genotypes[i][0]
         allele1 = kgpLine.genotypes[i][1]
         if allele0 != None:
             count += 2.0
             matches[allele0] += 1
             matches[allele1] += 1
     
     if count == 0.0:
         return [float('Inf') for a in alleles]
     else:
         return [matches.get(i,0)/count for i,a in enumerate(alleles)]
def run(args):
    kgp = kgpInterface(args.data, sys.path[0] + "/KGP_populations.txt")
    outfile = open(args.outfile, 'wb')
    freqOnly = args.frequencies_only.lower().startswith('t')

    wroteHeader = False
    for line in kgp.iterate():
        if not wroteHeader:
            outfile.write('CHROM\tPOS\tID')
            if not freqOnly:
                for p in kgp.populations[args.pop]:
                    outfile.write('\t%s_1\t%s_2' % (p, p))
            outfile.write('\n')
            wroteHeader = True
        line.extractChrAndPos()
        line.extractAlleles()
        line.extractGenotypes()
        outfile.write('\t'.join(
            [line.chromosome, str(line.position), line.name]))
        if freqOnly:
            counts = countingDict()
            total = 0.0
            for p in kgp.populations[args.pop]:
                i = kgp.individualIndices[p]
                if line.genotypes[i][0] != None:
                    counts[line.genotypes[i][0]] += 1
                    total += 1.0
                if line.genotypes[i][1] != None:
                    counts[line.genotypes[i][1]] += 1
                    total += 1.0
            for i, c in counts.iteritems():
                outfile.write('\t%s:\t%f' % (line.alleles[i], c / total))
        else:
            for p in kgp.populations[args.pop]:
                i = kgp.individualIndices[p]
                a1 = line.genotypes[i][0]
                if a1 == None:
                    a1 = '.'
                else:
                    a1 = line.alleles[a1]
                a2 = line.genotypes[i][1]
                if a2 == None:
                    a2 = '.'
                else:
                    a2 = line.alleles[a2]

                outfile.write('\t%s\t%s' % (a1, a2))
        outfile.write('\n')
    outfile.close()
def run(args):
    kgp = kgpInterface(args.data,sys.path[0] + "/KGP_populations.txt")
    outfile = open(args.outfile,'wb')
    freqOnly = args.frequencies_only.lower().startswith('t')
    
    wroteHeader = False
    for line in kgp.iterate():
        if not wroteHeader:
            outfile.write('CHROM\tPOS\tID')
            if not freqOnly:
                for p in kgp.populations[args.pop]:
                    outfile.write('\t%s_1\t%s_2' % (p,p))
            outfile.write('\n')
            wroteHeader = True
        line.extractChrAndPos()
        line.extractAlleles()
        line.extractGenotypes()
        outfile.write('\t'.join([line.chromosome,str(line.position),line.name]))
        if freqOnly:
            counts = countingDict()
            total = 0.0
            for p in kgp.populations[args.pop]:
                i = kgp.individualIndices[p]
                if line.genotypes[i][0] != None:
                    counts[line.genotypes[i][0]] += 1
                    total += 1.0
                if line.genotypes[i][1] != None:
                    counts[line.genotypes[i][1]] += 1
                    total += 1.0
            for i,c in counts.iteritems():
                outfile.write('\t%s:\t%f' % (line.alleles[i],c/total))
        else:
            for p in kgp.populations[args.pop]:
                i = kgp.individualIndices[p]
                a1 = line.genotypes[i][0]
                if a1 == None:
                    a1 = '.'
                else:
                    a1 = line.alleles[a1]
                a2 = line.genotypes[i][1]
                if a2 == None:
                    a2 = '.'
                else:
                    a2 = line.alleles[a2]
                
                outfile.write('\t%s\t%s' % (a1,a2))
        outfile.write('\n')
    outfile.close()