def performGroupCalculations(variantObject, groupDict, basisGroup, mode=1, vcfOverride=False): if variantObject == None or variantObject.poisoned: return # First find all the target alleles targetAlleles = {} if vcfOverride: alleles = variantObject.alleles else: # First see if we can find a major allele with the people in basisGroup alleleCounts = countingDict() for i in groupDict[basisGroup]: if variantObject.genotypes.has_key(i): allele1 = variantObject.genotypes[i].allele1 allele2 = variantObject.genotypes[i].allele2 if allele1.text != None: alleleCounts[allele1] += 1 if allele2.text != None: alleleCounts[allele2] += 1 alleles = [x[0] for x in sorted(alleleCounts.iteritems(), key=lambda x: x[1])] if mode > 0: mode -= 1 # we're in 0-based coordinates, but -1 is still the same if mode >= len(alleles) or mode < -len(alleles): targetAllele = None else: targetAllele = variantObject.alleles[mode] for groupID,samples in groupDict.iteritems(): if targetAllele == None: variantObject.setAttribute(groupID + " AF","Masked") # the original group didn't have the allele, so we're masked continue allCount = 0 targetCount = 0 for i in samples: if variantObject.genotypes.has_key(i): allele1 = variantObject.genotypes[i].allele1 allele2 = variantObject.genotypes[i].allele2 if allele1 != None: allCount += 1 if allele1 == targetAllele: targetCount += 1 if allele2 != None: allCount += 1 if allele2 == targetAllele: targetCount += 1 if allCount == 0: variantObject.setAttribute(groupID,None) # We had no data for this variant, so this thing is undefined else: variantObject.setAttribute(groupID,float(targetCount)/allCount)
def performGroupCalculations(self, groupDict, statisticDict, callback, tickInterval): from dataModels.setupData import statistic currentLine = 0 nextTick = tickInterval targetAlleleGroups = {} for s in statisticDict.itervalues(): if s.statisticType == statistic.ALLELE_FREQUENCY: if s.parameters.has_key('alleleGroup'): index = s.parameters['alleleMode'] if index >= 1: index -= 1 # they'll specify 1 as the most frequent, but we're in 0-based computer land; -1 is still the same though targetAlleleGroups[s.parameters['alleleGroup']] = s.parameters['alleleMode'] else: targetAlleleGroups['vcf override'] = s.parameters['alleleMode'] if len(targetAlleleGroups) == 0: # nothing to calculate return for key in self.data.iterkeys(): if key == 'variant keys': continue variantObject = self.data[key] currentLine += 1 if currentLine >= nextTick: nextTick += tickInterval self.dataConnection.commit() if callback(): # abort? return "ABORTED" if variantObject == None or variantObject.poisoned: continue # First find all the target alleles targetAlleles = {} for group,mode in targetAlleleGroups.iteritems(): if group == 'vcf override': alleles = variantObject.alleles else: # First see if we can find a major allele with the people in basisGroup alleleCounts = countingDict() for i in groupDict[group].samples: if variantObject.genotypes.has_key(i): allele1 = variantObject.genotypes[i].allele1 allele2 = variantObject.genotypes[i].allele2 if allele1.text != None: alleleCounts[allele1] += 1 if allele2.text != None: alleleCounts[allele2] += 1 alleles = [x[0] for x in sorted(alleleCounts.iteritems(), key=lambda x: x[1])] if mode >= len(alleles) or mode < -len(alleles): targetAlleles[group] = None else: targetAlleles[group] = variantObject.alleles[mode] for statisticID,s in statisticDict.iteritems(): targetAllele = targetAlleles[s.parameters.get('alleleGroup','vcf override')] if s.statisticType == statistic.ALLELE_FREQUENCY: if targetAllele == None: variantObject.setAttribute(statisticID,"Masked") # the original group didn't have the allele, so we're masked continue allCount = 0 targetCount = 0 for i in groupDict[s.parameters['group']].samples: if variantObject.genotypes.has_key(i): allele1 = variantObject.genotypes[i].allele1 allele2 = variantObject.genotypes[i].allele2 if allele1 != None: allCount += 1 if allele1 == targetAllele: targetCount += 1 if allele2 != None: allCount += 1 if allele2 == targetAllele: targetCount += 1 if allCount == 0: variantObject.setAttribute(statisticID,None) # We had no data for this variant, so this thing is undefined else: variantObject.setAttribute(statisticID,float(targetCount)/allCount) self.data[variantObject.name].attributes = variantObject.attributes # a way to force durus to acknowledge the change self.data._p_note_change() self.dataConnection.commit() self.dataConnection.commit()