def calcAndAppendStatValDiplo(alleleCounts, snpLocs, statName, subWinStart, subWinEnd, statVals, instanceIndex, subWinIndex, genosInSubWin, unmasked): genosNAlt = genosInSubWin.to_n_alt() if statName == "tajD": statVals[statName][instanceIndex].append(allel.stats.diversity.tajima_d( alleleCounts, pos=snpLocs, start=subWinStart, stop=subWinEnd)) elif statName == "pi": statVals[statName][instanceIndex].append(allel.stats.diversity.sequence_diversity( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) elif statName == "thetaW": statVals[statName][instanceIndex].append(allel.stats.diversity.watterson_theta( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) elif statName == "thetaH": statVals[statName][instanceIndex].append(thetah( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) elif statName == "fayWuH": statVals[statName][instanceIndex].append( statVals["thetaH"][instanceIndex][subWinIndex]-statVals["pi"][instanceIndex][subWinIndex]) elif statName == "HapCount": statVals[statName][instanceIndex].append(len(hapsInSubWin.distinct())) elif statName == "nDiplos": diplotypeCounts = shicstats.getHaplotypeFreqSpec(genosNAlt) nDiplos = diplotypeCounts[genosNAlt.shape[1]] statVals["nDiplos"][instanceIndex].append(nDiplos) diplotypeCounts = diplotypeCounts[:-1] dh1 = garudH1(diplotypeCounts) dh2 = garudH2(diplotypeCounts) dh12 = garudH12(diplotypeCounts) if "diplo_H1" in statVals: statVals["diplo_H1"][instanceIndex].append(dh1) if "diplo_H12" in statVals: statVals["diplo_H12"][instanceIndex].append(dh12) if "diplo_H2/H1" in statVals: statVals["diplo_H2/H1"][instanceIndex].append(dh2/dh1) elif statName == "diplo_ZnS": if genosNAlt.shape[0] == 1: statVals["diplo_ZnS"][instanceIndex].append(0.0) statVals["diplo_Omega"][instanceIndex].append(0.0) else: r2Matrix = allel.stats.ld.rogers_huff_r(genosNAlt) statVals["diplo_ZnS"][instanceIndex].append(np.nanmean(r2Matrix)) r2Matrix2 = squareform(r2Matrix ** 2) statVals["diplo_Omega"][instanceIndex].append( shicstats.omega(r2Matrix2)[0]) elif statName == "distVar": dists = shicstats.pairwiseDiffsDiplo( genosNAlt)/float(unmasked[subWinStart-1:subWinEnd].count(True)) statVals["distVar"][instanceIndex].append(np.var(dists, ddof=1)) statVals["distSkew"][instanceIndex].append(scipy.stats.skew(dists)) statVals["distKurt"][instanceIndex].append(scipy.stats.kurtosis(dists)) elif statName in ["diplo_H12", "diplo_H123", "diplo_H2/H1", "distVar", "distSkew", "distKurt", "diplo_Omega"]: if not len(statVals[statName][instanceIndex]) == subWinIndex+1: print(statName, instanceIndex, subWinIndex+1) print(statVals["diplo_H1"][instanceIndex], statVals["diplo_H12"][instanceIndex]) sys.exit()
def calcAndAppendStatValForScan(alleleCounts, snpLocs, statName, subWinStart, subWinEnd, statVals, subWinIndex, hapsInSubWin, unmasked, precomputedStats): if statName == "tajD": statVals[statName].append(allel.stats.diversity.tajima_d( alleleCounts, pos=snpLocs, start=subWinStart, stop=subWinEnd)) elif statName == "pi": statVals[statName].append(allel.stats.diversity.sequence_diversity( # NOQA snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) # NOQA elif statName == "thetaW": statVals[statName].append(allel.stats.diversity.watterson_theta( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) # NOQA elif statName == "thetaH": statVals[statName].append(thetah( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) # NOQA elif statName == "fayWuH": statVals[statName].append( statVals["thetaH"][subWinIndex]-statVals["pi"][subWinIndex]) elif statName == "maxFDA": # AK: undefined variables statVals[statName].append(maxFDA( snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked)) elif statName == "HapCount": statVals[statName].append(len(hapsInSubWin.distinct())) elif statName == "H1": h1, h12, h123, h21 = allel.stats.selection.garud_h(hapsInSubWin) statVals["H1"].append(h1) if "H12" in statVals: statVals["H12"].append(h12) if "H123" in statVals: statVals["H123"].append(h123) if "H2/H1" in statVals: statVals["H2/H1"].append(h21) elif statName == "ZnS": r2Matrix = shicstats.computeR2Matrix(hapsInSubWin) statVals["ZnS"].append(shicstats.ZnS(r2Matrix)[0]) statVals["Omega"].append(shicstats.omega(r2Matrix)[0]) elif statName == "RH": rMatrixFlat = allel.stats.ld.rogers_huff_r( hapsInSubWin.to_genotypes(ploidy=2).to_n_alt()) rhAvg = rMatrixFlat.mean() statVals["RH"].append(rhAvg) r2Matrix = squareform(rMatrixFlat ** 2) statVals["Omega"].append(shicstats.omega(r2Matrix)[0]) elif statName == "iHSMean": vals = [x for x in precomputedStats["iHS"][subWinIndex] if not (math.isnan(x) or math.isinf(x))] if len(vals) == 0: statVals["iHSMean"].append(0.0) else: statVals["iHSMean"].append(sum(vals)/float(len(vals))) elif statName == "nSLMean": vals = [x for x in precomputedStats["nSL"][subWinIndex] if not (math.isnan(x) or math.isnan(x))] if len(vals) == 0: statVals["nSLMean"].append(0.0) else: statVals["nSLMean"].append(sum(vals)/float(len(vals))) elif statName == "iHSMax": vals = [x for x in precomputedStats["iHS"][subWinIndex] if not (math.isnan(x) or math.isinf(x))] if len(vals) == 0: maxVal = 0.0 else: maxVal = max(vals) statVals["iHSMax"].append(maxVal) elif statName == "nSLMax": vals = [x for x in precomputedStats["nSL"][subWinIndex] if not (math.isnan(x) or math.isnan(x))] if len(vals) == 0: maxVal = 0.0 else: maxVal = max(vals) statVals["nSLMax"].append(maxVal) elif statName == "iHSOutFrac": statVals["iHSOutFrac"].append(getOutlierFrac( precomputedStats["iHS"][subWinIndex])) elif statName == "nSLOutFrac": statVals["nSLOutFrac"].append(getOutlierFrac( precomputedStats["nSL"][subWinIndex])) elif statName == "distVar": dists = shicstats.pairwiseDiffs( hapsInSubWin)/float(unmasked[subWinStart-1:subWinEnd].count(True)) statVals["distVar"].append(np.var(dists, ddof=1)) statVals["distSkew"].append(scipy.stats.skew(dists)) statVals["distKurt"].append(scipy.stats.kurtosis(dists)) elif statName in ["H12", "H123", "H2/H1", "Omega", "distVar", "distSkew", "distKurt"]: assert len(statVals[statName]) == subWinIndex+1