Esempio n. 1
0
def calcAndAppendStatValDiplo(alleleCounts, snpLocs, statName, subWinStart, subWinEnd, statVals, instanceIndex, subWinIndex, genosInSubWin, unmasked):
    genosNAlt = genosInSubWin.to_n_alt()
    if statName == "tajD":
        statVals[statName][instanceIndex].append(allel.stats.diversity.tajima_d(
            alleleCounts, pos=snpLocs, start=subWinStart, stop=subWinEnd))
    elif statName == "pi":
        statVals[statName][instanceIndex].append(allel.stats.diversity.sequence_diversity(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))
    elif statName == "thetaW":
        statVals[statName][instanceIndex].append(allel.stats.diversity.watterson_theta(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))
    elif statName == "thetaH":
        statVals[statName][instanceIndex].append(thetah(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))
    elif statName == "fayWuH":
        statVals[statName][instanceIndex].append(
            statVals["thetaH"][instanceIndex][subWinIndex]-statVals["pi"][instanceIndex][subWinIndex])
    elif statName == "HapCount":
        statVals[statName][instanceIndex].append(len(hapsInSubWin.distinct()))
    elif statName == "nDiplos":
        diplotypeCounts = shicstats.getHaplotypeFreqSpec(genosNAlt)
        nDiplos = diplotypeCounts[genosNAlt.shape[1]]
        statVals["nDiplos"][instanceIndex].append(nDiplos)
        diplotypeCounts = diplotypeCounts[:-1]
        dh1 = garudH1(diplotypeCounts)
        dh2 = garudH2(diplotypeCounts)
        dh12 = garudH12(diplotypeCounts)
        if "diplo_H1" in statVals:
            statVals["diplo_H1"][instanceIndex].append(dh1)
        if "diplo_H12" in statVals:
            statVals["diplo_H12"][instanceIndex].append(dh12)
        if "diplo_H2/H1" in statVals:
            statVals["diplo_H2/H1"][instanceIndex].append(dh2/dh1)
    elif statName == "diplo_ZnS":
        if genosNAlt.shape[0] == 1:
            statVals["diplo_ZnS"][instanceIndex].append(0.0)
            statVals["diplo_Omega"][instanceIndex].append(0.0)
        else:
            r2Matrix = allel.stats.ld.rogers_huff_r(genosNAlt)
            statVals["diplo_ZnS"][instanceIndex].append(np.nanmean(r2Matrix))
            r2Matrix2 = squareform(r2Matrix ** 2)
            statVals["diplo_Omega"][instanceIndex].append(
                shicstats.omega(r2Matrix2)[0])
    elif statName == "distVar":
        dists = shicstats.pairwiseDiffsDiplo(
            genosNAlt)/float(unmasked[subWinStart-1:subWinEnd].count(True))
        statVals["distVar"][instanceIndex].append(np.var(dists, ddof=1))
        statVals["distSkew"][instanceIndex].append(scipy.stats.skew(dists))
        statVals["distKurt"][instanceIndex].append(scipy.stats.kurtosis(dists))
    elif statName in ["diplo_H12", "diplo_H123", "diplo_H2/H1", "distVar", "distSkew", "distKurt", "diplo_Omega"]:
        if not len(statVals[statName][instanceIndex]) == subWinIndex+1:
            print(statName, instanceIndex, subWinIndex+1)
            print(statVals["diplo_H1"][instanceIndex],
                  statVals["diplo_H12"][instanceIndex])
            sys.exit()
Esempio n. 2
0
def calcAndAppendStatValForScan(alleleCounts, snpLocs, statName, subWinStart, subWinEnd, statVals, subWinIndex, hapsInSubWin, unmasked, precomputedStats):
    if statName == "tajD":
        statVals[statName].append(allel.stats.diversity.tajima_d(
            alleleCounts, pos=snpLocs, start=subWinStart, stop=subWinEnd))
    elif statName == "pi":
        statVals[statName].append(allel.stats.diversity.sequence_diversity(  # NOQA
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))  # NOQA
    elif statName == "thetaW":
        statVals[statName].append(allel.stats.diversity.watterson_theta(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))  # NOQA
    elif statName == "thetaH":
        statVals[statName].append(thetah(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))  # NOQA
    elif statName == "fayWuH":
        statVals[statName].append(
            statVals["thetaH"][subWinIndex]-statVals["pi"][subWinIndex])
    elif statName == "maxFDA":
        # AK: undefined variables
        statVals[statName].append(maxFDA(
            snpLocs, alleleCounts, start=subWinStart, stop=subWinEnd, is_accessible=unmasked))
    elif statName == "HapCount":
        statVals[statName].append(len(hapsInSubWin.distinct()))
    elif statName == "H1":
        h1, h12, h123, h21 = allel.stats.selection.garud_h(hapsInSubWin)
        statVals["H1"].append(h1)
        if "H12" in statVals:
            statVals["H12"].append(h12)
        if "H123" in statVals:
            statVals["H123"].append(h123)
        if "H2/H1" in statVals:
            statVals["H2/H1"].append(h21)
    elif statName == "ZnS":
        r2Matrix = shicstats.computeR2Matrix(hapsInSubWin)
        statVals["ZnS"].append(shicstats.ZnS(r2Matrix)[0])
        statVals["Omega"].append(shicstats.omega(r2Matrix)[0])
    elif statName == "RH":
        rMatrixFlat = allel.stats.ld.rogers_huff_r(
            hapsInSubWin.to_genotypes(ploidy=2).to_n_alt())
        rhAvg = rMatrixFlat.mean()
        statVals["RH"].append(rhAvg)
        r2Matrix = squareform(rMatrixFlat ** 2)
        statVals["Omega"].append(shicstats.omega(r2Matrix)[0])
    elif statName == "iHSMean":
        vals = [x for x in precomputedStats["iHS"][subWinIndex]
                if not (math.isnan(x) or math.isinf(x))]
        if len(vals) == 0:
            statVals["iHSMean"].append(0.0)
        else:
            statVals["iHSMean"].append(sum(vals)/float(len(vals)))
    elif statName == "nSLMean":
        vals = [x for x in precomputedStats["nSL"][subWinIndex]
                if not (math.isnan(x) or math.isnan(x))]
        if len(vals) == 0:
            statVals["nSLMean"].append(0.0)
        else:
            statVals["nSLMean"].append(sum(vals)/float(len(vals)))
    elif statName == "iHSMax":
        vals = [x for x in precomputedStats["iHS"][subWinIndex]
                if not (math.isnan(x) or math.isinf(x))]
        if len(vals) == 0:
            maxVal = 0.0
        else:
            maxVal = max(vals)
        statVals["iHSMax"].append(maxVal)
    elif statName == "nSLMax":
        vals = [x for x in precomputedStats["nSL"][subWinIndex]
                if not (math.isnan(x) or math.isnan(x))]
        if len(vals) == 0:
            maxVal = 0.0
        else:
            maxVal = max(vals)
        statVals["nSLMax"].append(maxVal)
    elif statName == "iHSOutFrac":
        statVals["iHSOutFrac"].append(getOutlierFrac(
            precomputedStats["iHS"][subWinIndex]))
    elif statName == "nSLOutFrac":
        statVals["nSLOutFrac"].append(getOutlierFrac(
            precomputedStats["nSL"][subWinIndex]))
    elif statName == "distVar":
        dists = shicstats.pairwiseDiffs(
            hapsInSubWin)/float(unmasked[subWinStart-1:subWinEnd].count(True))
        statVals["distVar"].append(np.var(dists, ddof=1))
        statVals["distSkew"].append(scipy.stats.skew(dists))
        statVals["distKurt"].append(scipy.stats.kurtosis(dists))
    elif statName in ["H12", "H123", "H2/H1",
                      "Omega", "distVar", "distSkew", "distKurt"]:
        assert len(statVals[statName]) == subWinIndex+1