def test_trimmedmean(self): "Tests the trimmed mean." data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262, 296,299,306,376,428,515,666,1310,2611]) assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0) assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0) assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest): """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def test_trimmed_mean_ci(): # Tests the confidence intervals of the trimmed mean. data = ma.array([545,555,558,572,575,576,578,580, 594,605,635,651,653,661,666]) assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1) assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1), [561.8, 630.6])
def test_trimmedmeanci(self): "Tests the confidence intervals of the trimmed mean." data = ma.array([545,555,558,572,575,576,578,580, 594,605,635,651,653,661,666]) assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1) assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1), [561.8, 630.6])
def _computePositionTraditionalControl(self, caseObservations, controlObservations, capValue, controlCapValue, methylFractionFlag, identifyFlag, testProcedure=_tTest): oCapValue = capValue oControlCapValue = controlCapValue """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] # cap both the native and control data, more or less as it is done in computePositionSyntheticControl: percentile = min(90, (1.0 - 1.0 / (caseData.size - 1)) * 100) localPercentile = np.percentile(caseData, percentile) capValue = max(capValue, 4.0 * np.median(caseData).item(), localPercentile) caseData = np.minimum(caseData, capValue) percentile = min(90, (1.0 - 1.0 / (controlData.size - 1)) * 100) localPercentile = np.percentile(controlData, percentile) controlCapValue = max(controlCapValue, 4.0 * np.median(controlData).item(), localPercentile) controlData = np.minimum(controlData, controlCapValue) res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] # res['testStatistic'] = ( res['caseMedian'] - res['controlMedian'] ) / sqrt( res['caseStd']**2 + res['controlStd']**2 ) # res['pvalue'] = 0.5 * erfc(res['testStatistic'] / sqrt(2)) pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # print res # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res[ 'caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def tmean(arr): return stats.trimmed_mean(arr, limits=(TRIM_LOW_PERCENTAGE, TRIM_HIGH_PERCENTAGE))