def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest): """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def _computePositionTraditionalControl(self, caseObservations, controlObservations, capValue, controlCapValue, methylFractionFlag, identifyFlag, testProcedure=_tTest): oCapValue = capValue oControlCapValue = controlCapValue """Summarize the observed ipds at one template position/strand, using a case-control analysis""" # Compute stats on the observed ipds caseData = caseObservations['data']['ipd'] controlData = controlObservations['data']['ipd'] # cap both the native and control data, more or less as it is done in computePositionSyntheticControl: percentile = min(90, (1.0 - 1.0 / (caseData.size - 1)) * 100) localPercentile = np.percentile(caseData, percentile) capValue = max(capValue, 4.0 * np.median(caseData).item(), localPercentile) caseData = np.minimum(caseData, capValue) percentile = min(90, (1.0 - 1.0 / (controlData.size - 1)) * 100) localPercentile = np.percentile(controlData, percentile) controlCapValue = max(controlCapValue, 4.0 * np.median(controlData).item(), localPercentile) controlData = np.minimum(controlData, controlCapValue) res = dict() res['refId'] = self.refId # FASTA header name res['refName'] = self.refName strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['base'] = self.cognateBaseFunc(tpl, strand) res['coverage'] = int(round((caseData.size + controlData.size) / 2.0)) # need a coverage annotation res['caseCoverage'] = caseData.size res['controlCoverage'] = controlData.size res['caseMean'] = caseData.mean().item() res['caseMedian'] = np.median(caseData).item() res['caseStd'] = np.std(caseData).item() res['controlMean'] = controlData.mean().item() res['controlMedian'] = np.median(controlData).item() res['controlStd'] = np.std(controlData).item() trim = (0.001, 0.03) ctrlMean = mstats.trimmed_mean(controlData, trim).item() if abs(ctrlMean) > 1e-3: res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean) else: res['ipdRatio'] = 1.0 testResults = testProcedure(caseData, controlData) res['testStatistic'] = testResults['testStatistic'] res['pvalue'] = testResults['pvalue'] # res['testStatistic'] = ( res['caseMedian'] - res['controlMedian'] ) / sqrt( res['caseStd']**2 + res['controlStd']**2 ) # res['pvalue'] = 0.5 * erfc(res['testStatistic'] / sqrt(2)) pvalue = max(sys.float_info.min, res['pvalue']) res['score'] = round(-10.0 * math.log10(pvalue)) # print res # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['controlCoverage'] > self.options.methylMinCov and res[ 'caseCoverage'] > self.options.methylMinCov: # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan return res
def _computePositionSyntheticControl(self, caseObservations, capValue, methylFractionFlag, identifyFlag, modelPrediction=None): """Summarize the observed ipds at one template position/strand, using the synthetic ipd model""" # Compute stats on the observed ipds d = caseObservations['data']['ipd'] res = dict() # ref00000x name res['refId'] = self.refId # FASTA header name res['refName'] = self.refName # NOTE -- this is where the strand flipping occurs -- make sure to reproduce this in the all calling methods strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['coverage'] = d.size # Don't compute these stats - they just take time and confuse things # res['mean'] = d.mean().item() # res['median'] = np.median(d).item() # res['std'] = np.std(d).item() # Compute the predicted IPD from the model # NOTE! The ipd model is in the observed read strand if modelPrediction is None: modelPrediction = self.meanIpdFunc(tpl, strand).item() res['modelPrediction'] = modelPrediction res['base'] = self.cognateBaseFunc(tpl, strand) # Store in case of methylated fraction estimtion: res['rawData'] = d # Try a hybrid capping approach -- cap at the higher of # - 5x the model prediction # - 90th percentile of the local data (at low coverage we pick a lower percentile to ensure we trim the highest datapoint # - global cap value percentile = min(90, (1.0 - 1.0 / (d.size - 1)) * 100) localPercentile = np.percentile(d, percentile) capValue = max(capValue, 4.0 * modelPrediction, localPercentile) # np.minimum(d, capValue, out=d) # this version will send capped IPDs to modified fraction estimator d = np.minimum(d, capValue) # Trimmed stats res['tMean'] = d.mean().item() res['tErr'] = np.std(d).item() / sqrt(d.size) res['ipdRatio'] = res['tMean'] / res['modelPrediction'] # Don't know the modification yet res["modification"] = "." # use ttest-based pvalue # res['pvalue'] = self.computeObservationPValue(res) res['tStatistic'] = self.computeObservationTstatistic(res) res['pvalue'] = self.computeObservationPValueTTest(res) pvalue = max(sys.float_info.min, res['pvalue']) score = round(-10.0 * math.log10(pvalue)) res['score'] = score # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['coverage'] > self.options.methylMinCov: modelPrediction = self.meanIpdFunc(tpl, strand).item() # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(modelPrediction, d) # x = self.detectionMixModelBootstrap(modelPrediction, d) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan # print res return res
def _computePositionSyntheticControl(self, caseObservations, capValue, methylFractionFlag, identifyFlag, modelPrediction=None): """Summarize the observed ipds at one template position/strand, using the synthetic ipd model""" # Compute stats on the observed ipds d = caseObservations['data']['ipd'] res = dict() # ref00000x name res['refId'] = self.refId # FASTA header name res['refName'] = self.refName # NOTE -- this is where the strand flipping occurs -- make sure to reproduce this in the all calling methods strand = res['strand'] = 1 - caseObservations['strand'] tpl = res['tpl'] = caseObservations['tpl'] res['coverage'] = d.size # Don't compute these stats - they just take time and confuse things # res['mean'] = d.mean().item() # res['median'] = np.median(d).item() # res['std'] = np.std(d).item() # Compute the predicted IPD from the model # NOTE! The ipd model is in the observed read strand if modelPrediction is None: modelPrediction = self.meanIpdFunc(tpl, strand).item() res['modelPrediction'] = modelPrediction res['base'] = self.cognateBaseFunc(tpl, strand) # Store in case of methylated fraction estimtion: res['rawData'] = d # Try a hybrid capping approach -- cap at the higher of # - 5x the model prediction # - 90th percentile of the local data (at low coverage we pick a lower percentile to ensure we trim the highest datapoint # - global cap value percentile = min(90, (1.0 - 1.0 / (d.size - 1)) * 100) localPercentile = np.percentile(d, percentile) capValue = max(capValue, 4.0 * modelPrediction, localPercentile) # np.minimum(d, capValue, out=d) # this version will send capped IPDs to modified fraction estimator d = np.minimum(d, capValue) # Trimmed stats res['tMean'] = d.mean().item() res['tErr'] = np.std(d).item() / sqrt(d.size) res['ipdRatio'] = res['tMean'] / res['modelPrediction'] # Don't know the modification yet res["modification"] = "." # use ttest-based pvalue # res['pvalue'] = self.computeObservationPValue(res) res['tStatistic'] = self.computeObservationTstatistic(res) res['pvalue'] = self.computeObservationPValueTTest(res) pvalue = max(sys.float_info.min, res['pvalue']) score = round(-10.0 * math.log10(pvalue)) res['score'] = score # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case. if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag: if res['coverage'] > self.options.methylMinCov: modelPrediction = self.meanIpdFunc(tpl, strand).item() # Instantiate mixture estimation methods: mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov) x = mixture.detectionMixModelBootstrap(modelPrediction, d) # x = self.detectionMixModelBootstrap(modelPrediction, d) res[FRAC] = x[0] res[FRAClow] = x[1] res[FRACup] = x[2] else: res[FRAC] = np.nan res[FRACup] = np.nan res[FRAClow] = np.nan # print res return res