Esempio n. 1
0
    def pvalues(self, pBackgroundModel, pDataList, pIndexReferencePoint):
        # cdf
        p_value_list = []

        for i, data_element in enumerate(pDataList):
            relative_distance = i - pIndexReferencePoint
            # check if relative distance is available.
            # if not, use either min or max key for the distribution
            if relative_distance not in pBackgroundModel:
                if relative_distance < 0:
                    relative_distance = min(pBackgroundModel.keys())
                else:
                    relative_distance = max(pBackgroundModel.keys())
            if data_element == 0.0:
                p_value_list.append(0.0)
            else:
                p_value_list.append(
                    cnb.cdf(data_element,
                            pBackgroundModel[relative_distance][0],
                            pBackgroundModel[relative_distance][1]))

        # for reason I do not understand atm the values needs to be inverted again, it seems it is not enough to do this in try/catch region
        p_value_list = np.array(p_value_list, dtype=np.float64)

        p_value_list = 1 - p_value_list

        # remove possible occuring nan with a p-value of 1
        mask = np.isnan(p_value_list)
        mask_inf = np.isinf(p_value_list)
        p_value_list = np.array(p_value_list)
        mask = np.logical_or(mask, mask_inf)
        p_value_list[mask] = 1.0
        return p_value_list
def compute_new_p_values(pData, pBackgroundModel, pPValue, pMergedLinesDict,
                         pPeakInteractionsThreshold, pViewpointObj):
    accepted = {}
    accepted_lines = []
    if isinstance(pPValue, float):
        for key in pData:
            if key in pBackgroundModel:
                log.debug('Recompute p-values. Old: {}'.format(pData[key][-3]))
                pData[key][-3] = 1 - cnb.cdf(float(pData[key][-1]),
                                             float(pBackgroundModel[key][0]),
                                             float(pBackgroundModel[key][1]))
                log.debug('new {}\n\n'.format(pData[key][-3]))
                if pData[key][-3] <= pPValue:
                    if float(pData[key][-1]) >= pPeakInteractionsThreshold:
                        accepted[key] = pData[key]
                        target_content = pMergedLinesDict[key][0][:3]
                        target_content[2] = pMergedLinesDict[key][-1][2]
                        accepted_lines.append(target_content)
            else:
                log.debug('key not in background')
    elif isinstance(pPValue, dict):
        for key in pData:
            if key in pBackgroundModel:
                log.debug('Recompute p-values. Old: {}'.format(pData[key][-3]))

                pData[key][-3] = 1 - cnb.cdf(float(pData[key][-1]),
                                             float(pBackgroundModel[key][0]),
                                             float(pBackgroundModel[key][1]))
                log.debug('new {}\n\n'.format(pData[key][-3]))

                if pData[key][-3] <= pPValue[key]:
                    if float(pData[key][-1]) >= pPeakInteractionsThreshold:
                        accepted[key] = pData[key]
                        target_content = pMergedLinesDict[key][0][:3]
                        target_content[2] = pMergedLinesDict[key][-1][2]
                        accepted_lines.append(target_content)
            else:
                log.debug('key not in background')
    return accepted, accepted_lines
Esempio n. 3
0
def compute_p_values_mask(pGenomicDistanceDistributionsObsExp, pGenomicDistanceDistributionsKeyList,
                          pPValuePreselection, pGenomicDistanceDistributionPosition, pResolution,
                          pMinimumInteractionsThreshold, pObsExpThreshold, pQueue):

    try:
        true_values = []
        if len(pGenomicDistanceDistributionsKeyList) == 0:
            pQueue.put(true_values)
            return

        float_dict = isinstance(pPValuePreselection, float)
        for i, key in enumerate(pGenomicDistanceDistributionsKeyList):

            data_obs_exp = np.array(pGenomicDistanceDistributionsObsExp[key])
            # do not fit and not compute any p-value if all values on this distance are small than the pMinimumInteractionsThreshold
            mask = data_obs_exp >= pObsExpThreshold
            nbinom_parameters = fit_nbinom.fit(data_obs_exp)

            p_value = 1 - cnb.cdf(data_obs_exp[mask], nbinom_parameters['size'], nbinom_parameters['prob'])

            if float_dict:
                mask_distance = p_value <= pPValuePreselection
            else:
                key_genomic = int(key * pResolution)
                mask_distance = p_value <= pPValuePreselection[key_genomic]
            j = 0
            for k, value in enumerate(mask):
                if value:
                    if mask_distance[j]:
                        true_values.append(pGenomicDistanceDistributionPosition[key][k])
                    j += 1
    except Exception as exp:
        pQueue.put('Fail: ' + str(exp) + traceback.format_exc())
        return
    pQueue.put(true_values)
    return