コード例 #1
0
ファイル: hicTransform.py プロジェクト: ryys1122/HiCExplorer
def _obs_exp(pSubmatrix):

    obs_exp_matrix_ = obs_exp_matrix(pSubmatrix)
    obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_))
    obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_))
    # if len(obs_exp_matrix_.data) == 0:
    # return np.array([[]])
    return obs_exp_matrix_  # .todense()
コード例 #2
0
ファイル: hicTransform.py プロジェクト: tw7649116/HiCExplorer
def _obs_exp(pSubmatrix):

    obs_exp_matrix_ = obs_exp_matrix(pSubmatrix)
    obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_))
    obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_))
    # log.error('obs_exp_matrix_.data {}'.format(obs_exp_matrix_.data))
    # if len(obs_exp_matrix_.data) == 0:
    #     log.debug('No data!')
    #     return np.array([[]])
    return obs_exp_matrix_  # .todense()
コード例 #3
0
def compute_loops(pHiCMatrix, pRegion, pArgs, pIsCooler, pQueue=None):
    """
        Master function to compute the loops for one chromosome.
            - Removes all regions greater maxLoopSize
            - Calls
            - Writes computed loops to a bedgraph file

        Input:
            - pHiCMatrix: Hi-C interaction matrix object
            - pRegion: Chromosome name
            - pArgs: Argparser object
            - pIsCooler: True / False if matrix is stored in a .cool file
            - pQueue: Queue object for multiprocessing communication with parent process
    """
    try:

        if pQueue is not None:
            if pIsCooler:
                pHiCMatrix = hm.hiCMatrix(pMatrixFile=pArgs.matrix, pChrnameList=[pRegion], pDistance=pArgs.maxLoopDistance, pNoIntervalTree=True, pUpperTriangleOnly=False)
            else:
                pHiCMatrix = hm.hiCMatrix(pMatrixFile=pArgs.matrix, pChrnameList=[pRegion], pDistance=pArgs.maxLoopDistance, pNoIntervalTree=False, pUpperTriangleOnly=False)

        if not pIsCooler:
            # cooler files load only what is necessary.
            pHiCMatrix.keepOnlyTheseChr([pRegion])
            max_loop_distance = pArgs.maxLoopDistance / pHiCMatrix.getBinSize()
            instances, features = pHiCMatrix.matrix.nonzero()
            distances = np.absolute(instances - features)
            mask = distances > max_loop_distance
            pHiCMatrix.matrix.data[mask] = 0
            pHiCMatrix.matrix.eliminate_zeros()

        if len(pHiCMatrix.matrix.data) == 0:
            pQueue.put([None])
            return

        if pHiCMatrix.matrix.shape[0] < 5 or pHiCMatrix.matrix.shape[1] < 5:
            log.debug('Computed loops for {}: 0'.format(pRegion))

            if pQueue is None:
                return None
            else:
                pQueue.put([None])
                return
        if pArgs.windowSize is None:
            bin_size = pHiCMatrix.getBinSize()
            if 0 < bin_size <= 5000:
                pArgs.windowSize = 10
            elif 5000 < bin_size <= 10000:
                pArgs.windowSize = 5
            elif 10000 < bin_size <= 25000:
                pArgs.windowSize = 5
            elif 25000 < bin_size <= 50000:
                pArgs.windowSize = 5
            else:
                pArgs.windowSize = 5
            log.debug('Setting window size to: {}'.format(pArgs.windowSize))
        if pArgs.peakWidth is None:
            pArgs.peakWidth = pArgs.windowSize - 3
        log.debug('Setting peak width to: {}'.format(pArgs.peakWidth))
        pHiCMatrix.matrix = triu(pHiCMatrix.matrix, format='csr')
        pHiCMatrix.matrix.eliminate_zeros()
        # log.debug('candidates region {} {}'.format(
        #     pRegion, len(pHiCMatrix.matrix.data)))

        # delete main diagonal
        instances, features = pHiCMatrix.matrix.nonzero()
        distances = np.absolute(instances - features)
        mask = distances == 0
        pHiCMatrix.matrix.data[mask] = 0
        pHiCMatrix.matrix.eliminate_zeros()

        del instances
        del features
        del mask
        del distances

        if pArgs.expected == 'mean':
            obs_exp_csr_matrix = obs_exp_matrix(pHiCMatrix.matrix, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome)
        elif pArgs.expected == 'mean_nonzero':
            obs_exp_csr_matrix = obs_exp_matrix_non_zero(pHiCMatrix.matrix, ligation_factor=False, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome)

        elif pArgs.expected == 'mean_nonzero_ligation':
            obs_exp_csr_matrix = obs_exp_matrix_non_zero(pHiCMatrix.matrix, ligation_factor=True, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome)

        if not isinstance(obs_exp_csr_matrix, csr_matrix):
            if pQueue is None:
                return None
            else:
                pQueue.put([None])
                return
        pHiCMatrix.matrix.eliminate_zeros()
        obs_exp_csr_matrix.eliminate_zeros()
        if len(pHiCMatrix.matrix.data) != len(obs_exp_csr_matrix.data):
            if pQueue is None:
                return None
            else:
                pQueue.put([None])
                return
        # handle pValuePreselection
        try:
            pArgs.pValuePreselection = float(pArgs.pValuePreselection)
        except Exception:
            pArgs.pValuePreselection = read_threshold_file(pArgs.pValuePreselection)

        candidates, pValueList = compute_long_range_contacts(pHiCMatrix,
                                                             obs_exp_csr_matrix,
                                                             pArgs.windowSize,
                                                             pArgs.pValue,
                                                             pArgs.peakWidth,
                                                             pArgs.pValuePreselection,
                                                             pArgs.peakInteractionsThreshold,
                                                             pArgs.obsExpThreshold,
                                                             pArgs.threadsPerChromosome)

        if candidates is None:
            log.info('Computed loops for {}: 0'.format(pRegion))
            if pQueue is None:
                return None
            else:
                pQueue.put([None])
                return
        elif 'Fail: ' in candidates and pQueue is not None:
            pQueue.put(candidates)
            return
        elif 'Fail: ' in candidates and pQueue is None:
            return candidates
        mapped_loops = cluster_to_genome_position_mapping(
            pHiCMatrix, candidates, pValueList, pArgs.maxLoopDistance)
        del pHiCMatrix
        del candidates
        log.debug('Computed loops for {}: {}'.format(pRegion, len(mapped_loops)))
    except Exception as exp:
        if pQueue is not None:
            pQueue.put('Fail: ' + str(exp) + traceback.format_exc())
            return
        else:
            return 'Fail: ' + str(exp) + traceback.format_exc()
    if pQueue is None:
        return mapped_loops
    else:
        pQueue.put([mapped_loops])
    return
コード例 #4
0
ファイル: hicTransform.py プロジェクト: nrkssa/HiCExplorer
def _obs_exp(pSubmatrix):

    obs_exp_matrix_ = obs_exp_matrix(pSubmatrix)
    obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_))
    obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)).todense()
    return obs_exp_matrix_