def _obs_exp(pSubmatrix): obs_exp_matrix_ = obs_exp_matrix(pSubmatrix) obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_)) obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)) # if len(obs_exp_matrix_.data) == 0: # return np.array([[]]) return obs_exp_matrix_ # .todense()
def _obs_exp(pSubmatrix): obs_exp_matrix_ = obs_exp_matrix(pSubmatrix) obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_)) obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)) # log.error('obs_exp_matrix_.data {}'.format(obs_exp_matrix_.data)) # if len(obs_exp_matrix_.data) == 0: # log.debug('No data!') # return np.array([[]]) return obs_exp_matrix_ # .todense()
def compute_loops(pHiCMatrix, pRegion, pArgs, pIsCooler, pQueue=None): """ Master function to compute the loops for one chromosome. - Removes all regions greater maxLoopSize - Calls - Writes computed loops to a bedgraph file Input: - pHiCMatrix: Hi-C interaction matrix object - pRegion: Chromosome name - pArgs: Argparser object - pIsCooler: True / False if matrix is stored in a .cool file - pQueue: Queue object for multiprocessing communication with parent process """ try: if pQueue is not None: if pIsCooler: pHiCMatrix = hm.hiCMatrix(pMatrixFile=pArgs.matrix, pChrnameList=[pRegion], pDistance=pArgs.maxLoopDistance, pNoIntervalTree=True, pUpperTriangleOnly=False) else: pHiCMatrix = hm.hiCMatrix(pMatrixFile=pArgs.matrix, pChrnameList=[pRegion], pDistance=pArgs.maxLoopDistance, pNoIntervalTree=False, pUpperTriangleOnly=False) if not pIsCooler: # cooler files load only what is necessary. pHiCMatrix.keepOnlyTheseChr([pRegion]) max_loop_distance = pArgs.maxLoopDistance / pHiCMatrix.getBinSize() instances, features = pHiCMatrix.matrix.nonzero() distances = np.absolute(instances - features) mask = distances > max_loop_distance pHiCMatrix.matrix.data[mask] = 0 pHiCMatrix.matrix.eliminate_zeros() if len(pHiCMatrix.matrix.data) == 0: pQueue.put([None]) return if pHiCMatrix.matrix.shape[0] < 5 or pHiCMatrix.matrix.shape[1] < 5: log.debug('Computed loops for {}: 0'.format(pRegion)) if pQueue is None: return None else: pQueue.put([None]) return if pArgs.windowSize is None: bin_size = pHiCMatrix.getBinSize() if 0 < bin_size <= 5000: pArgs.windowSize = 10 elif 5000 < bin_size <= 10000: pArgs.windowSize = 5 elif 10000 < bin_size <= 25000: pArgs.windowSize = 5 elif 25000 < bin_size <= 50000: pArgs.windowSize = 5 else: pArgs.windowSize = 5 log.debug('Setting window size to: {}'.format(pArgs.windowSize)) if pArgs.peakWidth is None: pArgs.peakWidth = pArgs.windowSize - 3 log.debug('Setting peak width to: {}'.format(pArgs.peakWidth)) pHiCMatrix.matrix = triu(pHiCMatrix.matrix, format='csr') pHiCMatrix.matrix.eliminate_zeros() # log.debug('candidates region {} {}'.format( # pRegion, len(pHiCMatrix.matrix.data))) # delete main diagonal instances, features = pHiCMatrix.matrix.nonzero() distances = np.absolute(instances - features) mask = distances == 0 pHiCMatrix.matrix.data[mask] = 0 pHiCMatrix.matrix.eliminate_zeros() del instances del features del mask del distances if pArgs.expected == 'mean': obs_exp_csr_matrix = obs_exp_matrix(pHiCMatrix.matrix, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome) elif pArgs.expected == 'mean_nonzero': obs_exp_csr_matrix = obs_exp_matrix_non_zero(pHiCMatrix.matrix, ligation_factor=False, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome) elif pArgs.expected == 'mean_nonzero_ligation': obs_exp_csr_matrix = obs_exp_matrix_non_zero(pHiCMatrix.matrix, ligation_factor=True, pInplace=False, pToEpsilon=True, pThreads=pArgs.threadsPerChromosome) if not isinstance(obs_exp_csr_matrix, csr_matrix): if pQueue is None: return None else: pQueue.put([None]) return pHiCMatrix.matrix.eliminate_zeros() obs_exp_csr_matrix.eliminate_zeros() if len(pHiCMatrix.matrix.data) != len(obs_exp_csr_matrix.data): if pQueue is None: return None else: pQueue.put([None]) return # handle pValuePreselection try: pArgs.pValuePreselection = float(pArgs.pValuePreselection) except Exception: pArgs.pValuePreselection = read_threshold_file(pArgs.pValuePreselection) candidates, pValueList = compute_long_range_contacts(pHiCMatrix, obs_exp_csr_matrix, pArgs.windowSize, pArgs.pValue, pArgs.peakWidth, pArgs.pValuePreselection, pArgs.peakInteractionsThreshold, pArgs.obsExpThreshold, pArgs.threadsPerChromosome) if candidates is None: log.info('Computed loops for {}: 0'.format(pRegion)) if pQueue is None: return None else: pQueue.put([None]) return elif 'Fail: ' in candidates and pQueue is not None: pQueue.put(candidates) return elif 'Fail: ' in candidates and pQueue is None: return candidates mapped_loops = cluster_to_genome_position_mapping( pHiCMatrix, candidates, pValueList, pArgs.maxLoopDistance) del pHiCMatrix del candidates log.debug('Computed loops for {}: {}'.format(pRegion, len(mapped_loops))) except Exception as exp: if pQueue is not None: pQueue.put('Fail: ' + str(exp) + traceback.format_exc()) return else: return 'Fail: ' + str(exp) + traceback.format_exc() if pQueue is None: return mapped_loops else: pQueue.put([mapped_loops]) return
def _obs_exp(pSubmatrix): obs_exp_matrix_ = obs_exp_matrix(pSubmatrix) obs_exp_matrix_ = convertNansToZeros(csr_matrix(obs_exp_matrix_)) obs_exp_matrix_ = convertInfsToZeros(csr_matrix(obs_exp_matrix_)).todense() return obs_exp_matrix_