def getRegion(args, ma): chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None chrom, region_start, region_end = translate_region(args.region) chrom = check_chrom_str_bytes(ma.interval_trees, chrom) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): chrom = change_chrom_names(chrom) chrom = check_chrom_str_bytes(ma.interval_trees, chrom) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): exit("Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom))) args.region = [chrom, region_start, region_end] is_cooler = check_cooler(args.matrix) if is_cooler: idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and ((x[1] >= region_start and x[2] < region_end) or (x[1] < region_end and x[2] < region_end and x[2] > region_start) or (x[1] > region_start and x[1] < region_end))]) else: idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and x[1] >= region_start and x[2] < region_end]) if args.region2: chrom2, region_start2, region_end2 = translate_region(args.region2) chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): chrom2 = change_chrom_names(chrom2) chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): exit("Chromosome name {} in --region2 not in matrix".format(change_chrom_names(chrom2))) if is_cooler: idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and ((x[1] >= region_start2 and x[2] < region_end2) or (x[1] < region_end2 and x[2] < region_end2 and x[2] > region_start2) or (x[1] > region_start2 and x[1] < region_end2))]) else: idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and x[1] >= region_start2 and x[2] < region_end2]) else: idx2 = idx1 chrom2 = chrom start_pos2 = start_pos1 return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2
def main(args=None): args = parse_arguments().parse_args(args) # read domains file domains_df = readDomainBoundaries(args.tadDomains) log.debug('len(domains_df) {}'.format(len(domains_df))) domains = domains_df.values.tolist() old_chromosome = None tads_per_chromosome = [] for j in range(len(domains)): if old_chromosome is None: old_chromosome = domains[j][0] per_chromosome = [] per_chromosome.append(domains[j]) elif old_chromosome == domains[j][0]: per_chromosome.append(domains[j]) continue else: tads_per_chromosome.append(per_chromosome) per_chromosome = [] per_chromosome.append(domains[j]) old_chromosome = domains[j][0] tads_per_chromosome.append(per_chromosome) # log.debug('len(tads_per_chromosome) {}'.format(len(tads_per_chromosome[0]) + len(tads_per_chromosome[1]))) # read full h5 or only region if cooler is_cooler_target = check_cooler(args.targetMatrix) is_cooler_control = check_cooler(args.controlMatrix) if is_cooler_target != is_cooler_control: log.error('Matrices are not given in the same format!') exit(1) if not is_cooler_control: hic_matrix_target = hm.hiCMatrix(args.targetMatrix) hic_matrix_control = hm.hiCMatrix(args.controlMatrix) else: hic_matrix_target = args.targetMatrix hic_matrix_control = args.controlMatrix # accepted_H0 = [] # rejected_H0 = [] # log.debug('domains_df {}'.format(domains_df)) stats_chromosomes = [] p_values_chromosomes = [] accepted_inter_left_chromosomes = [] accepted_inter_right_chromosomes = [] accepted_intra_chromosomes = [] rows_chromosomes = [] stats_threads = [[]] * args.threads p_values_threads = [[]] * args.threads accepted_left_inter_threads = [[]] * args.threads accepted_right_inter_threads = [[]] * args.threads accepted_intra_threads = [[]] * args.threads rows_threads = [[]] * args.threads threads_save = deepcopy(args.threads) for chromosome in tads_per_chromosome: log.debug('tads_per_chromosome {}'.format(chromosome)) domainsPerThread = len(chromosome) // args.threads if domainsPerThread == 0 and len(chromosome) > 0: domainsPerThread = 1 args.threads = 1 elif domainsPerThread > 0: args.threads = threads_save all_data_collected = False queue = [None] * args.threads process = [None] * args.threads thread_done = [False] * args.threads # None --> first thread, process first element in list, ignore last one # True --> middle thread: ignore first and last element in tad processing # False --> last thread: ignore first element, process last one thread_id = None for i in range(args.threads): if args.threads == 1: domainListThread = chromosome elif i == 0: domainListThread = chromosome[i * domainsPerThread:( (i + 1) * domainsPerThread) + 1] thread_id = None elif i < args.threads - 1: domainListThread = chromosome[(i * domainsPerThread) - 1:((i + 1) * domainsPerThread) + 1] thread_id = True else: domainListThread = chromosome[(i * domainsPerThread) - 1:] thread_id = False if args.threads == 1: thread_id = '' log.debug('len(domainListThread) {}'.format(len(domainListThread))) log.debug('len(thread_id) {}'.format(thread_id)) queue[i] = Queue() process[i] = Process(target=computeDifferentialTADs, kwargs=dict(pMatrixTarget=hic_matrix_target, pMatrixControl=hic_matrix_control, pDomainList=domainListThread, pCoolOrH5=is_cooler_control, pPValue=args.pValue, pThreadId=thread_id, pQueue=queue[i])) process[i].start() fail_flag = False fail_message = '' while not all_data_collected: for i in range(args.threads): if queue[i] is not None and not queue[i].empty(): queue_data = queue[i].get() if 'Fail:' in queue_data: fail_flag = True fail_message = queue_data else: stats_threads[i], p_values_threads[i], accepted_left_inter_threads[i], \ accepted_right_inter_threads[i], \ accepted_intra_threads[i], rows_threads[i] = queue_data queue[i] = None process[i].join() process[i].terminate() process[i] = None thread_done[i] = True # elif queue[i] is None and all_data_collected = True for thread in thread_done: if not thread: all_data_collected = False time.sleep(1) # outfile_names = [item for sublist in outfile_names for item in sublist] # target_list_name = [ # item for sublist in target_list_name for item in sublist] if fail_flag: log.error(fail_message[6:]) exit(1) stats_chromosomes.append( [item for sublist in stats_threads for item in sublist]) p_values_chromosomes.append( [item for sublist in p_values_threads for item in sublist]) accepted_inter_left_chromosomes.append([ item for sublist in accepted_left_inter_threads for item in sublist ]) accepted_inter_right_chromosomes.append([ item for sublist in accepted_right_inter_threads for item in sublist ]) accepted_intra_chromosomes.append( [item for sublist in accepted_intra_threads for item in sublist]) rows_chromosomes.append( [item for sublist in rows_threads for item in sublist]) log.debug('rows_threads {}'.format(rows_threads)) stats_list = [item for sublist in stats_chromosomes for item in sublist] p_values_list = [ item for sublist in p_values_chromosomes for item in sublist ] accepted_inter_left = [ item for sublist in accepted_inter_left_chromosomes for item in sublist ] accepted_inter_right = [ item for sublist in accepted_inter_right_chromosomes for item in sublist ] accepted_intra = [ item for sublist in accepted_intra_chromosomes for item in sublist ] rows = [item for sublist in rows_chromosomes for item in sublist] stats_list = np.array(stats_list) p_values_list = np.array(p_values_list) accepted_inter_left = np.array(accepted_inter_left) accepted_inter_right = np.array(accepted_inter_right) accepted_intra = np.array(accepted_intra) rows = np.array(rows) if args.mode == 'intra-TAD': mask = np.array(accepted_intra, dtype=bool) elif args.mode == 'left-inter-TAD': if args.modeReject == 'all': mask = np.logical_and(accepted_inter_left, accepted_intra) else: mask = np.logical_or(accepted_inter_left, accepted_intra) elif args.mode == 'right-inter-TAD': if args.modeReject == 'all': mask = np.logical_and(accepted_intra, accepted_inter_right) else: mask = np.logical_or(accepted_intra, accepted_inter_right) else: if args.modeReject == 'all': mask = np.logical_and(accepted_inter_left, accepted_inter_right) mask = np.logical_and(mask, accepted_intra) else: mask = np.logical_or(accepted_inter_left, accepted_inter_right) mask = np.logical_or(mask, accepted_intra) log.debug('len(mask) {}'.format(len(mask))) log.debug('mask.sum() {}'.format(mask.sum())) log.debug('mask[:10] {}'.format(mask[:10])) accepted_H0 = p_values_list[~mask] accepted_H0_s = stats_list[~mask] rejected_H0 = p_values_list[mask] rejected_H0_s = stats_list[mask] accepted_rows = rows[~mask] rejected_rows = rows[mask] with open(args.outFileNamePrefix + '_accepted.diff_tad', 'w') as file: header = '# Created with HiCExplorer\'s hicDifferentialTAD version ' + __version__ + '\n' header += '# H0 \'regions are equal\' H0 is accepted for all p-value greater the user given p-value threshold; i.e. regions in this file are not considered as differential.\n' header += '# Accepted regions with Wilcoxon rank-sum test to p-value: {} with used mode: {} and modeReject: {} \n'.format( args.pValue, args.mode, args.modeReject) header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tp-value left-inter-TAD\tp-value right-inter-TAD\tp-value intra-TAD\tW left-inter-TAD\tW right-inter-TAD\tW intra-TAD\n' file.write(header) for i, row in enumerate(accepted_rows): row_list = list(map(str, row)) file.write('\t'.join(row_list)) file.write('\t') pvalue_list = list(map(str, accepted_H0[i])) file.write('\t'.join(pvalue_list)) file.write('\t') stats_list = list(map(str, accepted_H0_s[i])) file.write('\t'.join(stats_list)) file.write('\n') with open(args.outFileNamePrefix + '_rejected.diff_tad', 'w') as file: header = '# Created with HiCExplorer\'s hicDifferentialTAD version ' + __version__ + '\n' header += '# H0 \'regions are equal\' H0 is rejected for all p-value smaller or equal the user given p-value threshold; i.e. regions in this file are considered as differential.\n' header += '# Rejected regions with Wilcoxon rank-sum test to p-value: {} with used mode: {} and modeReject: {} \n'.format( args.pValue, args.mode, args.modeReject) header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tp-value left-inter-TAD\tp-value right-inter-TAD\tp-value intra-TAD\tW left-inter-TAD\tW right-inter-TAD\tW intra-TAD\n' file.write(header) for i, row in enumerate(rejected_rows): row_list = list(map(str, row)) file.write('\t'.join(row_list)) file.write('\t') pvalue_list = list(map(str, rejected_H0[i])) file.write('\t'.join(pvalue_list)) file.write('\t') stats_list = list(map(str, rejected_H0_s[i])) file.write('\t'.join(stats_list)) file.write('\n')
def main(args=None): args = parse_arguments().parse_args(args) mpl.rcParams['pdf.fonttype'] = 42 if args.labels and len(args.matrices) != len(args.labels): log.error( "The number of labels does not match the number of matrices.") exit(0) if not args.labels: args.labels = map(lambda x: os.path.basename(x), args.matrices) num_files = len(args.matrices) map(lambda x: os.path.basename(x), args.matrices) # initialize results matrix results = np.zeros((num_files, num_files), dtype='float') rows, cols = np.triu_indices(num_files) correlation_opts = {'spearman': spearmanr, 'pearson': pearsonr} hic_mat_list = [] max_value = None min_value = None all_mat = None all_nan = [] for i, matrix in enumerate(args.matrices): log.debug("loading hic matrix {}\n".format(matrix)) if (check_cooler( args.matrices[i])) and args.chromosomes is not None and len( args.chromosomes) == 1: _mat = hm.hiCMatrix(matrix, pChrnameList=args.chromosomes) else: _mat = hm.hiCMatrix(matrix) if args.chromosomes: _mat.keepOnlyTheseChr(args.chromosomes) _mat.filterOutInterChrCounts() _mat.diagflat(0) log.debug("restore masked bins {}\n".format(matrix)) bin_size = _mat.getBinSize() all_nan = np.unique(np.concatenate([all_nan, _mat.nan_bins])) _mat = triu(_mat.matrix, k=0, format='csr') if args.range: min_dist, max_dist = args.range.split(":") min_dist = int(min_dist) max_dist = int(max_dist) if max_dist < bin_size: log.error( "Please specify a max range that is larger than bin size ({})" .format(bin_size)) exit() max_depth_in_bins = int(max_dist / bin_size) max_dist = int(max_dist) // bin_size min_dist = int(min_dist) // bin_size # work only with the upper matrix # and remove all pixels that are beyond # max_depth_in_bis # (this is done by subtracting a second sparse matrix # that contains only the upper matrix that wants to be removed. _mat = triu(_mat, k=0, format='csr') - triu( _mat, k=max_depth_in_bins, format='csr') _mat.eliminate_zeros() _mat_coo = _mat.tocoo() dist = _mat_coo.col - _mat_coo.row keep = np.flatnonzero((dist <= max_dist) & (dist >= min_dist)) _mat_coo.data = _mat_coo.data[keep] _mat_coo.row = _mat_coo.row[keep] _mat_coo.col = _mat_coo.col[keep] _mat = _mat_coo.tocsr() else: _mat = triu(_mat, k=0, format='csr') if args.log1p: _mat.data = np.log1p(_mat.data) if all_mat is None: all_mat = _mat else: all_mat = all_mat + _mat if max_value is None or max_value < _mat.data.max(): max_value = _mat.data.max() if min_value is None or min_value > _mat.data.min(): min_value = _mat.data.min() hic_mat_list.append(_mat) # remove nan bins rows_keep = cols_keep = np.delete(list(range(all_mat.shape[1])), all_nan.astype('int')) all_mat = all_mat[rows_keep, :][:, cols_keep] # make large matrix to correlate by # using sparse matrix tricks big_mat = None for mat in hic_mat_list: mat = mat[rows_keep, :][:, cols_keep] sample_vector = (mat + all_mat).data - all_mat.data if big_mat is None: big_mat = sample_vector else: big_mat = np.vstack([big_mat, sample_vector]) # take the transpose such that columns represent each of the samples big_mat = np.ma.masked_invalid(big_mat).T grids = gridspec.GridSpec(num_files, num_files) grids.update(wspace=0, hspace=0) fig = plt.figure(figsize=(2 * num_files, 2 * num_files)) plt.rcParams['font.size'] = 8.0 min_value = int(big_mat.min()) max_value = int(big_mat.max()) if (min_value % 2 == 0 and max_value % 2 == 0) or \ (min_value % 1 == 0 and max_value % 2 == 1): # make one value odd and the other even max_value += 1 if args.log1p: major_locator = FixedLocator(list(range(min_value, max_value, 2))) minor_locator = FixedLocator(list(range(min_value, max_value, 1))) for index in range(len(rows)): row = rows[index] col = cols[index] if row == col: results[row, col] = 1 # add titles as # empty plot in the diagonal ax = fig.add_subplot(grids[row, col]) ax.text(0.6, 0.6, args.labels[row], verticalalignment='center', horizontalalignment='center', fontsize=10, fontweight='bold', transform=ax.transAxes) ax.set_axis_off() continue log.debug("comparing {} and {}\n".format(args.matrices[row], args.matrices[col])) # remove cases in which both are zero or one is zero and # the other is one _mat = big_mat[:, [row, col]] _mat = _mat[_mat.sum(axis=1) > 1, :] vector1 = _mat[:, 0] vector2 = _mat[:, 1] results[row, col] = correlation_opts[args.method](vector1, vector2)[0] # scatter plots ax = fig.add_subplot(grids[row, col]) if args.log1p: ax.xaxis.set_major_locator(major_locator) ax.xaxis.set_minor_locator(minor_locator) ax.yaxis.set_major_locator(major_locator) ax.yaxis.set_minor_locator(minor_locator) ax.text(0.2, 0.8, "{}={:.2f}".format(args.method, results[row, col]), horizontalalignment='left', transform=ax.transAxes) ax.get_yaxis().set_tick_params(which='both', left='off', right='off', direction='out') ax.get_xaxis().set_tick_params(which='both', top='off', bottom='off', direction='out') if col != num_files - 1: ax.set_yticklabels([]) else: ax.yaxis.tick_right() ax.get_yaxis().set_tick_params(which='both', left='off', right='on', direction='out') if col - row == 1: ax.xaxis.tick_bottom() ax.get_xaxis().set_tick_params(which='both', top='off', bottom='on', direction='out') else: ax.set_xticklabels([]) ax.hist2d(vector1, vector2, bins=150, cmin=0.1) fig.tight_layout() log.debug("saving {}".format(args.outFileNameScatter)) fig.savefig(args.outFileNameScatter, bbox_inches='tight') results = results + np.triu(results, 1).T plot_correlation(results, args.labels, args.outFileNameHeatmap, args.zMax, args.zMin, args.colorMap, pPlotNumbers=args.plotNumbers)
def main(args=None): args = parse_arguments().parse_args(args) if args.verbose: log.setLevel(logging.INFO) # args.chromosomes if check_cooler(args.matrix) and args.chromosomes is not None and len( args.chromosomes) == 1: ma = hm.hiCMatrix(args.matrix, pChrnameList=toString(args.chromosomes)) else: ma = hm.hiCMatrix(args.matrix) if args.chromosomes: ma.reorderChromosomes(toString(args.chromosomes)) # mask all zero value bins if 'correctionMethod' in args: if args.correctionMethod == 'ICE': row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten() log.info("Removing {} zero value bins".format(sum(row_sum == 0))) ma.maskBins(np.flatnonzero(row_sum == 0)) matrix_shape = ma.matrix.shape if 'plotName' in args: row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten() log.info("Removing {} zero value bins".format(sum(row_sum == 0))) ma.maskBins(np.flatnonzero(row_sum == 0)) matrix_shape = ma.matrix.shape ma.matrix = convertNansToZeros(ma.matrix) ma.matrix = convertInfsToZeros(ma.matrix) ma.matrix = ma.matrix.astype(np.float64, copy=True) log.debug('ma.matrix.indices {}'.format(ma.matrix.indices.dtype)) log.debug('ma.matrix.data {}'.format(ma.matrix.data.dtype)) log.debug('ma.matrix.indptr {}'.format(ma.matrix.indptr.dtype)) # log.debug('ma.matrix.indices {}'.format(np.max(ma.matrix.indices))) # log.debug('ma.matrix.data {}'.format(np.max(ma.matrix.data))) # log.debug('ma.matrix.indptr {}'.format(np.max(ma.matrix.indptr))) # ma.matrix.indptr = ma.matrix.indptr.astype(np.int32, copy=False) # ma.matrix.indices = ma.matrix.indices.astype(np.int32, copy=False) if 'plotName' in args: plot_total_contact_dist(ma, args) log.info("Saving diagnostic plot {}\n".format(args.plotName)) return log.info("matrix contains {} data points. Sparsity {:.3f}.".format( len(ma.matrix.data), float(len(ma.matrix.data)) / (ma.matrix.shape[0]**2))) if args.skipDiagonal: ma.diagflat(value=0) total_filtered_out = set() if args.correctionMethod == 'ICE': if not args.filterThreshold: log.error('min and max filtering thresholds should be set') sys.exit(1) outlier_regions = filter_by_zscore(ma, args.filterThreshold[0], args.filterThreshold[1], perchr=args.perchr) # compute and print some statistics pct_outlier = 100 * float(len(outlier_regions)) / ma.matrix.shape[0] ma.printchrtoremove(outlier_regions, label="Bins that are MAD outliers ({:.2f}%) " "out of".format(pct_outlier, ma.matrix.shape[0]), restore_masked_bins=False) assert matrix_shape == ma.matrix.shape # mask filtered regions ma.maskBins(outlier_regions) total_filtered_out = set(outlier_regions) if args.sequencedCountCutoff and 0 < args.sequencedCountCutoff < 1: chrom, _, _, coverage = zip(*ma.cut_intervals) assert type(coverage[0]) == np.float64 failed_bins = np.flatnonzero( np.array(coverage) < args.sequencedCountCutoff) ma.printchrtoremove(failed_bins, label="Bins with low coverage", restore_masked_bins=False) ma.maskBins(failed_bins) total_filtered_out = set(failed_bins) """ ma.matrix, to_remove = fill_gaps(ma, failed_bins) log.warning("From {} failed bins, {} could " "not be filled\n".format(len(failed_bins), len(to_remove))) ma.maskBins(to_remove) """ if args.transCutoff and 0 < args.transCutoff < 100: cutoff = float(args.transCutoff) / 100 # a usual cutoff is 0.05 ma.truncTrans(high=cutoff) pre_row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten() correction_factors = [] corrected_matrix = lil_matrix(ma.matrix.shape) if args.perchr: # normalize each chromosome independently for chrname in list(ma.interval_trees): chr_range = ma.getChrBinRange(chrname) chr_submatrix = ma.matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] if args.correctionMethod == 'ICE': _matrix, _corr_factors = iterative_correction( chr_submatrix, args) corrected_matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] = _matrix correction_factors.append(_corr_factors) else: # Set the kr matrix along with its correction factors vector assert (args.correctionMethod == 'KR') log.debug("Loading a float sparse matrix for KR balancing") kr = kr_balancing( chr_submatrix.shape[0], chr_submatrix.shape[1], chr_submatrix.count_nonzero(), chr_submatrix.indptr.astype(np.int64, copy=False), chr_submatrix.indices.astype(np.int64, copy=False), chr_submatrix.data.astype(np.float64, copy=False)) kr.computeKR() if args.outFileName.endswith('.h5'): corrected_matrix[ chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] = kr.get_normalised_matrix( True) # correction_factors.append(np.true_divide(1, # kr.get_normalisation_vector(False).todense())) correction_factors.append( kr.get_normalisation_vector(False).todense()) correction_factors = np.concatenate(correction_factors) else: if args.correctionMethod == 'ICE': corrected_matrix, correction_factors = iterative_correction( ma.matrix, args) ma.setMatrixValues(corrected_matrix) else: assert (args.correctionMethod == 'KR') log.debug("Loading a float sparse matrix for KR balancing") kr = kr_balancing(ma.matrix.shape[0], ma.matrix.shape[1], ma.matrix.count_nonzero(), ma.matrix.indptr.astype(np.int64, copy=False), ma.matrix.indices.astype(np.int64, copy=False), ma.matrix.data.astype(np.float64, copy=False)) log.debug('passed pointers') kr.computeKR() log.debug('computation done') # set it to False since the vector is already normalised # with the previous True # correction_factors = np.true_divide(1, kr.get_normalisation_vector(False).todense()) correction_factors = kr.get_normalisation_vector(False).todense() if args.outFileName.endswith('.h5'): corrected_matrix = kr.get_normalised_matrix(True) if args.outFileName.endswith('.h5'): ma.setMatrixValues(corrected_matrix) # if ma.setCorrectionFactors(correction_factors) log.debug("Correction factors {}".format(correction_factors[:10])) if args.inflationCutoff and args.inflationCutoff > 0 and args.correctionMethod == 'ICE': after_row_sum = np.asarray(corrected_matrix.sum(axis=1)).flatten() # identify rows that were expanded more than args.inflationCutoff times to_remove = np.flatnonzero( after_row_sum / pre_row_sum >= args.inflationCutoff) ma.printchrtoremove(to_remove, label="inflated >={} " "regions".format(args.inflationCutoff), restore_masked_bins=False) total_filtered_out = total_filtered_out.union(to_remove) ma.maskBins(to_remove) ma.printchrtoremove(sorted(list(total_filtered_out)), label="Total regions to be removed", restore_masked_bins=False) ma.save(args.outFileName, pApplyCorrection=False)
def main(args=None): args = parse_arguments().parse_args(args) if args.title: args.title = remove_non_ascii(args.title) chrom = None start_pos1 = None chrom2 = None start_pos2 = None if args.perChromosome and args.region: log.error('ERROR, choose from the option ' '--perChromosome or --region, the two ' 'options at the same time are not ' 'compatible.') exit(1) # if args.region and args.region2 and args.bigwig: # log.error("Inter-chromosomal pca is not supported.") # exit(1) # is_cooler = False # if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix) or'.mcool' in args.matrix: is_cooler = check_cooler(args.matrix) log.debug("Cooler or no cooler: {}".format(is_cooler)) open_cooler_chromosome_order = True if args.chromosomeOrder is not None and len(args.chromosomeOrder) > 1: open_cooler_chromosome_order = False if is_cooler and not args.region2 and open_cooler_chromosome_order: log.debug("Retrieve data from cooler format and use its benefits.") regionsToRetrieve = None if args.region: regionsToRetrieve = [] regionsToRetrieve.append(args.region) # if args.region2: # chrom2, region_start2, region_end2 = translate_region(args.region2) # regionsToRetrieve.append(args.region2) if args.chromosomeOrder: args.region = None args.region2 = None regionsToRetrieve = args.chromosomeOrder ma = HiCMatrix.hiCMatrix(args.matrix, pChrnameList=regionsToRetrieve) log.debug('Shape {}'.format(ma.matrix.shape)) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) # to avoid gaps in the plot, bins flanking the masked bins # are enlarged new_intervals = enlarge_bins(ma.cut_intervals) ma.setCutIntervals(new_intervals) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma) matrix = np.asarray(ma.matrix.todense().astype(float)) matrix_length = len(matrix[0]) log.debug("Number of data points matrix_cool: {}".format(matrix_length)) else: ma = HiCMatrix.hiCMatrix(args.matrix) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) new_intervals = enlarge_bins(ma.cut_intervals) ma.setCutIntervals(new_intervals) if args.chromosomeOrder: args.region = None args.region2 = None valid_chromosomes = [] invalid_chromosomes = [] log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder)) log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries)) if sys.version_info[0] == 3: args.chromosomeOrder = toBytes(args.chromosomeOrder) for chrom in toString(args.chromosomeOrder): if chrom in ma.chrBinBoundaries: valid_chromosomes.append(chrom) else: invalid_chromosomes.append(chrom) if len(invalid_chromosomes) > 0: log.warning("WARNING: The following chromosome/scaffold names were not found. Please check" "the correct spelling of the chromosome names. \n") log.warning("\n".join(invalid_chromosomes)) ma.reorderChromosomes(valid_chromosomes) log.info("min: {}, max: {}\n".format(ma.matrix.data.min(), ma.matrix.data.max())) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma) matrix = np.asarray(ma.matrix[idx1, :][:, idx2].todense().astype(float)) else: log.debug("Else branch") matrix = np.asarray(ma.getMatrix().astype(float)) matrix_length = len(matrix[0]) log.debug("Number of data points matrix: {}".format(matrix_length)) for matrix_ in matrix: if not matrix_length == len(matrix_): log.error("Matrices do not have the same length: {} , {}".format(matrix_length, len(matrix_))) cmap = cm.get_cmap(args.colorMap) log.debug("Nan values set to black\n") cmap.set_bad('black') bigwig_info = None if args.bigwig: bigwig_info = {'args': args, 'axis': None, 'axis_colorbar': None, 'nan_bins': ma.nan_bins} if args.perChromosome: fig = plotPerChr(ma, cmap, args, pBigwig=bigwig_info) else: norm = None if args.log or args.log1p: mask = matrix == 0 matrix[mask] = np.nanmin(matrix[mask == False]) if np.isnan(matrix).any() or np.isinf(matrix).any(): log.debug("any nan {}".format(np.isnan(matrix).any())) log.debug("any inf {}".format(np.isinf(matrix).any())) mask_nan = np.isnan(matrix) mask_inf = np.isinf(matrix) matrix[mask_nan] = np.nanmin(matrix[mask_nan == False]) matrix[mask_inf] = np.nanmin(matrix[mask_inf == False]) log.debug("any nan after remove of nan: {}".format(np.isnan(matrix).any())) log.debug("any inf after remove of inf: {}".format(np.isinf(matrix).any())) if args.log1p: matrix += 1 norm = LogNorm() elif args.log: norm = LogNorm() if args.bigwig: # increase figure height to accommodate bigwig track fig_height = 8.5 else: fig_height = 7 height = 4.8 / fig_height fig_width = 8 width = 5.0 / fig_width left_margin = (1.0 - width) * 0.5 fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi) if args.bigwig: gs = gridspec.GridSpec(2, 2, height_ratios=[0.90, 0.1], width_ratios=[0.97, 0.03]) gs.update(hspace=0.05, wspace=0.05) ax1 = plt.subplot(gs[0, 0]) ax2 = plt.subplot(gs[1, 0]) ax3 = plt.subplot(gs[0, 1]) bigwig_info['axis'] = ax2 bigwig_info['axis_colorbar'] = ax3 else: ax1 = None bottom = 1.3 / fig_height if start_pos1 is None: start_pos1 = make_start_pos_array(ma) position = [left_margin, bottom, width, height] plotHeatmap(matrix, ma.get_chromosome_sizes(), fig, position, args, cmap, xlabel=chrom, ylabel=chrom2, start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pBigwig=bigwig_info) if not args.disable_tight_layout: if args.perChromosome or args.bigwig: try: plt.tight_layout() except UserWarning: log.info("Failed to tight layout. Using regular plot.") except ValueError: log.info("Failed to tight layout. Using regular plot.") plt.savefig(args.outFileName, dpi=args.dpi) plt.close(fig)
def main(args=None): args = parse_arguments().parse_args(args) if args.windowSize <= args.peakWidth: log.error('The window size ({}) must be larger than the peakWidth ({})'.format(args.windowSize, args.peakWidth)) exit(1) is_cooler = check_cooler(args.matrix) if args.threadsPerChromosome < 1: args.threadsPerChromosome = 1 mapped_loops = [] if not is_cooler: hic_matrix = hm.hiCMatrix(args.matrix) matrix = deepcopy(hic_matrix.matrix) cut_intervals = deepcopy(hic_matrix.cut_intervals) if args.chromosomes is None: # get all chromosomes from cooler file if not is_cooler: chromosomes_list = list(hic_matrix.chrBinBoundaries) else: chromosome_sizes = cooler.Cooler(args.matrix).chromsizes # shuffle the processing order of chromosomes. # with this one large chromosome and 4 smalls are in a row # peak memory is reduced and more chromosomes can be processed in parallel on low memory systems. sorted_sizes_desc = chromosome_sizes.sort_values(ascending=False) size = sorted_sizes_desc.size chromosome_names_list = sorted_sizes_desc.index.tolist() chromosomes_list = [] i = 0 j = args.threads # biggest + thread smallest; 2nd biggest chr + 4 - 8 smallest k = size - 1 while i < size: chromosomes_list.append(chromosome_names_list[i]) while j > 0 and k > 0: if k == i: break chromosomes_list.append(chromosome_names_list[k]) k -= 1 j -= 1 j = args.threads - 1 if i == k: break i += 1 else: chromosomes_list = args.chromosomes if len(chromosomes_list) < args.threads: args.threads = len(chromosomes_list) if len(chromosomes_list) == 1: single_core = True else: single_core = False if single_core: for chromosome in chromosomes_list: if is_cooler: hic_matrix = hm.hiCMatrix( pMatrixFile=args.matrix, pChrnameList=[chromosome], pDistance=args.maxLoopDistance, pNoIntervalTree=True, pUpperTriangleOnly=True) else: hic_matrix.setMatrix( deepcopy(matrix), deepcopy(cut_intervals)) hic_matrix.keepOnlyTheseChr([chromosome]) loops = compute_loops(hic_matrix, chromosome, args, is_cooler) if loops is None: log.error('No loops could be detected. Please change your input parameters, use a matrix with a better read coverage or contact the develops on https://github.com/deeptools/HiCExplorer/issues') exit(1) if 'Fail: ' in loops: log.error(loops[6:]) exit(1) if loops is not None: mapped_loops.extend(loops) else: queue = [None] * args.threads process = [None] * args.threads all_data_processed = False all_threads_done = False thread_done = [False] * args.threads count_call_of_read_input = 0 fail_flag = False fail_message = '' while not all_data_processed or not all_threads_done: for i in range(args.threads): if queue[i] is None and not all_data_processed: if count_call_of_read_input >= len(chromosomes_list): all_data_processed = True continue queue[i] = Queue() thread_done[i] = False process[i] = Process(target=compute_loops, kwargs=dict( pHiCMatrix=args.matrix, pRegion=chromosomes_list[count_call_of_read_input], pArgs=args, pIsCooler=is_cooler, pQueue=queue[i] )) process[i].start() if count_call_of_read_input < len(chromosomes_list): count_call_of_read_input += 1 else: all_data_processed = True elif queue[i] is not None and not queue[i].empty(): result = queue[i].get() if result is not None and 'Fail: ' in result: fail_flag = True fail_message = result if result[0] is not None: mapped_loops.extend(result[0]) queue[i] = None process[i].join() process[i].terminate() process[i] = None thread_done[i] = True elif all_data_processed and queue[i] is None: thread_done[i] = True else: time.sleep(1) if all_data_processed: all_threads_done = True for thread in thread_done: if not thread: all_threads_done = False if fail_flag: if fail_message is not None: log.error(fail_message[6:]) else: log.error('An error occurred.') exit(1) if len(mapped_loops) > 0: write_bedgraph(mapped_loops, args.outFileName) log.info("Number of detected loops for all regions: {}".format( len(mapped_loops)))
def main(args=None): args = parse_arguments().parse_args(args) log.info('peak interactions threshold set to {}'.format( args.peakInteractionsThreshold)) if args.region is not None and args.chromosomes is not None: log.error('Please choose either --region or --chromosomes.') exit(1) log.debug('args.matrix {}'.format(args.matrix)) is_cooler = check_cooler(args.matrix) log.debug('is_cooler {}'.format(is_cooler)) if args.region: chrom, region_start, region_end = translate_region(args.region) if is_cooler: hic_matrix = hm.hiCMatrix(pMatrixFile=args.matrix, pChrnameList=[args.region]) else: hic_matrix = hm.hiCMatrix(args.matrix) hic_matrix.keepOnlyTheseChr([chrom]) mapped_loops = compute_loops(hic_matrix, args.region, args) write_bedgraph(mapped_loops, args.outFileName, region_start, region_end) else: mapped_loops = [] if not is_cooler: hic_matrix = hm.hiCMatrix(args.matrix) # hic_matrix.keepOnlyTheseChr([chromosome]) matrix = deepcopy(hic_matrix.matrix) cut_intervals = deepcopy(hic_matrix.cut_intervals) if args.chromosomes is None: # get all chromosomes from cooler file if not is_cooler: chromosomes_list = list(hic_matrix.chrBinBoundaries) else: chromosomes_list = cooler.Cooler(args.matrix).chromnames else: chromosomes_list = args.chromosomes if len(chromosomes_list) == 1: single_core = True else: single_core = False if single_core: for chromosome in chromosomes_list: if is_cooler: hic_matrix = hm.hiCMatrix(pMatrixFile=args.matrix, pChrnameList=[chromosome]) else: hic_matrix.setMatrix(deepcopy(matrix), deepcopy(cut_intervals)) hic_matrix.keepOnlyTheseChr([chromosome]) hic_matrix.maskBins(hic_matrix.nan_bins) loops = compute_loops(hic_matrix, chromosome, args) if loops is not None: mapped_loops.extend(loops) else: queue = [None] * args.threads process = [None] * args.threads all_data_processed = False all_threads_done = False thread_done = [False] * args.threads count_call_of_read_input = 0 while not all_data_processed or not all_threads_done: for i in range(args.threads): if queue[i] is None and not all_data_processed: if count_call_of_read_input >= len(chromosomes_list): all_data_processed = True continue queue[i] = Queue() thread_done[i] = False if is_cooler: hic_matrix = hm.hiCMatrix( pMatrixFile=args.matrix, pChrnameList=[ chromosomes_list[count_call_of_read_input] ]) else: hic_matrix.setMatrix(deepcopy(matrix), deepcopy(cut_intervals)) hic_matrix.keepOnlyTheseChr( [chromosomes_list[count_call_of_read_input]]) if len(hic_matrix.matrix.data) > 0: process[i] = Process( target=compute_loops, kwargs=dict(pHiCMatrix=hic_matrix, pRegion=chromosomes_list[ count_call_of_read_input], pArgs=args, pQueue=queue[i])) process[i].start() else: queue[i] = None thread_done[i] = True if count_call_of_read_input < len(chromosomes_list): count_call_of_read_input += 1 else: all_data_processed = True elif queue[i] is not None and not queue[i].empty(): result = queue[i].get() if result[0] is not None: mapped_loops.extend(result[0]) queue[i] = None process[i].join() process[i].terminate() process[i] = None thread_done[i] = True elif all_data_processed and queue[i] is None: thread_done[i] = True else: time.sleep(1) if all_data_processed: all_threads_done = True for thread in thread_done: if not thread: all_threads_done = False log.debug('done computing. loops {}'.format(mapped_loops)) if len(mapped_loops) > 0: write_bedgraph(mapped_loops, args.outFileName) log.info("Number of detected loops for all regions: {}".format( len(mapped_loops)))
def main(args=None): args = parse_arguments().parse_args(args) if args.verbose: log.setLevel(logging.INFO) # args.chromosomes if check_cooler(args.matrix) and args.chromosomes is not None and len(args.chromosomes) == 1: ma = hm.hiCMatrix(args.matrix, pChrnameList=toString(args.chromosomes)) else: ma = hm.hiCMatrix(args.matrix) if args.chromosomes: ma.reorderChromosomes(toString(args.chromosomes)) # mask all zero value bins row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten() log.info("Removing {} zero value bins".format(sum(row_sum == 0))) ma.maskBins(np.flatnonzero(row_sum == 0)) matrix_shape = ma.matrix.shape ma.matrix = convertNansToZeros(ma.matrix) ma.matrix = convertInfsToZeros(ma.matrix) if 'plotName' in args: plot_total_contact_dist(ma, args) log.info("Saving diagnostic plot {}\n".format(args.plotName)) return log.info("matrix contains {} data points. Sparsity {:.3f}.".format( len(ma.matrix.data), float(len(ma.matrix.data)) / (ma.matrix.shape[0] ** 2))) if args.skipDiagonal: ma.diagflat(value=0) outlier_regions = filter_by_zscore(ma, args.filterThreshold[0], args.filterThreshold[1], perchr=args.perchr) # compute and print some statistics pct_outlier = 100 * float(len(outlier_regions)) / ma.matrix.shape[0] ma.printchrtoremove(outlier_regions, label="Bins that are MAD outliers ({:.2f}%) " "out of".format(pct_outlier, ma.matrix.shape[0]), restore_masked_bins=False) assert matrix_shape == ma.matrix.shape # mask filtered regions ma.maskBins(outlier_regions) total_filtered_out = set(outlier_regions) if args.sequencedCountCutoff and 0 < args.sequencedCountCutoff < 1: chrom, _, _, coverage = zip(*ma.cut_intervals) assert type(coverage[0]) == np.float64 failed_bins = np.flatnonzero( np.array(coverage) < args.sequencedCountCutoff) ma.printchrtoremove(failed_bins, label="Bins with low coverage", restore_masked_bins=False) ma.maskBins(failed_bins) total_filtered_out = set(failed_bins) """ ma.matrix, to_remove = fill_gaps(ma, failed_bins) log.warning("From {} failed bins, {} could " "not be filled\n".format(len(failed_bins), len(to_remove))) ma.maskBins(to_remove) """ if args.transCutoff and 0 < args.transCutoff < 100: cutoff = float(args.transCutoff) / 100 # a usual cutoff is 0.05 ma.truncTrans(high=cutoff) pre_row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten() correction_factors = [] if args.perchr: corrected_matrix = lil_matrix(ma.matrix.shape) # normalize each chromosome independently for chrname in list(ma.interval_trees): chr_range = ma.getChrBinRange(chrname) chr_submatrix = ma.matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] _matrix, _corr_factors = iterative_correction(chr_submatrix, args) corrected_matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] = _matrix correction_factors.append(_corr_factors) correction_factors = np.concatenate(correction_factors) else: corrected_matrix, correction_factors = iterative_correction(ma.matrix, args) ma.setMatrixValues(corrected_matrix) ma.setCorrectionFactors(correction_factors) log.info("Correction factors {}".format(correction_factors[:10])) if args.inflationCutoff and args.inflationCutoff > 0: after_row_sum = np.asarray(corrected_matrix.sum(axis=1)).flatten() # identify rows that were expanded more than args.inflationCutoff times to_remove = np.flatnonzero(after_row_sum / pre_row_sum >= args.inflationCutoff) ma.printchrtoremove(to_remove, label="inflated >={} " "regions".format(args.inflationCutoff), restore_masked_bins=False) total_filtered_out = total_filtered_out.union(to_remove) ma.maskBins(to_remove) ma.printchrtoremove(sorted(list(total_filtered_out)), label="Total regions to be removed", restore_masked_bins=False) ma.save(args.outFileName, pApplyCorrection=False)
def main(args=None): args = parse_arguments().parse_args(args) mpl.rcParams['pdf.fonttype'] = 42 # read domains file domains_df = readDomainBoundaries(args.tadDomains) # log.debug('len(domains_df) {}'.format(len(domains_df))) domains = domains_df.values.tolist() old_chromosome = None tads_per_chromosome = [] for j in range(len(domains)): if old_chromosome is None: old_chromosome = domains[j][0] per_chromosome = [] per_chromosome.append(domains[j]) elif old_chromosome == domains[j][0]: per_chromosome.append(domains[j]) continue else: tads_per_chromosome.append(per_chromosome) per_chromosome = [] per_chromosome.append(domains[j]) old_chromosome = domains[j][0] tads_per_chromosome.append(per_chromosome) # read full h5 or only region if cooler is_cooler = check_cooler(args.matrix) if not is_cooler: hic_matrix = hm.hiCMatrix(args.matrix) else: hic_matrix = args.matrix inter_left_sum_list_chromosomes = [] inter_right_sum_list_chromosomes = [] inter_left_density_list_chromosomes = [] inter_right_density_list_chromosomes = [] inter_left_number_of_contacts_list_chromosomes = [] inter_right_number_of_contacts_list_chromosomes = [] inter_left_number_of_contacts_nnz_list_chromosomes = [] inter_right_number_of_contacts_nzz_list_chromosomes = [] intra_sum_list_chromosomes = [] intra_number_of_contacts_list_chromosomes = [] intra_number_of_contacts_nnz_list_chromosomes = [] intra_density_list_chromosomes = [] inter_left_intra_ratio_list_chromosomes = [] inter_right_intra_ratio_list_chromosomes = [] inter_left_inter_right_intra_ratio_list_chromosomes = [] rows_chromosomes = [] inter_left_sum_list_threads = [[]] * args.threads inter_right_sum_list_threads = [[]] * args.threads inter_left_density_list_threads = [[]] * args.threads inter_right_density_list_threads = [[]] * args.threads inter_left_number_of_contacts_list_threads = [[]] * args.threads inter_right_number_of_contacts_list_threads = [[]] * args.threads inter_left_number_of_contacts_nnz_list_threads = [[]] * args.threads inter_right_number_of_contacts_nzz_list_threads = [[]] * args.threads intra_sum_list_threads = [[]] * args.threads intra_number_of_contacts_list_threads = [[]] * args.threads intra_number_of_contacts_nnz_list_threads = [[]] * args.threads intra_density_list_threads = [[]] * args.threads inter_left_intra_ratio_list_threads = [[]] * args.threads inter_right_intra_ratio_list_threads = [[]] * args.threads inter_left_inter_right_intra_ratio_list_threads = [[]] * args.threads rows_threads = [[]] * args.threads threads_save = deepcopy(args.threads) for chromosome in tads_per_chromosome: # log.debug('tads_per_chromosome {}'.format(chromosome)) domainsPerThread = len(chromosome) // args.threads if domainsPerThread == 0 and len(chromosome) > 0: domainsPerThread = 1 args.threads = 1 elif domainsPerThread > 0: args.threads = threads_save all_data_collected = False queue = [None] * args.threads process = [None] * args.threads thread_done = [False] * args.threads # None --> first thread, process first element in list, ignore last one # True --> middle thread: ignore first and last element in tad processing # False --> last thread: ignore first element, process last one thread_id = None for i in range(args.threads): if args.threads == 1: domainListThread = chromosome elif i == 0: domainListThread = chromosome[i * domainsPerThread:( (i + 1) * domainsPerThread) + 1] thread_id = None elif i < args.threads - 1: domainListThread = chromosome[(i * domainsPerThread) - 1:((i + 1) * domainsPerThread) + 1] thread_id = True else: domainListThread = chromosome[(i * domainsPerThread) - 1:] thread_id = False if args.threads == 1: thread_id = '' # log.debug('len(domainListThread) {}'.format(len(domainListThread))) # log.debug('len(thread_id) {}'.format(thread_id)) queue[i] = Queue() process[i] = Process( target=computeInterIntraTADs, kwargs=dict( pMatrix=hic_matrix, # pMatrixControl=hic_matrix_control, pDomainList=domainListThread, pCoolOrH5=is_cooler, # pPValue=args.pValue, pThreadId=thread_id, pQueue=queue[i])) process[i].start() fail_flag = False fail_message = '' while not all_data_collected: for i in range(args.threads): if queue[i] is not None and not queue[i].empty(): queue_data = queue[i].get() if 'Fail:' in queue_data: fail_flag = True fail_message = queue_data else: inter_left_sum_list_threads[i], \ inter_right_sum_list_threads[i], \ inter_left_density_list_threads[i], \ inter_right_density_list_threads[i], \ inter_left_number_of_contacts_list_threads[i], \ inter_right_number_of_contacts_list_threads[i], \ inter_left_number_of_contacts_nnz_list_threads[i], \ inter_right_number_of_contacts_nzz_list_threads[i], \ intra_sum_list_threads[i], \ intra_number_of_contacts_list_threads[i], \ intra_number_of_contacts_nnz_list_threads[i], \ intra_density_list_threads[i], \ inter_left_intra_ratio_list_threads[i], \ inter_right_intra_ratio_list_threads[i], \ inter_left_inter_right_intra_ratio_list_threads[i], \ rows_threads[i] = queue_data queue[i] = None process[i].join() process[i].terminate() process[i] = None thread_done[i] = True # elif queue[i] is None and all_data_collected = True for thread in thread_done: if not thread: all_data_collected = False time.sleep(1) if fail_flag: log.error(fail_message[6:]) exit(1) inter_left_sum_list_chromosomes.append([ item for sublist in inter_left_sum_list_threads for item in sublist ]) inter_right_sum_list_chromosomes.append([ item for sublist in inter_right_sum_list_threads for item in sublist ]) inter_left_density_list_chromosomes.append([ item for sublist in inter_left_density_list_threads for item in sublist ]) inter_right_density_list_chromosomes.append([ item for sublist in inter_right_density_list_threads for item in sublist ]) inter_left_number_of_contacts_list_chromosomes.append([ item for sublist in inter_left_number_of_contacts_list_threads for item in sublist ]) inter_right_number_of_contacts_list_chromosomes.append([ item for sublist in inter_right_number_of_contacts_list_threads for item in sublist ]) inter_left_number_of_contacts_nnz_list_chromosomes.append([ item for sublist in inter_left_number_of_contacts_nnz_list_threads for item in sublist ]) inter_right_number_of_contacts_nzz_list_chromosomes.append([ item for sublist in inter_right_number_of_contacts_nzz_list_threads for item in sublist ]) intra_sum_list_chromosomes.append( [item for sublist in intra_sum_list_threads for item in sublist]) intra_number_of_contacts_list_chromosomes.append([ item for sublist in intra_number_of_contacts_list_threads for item in sublist ]) intra_number_of_contacts_nnz_list_chromosomes.append([ item for sublist in intra_number_of_contacts_nnz_list_threads for item in sublist ]) intra_density_list_chromosomes.append([ item for sublist in intra_density_list_threads for item in sublist ]) inter_left_intra_ratio_list_chromosomes.append([ item for sublist in inter_left_intra_ratio_list_threads for item in sublist ]) inter_right_intra_ratio_list_chromosomes.append([ item for sublist in inter_right_intra_ratio_list_threads for item in sublist ]) inter_left_inter_right_intra_ratio_list_chromosomes.append([ item for sublist in inter_left_inter_right_intra_ratio_list_threads for item in sublist ]) rows_chromosomes.append( [item for sublist in rows_threads for item in sublist]) inter_left_sum_list = [ item for sublist in inter_left_sum_list_chromosomes for item in sublist ] inter_right_sum_list = [ item for sublist in inter_right_sum_list_chromosomes for item in sublist ] inter_left_density_list = [ item for sublist in inter_left_density_list_chromosomes for item in sublist ] inter_right_density_list = [ item for sublist in inter_right_density_list_chromosomes for item in sublist ] inter_left_number_of_contacts_list = [ item for sublist in inter_left_number_of_contacts_list_chromosomes for item in sublist ] inter_right_number_of_contacts_list = [ item for sublist in inter_right_number_of_contacts_list_chromosomes for item in sublist ] inter_left_number_of_contacts_nnz_list = [ item for sublist in inter_left_number_of_contacts_nnz_list_chromosomes for item in sublist ] inter_right_number_of_contacts_nzz_list = [ item for sublist in inter_right_number_of_contacts_nzz_list_chromosomes for item in sublist ] intra_sum_list = [ item for sublist in intra_sum_list_chromosomes for item in sublist ] intra_number_of_contacts_list = [ item for sublist in intra_number_of_contacts_list_chromosomes for item in sublist ] intra_number_of_contacts_nnz_list = [ item for sublist in intra_number_of_contacts_nnz_list_chromosomes for item in sublist ] intra_density_list = [ item for sublist in intra_density_list_chromosomes for item in sublist ] inter_left_intra_ratio_list = [ item for sublist in inter_left_intra_ratio_list_chromosomes for item in sublist ] inter_right_intra_ratio_list = [ item for sublist in inter_right_intra_ratio_list_chromosomes for item in sublist ] inter_left_inter_right_intra_ratio_list = [ item for sublist in inter_left_inter_right_intra_ratio_list_chromosomes for item in sublist ] rows = [item for sublist in rows_chromosomes for item in sublist] with open(args.outFileName, 'w') as file: header = '# Created with HiCExplorer\'s hicInterIntraTAD version ' + __version__ + '\n' header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tinter_left_sum\tinter_right_sum\tinter_left_density\tinter_right_density\tinter_left_number_of_contacts\tinter_right_number_of_contacts\t' \ 'inter_left_number_of_contacts_nnz\tinter_right_number_of_contacts_nnz\tintra_sum\tintra_number_of_contacts\tintra_number_of_contacts_nnz\tintra_density\tinter_left_intra_ratio\tinter_right_intra_ratio\tinter_left_inter_right_intra_ratio\n' file.write(header) for i, row in enumerate(rows): row_list = list(map(str, row)) file.write('\t'.join(row_list)) file.write('\t{}'.format(inter_left_sum_list[i])) file.write('\t{}'.format(inter_right_sum_list[i])) file.write('\t{}'.format(inter_left_density_list[i])) file.write('\t{}'.format(inter_right_density_list[i])) file.write('\t{}'.format(inter_left_number_of_contacts_list[i])) file.write('\t{}'.format(inter_right_number_of_contacts_list[i])) file.write('\t{}'.format( inter_left_number_of_contacts_nnz_list[i])) file.write('\t{}'.format( inter_right_number_of_contacts_nzz_list[i])) file.write('\t{}'.format(intra_sum_list[i])) file.write('\t{}'.format(intra_number_of_contacts_list[i])) file.write('\t{}'.format(intra_number_of_contacts_nnz_list[i])) file.write('\t{}'.format(intra_density_list[i])) file.write('\t{}'.format(inter_left_intra_ratio_list[i])) file.write('\t{}'.format(inter_right_intra_ratio_list[i])) file.write('\t{}'.format( inter_left_inter_right_intra_ratio_list[i])) file.write('\n') plt.scatter(inter_left_intra_ratio_list, inter_right_intra_ratio_list, s=20, alpha=0.7) plt.xlabel('Inter-left/intra TAD contact ratio', fontsize=args.fontsize) plt.ylabel('Inter-right/intra TAD contact ratio', fontsize=args.fontsize) plt.tight_layout() plt.savefig(args.outFileNameRatioPlot, dpi=args.dpi) plt.close()