def getRegion(args, ma): chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None chrom, region_start, region_end = translate_region(args.region) if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): exit( "The contig/scaffold name '{}' given in --region is not part of the Hi-C matrix. " "Check spelling".format(chrom)) args.region = [chrom, region_start, region_end] idx1, start_pos1 = zip( *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and x[1] >= region_start and x[2] < region_end]) idx2 = idx1 chrom2 = chrom start_pos2 = start_pos1 return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2
def main(args=None): args = parse_arguments().parse_args(args) if args.title: args.title = remove_non_ascii(args.title) chrom = None start_pos1 = None chrom2 = None start_pos2 = None if args.perChromosome and args.region: log.error('ERROR, choose from the option ' '--perChromosome or --region, the two ' 'options at the same time are not ' 'compatible.') exit(1) # if args.region and args.region2 and args.bigwig: # log.error("Inter-chromosomal pca is not supported.") # exit(1) # is_cooler = False # if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix) or'.mcool' in args.matrix: is_cooler = check_cooler(args.matrix) log.debug("Cooler or no cooler: {}".format(is_cooler)) open_cooler_chromosome_order = True if args.chromosomeOrder is not None and len(args.chromosomeOrder) > 1: open_cooler_chromosome_order = False if is_cooler and not args.region2 and open_cooler_chromosome_order: log.debug("Retrieve data from cooler format and use its benefits.") regionsToRetrieve = None if args.region: regionsToRetrieve = [] regionsToRetrieve.append(args.region) # if args.region2: # chrom2, region_start2, region_end2 = translate_region(args.region2) # regionsToRetrieve.append(args.region2) if args.chromosomeOrder: args.region = None args.region2 = None regionsToRetrieve = args.chromosomeOrder ma = HiCMatrix.hiCMatrix(args.matrix, pChrnameList=regionsToRetrieve) log.debug('Shape {}'.format(ma.matrix.shape)) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) # to avoid gaps in the plot, bins flanking the masked bins # are enlarged new_intervals = enlarge_bins(ma.cut_intervals) ma.setCutIntervals(new_intervals) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma) matrix = np.asarray(ma.matrix.todense().astype(float)) matrix_length = len(matrix[0]) log.debug("Number of data points matrix_cool: {}".format(matrix_length)) else: ma = HiCMatrix.hiCMatrix(args.matrix) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) new_intervals = enlarge_bins(ma.cut_intervals) ma.setCutIntervals(new_intervals) if args.chromosomeOrder: args.region = None args.region2 = None valid_chromosomes = [] invalid_chromosomes = [] log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder)) log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries)) if sys.version_info[0] == 3: args.chromosomeOrder = toBytes(args.chromosomeOrder) for chrom in toString(args.chromosomeOrder): if chrom in ma.chrBinBoundaries: valid_chromosomes.append(chrom) else: invalid_chromosomes.append(chrom) if len(invalid_chromosomes) > 0: log.warning("WARNING: The following chromosome/scaffold names were not found. Please check" "the correct spelling of the chromosome names. \n") log.warning("\n".join(invalid_chromosomes)) ma.reorderChromosomes(valid_chromosomes) log.info("min: {}, max: {}\n".format(ma.matrix.data.min(), ma.matrix.data.max())) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma) matrix = np.asarray(ma.matrix[idx1, :][:, idx2].todense().astype(float)) else: log.debug("Else branch") matrix = np.asarray(ma.getMatrix().astype(float)) matrix_length = len(matrix[0]) log.debug("Number of data points matrix: {}".format(matrix_length)) for matrix_ in matrix: if not matrix_length == len(matrix_): log.error("Matrices do not have the same length: {} , {}".format(matrix_length, len(matrix_))) cmap = cm.get_cmap(args.colorMap) log.debug("Nan values set to black\n") cmap.set_bad('black') bigwig_info = None if args.bigwig: bigwig_info = {'args': args, 'axis': None, 'axis_colorbar': None, 'nan_bins': ma.nan_bins} if args.perChromosome: fig = plotPerChr(ma, cmap, args, pBigwig=bigwig_info) else: norm = None if args.log or args.log1p: mask = matrix == 0 matrix[mask] = np.nanmin(matrix[mask == False]) if np.isnan(matrix).any() or np.isinf(matrix).any(): log.debug("any nan {}".format(np.isnan(matrix).any())) log.debug("any inf {}".format(np.isinf(matrix).any())) mask_nan = np.isnan(matrix) mask_inf = np.isinf(matrix) matrix[mask_nan] = np.nanmin(matrix[mask_nan == False]) matrix[mask_inf] = np.nanmin(matrix[mask_inf == False]) log.debug("any nan after remove of nan: {}".format(np.isnan(matrix).any())) log.debug("any inf after remove of inf: {}".format(np.isinf(matrix).any())) if args.log1p: matrix += 1 norm = LogNorm() elif args.log: norm = LogNorm() if args.bigwig: # increase figure height to accommodate bigwig track fig_height = 8.5 else: fig_height = 7 height = 4.8 / fig_height fig_width = 8 width = 5.0 / fig_width left_margin = (1.0 - width) * 0.5 fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi) if args.bigwig: gs = gridspec.GridSpec(2, 2, height_ratios=[0.90, 0.1], width_ratios=[0.97, 0.03]) gs.update(hspace=0.05, wspace=0.05) ax1 = plt.subplot(gs[0, 0]) ax2 = plt.subplot(gs[1, 0]) ax3 = plt.subplot(gs[0, 1]) bigwig_info['axis'] = ax2 bigwig_info['axis_colorbar'] = ax3 else: ax1 = None bottom = 1.3 / fig_height if start_pos1 is None: start_pos1 = make_start_pos_array(ma) position = [left_margin, bottom, width, height] plotHeatmap(matrix, ma.get_chromosome_sizes(), fig, position, args, cmap, xlabel=chrom, ylabel=chrom2, start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pBigwig=bigwig_info) if not args.disable_tight_layout: if args.perChromosome or args.bigwig: try: plt.tight_layout() except UserWarning: log.info("Failed to tight layout. Using regular plot.") except ValueError: log.info("Failed to tight layout. Using regular plot.") plt.savefig(args.outFileName, dpi=args.dpi) plt.close(fig)
def main(args=None): args = parse_arguments().parse_args(args) chrom = None start_pos1 = None chrom2 = None start_pos2 = None if args.perChromosome and args.region: log.error('ERROR, choose from the option ' '--perChromosome or --region, the two ' 'options at the same time are not ' 'compatible.') exit(1) if args.region and args.region2 and args.pca: log.error("Inter-chromosomal pca is not supported.") exit(1) is_cooler = False if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix): is_cooler = True if is_cooler and not args.region2: log.debug("Retrieve data from cooler format and use its benefits.") regionsToRetrieve = None if args.region: regionsToRetrieve = [] regionsToRetrieve.append(args.region) if args.region2: chrom2, region_start2, region_end2 = translate_region( args.region2) regionsToRetrieve.append(args.region2) if args.chromosomeOrder: args.region = None args.region2 = None regionsToRetrieve = args.chromosomeOrder ma = HiCMatrix.hiCMatrix(args.matrix, chrnameList=regionsToRetrieve) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion( args, ma) matrix = np.asarray(ma.matrix.todense().astype(float)) else: ma = HiCMatrix.hiCMatrix(args.matrix) if args.clearMaskedBins: ma.maskBins(ma.nan_bins) if args.chromosomeOrder: args.region = None args.region2 = None valid_chromosomes = [] invalid_chromosomes = [] log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder)) log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries)) if sys.version_info[0] == 3: args.chromosomeOrder = toBytes(args.chromosomeOrder) for chrom in args.chromosomeOrder: if chrom in ma.chrBinBoundaries: valid_chromosomes.append(chrom) else: invalid_chromosomes.append(chrom) if len(invalid_chromosomes) > 0: log.warning( "WARNING: The following chromosome/scaffold names were not found. Please check" "the correct spelling of the chromosome names. \n") log.warning("\n".join(invalid_chromosomes)) ma.reorderChromosomes(valid_chromosomes) log.info("min: {}, max: {}\n".format(ma.matrix.data.min(), ma.matrix.data.max())) if args.region: chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion( args, ma) matrix = np.asarray( ma.matrix[idx1, :][:, idx2].todense().astype(float)) else: log.debug("Else branch") matrix = np.asarray(ma.getMatrix().astype(float)) matrix_length = len(matrix[0]) for matrix_ in matrix: if not matrix_length == len(matrix_): log.error("Matrices do not have the same length: {} , {}".format( matrix_length, len(matrix_))) cmap = cm.get_cmap(args.colorMap) log.debug("Nan values set to black\n") cmap.set_bad('black') pca = None if args.pca: pca = { 'args': args, 'axis': None, 'axis_colorbar': None, 'nan_bins': ma.nan_bins } if args.perChromosome: fig = plotPerChr(ma, cmap, args, pPca=pca) else: norm = None if args.log or args.log1p: mask = matrix == 0 matrix[mask] = np.nanmin(matrix[mask == False]) if np.isnan(matrix).any() or np.isinf(matrix).any(): log.debug("any nan {}".format(np.isnan(matrix).any())) log.debug("any inf {}".format(np.isinf(matrix).any())) mask_nan = np.isnan(matrix) mask_inf = np.isinf(matrix) matrix[mask_nan] = np.nanmin(matrix[mask_nan == False]) matrix[mask_inf] = np.nanmin(matrix[mask_inf == False]) if args.log: matrix = np.log(matrix) log.debug("any nan after remove of nan: {}".format( np.isnan(matrix).any())) log.debug("any inf after remove of inf: {}".format( np.isinf(matrix).any())) if args.log1p: matrix += 1 norm = LogNorm() fig_height = 7 height = 4.8 / fig_height fig_width = 8 width = 5.0 / fig_width left_margin = (1.0 - width) * 0.5 fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi) if args.pca: gs = gridspec.GridSpec(2, 2, height_ratios=[0.85, 0.15], width_ratios=[0.93, 0.07]) gs.update(hspace=0.1) ax1 = plt.subplot(gs[0, 0]) ax2 = plt.subplot(gs[1, 0]) ax3 = plt.subplot(gs[0, 1]) pca['axis'] = ax2 pca['axis_colorbar'] = ax3 else: ax1 = None bottom = 1.3 / fig_height position = [left_margin, bottom, width, height] plotHeatmap(matrix, ma.chrBinBoundaries, fig, position, args, cmap, xlabel=chrom, ylabel=chrom2, start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pPca=pca) if args.perChromosome or args.pca: plt.tight_layout() plt.savefig(args.outFileName, dpi=args.dpi) plt.close(fig)
def getRegion(args, ma): chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None chrom, region_start, region_end = translate_region(args.region) if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): chrom = change_chrom_names(chrom) if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): exit("Chromosome name {} in --region not in matrix".format( change_chrom_names(chrom))) args.region = [chrom, region_start, region_end] is_cooler = False if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix): is_cooler = True if is_cooler: idx1, start_pos1 = zip( *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and ((x[1] >= region_start and x[2] < region_end) or (x[1] < region_end and x[2] < region_end and x[2] > region_start ) or (x[1] > region_start and x[1] < region_end))]) else: idx1, start_pos1 = zip( *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and x[1] >= region_start and x[2] < region_end]) if args.region2: chrom2, region_start2, region_end2 = translate_region(args.region2) if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): chrom2 = change_chrom_names(chrom2) if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): exit("Chromosome name {} in --region2 not in matrix".format( change_chrom_names(chrom2))) if is_cooler: idx2, start_pos2 = zip( *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and ( (x[1] >= region_start2 and x[2] < region_end2) or (x[1] < region_end2 and x[2] < region_end2 and x[2] > region_start2) or (x[1] > region_start2 and x[1] < region_end2))]) else: idx2, start_pos2 = zip( *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and x[1] >= region_start2 and x[2] < region_end2]) else: idx2 = idx1 chrom2 = chrom start_pos2 = start_pos1 return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2