def getRegion(args, ma): chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None chrom, region_start, region_end = translate_region(args.region) chrom = check_chrom_str_bytes(ma.interval_trees, chrom) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): chrom = change_chrom_names(chrom) chrom = check_chrom_str_bytes(ma.interval_trees, chrom) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom = toBytes(chrom) if chrom not in list(ma.interval_trees): exit("Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom))) args.region = [chrom, region_start, region_end] is_cooler = check_cooler(args.matrix) if is_cooler: idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and ((x[1] >= region_start and x[2] < region_end) or (x[1] < region_end and x[2] < region_end and x[2] > region_start) or (x[1] > region_start and x[1] < region_end))]) else: idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and x[1] >= region_start and x[2] < region_end]) if args.region2: chrom2, region_start2, region_end2 = translate_region(args.region2) chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): chrom2 = change_chrom_names(chrom2) chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2) # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]: # chrom2 = toBytes(chrom) if chrom2 not in list(ma.interval_trees): exit("Chromosome name {} in --region2 not in matrix".format(change_chrom_names(chrom2))) if is_cooler: idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and ((x[1] >= region_start2 and x[2] < region_end2) or (x[1] < region_end2 and x[2] < region_end2 and x[2] > region_start2) or (x[1] > region_start2 and x[1] < region_end2))]) else: idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and x[1] >= region_start2 and x[2] < region_end2]) else: idx2 = idx1 chrom2 = chrom start_pos2 = start_pos1 return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2
def translate_region(region_string, ma): """ Takes an string and returns a list of chrom, start, end. If the region string only contains the chrom, then start and end are set to matrix start and end on this chromosome """ # region_string = toBytes(region_string) region_string = region_string.replace(",", "") region_string = region_string.replace(";", "") region_string = region_string.replace("!", "") region_string = region_string.replace("-", ":") fields = region_string.split(":") chrom = fields[0] chrom = check_chrom_str_bytes(ma.interval_trees, chrom) if chrom not in list(ma.interval_trees): chrom = change_chrom_names(chrom) chrom = check_chrom_str_bytes(ma.interval_trees, chrom) if chrom not in list(ma.interval_trees): exit( "Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom))) first_bin, last_bin = ma.getChrBinRange(chrom) try: region_start = int(fields[1]) except IndexError: region_start = ma.getBinPos(first_bin)[1] try: region_end = int(fields[2]) except IndexError: region_end = ma.getBinPos(last_bin - 1)[2] return chrom, region_start, region_end
def plotBigwig(pAxis, pNameOfBigwigList, pChromosomeSizes=None, pRegion=None, pXticks=None, pFlipBigwigSign=None, pScaleFactorBigwig=None, pValueMin=None, pValueMax=None): log.debug('plotting eigenvector') pAxis.set_frame_on(False) pAxis.xaxis.set_visible(False) # pNameOfBigwigList is not a list, but to make room for future options # requiring more than one bigwig file I set this to a list intentionally. pNameOfBigwigList = [pNameOfBigwigList] file_format = pNameOfBigwigList[0].split(".")[-1] if file_format != 'bigwig' and file_format != 'bw': log.error("Given files are not bigwig") exit() for bigwig_file in pNameOfBigwigList: if bigwig_file.split('.')[-1] != file_format: log.error("Eigenvector input files have different formats.") exit() x_values = [] bigwig_scores = [] if file_format == "bigwig" or file_format == 'bw': for i, bigwigFile in enumerate(pNameOfBigwigList): bw = pyBigWig.open(bigwigFile) bigwig_scores = [] if pRegion: chrom, region_start, region_end = pRegion # region_end could be a very large number returned by translate_region region_end = min(region_end, pChromosomeSizes[chrom]) # log.info("chromosomes bigwig: {}".format(bw.chroms())) chrom = check_chrom_str_bytes(bw.chroms(), chrom) if chrom not in list(bw.chroms().keys()): chrom = change_chrom_names(chrom) if chrom not in list(bw.chroms().keys()): log.info("bigwig file has no chromosome named: {}.".format(chrom)) return # the bigwig file may end before the region end, to avoid and error # the bigwig_end is set for the pyBigwig query bigwig_end = min(bw.chroms()[chrom], region_end) # TODO, this could be a parameters num_bins = min(1000, int(bigwig_end - region_start) / 10) scores_per_bin = np.array(bw.stats(chrom, region_start, bigwig_end, nBins=num_bins)).astype(float) if scores_per_bin is None: log.info("Chromosome {} has no entries in bigwig file.".format(chrom)) return _x_vals = np.linspace(region_start, region_end, num_bins) assert len(_x_vals) == len(scores_per_bin) x_values.extend(_x_vals) bigwig_scores.extend(scores_per_bin) pAxis.set_xlim(region_start, region_end) elif pChromosomeSizes: chrom_length_sum = 0 for chrom in pChromosomeSizes: chrom_ = check_chrom_str_bytes(bw.chroms(), chrom) if chrom_ not in list(bw.chroms().keys()): log.info("bigwig file as no chromosome named: {}.".format(chrom)) return # chrom = check_chrom_str_bytes(pChromosomeSizes, chrom) # set the bin size to approximately 100kb # or to the chromosome size if this happens to be less than 100kb chunk_size = min(1e5, pChromosomeSizes[chrom]) num_bins = int(pChromosomeSizes[chrom] / chunk_size) scores_per_bin = np.array(bw.stats(chrom_, 0, pChromosomeSizes[chrom], nBins=num_bins)).astype(float) if scores_per_bin is None: log.info("Chromosome {} has no entries in bigwig file.".format(chrom)) return _x_vals = np.linspace(chrom_length_sum, chrom_length_sum + pChromosomeSizes[chrom], num_bins) assert len(_x_vals) == len(scores_per_bin) x_values.extend(_x_vals) bigwig_scores.extend(scores_per_bin) chrom_length_sum += pChromosomeSizes[chrom] pAxis.set_xlim(0, chrom_length_sum) log.debug("Number of data points: {}".format(len(bigwig_scores))) if pFlipBigwigSign: log.info("Flipping sign of bigwig values.") bigwig_scores = np.array(bigwig_scores) bigwig_scores *= -1 if pScaleFactorBigwig is not None and pScaleFactorBigwig != 1.0: log.info("Scaling bigwig values.") bigwig_scores = np.array(bigwig_scores) bigwig_scores *= pScaleFactorBigwig if pValueMin is not None or pValueMax is not None bigwig_scores = bigwig_scores.clip(pValueMin, pValueMax) # else: # for i, bigwigFile in enumerate(pNameOfBigwigList): # interval_tree, min_value, max_value = file_to_intervaltree(bigwigFile) # eigenvector = [] # if pChromosomeSizes: # for chrom in pChromosomeSizes: # if toString(chrom) not in interval_tree: # log.info("Chromosome with no entry in the eigenvector found. Please exclude it from the matrix: {}. The eigenvector is left empty.".format(chrom)) # return # for i, region in enumerate(sorted(interval_tree[toString(chrom)])): # if i == 0: # region_start = region[0] # region_end = region[1] # eigenvector.append(complex(region.data[0]).real) # x = np.arange(0, len(eigenvector), 1) # pAxis.set_xlim(0, len(eigenvector)) # # elif pRegion: # if toString(chrom) not in interval_tree: # log.info("Chromosome with no entry in the eigenvector found. Please exclude it from the matrix: {}. The eigenvector is left empty.".format(chrom)) # return # for region in sorted(interval_tree[toString(chrom)][region_start:region_end]): # eigenvector.append(float(region.data[0])) # step = (region_end * 2 - region_start) // len(eigenvector) # # x = np.arange(region_start, region_end * 2, int(step)) # while len(x) < len(eigenvector): # x = np.append(x[-1] + int(step)) # while len(eigenvector) < len(x): # x = x[:-1] # # pAxis.set_xlim(region_start, region_end * 2) if x_values is not None and bigwig_scores is not None: pAxis.fill_between(x_values, 0, bigwig_scores, edgecolor='none')
def plotBigwig(pAxis, pNameOfBigwigList, pChromosomeSizes=None, pRegion=None, pXticks=None, pFlipBigwigSign=None, pScaleFactorBigwig=None, pVertical=False, pValueMin=None, pValueMax=None): log.debug('plotting eigenvector') # pNameOfBigwigList is not a list, but to make room for future options # requiring more than one bigwig file I set this to a list intentionally. # pNameOfBigwigList = [pNameOfBigwigList] for file in pNameOfBigwigList: file_format = file.split(".")[-1] if file_format != 'bigwig' and file_format != 'bw': log.error("Given files are not bigwig") exit(1) # for bigwig_file in pNameOfBigwigList: # if bigwig_file.split('.')[-1] != file_format: # log.error("Eigenvector input files have different formats.") # exit() if file_format == "bigwig" or file_format == 'bw': for i, bigwigFile in enumerate(pNameOfBigwigList): x_values = [] bigwig_scores = [] pAxis[i].set_frame_on(False) if pVertical: pAxis[i].yaxis.set_visible(False) else: # pAxis[i].set_frame_on(False) pAxis[i].xaxis.set_visible(False) bw = pyBigWig.open(bigwigFile) bigwig_scores = [] if pRegion: chrom, region_start, region_end = pRegion # region_end could be a very large number returned by translate_region region_end = min(region_end, pChromosomeSizes[chrom]) # log.info("chromosomes bigwig: {}".format(bw.chroms())) chrom = check_chrom_str_bytes(bw.chroms(), chrom) if chrom not in list(bw.chroms().keys()): chrom = change_chrom_names(chrom) if chrom not in list(bw.chroms().keys()): log.info("bigwig file has no chromosome named: {}.".format(chrom)) return # the bigwig file may end before the region end, to avoid and error # the bigwig_end is set for the pyBigwig query bigwig_end = min(bw.chroms()[chrom], region_end) # TODO, this could be a parameters num_bins = min(1000, int(bigwig_end - region_start) / 10) scores_per_bin = np.array(bw.stats(chrom, region_start, bigwig_end, nBins=num_bins)).astype(float) if scores_per_bin is None: log.info("Chromosome {} has no entries in bigwig file.".format(chrom)) return _x_vals = np.linspace(region_start, region_end, num_bins) assert len(_x_vals) == len(scores_per_bin) x_values.extend(_x_vals) bigwig_scores.extend(scores_per_bin) if pVertical: pAxis[i].set_ylim(region_start, region_end) else: pAxis[i].set_xlim(region_start, region_end) elif pChromosomeSizes: chrom_length_sum = 0 for chrom in pChromosomeSizes: chrom_ = check_chrom_str_bytes(bw.chroms(), chrom) if chrom_ not in list(bw.chroms().keys()): log.info("bigwig file as no chromosome named: {}.".format(chrom)) return # chrom = check_chrom_str_bytes(pChromosomeSizes, chrom) # set the bin size to approximately 100kb # or to the chromosome size if this happens to be less than 100kb chunk_size = min(1e5, pChromosomeSizes[chrom]) num_bins = int(pChromosomeSizes[chrom] / chunk_size) scores_per_bin = np.array(bw.stats(chrom_, 0, pChromosomeSizes[chrom], nBins=num_bins)).astype(float) if scores_per_bin is None: log.info("Chromosome {} has no entries in bigwig file.".format(chrom)) return _x_vals = np.linspace(chrom_length_sum, chrom_length_sum + pChromosomeSizes[chrom], num_bins) assert len(_x_vals) == len(scores_per_bin) x_values.extend(_x_vals) bigwig_scores.extend(scores_per_bin) chrom_length_sum += pChromosomeSizes[chrom] if pVertical: pAxis[i].set_ylim(0, chrom_length_sum) else: pAxis[i].set_xlim(0, chrom_length_sum) log.debug("Number of data points: {}".format(len(bigwig_scores))) bigwig_scores = np.array(bigwig_scores) if pFlipBigwigSign: log.info("Flipping sign of bigwig values.") bigwig_scores *= -1 if pScaleFactorBigwig is not None and pScaleFactorBigwig != 1.0: log.info("Scaling bigwig values.") bigwig_scores *= pScaleFactorBigwig if pValueMin is not None or pValueMax is not None: bigwig_scores = bigwig_scores.clip(pValueMin, pValueMax) if x_values is not None and bigwig_scores is not None: if pVertical: pAxis[i].fill_between(np.flip(bigwig_scores, 0), x_values, edgecolor='none') else: pAxis[i].fill_between(x_values, 0, bigwig_scores, edgecolor='none')