Beispiel #1
0
def getRegion(args, ma):
    chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None
    chrom, region_start, region_end = translate_region(args.region)

    chrom = check_chrom_str_bytes(ma.interval_trees, chrom)
    # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
    #     chrom = toBytes(chrom)

    if chrom not in list(ma.interval_trees):

        chrom = change_chrom_names(chrom)

        chrom = check_chrom_str_bytes(ma.interval_trees, chrom)

        # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        #     chrom = toBytes(chrom)

        if chrom not in list(ma.interval_trees):
            exit("Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom)))

    args.region = [chrom, region_start, region_end]
    is_cooler = check_cooler(args.matrix)
    if is_cooler:
        idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and
                                 ((x[1] >= region_start and x[2] < region_end) or
                                  (x[1] < region_end and x[2] < region_end and x[2] > region_start) or
                                  (x[1] > region_start and x[1] < region_end))])
    else:
        idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and
                                 x[1] >= region_start and x[2] < region_end])
    if args.region2:
        chrom2, region_start2, region_end2 = translate_region(args.region2)
        chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2)

        # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        #     chrom2 = toBytes(chrom)
        if chrom2 not in list(ma.interval_trees):
            chrom2 = change_chrom_names(chrom2)
            chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2)

            # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
            #     chrom2 = toBytes(chrom)
            if chrom2 not in list(ma.interval_trees):
                exit("Chromosome name {} in --region2 not in matrix".format(change_chrom_names(chrom2)))
        if is_cooler:
            idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and
                                     ((x[1] >= region_start2 and x[2] < region_end2) or
                                      (x[1] < region_end2 and x[2] < region_end2 and x[2] > region_start2) or
                                      (x[1] > region_start2 and x[1] < region_end2))])
        else:
            idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and
                                     x[1] >= region_start2 and x[2] < region_end2])
    else:
        idx2 = idx1
        chrom2 = chrom
        start_pos2 = start_pos1

    return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2
Beispiel #2
0
def translate_region(region_string, ma):
    """
    Takes an string and returns a list
    of chrom, start, end.
    If the region string only contains
    the chrom, then start and end
    are set to matrix start and end on this chromosome
    """
    # region_string = toBytes(region_string)
    region_string = region_string.replace(",", "")
    region_string = region_string.replace(";", "")
    region_string = region_string.replace("!", "")
    region_string = region_string.replace("-", ":")

    fields = region_string.split(":")
    chrom = fields[0]

    chrom = check_chrom_str_bytes(ma.interval_trees, chrom)
    if chrom not in list(ma.interval_trees):
        chrom = change_chrom_names(chrom)
        chrom = check_chrom_str_bytes(ma.interval_trees, chrom)
        if chrom not in list(ma.interval_trees):
            exit(
                "Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom)))

    first_bin, last_bin = ma.getChrBinRange(chrom)
    try:
        region_start = int(fields[1])
    except IndexError:
        region_start = ma.getBinPos(first_bin)[1]
    try:
        region_end = int(fields[2])
    except IndexError:
        region_end = ma.getBinPos(last_bin - 1)[2]

    return chrom, region_start, region_end
Beispiel #3
0
def plotBigwig(pAxis, pNameOfBigwigList, pChromosomeSizes=None, pRegion=None, pXticks=None, pFlipBigwigSign=None, pScaleFactorBigwig=None,
                    pValueMin=None, pValueMax=None):
    log.debug('plotting eigenvector')
    pAxis.set_frame_on(False)
    pAxis.xaxis.set_visible(False)

    # pNameOfBigwigList is not a list, but to make room for future options
    # requiring more than one bigwig file I set this to a list intentionally.
    pNameOfBigwigList = [pNameOfBigwigList]
    file_format = pNameOfBigwigList[0].split(".")[-1]
    if file_format != 'bigwig' and file_format != 'bw':
        log.error("Given files are not bigwig")
        exit()

    for bigwig_file in pNameOfBigwigList:
        if bigwig_file.split('.')[-1] != file_format:
            log.error("Eigenvector input files have different formats.")
            exit()

    x_values = []
    bigwig_scores = []
    if file_format == "bigwig" or file_format == 'bw':
        for i, bigwigFile in enumerate(pNameOfBigwigList):
            bw = pyBigWig.open(bigwigFile)
            bigwig_scores = []
            if pRegion:
                chrom, region_start, region_end = pRegion
                # region_end could be a very large number returned by translate_region
                region_end = min(region_end, pChromosomeSizes[chrom])
                # log.info("chromosomes bigwig: {}".format(bw.chroms()))
                chrom = check_chrom_str_bytes(bw.chroms(), chrom)
                if chrom not in list(bw.chroms().keys()):
                    chrom = change_chrom_names(chrom)
                    if chrom not in list(bw.chroms().keys()):
                        log.info("bigwig file has no chromosome named: {}.".format(chrom))
                        return

                # the bigwig file may end before the region end, to avoid and error
                # the bigwig_end is set for the pyBigwig query
                bigwig_end = min(bw.chroms()[chrom], region_end)

                # TODO, this could be a parameters
                num_bins = min(1000, int(bigwig_end - region_start) / 10)

                scores_per_bin = np.array(bw.stats(chrom, region_start, bigwig_end, nBins=num_bins)).astype(float)
                if scores_per_bin is None:
                    log.info("Chromosome {} has no entries in bigwig file.".format(chrom))
                    return

                _x_vals = np.linspace(region_start, region_end, num_bins)
                assert len(_x_vals) == len(scores_per_bin)
                x_values.extend(_x_vals)
                bigwig_scores.extend(scores_per_bin)
                pAxis.set_xlim(region_start, region_end)

            elif pChromosomeSizes:
                chrom_length_sum = 0
                for chrom in pChromosomeSizes:
                    chrom_ = check_chrom_str_bytes(bw.chroms(), chrom)

                    if chrom_ not in list(bw.chroms().keys()):
                        log.info("bigwig file as no chromosome named: {}.".format(chrom))
                        return
                    # chrom = check_chrom_str_bytes(pChromosomeSizes, chrom)
                    # set the bin size to approximately 100kb
                    # or to the chromosome size if this happens to be less than 100kb
                    chunk_size = min(1e5, pChromosomeSizes[chrom])
                    num_bins = int(pChromosomeSizes[chrom] / chunk_size)
                    scores_per_bin = np.array(bw.stats(chrom_, 0, pChromosomeSizes[chrom], nBins=num_bins)).astype(float)

                    if scores_per_bin is None:
                        log.info("Chromosome {} has no entries in bigwig file.".format(chrom))
                        return

                    _x_vals = np.linspace(chrom_length_sum, chrom_length_sum + pChromosomeSizes[chrom], num_bins)
                    assert len(_x_vals) == len(scores_per_bin)
                    x_values.extend(_x_vals)
                    bigwig_scores.extend(scores_per_bin)

                    chrom_length_sum += pChromosomeSizes[chrom]

                pAxis.set_xlim(0, chrom_length_sum)

            log.debug("Number of data points: {}".format(len(bigwig_scores)))

            if pFlipBigwigSign:
                log.info("Flipping sign of bigwig values.")
                bigwig_scores = np.array(bigwig_scores)
                bigwig_scores *= -1
            if pScaleFactorBigwig is not None and pScaleFactorBigwig != 1.0:
                log.info("Scaling bigwig values.")
                bigwig_scores = np.array(bigwig_scores)
                bigwig_scores *= pScaleFactorBigwig
            if pValueMin is not None or pValueMax is not None
                bigwig_scores = bigwig_scores.clip(pValueMin, pValueMax)

    # else:
    #     for i, bigwigFile in enumerate(pNameOfBigwigList):
    #         interval_tree, min_value, max_value = file_to_intervaltree(bigwigFile)
    #         eigenvector = []
    #         if pChromosomeSizes:
    #             for chrom in pChromosomeSizes:
    #                 if toString(chrom) not in interval_tree:
    #                     log.info("Chromosome with no entry in the eigenvector found. Please exclude it from the matrix: {}. The eigenvector is left empty.".format(chrom))
    #                     return
    #                 for i, region in enumerate(sorted(interval_tree[toString(chrom)])):
    #                     if i == 0:
    #                         region_start = region[0]
    #                     region_end = region[1]
    #                     eigenvector.append(complex(region.data[0]).real)
    #             x = np.arange(0, len(eigenvector), 1)
    #             pAxis.set_xlim(0, len(eigenvector))
    #
    #         elif pRegion:
    #             if toString(chrom) not in interval_tree:
    #                 log.info("Chromosome with no entry in the eigenvector found. Please exclude it from the matrix: {}. The eigenvector is left empty.".format(chrom))
    #                 return
    #             for region in sorted(interval_tree[toString(chrom)][region_start:region_end]):
    #                 eigenvector.append(float(region.data[0]))
    #             step = (region_end * 2 - region_start) // len(eigenvector)
    #
    #             x = np.arange(region_start, region_end * 2, int(step))
    #             while len(x) < len(eigenvector):
    #                 x = np.append(x[-1] + int(step))
    #             while len(eigenvector) < len(x):
    #                 x = x[:-1]
    #
    #             pAxis.set_xlim(region_start, region_end * 2)

    if x_values is not None and bigwig_scores is not None:
        pAxis.fill_between(x_values, 0, bigwig_scores, edgecolor='none')
Beispiel #4
0
def plotBigwig(pAxis, pNameOfBigwigList, pChromosomeSizes=None, pRegion=None, pXticks=None, pFlipBigwigSign=None, pScaleFactorBigwig=None, pVertical=False,
               pValueMin=None, pValueMax=None):
    log.debug('plotting eigenvector')

    # pNameOfBigwigList is not a list, but to make room for future options
    # requiring more than one bigwig file I set this to a list intentionally.
    # pNameOfBigwigList = [pNameOfBigwigList]
    for file in pNameOfBigwigList:
        file_format = file.split(".")[-1]
        if file_format != 'bigwig' and file_format != 'bw':
            log.error("Given files are not bigwig")
            exit(1)

    # for bigwig_file in pNameOfBigwigList:
    #     if bigwig_file.split('.')[-1] != file_format:
    #         log.error("Eigenvector input files have different formats.")
    #         exit()

    if file_format == "bigwig" or file_format == 'bw':
        for i, bigwigFile in enumerate(pNameOfBigwigList):
            x_values = []
            bigwig_scores = []
            pAxis[i].set_frame_on(False)

            if pVertical:

                pAxis[i].yaxis.set_visible(False)

            else:
                # pAxis[i].set_frame_on(False)

                pAxis[i].xaxis.set_visible(False)
            bw = pyBigWig.open(bigwigFile)
            bigwig_scores = []
            if pRegion:
                chrom, region_start, region_end = pRegion
                # region_end could be a very large number returned by translate_region
                region_end = min(region_end, pChromosomeSizes[chrom])
                # log.info("chromosomes bigwig: {}".format(bw.chroms()))
                chrom = check_chrom_str_bytes(bw.chroms(), chrom)
                if chrom not in list(bw.chroms().keys()):
                    chrom = change_chrom_names(chrom)
                    if chrom not in list(bw.chroms().keys()):
                        log.info("bigwig file has no chromosome named: {}.".format(chrom))
                        return

                # the bigwig file may end before the region end, to avoid and error
                # the bigwig_end is set for the pyBigwig query
                bigwig_end = min(bw.chroms()[chrom], region_end)

                # TODO, this could be a parameters
                num_bins = min(1000, int(bigwig_end - region_start) / 10)

                scores_per_bin = np.array(bw.stats(chrom, region_start, bigwig_end, nBins=num_bins)).astype(float)
                if scores_per_bin is None:
                    log.info("Chromosome {} has no entries in bigwig file.".format(chrom))
                    return

                _x_vals = np.linspace(region_start, region_end, num_bins)
                assert len(_x_vals) == len(scores_per_bin)
                x_values.extend(_x_vals)
                bigwig_scores.extend(scores_per_bin)
                if pVertical:
                    pAxis[i].set_ylim(region_start, region_end)
                else:
                    pAxis[i].set_xlim(region_start, region_end)

            elif pChromosomeSizes:
                chrom_length_sum = 0
                for chrom in pChromosomeSizes:
                    chrom_ = check_chrom_str_bytes(bw.chroms(), chrom)

                    if chrom_ not in list(bw.chroms().keys()):
                        log.info("bigwig file as no chromosome named: {}.".format(chrom))
                        return
                    # chrom = check_chrom_str_bytes(pChromosomeSizes, chrom)
                    # set the bin size to approximately 100kb
                    # or to the chromosome size if this happens to be less than 100kb
                    chunk_size = min(1e5, pChromosomeSizes[chrom])
                    num_bins = int(pChromosomeSizes[chrom] / chunk_size)
                    scores_per_bin = np.array(bw.stats(chrom_, 0, pChromosomeSizes[chrom], nBins=num_bins)).astype(float)

                    if scores_per_bin is None:
                        log.info("Chromosome {} has no entries in bigwig file.".format(chrom))
                        return

                    _x_vals = np.linspace(chrom_length_sum, chrom_length_sum + pChromosomeSizes[chrom], num_bins)
                    assert len(_x_vals) == len(scores_per_bin)
                    x_values.extend(_x_vals)
                    bigwig_scores.extend(scores_per_bin)

                    chrom_length_sum += pChromosomeSizes[chrom]
                if pVertical:
                    pAxis[i].set_ylim(0, chrom_length_sum)
                else:
                    pAxis[i].set_xlim(0, chrom_length_sum)

            log.debug("Number of data points: {}".format(len(bigwig_scores)))
            bigwig_scores = np.array(bigwig_scores)
            if pFlipBigwigSign:
                log.info("Flipping sign of bigwig values.")
                bigwig_scores *= -1
            if pScaleFactorBigwig is not None and pScaleFactorBigwig != 1.0:
                log.info("Scaling bigwig values.")
                bigwig_scores *= pScaleFactorBigwig
            if pValueMin is not None or pValueMax is not None:
                bigwig_scores = bigwig_scores.clip(pValueMin, pValueMax)

            if x_values is not None and bigwig_scores is not None:
                if pVertical:
                    pAxis[i].fill_between(np.flip(bigwig_scores, 0), x_values, edgecolor='none')
                else:
                    pAxis[i].fill_between(x_values, 0, bigwig_scores, edgecolor='none')