def getRegion(args, ma):
    chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None
    chrom, region_start, region_end = translate_region(args.region)

    if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        chrom = toBytes(chrom)

    if chrom not in list(ma.interval_trees):

        if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
            chrom = toBytes(chrom)

        if chrom not in list(ma.interval_trees):
            exit(
                "The contig/scaffold name '{}' given in --region is not part of the Hi-C matrix. "
                "Check spelling".format(chrom))

    args.region = [chrom, region_start, region_end]

    idx1, start_pos1 = zip(
        *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals)
          if x[0] == chrom and x[1] >= region_start and x[2] < region_end])
    idx2 = idx1
    chrom2 = chrom
    start_pos2 = start_pos1

    return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2
Ejemplo n.º 2
0
def main(args=None):
    args = parse_arguments().parse_args(args)
    if args.title:
        args.title = remove_non_ascii(args.title)

    chrom = None
    start_pos1 = None
    chrom2 = None
    start_pos2 = None

    if args.perChromosome and args.region:
        log.error('ERROR, choose from the option '
                  '--perChromosome or --region, the two '
                  'options at the same time are not '
                  'compatible.')
        exit(1)

    # if args.region and args.region2 and args.bigwig:
    #     log.error("Inter-chromosomal pca is not supported.")
    #     exit(1)
    # is_cooler = False
    # if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix) or'.mcool' in args.matrix:
    is_cooler = check_cooler(args.matrix)
    log.debug("Cooler or no cooler: {}".format(is_cooler))
    open_cooler_chromosome_order = True
    if args.chromosomeOrder is not None and len(args.chromosomeOrder) > 1:
        open_cooler_chromosome_order = False

    if is_cooler and not args.region2 and open_cooler_chromosome_order:
        log.debug("Retrieve data from cooler format and use its benefits.")
        regionsToRetrieve = None
        if args.region:
            regionsToRetrieve = []
            regionsToRetrieve.append(args.region)
            # if args.region2:
            #     chrom2, region_start2, region_end2 = translate_region(args.region2)
            #     regionsToRetrieve.append(args.region2)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None
            regionsToRetrieve = args.chromosomeOrder

        ma = HiCMatrix.hiCMatrix(args.matrix, pChrnameList=regionsToRetrieve)
        log.debug('Shape {}'.format(ma.matrix.shape))
        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
            # to avoid gaps in the plot, bins flanking the masked bins
            # are enlarged
            new_intervals = enlarge_bins(ma.cut_intervals)
            ma.setCutIntervals(new_intervals)

        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma)

        matrix = np.asarray(ma.matrix.todense().astype(float))
        matrix_length = len(matrix[0])
        log.debug("Number of data points matrix_cool: {}".format(matrix_length))
    else:
        ma = HiCMatrix.hiCMatrix(args.matrix)
        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
            new_intervals = enlarge_bins(ma.cut_intervals)
            ma.setCutIntervals(new_intervals)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None

            valid_chromosomes = []
            invalid_chromosomes = []
            log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder))
            log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries))
            if sys.version_info[0] == 3:
                args.chromosomeOrder = toBytes(args.chromosomeOrder)
            for chrom in toString(args.chromosomeOrder):
                if chrom in ma.chrBinBoundaries:
                    valid_chromosomes.append(chrom)
                else:
                    invalid_chromosomes.append(chrom)

            if len(invalid_chromosomes) > 0:
                log.warning("WARNING: The following chromosome/scaffold names were not found. Please check"
                            "the correct spelling of the chromosome names. \n")
                log.warning("\n".join(invalid_chromosomes))
            ma.reorderChromosomes(valid_chromosomes)

        log.info("min: {}, max: {}\n".format(ma.matrix.data.min(), ma.matrix.data.max()))

        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma)

            matrix = np.asarray(ma.matrix[idx1, :][:, idx2].todense().astype(float))

        else:
            log.debug("Else branch")
            matrix = np.asarray(ma.getMatrix().astype(float))

    matrix_length = len(matrix[0])
    log.debug("Number of data points matrix: {}".format(matrix_length))

    for matrix_ in matrix:
        if not matrix_length == len(matrix_):
            log.error("Matrices do not have the same length: {} , {}".format(matrix_length, len(matrix_)))

    cmap = cm.get_cmap(args.colorMap)
    log.debug("Nan values set to black\n")
    cmap.set_bad('black')

    bigwig_info = None
    if args.bigwig:
        bigwig_info = {'args': args, 'axis': None, 'axis_colorbar': None, 'nan_bins': ma.nan_bins}

    if args.perChromosome:
        fig = plotPerChr(ma, cmap, args, pBigwig=bigwig_info)

    else:
        norm = None

        if args.log or args.log1p:
            mask = matrix == 0
            matrix[mask] = np.nanmin(matrix[mask == False])

            if np.isnan(matrix).any() or np.isinf(matrix).any():
                log.debug("any nan {}".format(np.isnan(matrix).any()))
                log.debug("any inf {}".format(np.isinf(matrix).any()))
                mask_nan = np.isnan(matrix)
                mask_inf = np.isinf(matrix)
                matrix[mask_nan] = np.nanmin(matrix[mask_nan == False])
                matrix[mask_inf] = np.nanmin(matrix[mask_inf == False])

        log.debug("any nan after remove of nan: {}".format(np.isnan(matrix).any()))
        log.debug("any inf after remove of inf: {}".format(np.isinf(matrix).any()))
        if args.log1p:
            matrix += 1
            norm = LogNorm()
        elif args.log:
            norm = LogNorm()

        if args.bigwig:
            # increase figure height to accommodate bigwig track
            fig_height = 8.5
        else:
            fig_height = 7
        height = 4.8 / fig_height

        fig_width = 8
        width = 5.0 / fig_width
        left_margin = (1.0 - width) * 0.5

        fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi)

        if args.bigwig:
            gs = gridspec.GridSpec(2, 2, height_ratios=[0.90, 0.1], width_ratios=[0.97, 0.03])
            gs.update(hspace=0.05, wspace=0.05)
            ax1 = plt.subplot(gs[0, 0])
            ax2 = plt.subplot(gs[1, 0])
            ax3 = plt.subplot(gs[0, 1])
            bigwig_info['axis'] = ax2
            bigwig_info['axis_colorbar'] = ax3
        else:
            ax1 = None
        bottom = 1.3 / fig_height

        if start_pos1 is None:
            start_pos1 = make_start_pos_array(ma)

        position = [left_margin, bottom, width, height]
        plotHeatmap(matrix, ma.get_chromosome_sizes(), fig, position,
                    args, cmap, xlabel=chrom, ylabel=chrom2,
                    start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pBigwig=bigwig_info)

    if not args.disable_tight_layout:
        if args.perChromosome or args.bigwig:
            try:
                plt.tight_layout()
            except UserWarning:
                log.info("Failed to tight layout. Using regular plot.")
            except ValueError:
                log.info("Failed to tight layout. Using regular plot.")

    plt.savefig(args.outFileName, dpi=args.dpi)
    plt.close(fig)
Ejemplo n.º 3
0
def main(args=None):
    args = parse_arguments().parse_args(args)
    chrom = None
    start_pos1 = None
    chrom2 = None
    start_pos2 = None

    if args.perChromosome and args.region:
        log.error('ERROR, choose from the option '
                  '--perChromosome or --region, the two '
                  'options at the same time are not '
                  'compatible.')
        exit(1)

    if args.region and args.region2 and args.pca:
        log.error("Inter-chromosomal pca is not supported.")
        exit(1)
    is_cooler = False
    if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix):
        is_cooler = True
    if is_cooler and not args.region2:
        log.debug("Retrieve data from cooler format and use its benefits.")
        regionsToRetrieve = None
        if args.region:
            regionsToRetrieve = []
            regionsToRetrieve.append(args.region)
            if args.region2:
                chrom2, region_start2, region_end2 = translate_region(
                    args.region2)
                regionsToRetrieve.append(args.region2)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None
            regionsToRetrieve = args.chromosomeOrder

        ma = HiCMatrix.hiCMatrix(args.matrix, chrnameList=regionsToRetrieve)

        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(
                args, ma)

        matrix = np.asarray(ma.matrix.todense().astype(float))

    else:
        ma = HiCMatrix.hiCMatrix(args.matrix)
        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None

            valid_chromosomes = []
            invalid_chromosomes = []
            log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder))
            log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries))
            if sys.version_info[0] == 3:
                args.chromosomeOrder = toBytes(args.chromosomeOrder)
            for chrom in args.chromosomeOrder:
                if chrom in ma.chrBinBoundaries:
                    valid_chromosomes.append(chrom)
                else:
                    invalid_chromosomes.append(chrom)

            if len(invalid_chromosomes) > 0:
                log.warning(
                    "WARNING: The following chromosome/scaffold names were not found. Please check"
                    "the correct spelling of the chromosome names. \n")
                log.warning("\n".join(invalid_chromosomes))
            ma.reorderChromosomes(valid_chromosomes)

        log.info("min: {}, max: {}\n".format(ma.matrix.data.min(),
                                             ma.matrix.data.max()))

        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(
                args, ma)

            matrix = np.asarray(
                ma.matrix[idx1, :][:, idx2].todense().astype(float))

        else:
            log.debug("Else branch")
            matrix = np.asarray(ma.getMatrix().astype(float))

    matrix_length = len(matrix[0])
    for matrix_ in matrix:
        if not matrix_length == len(matrix_):
            log.error("Matrices do not have the same length: {} , {}".format(
                matrix_length, len(matrix_)))

    cmap = cm.get_cmap(args.colorMap)
    log.debug("Nan values set to black\n")
    cmap.set_bad('black')

    pca = None
    if args.pca:
        pca = {
            'args': args,
            'axis': None,
            'axis_colorbar': None,
            'nan_bins': ma.nan_bins
        }

    if args.perChromosome:
        fig = plotPerChr(ma, cmap, args, pPca=pca)

    else:
        norm = None

        if args.log or args.log1p:
            mask = matrix == 0
            matrix[mask] = np.nanmin(matrix[mask == False])

            if np.isnan(matrix).any() or np.isinf(matrix).any():
                log.debug("any nan {}".format(np.isnan(matrix).any()))
                log.debug("any inf {}".format(np.isinf(matrix).any()))
                mask_nan = np.isnan(matrix)
                mask_inf = np.isinf(matrix)
                matrix[mask_nan] = np.nanmin(matrix[mask_nan == False])
                matrix[mask_inf] = np.nanmin(matrix[mask_inf == False])

            if args.log:
                matrix = np.log(matrix)

        log.debug("any nan after remove of nan: {}".format(
            np.isnan(matrix).any()))
        log.debug("any inf after remove of inf: {}".format(
            np.isinf(matrix).any()))
        if args.log1p:
            matrix += 1
            norm = LogNorm()

        fig_height = 7
        height = 4.8 / fig_height

        fig_width = 8
        width = 5.0 / fig_width
        left_margin = (1.0 - width) * 0.5

        fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi)

        if args.pca:
            gs = gridspec.GridSpec(2,
                                   2,
                                   height_ratios=[0.85, 0.15],
                                   width_ratios=[0.93, 0.07])
            gs.update(hspace=0.1)
            ax1 = plt.subplot(gs[0, 0])
            ax2 = plt.subplot(gs[1, 0])
            ax3 = plt.subplot(gs[0, 1])
            pca['axis'] = ax2
            pca['axis_colorbar'] = ax3

        else:
            ax1 = None
        bottom = 1.3 / fig_height

        position = [left_margin, bottom, width, height]
        plotHeatmap(matrix,
                    ma.chrBinBoundaries,
                    fig,
                    position,
                    args,
                    cmap,
                    xlabel=chrom,
                    ylabel=chrom2,
                    start_pos=start_pos1,
                    start_pos2=start_pos2,
                    pNorm=norm,
                    pAxis=ax1,
                    pPca=pca)

    if args.perChromosome or args.pca:
        plt.tight_layout()

    plt.savefig(args.outFileName, dpi=args.dpi)
    plt.close(fig)
Ejemplo n.º 4
0
def getRegion(args, ma):
    chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None
    chrom, region_start, region_end = translate_region(args.region)

    if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        chrom = toBytes(chrom)

    if chrom not in list(ma.interval_trees):

        chrom = change_chrom_names(chrom)

        if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
            chrom = toBytes(chrom)

        if chrom not in list(ma.interval_trees):
            exit("Chromosome name {} in --region not in matrix".format(
                change_chrom_names(chrom)))

    args.region = [chrom, region_start, region_end]
    is_cooler = False
    if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix):
        is_cooler = True
    if is_cooler:
        idx1, start_pos1 = zip(
            *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals)
              if x[0] == chrom and
              ((x[1] >= region_start and x[2] < region_end) or
               (x[1] < region_end and x[2] < region_end and x[2] > region_start
                ) or (x[1] > region_start and x[1] < region_end))])
    else:
        idx1, start_pos1 = zip(
            *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals)
              if x[0] == chrom and x[1] >= region_start and x[2] < region_end])
    if args.region2:
        chrom2, region_start2, region_end2 = translate_region(args.region2)
        if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
            chrom2 = toBytes(chrom)
        if chrom2 not in list(ma.interval_trees):
            chrom2 = change_chrom_names(chrom2)
            if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
                chrom2 = toBytes(chrom)
            if chrom2 not in list(ma.interval_trees):
                exit("Chromosome name {} in --region2 not in matrix".format(
                    change_chrom_names(chrom2)))
        if is_cooler:
            idx2, start_pos2 = zip(
                *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals)
                  if x[0] == chrom2 and (
                      (x[1] >= region_start2 and x[2] < region_end2) or
                      (x[1] < region_end2 and x[2] < region_end2
                       and x[2] > region_start2) or
                      (x[1] > region_start2 and x[1] < region_end2))])
        else:
            idx2, start_pos2 = zip(
                *[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals)
                  if x[0] == chrom2 and x[1] >= region_start2
                  and x[2] < region_end2])
    else:
        idx2 = idx1
        chrom2 = chrom
        start_pos2 = start_pos1

    return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2