Python check_cooler Examples

Programming Language: Python

Namespace/Package Name: hicexplorer.utilities

Method/Function: check_cooler

Examples at hotexamples.com: 9

Python check_cooler - 9 examples found. These are the top rated real world Python examples of hicexplorer.utilities.check_cooler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def getRegion(args, ma):
    chrom = region_start = region_end = idx1 = start_pos1 = chrom2 = region_start2 = region_end2 = idx2 = start_pos2 = None
    chrom, region_start, region_end = translate_region(args.region)

    chrom = check_chrom_str_bytes(ma.interval_trees, chrom)
    # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
    #     chrom = toBytes(chrom)

    if chrom not in list(ma.interval_trees):

        chrom = change_chrom_names(chrom)

        chrom = check_chrom_str_bytes(ma.interval_trees, chrom)

        # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        #     chrom = toBytes(chrom)

        if chrom not in list(ma.interval_trees):
            exit("Chromosome name {} in --region not in matrix".format(change_chrom_names(chrom)))

    args.region = [chrom, region_start, region_end]
    is_cooler = check_cooler(args.matrix)
    if is_cooler:
        idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and
                                 ((x[1] >= region_start and x[2] < region_end) or
                                  (x[1] < region_end and x[2] < region_end and x[2] > region_start) or
                                  (x[1] > region_start and x[1] < region_end))])
    else:
        idx1, start_pos1 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom and
                                 x[1] >= region_start and x[2] < region_end])
    if args.region2:
        chrom2, region_start2, region_end2 = translate_region(args.region2)
        chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2)

        # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
        #     chrom2 = toBytes(chrom)
        if chrom2 not in list(ma.interval_trees):
            chrom2 = change_chrom_names(chrom2)
            chrom2 = check_chrom_str_bytes(ma.interval_trees, chrom2)

            # if type(next(iter(ma.interval_trees))) in [np.bytes_, bytes]:
            #     chrom2 = toBytes(chrom)
            if chrom2 not in list(ma.interval_trees):
                exit("Chromosome name {} in --region2 not in matrix".format(change_chrom_names(chrom2)))
        if is_cooler:
            idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and
                                     ((x[1] >= region_start2 and x[2] < region_end2) or
                                      (x[1] < region_end2 and x[2] < region_end2 and x[2] > region_start2) or
                                      (x[1] > region_start2 and x[1] < region_end2))])
        else:
            idx2, start_pos2 = zip(*[(idx, x[1]) for idx, x in enumerate(ma.cut_intervals) if x[0] == chrom2 and
                                     x[1] >= region_start2 and x[2] < region_end2])
    else:
        idx2 = idx1
        chrom2 = chrom
        start_pos2 = start_pos1

    return chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2

Example #2

Show file

File: hicDifferentialTAD.py Project: bgruening/HiCExplorer

def main(args=None):
    args = parse_arguments().parse_args(args)

    # read domains file
    domains_df = readDomainBoundaries(args.tadDomains)
    log.debug('len(domains_df) {}'.format(len(domains_df)))
    domains = domains_df.values.tolist()
    old_chromosome = None

    tads_per_chromosome = []

    for j in range(len(domains)):
        if old_chromosome is None:
            old_chromosome = domains[j][0]
            per_chromosome = []
            per_chromosome.append(domains[j])

        elif old_chromosome == domains[j][0]:
            per_chromosome.append(domains[j])
            continue
        else:
            tads_per_chromosome.append(per_chromosome)
            per_chromosome = []
            per_chromosome.append(domains[j])
            old_chromosome = domains[j][0]
    tads_per_chromosome.append(per_chromosome)
    # log.debug('len(tads_per_chromosome) {}'.format(len(tads_per_chromosome[0]) + len(tads_per_chromosome[1])))

    # read full h5 or only region if cooler
    is_cooler_target = check_cooler(args.targetMatrix)
    is_cooler_control = check_cooler(args.controlMatrix)

    if is_cooler_target != is_cooler_control:
        log.error('Matrices are not given in the same format!')
        exit(1)
    if not is_cooler_control:
        hic_matrix_target = hm.hiCMatrix(args.targetMatrix)
        hic_matrix_control = hm.hiCMatrix(args.controlMatrix)
    else:
        hic_matrix_target = args.targetMatrix
        hic_matrix_control = args.controlMatrix
    # accepted_H0 = []
    # rejected_H0 = []
    # log.debug('domains_df {}'.format(domains_df))

    stats_chromosomes = []
    p_values_chromosomes = []
    accepted_inter_left_chromosomes = []
    accepted_inter_right_chromosomes = []
    accepted_intra_chromosomes = []
    rows_chromosomes = []

    stats_threads = [[]] * args.threads
    p_values_threads = [[]] * args.threads
    accepted_left_inter_threads = [[]] * args.threads
    accepted_right_inter_threads = [[]] * args.threads
    accepted_intra_threads = [[]] * args.threads
    rows_threads = [[]] * args.threads

    threads_save = deepcopy(args.threads)
    for chromosome in tads_per_chromosome:
        log.debug('tads_per_chromosome {}'.format(chromosome))
        domainsPerThread = len(chromosome) // args.threads
        if domainsPerThread == 0 and len(chromosome) > 0:
            domainsPerThread = 1
            args.threads = 1
        elif domainsPerThread > 0:
            args.threads = threads_save

        all_data_collected = False
        queue = [None] * args.threads
        process = [None] * args.threads
        thread_done = [False] * args.threads
        # None --> first thread, process first element in list, ignore last one
        # True --> middle thread: ignore first and last element in tad processing
        # False --> last thread: ignore first element, process last one
        thread_id = None
        for i in range(args.threads):

            if args.threads == 1:
                domainListThread = chromosome

            elif i == 0:
                domainListThread = chromosome[i * domainsPerThread:(
                    (i + 1) * domainsPerThread) + 1]
                thread_id = None
            elif i < args.threads - 1:
                domainListThread = chromosome[(i * domainsPerThread) -
                                              1:((i + 1) * domainsPerThread) +
                                              1]
                thread_id = True

            else:
                domainListThread = chromosome[(i * domainsPerThread) - 1:]
                thread_id = False

            if args.threads == 1:
                thread_id = ''

            log.debug('len(domainListThread) {}'.format(len(domainListThread)))
            log.debug('len(thread_id) {}'.format(thread_id))

            queue[i] = Queue()
            process[i] = Process(target=computeDifferentialTADs,
                                 kwargs=dict(pMatrixTarget=hic_matrix_target,
                                             pMatrixControl=hic_matrix_control,
                                             pDomainList=domainListThread,
                                             pCoolOrH5=is_cooler_control,
                                             pPValue=args.pValue,
                                             pThreadId=thread_id,
                                             pQueue=queue[i]))

            process[i].start()
        fail_flag = False
        fail_message = ''
        while not all_data_collected:
            for i in range(args.threads):

                if queue[i] is not None and not queue[i].empty():
                    queue_data = queue[i].get()
                    if 'Fail:' in queue_data:
                        fail_flag = True
                        fail_message = queue_data
                    else:
                        stats_threads[i], p_values_threads[i], accepted_left_inter_threads[i], \
                            accepted_right_inter_threads[i], \
                            accepted_intra_threads[i], rows_threads[i] = queue_data

                    queue[i] = None
                    process[i].join()
                    process[i].terminate()
                    process[i] = None
                    thread_done[i] = True
                # elif queue[i] is None and

            all_data_collected = True
            for thread in thread_done:
                if not thread:
                    all_data_collected = False
            time.sleep(1)

        # outfile_names = [item for sublist in outfile_names for item in sublist]
        # target_list_name = [
        #     item for sublist in target_list_name for item in sublist]
        if fail_flag:
            log.error(fail_message[6:])
            exit(1)
        stats_chromosomes.append(
            [item for sublist in stats_threads for item in sublist])
        p_values_chromosomes.append(
            [item for sublist in p_values_threads for item in sublist])
        accepted_inter_left_chromosomes.append([
            item for sublist in accepted_left_inter_threads for item in sublist
        ])
        accepted_inter_right_chromosomes.append([
            item for sublist in accepted_right_inter_threads
            for item in sublist
        ])
        accepted_intra_chromosomes.append(
            [item for sublist in accepted_intra_threads for item in sublist])
        rows_chromosomes.append(
            [item for sublist in rows_threads for item in sublist])

        log.debug('rows_threads {}'.format(rows_threads))

    stats_list = [item for sublist in stats_chromosomes for item in sublist]
    p_values_list = [
        item for sublist in p_values_chromosomes for item in sublist
    ]
    accepted_inter_left = [
        item for sublist in accepted_inter_left_chromosomes for item in sublist
    ]
    accepted_inter_right = [
        item for sublist in accepted_inter_right_chromosomes
        for item in sublist
    ]
    accepted_intra = [
        item for sublist in accepted_intra_chromosomes for item in sublist
    ]
    rows = [item for sublist in rows_chromosomes for item in sublist]

    stats_list = np.array(stats_list)
    p_values_list = np.array(p_values_list)
    accepted_inter_left = np.array(accepted_inter_left)
    accepted_inter_right = np.array(accepted_inter_right)
    accepted_intra = np.array(accepted_intra)
    rows = np.array(rows)

    if args.mode == 'intra-TAD':
        mask = np.array(accepted_intra, dtype=bool)
    elif args.mode == 'left-inter-TAD':
        if args.modeReject == 'all':
            mask = np.logical_and(accepted_inter_left, accepted_intra)
        else:
            mask = np.logical_or(accepted_inter_left, accepted_intra)

    elif args.mode == 'right-inter-TAD':
        if args.modeReject == 'all':
            mask = np.logical_and(accepted_intra, accepted_inter_right)
        else:
            mask = np.logical_or(accepted_intra, accepted_inter_right)

    else:
        if args.modeReject == 'all':
            mask = np.logical_and(accepted_inter_left, accepted_inter_right)
            mask = np.logical_and(mask, accepted_intra)
        else:
            mask = np.logical_or(accepted_inter_left, accepted_inter_right)
            mask = np.logical_or(mask, accepted_intra)

    log.debug('len(mask) {}'.format(len(mask)))
    log.debug('mask.sum() {}'.format(mask.sum()))
    log.debug('mask[:10] {}'.format(mask[:10]))

    accepted_H0 = p_values_list[~mask]
    accepted_H0_s = stats_list[~mask]
    rejected_H0 = p_values_list[mask]
    rejected_H0_s = stats_list[mask]
    accepted_rows = rows[~mask]
    rejected_rows = rows[mask]
    with open(args.outFileNamePrefix + '_accepted.diff_tad', 'w') as file:
        header = '# Created with HiCExplorer\'s hicDifferentialTAD version ' + __version__ + '\n'
        header += '# H0 \'regions are equal\' H0 is accepted for all p-value greater the user given p-value threshold; i.e. regions in this file are not considered as differential.\n'
        header += '# Accepted regions with Wilcoxon rank-sum test to p-value: {}  with used mode: {} and modeReject: {} \n'.format(
            args.pValue, args.mode, args.modeReject)
        header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tp-value left-inter-TAD\tp-value right-inter-TAD\tp-value intra-TAD\tW left-inter-TAD\tW right-inter-TAD\tW intra-TAD\n'
        file.write(header)
        for i, row in enumerate(accepted_rows):
            row_list = list(map(str, row))
            file.write('\t'.join(row_list))
            file.write('\t')
            pvalue_list = list(map(str, accepted_H0[i]))
            file.write('\t'.join(pvalue_list))
            file.write('\t')
            stats_list = list(map(str, accepted_H0_s[i]))
            file.write('\t'.join(stats_list))

            file.write('\n')
    with open(args.outFileNamePrefix + '_rejected.diff_tad', 'w') as file:
        header = '# Created with HiCExplorer\'s hicDifferentialTAD version ' + __version__ + '\n'
        header += '# H0 \'regions are equal\' H0 is rejected for all p-value smaller or equal the user given p-value threshold; i.e. regions in this file are considered as differential.\n'
        header += '# Rejected regions with Wilcoxon rank-sum test to p-value: {}  with used mode: {} and modeReject: {} \n'.format(
            args.pValue, args.mode, args.modeReject)
        header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tp-value left-inter-TAD\tp-value right-inter-TAD\tp-value intra-TAD\tW left-inter-TAD\tW right-inter-TAD\tW intra-TAD\n'

        file.write(header)

        for i, row in enumerate(rejected_rows):
            row_list = list(map(str, row))
            file.write('\t'.join(row_list))
            file.write('\t')
            pvalue_list = list(map(str, rejected_H0[i]))
            file.write('\t'.join(pvalue_list))
            file.write('\t')
            stats_list = list(map(str, rejected_H0_s[i]))
            file.write('\t'.join(stats_list))
            file.write('\n')

Example #3

Show file

File: hicCorrelate.py Project: bgruening/HiCExplorer

def main(args=None):

    args = parse_arguments().parse_args(args)
    mpl.rcParams['pdf.fonttype'] = 42
    if args.labels and len(args.matrices) != len(args.labels):
        log.error(
            "The number of labels does not match the number of matrices.")
        exit(0)
    if not args.labels:
        args.labels = map(lambda x: os.path.basename(x), args.matrices)

    num_files = len(args.matrices)
    map(lambda x: os.path.basename(x), args.matrices)
    # initialize results matrix
    results = np.zeros((num_files, num_files), dtype='float')

    rows, cols = np.triu_indices(num_files)
    correlation_opts = {'spearman': spearmanr, 'pearson': pearsonr}
    hic_mat_list = []
    max_value = None
    min_value = None
    all_mat = None
    all_nan = []

    for i, matrix in enumerate(args.matrices):
        log.debug("loading hic matrix {}\n".format(matrix))

        if (check_cooler(
                args.matrices[i])) and args.chromosomes is not None and len(
                    args.chromosomes) == 1:
            _mat = hm.hiCMatrix(matrix, pChrnameList=args.chromosomes)
        else:
            _mat = hm.hiCMatrix(matrix)
            if args.chromosomes:
                _mat.keepOnlyTheseChr(args.chromosomes)
            _mat.filterOutInterChrCounts()

        _mat.diagflat(0)
        log.debug("restore masked bins {}\n".format(matrix))
        bin_size = _mat.getBinSize()
        all_nan = np.unique(np.concatenate([all_nan, _mat.nan_bins]))

        _mat = triu(_mat.matrix, k=0, format='csr')
        if args.range:
            min_dist, max_dist = args.range.split(":")
            min_dist = int(min_dist)
            max_dist = int(max_dist)
            if max_dist < bin_size:
                log.error(
                    "Please specify a max range that is larger than bin size ({})"
                    .format(bin_size))
                exit()
            max_depth_in_bins = int(max_dist / bin_size)
            max_dist = int(max_dist) // bin_size
            min_dist = int(min_dist) // bin_size
            # work only with the upper matrix
            # and remove all pixels that are beyond
            # max_depth_in_bis
            # (this is done by subtracting a second sparse matrix
            # that contains only the upper matrix that wants to be removed.
            _mat = triu(_mat, k=0, format='csr') - triu(
                _mat, k=max_depth_in_bins, format='csr')

            _mat.eliminate_zeros()

            _mat_coo = _mat.tocoo()
            dist = _mat_coo.col - _mat_coo.row
            keep = np.flatnonzero((dist <= max_dist) & (dist >= min_dist))
            _mat_coo.data = _mat_coo.data[keep]
            _mat_coo.row = _mat_coo.row[keep]
            _mat_coo.col = _mat_coo.col[keep]
            _mat = _mat_coo.tocsr()
        else:
            _mat = triu(_mat, k=0, format='csr')

        if args.log1p:
            _mat.data = np.log1p(_mat.data)
        if all_mat is None:
            all_mat = _mat
        else:
            all_mat = all_mat + _mat

        if max_value is None or max_value < _mat.data.max():
            max_value = _mat.data.max()
        if min_value is None or min_value > _mat.data.min():
            min_value = _mat.data.min()

        hic_mat_list.append(_mat)

    # remove nan bins
    rows_keep = cols_keep = np.delete(list(range(all_mat.shape[1])),
                                      all_nan.astype('int'))
    all_mat = all_mat[rows_keep, :][:, cols_keep]

    # make large matrix to correlate by
    # using sparse matrix tricks

    big_mat = None
    for mat in hic_mat_list:
        mat = mat[rows_keep, :][:, cols_keep]
        sample_vector = (mat + all_mat).data - all_mat.data
        if big_mat is None:
            big_mat = sample_vector
        else:
            big_mat = np.vstack([big_mat, sample_vector])

    # take the transpose such that columns represent each of the samples
    big_mat = np.ma.masked_invalid(big_mat).T

    grids = gridspec.GridSpec(num_files, num_files)
    grids.update(wspace=0, hspace=0)
    fig = plt.figure(figsize=(2 * num_files, 2 * num_files))
    plt.rcParams['font.size'] = 8.0

    min_value = int(big_mat.min())
    max_value = int(big_mat.max())
    if (min_value % 2 == 0 and max_value % 2 == 0) or \
            (min_value % 1 == 0 and max_value % 2 == 1):
        # make one value odd and the other even
        max_value += 1

    if args.log1p:
        major_locator = FixedLocator(list(range(min_value, max_value, 2)))
        minor_locator = FixedLocator(list(range(min_value, max_value, 1)))

    for index in range(len(rows)):
        row = rows[index]
        col = cols[index]
        if row == col:
            results[row, col] = 1

            # add titles as
            # empty plot in the diagonal
            ax = fig.add_subplot(grids[row, col])
            ax.text(0.6,
                    0.6,
                    args.labels[row],
                    verticalalignment='center',
                    horizontalalignment='center',
                    fontsize=10,
                    fontweight='bold',
                    transform=ax.transAxes)
            ax.set_axis_off()
            continue

        log.debug("comparing {} and {}\n".format(args.matrices[row],
                                                 args.matrices[col]))

        # remove cases in which both are zero or one is zero and
        # the other is one
        _mat = big_mat[:, [row, col]]
        _mat = _mat[_mat.sum(axis=1) > 1, :]
        vector1 = _mat[:, 0]
        vector2 = _mat[:, 1]

        results[row, col] = correlation_opts[args.method](vector1, vector2)[0]

        # scatter plots
        ax = fig.add_subplot(grids[row, col])
        if args.log1p:
            ax.xaxis.set_major_locator(major_locator)
            ax.xaxis.set_minor_locator(minor_locator)
            ax.yaxis.set_major_locator(major_locator)
            ax.yaxis.set_minor_locator(minor_locator)

        ax.text(0.2,
                0.8,
                "{}={:.2f}".format(args.method, results[row, col]),
                horizontalalignment='left',
                transform=ax.transAxes)
        ax.get_yaxis().set_tick_params(which='both',
                                       left='off',
                                       right='off',
                                       direction='out')

        ax.get_xaxis().set_tick_params(which='both',
                                       top='off',
                                       bottom='off',
                                       direction='out')

        if col != num_files - 1:
            ax.set_yticklabels([])
        else:
            ax.yaxis.tick_right()
            ax.get_yaxis().set_tick_params(which='both',
                                           left='off',
                                           right='on',
                                           direction='out')
        if col - row == 1:
            ax.xaxis.tick_bottom()
            ax.get_xaxis().set_tick_params(which='both',
                                           top='off',
                                           bottom='on',
                                           direction='out')
        else:
            ax.set_xticklabels([])

        ax.hist2d(vector1, vector2, bins=150, cmin=0.1)
    fig.tight_layout()
    log.debug("saving {}".format(args.outFileNameScatter))
    fig.savefig(args.outFileNameScatter, bbox_inches='tight')

    results = results + np.triu(results, 1).T
    plot_correlation(results,
                     args.labels,
                     args.outFileNameHeatmap,
                     args.zMax,
                     args.zMin,
                     args.colorMap,
                     pPlotNumbers=args.plotNumbers)

Example #4

Show file

File: hicCorrectMatrix.py Project: wangyibin/HiCExplorer

def main(args=None):
    args = parse_arguments().parse_args(args)
    if args.verbose:
        log.setLevel(logging.INFO)

    # args.chromosomes
    if check_cooler(args.matrix) and args.chromosomes is not None and len(
            args.chromosomes) == 1:
        ma = hm.hiCMatrix(args.matrix, pChrnameList=toString(args.chromosomes))
    else:
        ma = hm.hiCMatrix(args.matrix)

        if args.chromosomes:
            ma.reorderChromosomes(toString(args.chromosomes))

    # mask all zero value bins
    if 'correctionMethod' in args:
        if args.correctionMethod == 'ICE':
            row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten()
            log.info("Removing {} zero value bins".format(sum(row_sum == 0)))
            ma.maskBins(np.flatnonzero(row_sum == 0))
            matrix_shape = ma.matrix.shape
    if 'plotName' in args:
        row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten()
        log.info("Removing {} zero value bins".format(sum(row_sum == 0)))
        ma.maskBins(np.flatnonzero(row_sum == 0))
        matrix_shape = ma.matrix.shape

    ma.matrix = convertNansToZeros(ma.matrix)
    ma.matrix = convertInfsToZeros(ma.matrix)
    ma.matrix = ma.matrix.astype(np.float64, copy=True)

    log.debug('ma.matrix.indices {}'.format(ma.matrix.indices.dtype))
    log.debug('ma.matrix.data {}'.format(ma.matrix.data.dtype))
    log.debug('ma.matrix.indptr {}'.format(ma.matrix.indptr.dtype))

    # log.debug('ma.matrix.indices {}'.format(np.max(ma.matrix.indices)))
    # log.debug('ma.matrix.data {}'.format(np.max(ma.matrix.data)))
    # log.debug('ma.matrix.indptr {}'.format(np.max(ma.matrix.indptr)))

    # ma.matrix.indptr = ma.matrix.indptr.astype(np.int32, copy=False)
    # ma.matrix.indices = ma.matrix.indices.astype(np.int32, copy=False)

    if 'plotName' in args:
        plot_total_contact_dist(ma, args)
        log.info("Saving diagnostic plot {}\n".format(args.plotName))
        return

    log.info("matrix contains {} data points. Sparsity {:.3f}.".format(
        len(ma.matrix.data),
        float(len(ma.matrix.data)) / (ma.matrix.shape[0]**2)))

    if args.skipDiagonal:
        ma.diagflat(value=0)

    total_filtered_out = set()
    if args.correctionMethod == 'ICE':
        if not args.filterThreshold:
            log.error('min and max filtering thresholds should be set')
            sys.exit(1)
        outlier_regions = filter_by_zscore(ma,
                                           args.filterThreshold[0],
                                           args.filterThreshold[1],
                                           perchr=args.perchr)
        # compute and print some statistics
        pct_outlier = 100 * float(len(outlier_regions)) / ma.matrix.shape[0]
        ma.printchrtoremove(outlier_regions,
                            label="Bins that are MAD outliers ({:.2f}%) "
                            "out of".format(pct_outlier, ma.matrix.shape[0]),
                            restore_masked_bins=False)

        assert matrix_shape == ma.matrix.shape
        # mask filtered regions
        ma.maskBins(outlier_regions)
        total_filtered_out = set(outlier_regions)

        if args.sequencedCountCutoff and 0 < args.sequencedCountCutoff < 1:
            chrom, _, _, coverage = zip(*ma.cut_intervals)

            assert type(coverage[0]) == np.float64

            failed_bins = np.flatnonzero(
                np.array(coverage) < args.sequencedCountCutoff)

            ma.printchrtoremove(failed_bins,
                                label="Bins with low coverage",
                                restore_masked_bins=False)
            ma.maskBins(failed_bins)
            total_filtered_out = set(failed_bins)
            """
            ma.matrix, to_remove = fill_gaps(ma, failed_bins)
            log.warning("From {} failed bins, {} could "
                         "not be filled\n".format(len(failed_bins),
                                                  len(to_remove)))
            ma.maskBins(to_remove)
            """

        if args.transCutoff and 0 < args.transCutoff < 100:
            cutoff = float(args.transCutoff) / 100
            # a usual cutoff is 0.05
            ma.truncTrans(high=cutoff)
            pre_row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten()

    correction_factors = []
    corrected_matrix = lil_matrix(ma.matrix.shape)
    if args.perchr:
        # normalize each chromosome independently
        for chrname in list(ma.interval_trees):
            chr_range = ma.getChrBinRange(chrname)
            chr_submatrix = ma.matrix[chr_range[0]:chr_range[1],
                                      chr_range[0]:chr_range[1]]
            if args.correctionMethod == 'ICE':
                _matrix, _corr_factors = iterative_correction(
                    chr_submatrix, args)
                corrected_matrix[chr_range[0]:chr_range[1],
                                 chr_range[0]:chr_range[1]] = _matrix
                correction_factors.append(_corr_factors)
            else:
                # Set the kr matrix along with its correction factors vector
                assert (args.correctionMethod == 'KR')
                log.debug("Loading a float sparse matrix for KR balancing")
                kr = kr_balancing(
                    chr_submatrix.shape[0], chr_submatrix.shape[1],
                    chr_submatrix.count_nonzero(),
                    chr_submatrix.indptr.astype(np.int64, copy=False),
                    chr_submatrix.indices.astype(np.int64, copy=False),
                    chr_submatrix.data.astype(np.float64, copy=False))
                kr.computeKR()
                if args.outFileName.endswith('.h5'):
                    corrected_matrix[
                        chr_range[0]:chr_range[1],
                        chr_range[0]:chr_range[1]] = kr.get_normalised_matrix(
                            True)
                # correction_factors.append(np.true_divide(1,
                #                                          kr.get_normalisation_vector(False).todense()))
                correction_factors.append(
                    kr.get_normalisation_vector(False).todense())

        correction_factors = np.concatenate(correction_factors)

    else:
        if args.correctionMethod == 'ICE':
            corrected_matrix, correction_factors = iterative_correction(
                ma.matrix, args)
            ma.setMatrixValues(corrected_matrix)
        else:
            assert (args.correctionMethod == 'KR')
            log.debug("Loading a float sparse matrix for KR balancing")
            kr = kr_balancing(ma.matrix.shape[0], ma.matrix.shape[1],
                              ma.matrix.count_nonzero(),
                              ma.matrix.indptr.astype(np.int64, copy=False),
                              ma.matrix.indices.astype(np.int64, copy=False),
                              ma.matrix.data.astype(np.float64, copy=False))
            log.debug('passed pointers')
            kr.computeKR()
            log.debug('computation done')

            # set it to False since the vector is already normalised
            # with the previous True
            # correction_factors = np.true_divide(1, kr.get_normalisation_vector(False).todense())
            correction_factors = kr.get_normalisation_vector(False).todense()

            if args.outFileName.endswith('.h5'):
                corrected_matrix = kr.get_normalised_matrix(True)

    if args.outFileName.endswith('.h5'):
        ma.setMatrixValues(corrected_matrix)
    # if
    ma.setCorrectionFactors(correction_factors)

    log.debug("Correction factors {}".format(correction_factors[:10]))
    if args.inflationCutoff and args.inflationCutoff > 0 and args.correctionMethod == 'ICE':

        after_row_sum = np.asarray(corrected_matrix.sum(axis=1)).flatten()
        # identify rows that were expanded more than args.inflationCutoff times
        to_remove = np.flatnonzero(
            after_row_sum / pre_row_sum >= args.inflationCutoff)
        ma.printchrtoremove(to_remove,
                            label="inflated >={} "
                            "regions".format(args.inflationCutoff),
                            restore_masked_bins=False)
        total_filtered_out = total_filtered_out.union(to_remove)
        ma.maskBins(to_remove)
    ma.printchrtoremove(sorted(list(total_filtered_out)),
                        label="Total regions to be removed",
                        restore_masked_bins=False)

    ma.save(args.outFileName, pApplyCorrection=False)

Example #5

Show file

def main(args=None):
    args = parse_arguments().parse_args(args)
    if args.title:
        args.title = remove_non_ascii(args.title)

    chrom = None
    start_pos1 = None
    chrom2 = None
    start_pos2 = None

    if args.perChromosome and args.region:
        log.error('ERROR, choose from the option '
                  '--perChromosome or --region, the two '
                  'options at the same time are not '
                  'compatible.')
        exit(1)

    # if args.region and args.region2 and args.bigwig:
    #     log.error("Inter-chromosomal pca is not supported.")
    #     exit(1)
    # is_cooler = False
    # if args.matrix.endswith('.cool') or cooler.io.is_cooler(args.matrix) or'.mcool' in args.matrix:
    is_cooler = check_cooler(args.matrix)
    log.debug("Cooler or no cooler: {}".format(is_cooler))
    open_cooler_chromosome_order = True
    if args.chromosomeOrder is not None and len(args.chromosomeOrder) > 1:
        open_cooler_chromosome_order = False

    if is_cooler and not args.region2 and open_cooler_chromosome_order:
        log.debug("Retrieve data from cooler format and use its benefits.")
        regionsToRetrieve = None
        if args.region:
            regionsToRetrieve = []
            regionsToRetrieve.append(args.region)
            # if args.region2:
            #     chrom2, region_start2, region_end2 = translate_region(args.region2)
            #     regionsToRetrieve.append(args.region2)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None
            regionsToRetrieve = args.chromosomeOrder

        ma = HiCMatrix.hiCMatrix(args.matrix, pChrnameList=regionsToRetrieve)
        log.debug('Shape {}'.format(ma.matrix.shape))
        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
            # to avoid gaps in the plot, bins flanking the masked bins
            # are enlarged
            new_intervals = enlarge_bins(ma.cut_intervals)
            ma.setCutIntervals(new_intervals)

        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma)

        matrix = np.asarray(ma.matrix.todense().astype(float))
        matrix_length = len(matrix[0])
        log.debug("Number of data points matrix_cool: {}".format(matrix_length))
    else:
        ma = HiCMatrix.hiCMatrix(args.matrix)
        if args.clearMaskedBins:
            ma.maskBins(ma.nan_bins)
            new_intervals = enlarge_bins(ma.cut_intervals)
            ma.setCutIntervals(new_intervals)
        if args.chromosomeOrder:
            args.region = None
            args.region2 = None

            valid_chromosomes = []
            invalid_chromosomes = []
            log.debug('args.chromosomeOrder: {}'.format(args.chromosomeOrder))
            log.debug("ma.chrBinBoundaries {}".format(ma.chrBinBoundaries))
            if sys.version_info[0] == 3:
                args.chromosomeOrder = toBytes(args.chromosomeOrder)
            for chrom in toString(args.chromosomeOrder):
                if chrom in ma.chrBinBoundaries:
                    valid_chromosomes.append(chrom)
                else:
                    invalid_chromosomes.append(chrom)

            if len(invalid_chromosomes) > 0:
                log.warning("WARNING: The following chromosome/scaffold names were not found. Please check"
                            "the correct spelling of the chromosome names. \n")
                log.warning("\n".join(invalid_chromosomes))
            ma.reorderChromosomes(valid_chromosomes)

        log.info("min: {}, max: {}\n".format(ma.matrix.data.min(), ma.matrix.data.max()))

        if args.region:
            chrom, region_start, region_end, idx1, start_pos1, chrom2, region_start2, region_end2, idx2, start_pos2 = getRegion(args, ma)

            matrix = np.asarray(ma.matrix[idx1, :][:, idx2].todense().astype(float))

        else:
            log.debug("Else branch")
            matrix = np.asarray(ma.getMatrix().astype(float))

    matrix_length = len(matrix[0])
    log.debug("Number of data points matrix: {}".format(matrix_length))

    for matrix_ in matrix:
        if not matrix_length == len(matrix_):
            log.error("Matrices do not have the same length: {} , {}".format(matrix_length, len(matrix_)))

    cmap = cm.get_cmap(args.colorMap)
    log.debug("Nan values set to black\n")
    cmap.set_bad('black')

    bigwig_info = None
    if args.bigwig:
        bigwig_info = {'args': args, 'axis': None, 'axis_colorbar': None, 'nan_bins': ma.nan_bins}

    if args.perChromosome:
        fig = plotPerChr(ma, cmap, args, pBigwig=bigwig_info)

    else:
        norm = None

        if args.log or args.log1p:
            mask = matrix == 0
            matrix[mask] = np.nanmin(matrix[mask == False])

            if np.isnan(matrix).any() or np.isinf(matrix).any():
                log.debug("any nan {}".format(np.isnan(matrix).any()))
                log.debug("any inf {}".format(np.isinf(matrix).any()))
                mask_nan = np.isnan(matrix)
                mask_inf = np.isinf(matrix)
                matrix[mask_nan] = np.nanmin(matrix[mask_nan == False])
                matrix[mask_inf] = np.nanmin(matrix[mask_inf == False])

        log.debug("any nan after remove of nan: {}".format(np.isnan(matrix).any()))
        log.debug("any inf after remove of inf: {}".format(np.isinf(matrix).any()))
        if args.log1p:
            matrix += 1
            norm = LogNorm()
        elif args.log:
            norm = LogNorm()

        if args.bigwig:
            # increase figure height to accommodate bigwig track
            fig_height = 8.5
        else:
            fig_height = 7
        height = 4.8 / fig_height

        fig_width = 8
        width = 5.0 / fig_width
        left_margin = (1.0 - width) * 0.5

        fig = plt.figure(figsize=(fig_width, fig_height), dpi=args.dpi)

        if args.bigwig:
            gs = gridspec.GridSpec(2, 2, height_ratios=[0.90, 0.1], width_ratios=[0.97, 0.03])
            gs.update(hspace=0.05, wspace=0.05)
            ax1 = plt.subplot(gs[0, 0])
            ax2 = plt.subplot(gs[1, 0])
            ax3 = plt.subplot(gs[0, 1])
            bigwig_info['axis'] = ax2
            bigwig_info['axis_colorbar'] = ax3
        else:
            ax1 = None
        bottom = 1.3 / fig_height

        if start_pos1 is None:
            start_pos1 = make_start_pos_array(ma)

        position = [left_margin, bottom, width, height]
        plotHeatmap(matrix, ma.get_chromosome_sizes(), fig, position,
                    args, cmap, xlabel=chrom, ylabel=chrom2,
                    start_pos=start_pos1, start_pos2=start_pos2, pNorm=norm, pAxis=ax1, pBigwig=bigwig_info)

    if not args.disable_tight_layout:
        if args.perChromosome or args.bigwig:
            try:
                plt.tight_layout()
            except UserWarning:
                log.info("Failed to tight layout. Using regular plot.")
            except ValueError:
                log.info("Failed to tight layout. Using regular plot.")

    plt.savefig(args.outFileName, dpi=args.dpi)
    plt.close(fig)

Example #6

Show file

File: hicDetectLoops.py Project: ryys1122/HiCExplorer

def main(args=None):
    args = parse_arguments().parse_args(args)

    if args.windowSize <= args.peakWidth:
        log.error('The window size ({}) must be larger than the peakWidth ({})'.format(args.windowSize, args.peakWidth))
        exit(1)
    is_cooler = check_cooler(args.matrix)
    if args.threadsPerChromosome < 1:
        args.threadsPerChromosome = 1

    mapped_loops = []

    if not is_cooler:
        hic_matrix = hm.hiCMatrix(args.matrix)
        matrix = deepcopy(hic_matrix.matrix)
        cut_intervals = deepcopy(hic_matrix.cut_intervals)

    if args.chromosomes is None:
        # get all chromosomes from cooler file
        if not is_cooler:
            chromosomes_list = list(hic_matrix.chrBinBoundaries)
        else:
            chromosome_sizes = cooler.Cooler(args.matrix).chromsizes

            # shuffle the processing order of chromosomes.
            # with this one large chromosome and 4 smalls are in a row
            # peak memory is reduced and more chromosomes can be processed in parallel on low memory systems.
            sorted_sizes_desc = chromosome_sizes.sort_values(ascending=False)

            size = sorted_sizes_desc.size
            chromosome_names_list = sorted_sizes_desc.index.tolist()
            chromosomes_list = []
            i = 0
            j = args.threads  # biggest + thread smallest; 2nd biggest chr + 4 - 8 smallest
            k = size - 1
            while i < size:
                chromosomes_list.append(chromosome_names_list[i])
                while j > 0 and k > 0:
                    if k == i:
                        break
                    chromosomes_list.append(chromosome_names_list[k])
                    k -= 1
                    j -= 1
                j = args.threads - 1
                if i == k:
                    break
                i += 1
    else:
        chromosomes_list = args.chromosomes

    if len(chromosomes_list) < args.threads:
        args.threads = len(chromosomes_list)
    if len(chromosomes_list) == 1:
        single_core = True
    else:
        single_core = False

    if single_core:
        for chromosome in chromosomes_list:
            if is_cooler:
                hic_matrix = hm.hiCMatrix(
                    pMatrixFile=args.matrix, pChrnameList=[chromosome], pDistance=args.maxLoopDistance, pNoIntervalTree=True, pUpperTriangleOnly=True)
            else:
                hic_matrix.setMatrix(
                    deepcopy(matrix), deepcopy(cut_intervals))
                hic_matrix.keepOnlyTheseChr([chromosome])
            loops = compute_loops(hic_matrix, chromosome, args, is_cooler)
            if loops is None:
                log.error('No loops could be detected. Please change your input parameters, use a matrix with a better read coverage or contact the develops on https://github.com/deeptools/HiCExplorer/issues')
                exit(1)
            if 'Fail: ' in loops:
                log.error(loops[6:])
                exit(1)
            if loops is not None:
                mapped_loops.extend(loops)
    else:
        queue = [None] * args.threads
        process = [None] * args.threads
        all_data_processed = False
        all_threads_done = False
        thread_done = [False] * args.threads
        count_call_of_read_input = 0
        fail_flag = False
        fail_message = ''
        while not all_data_processed or not all_threads_done:
            for i in range(args.threads):
                if queue[i] is None and not all_data_processed:
                    if count_call_of_read_input >= len(chromosomes_list):
                        all_data_processed = True
                        continue
                    queue[i] = Queue()
                    thread_done[i] = False
                    process[i] = Process(target=compute_loops, kwargs=dict(
                        pHiCMatrix=args.matrix,
                        pRegion=chromosomes_list[count_call_of_read_input],
                        pArgs=args,
                        pIsCooler=is_cooler,
                        pQueue=queue[i]
                    ))
                    process[i].start()

                    if count_call_of_read_input < len(chromosomes_list):
                        count_call_of_read_input += 1
                    else:
                        all_data_processed = True
                elif queue[i] is not None and not queue[i].empty():
                    result = queue[i].get()
                    if result is not None and 'Fail: ' in result:
                        fail_flag = True
                        fail_message = result
                    if result[0] is not None:
                        mapped_loops.extend(result[0])

                    queue[i] = None
                    process[i].join()
                    process[i].terminate()
                    process[i] = None
                    thread_done[i] = True
                elif all_data_processed and queue[i] is None:
                    thread_done[i] = True
                else:
                    time.sleep(1)

            if all_data_processed:
                all_threads_done = True
                for thread in thread_done:
                    if not thread:
                        all_threads_done = False

    if fail_flag:
        if fail_message is not None:
            log.error(fail_message[6:])
        else:
            log.error('An error occurred.')
        exit(1)
    if len(mapped_loops) > 0:
        write_bedgraph(mapped_loops, args.outFileName)
    log.info("Number of detected loops for all regions: {}".format(
        len(mapped_loops)))

Example #7

Show file

def main(args=None):
    args = parse_arguments().parse_args(args)
    log.info('peak interactions threshold set to {}'.format(
        args.peakInteractionsThreshold))

    if args.region is not None and args.chromosomes is not None:
        log.error('Please choose either --region or --chromosomes.')
        exit(1)
    log.debug('args.matrix {}'.format(args.matrix))
    is_cooler = check_cooler(args.matrix)
    log.debug('is_cooler {}'.format(is_cooler))
    if args.region:
        chrom, region_start, region_end = translate_region(args.region)

        if is_cooler:
            hic_matrix = hm.hiCMatrix(pMatrixFile=args.matrix,
                                      pChrnameList=[args.region])
        else:
            hic_matrix = hm.hiCMatrix(args.matrix)
            hic_matrix.keepOnlyTheseChr([chrom])
        mapped_loops = compute_loops(hic_matrix, args.region, args)
        write_bedgraph(mapped_loops, args.outFileName, region_start,
                       region_end)

    else:
        mapped_loops = []

        if not is_cooler:
            hic_matrix = hm.hiCMatrix(args.matrix)
            # hic_matrix.keepOnlyTheseChr([chromosome])
            matrix = deepcopy(hic_matrix.matrix)
            cut_intervals = deepcopy(hic_matrix.cut_intervals)

        if args.chromosomes is None:
            # get all chromosomes from cooler file
            if not is_cooler:
                chromosomes_list = list(hic_matrix.chrBinBoundaries)
            else:
                chromosomes_list = cooler.Cooler(args.matrix).chromnames
        else:
            chromosomes_list = args.chromosomes

        if len(chromosomes_list) == 1:
            single_core = True
        else:
            single_core = False

        if single_core:
            for chromosome in chromosomes_list:
                if is_cooler:
                    hic_matrix = hm.hiCMatrix(pMatrixFile=args.matrix,
                                              pChrnameList=[chromosome])
                else:
                    hic_matrix.setMatrix(deepcopy(matrix),
                                         deepcopy(cut_intervals))
                    hic_matrix.keepOnlyTheseChr([chromosome])
                hic_matrix.maskBins(hic_matrix.nan_bins)
                loops = compute_loops(hic_matrix, chromosome, args)
                if loops is not None:
                    mapped_loops.extend(loops)
        else:
            queue = [None] * args.threads
            process = [None] * args.threads
            all_data_processed = False
            all_threads_done = False
            thread_done = [False] * args.threads
            count_call_of_read_input = 0
            while not all_data_processed or not all_threads_done:
                for i in range(args.threads):
                    if queue[i] is None and not all_data_processed:
                        if count_call_of_read_input >= len(chromosomes_list):
                            all_data_processed = True
                            continue
                        queue[i] = Queue()
                        thread_done[i] = False
                        if is_cooler:
                            hic_matrix = hm.hiCMatrix(
                                pMatrixFile=args.matrix,
                                pChrnameList=[
                                    chromosomes_list[count_call_of_read_input]
                                ])
                        else:
                            hic_matrix.setMatrix(deepcopy(matrix),
                                                 deepcopy(cut_intervals))
                            hic_matrix.keepOnlyTheseChr(
                                [chromosomes_list[count_call_of_read_input]])
                        if len(hic_matrix.matrix.data) > 0:

                            process[i] = Process(
                                target=compute_loops,
                                kwargs=dict(pHiCMatrix=hic_matrix,
                                            pRegion=chromosomes_list[
                                                count_call_of_read_input],
                                            pArgs=args,
                                            pQueue=queue[i]))
                            process[i].start()

                        else:
                            queue[i] = None
                            thread_done[i] = True
                        if count_call_of_read_input < len(chromosomes_list):
                            count_call_of_read_input += 1
                        else:
                            all_data_processed = True
                    elif queue[i] is not None and not queue[i].empty():
                        result = queue[i].get()
                        if result[0] is not None:
                            mapped_loops.extend(result[0])

                        queue[i] = None
                        process[i].join()
                        process[i].terminate()
                        process[i] = None
                        thread_done[i] = True
                    elif all_data_processed and queue[i] is None:
                        thread_done[i] = True
                    else:
                        time.sleep(1)

                if all_data_processed:
                    all_threads_done = True
                    for thread in thread_done:
                        if not thread:
                            all_threads_done = False
        log.debug('done computing. loops {}'.format(mapped_loops))
        if len(mapped_loops) > 0:
            write_bedgraph(mapped_loops, args.outFileName)

    log.info("Number of detected loops for all regions: {}".format(
        len(mapped_loops)))

Example #8

Show file

File: hicCorrectMatrix.py Project: Rungetf/HiCExplorer

def main(args=None):
    args = parse_arguments().parse_args(args)
    if args.verbose:
        log.setLevel(logging.INFO)

    # args.chromosomes
    if check_cooler(args.matrix) and args.chromosomes is not None and len(args.chromosomes) == 1:
        ma = hm.hiCMatrix(args.matrix, pChrnameList=toString(args.chromosomes))
    else:
        ma = hm.hiCMatrix(args.matrix)

        if args.chromosomes:
            ma.reorderChromosomes(toString(args.chromosomes))

    # mask all zero value bins
    row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten()
    log.info("Removing {} zero value bins".format(sum(row_sum == 0)))
    ma.maskBins(np.flatnonzero(row_sum == 0))
    matrix_shape = ma.matrix.shape
    ma.matrix = convertNansToZeros(ma.matrix)
    ma.matrix = convertInfsToZeros(ma.matrix)

    if 'plotName' in args:
        plot_total_contact_dist(ma, args)
        log.info("Saving diagnostic plot {}\n".format(args.plotName))
        return

    log.info("matrix contains {} data points. Sparsity {:.3f}.".format(
        len(ma.matrix.data),
        float(len(ma.matrix.data)) / (ma.matrix.shape[0] ** 2)))

    if args.skipDiagonal:
        ma.diagflat(value=0)

    outlier_regions = filter_by_zscore(ma, args.filterThreshold[0], args.filterThreshold[1], perchr=args.perchr)
    # compute and print some statistics
    pct_outlier = 100 * float(len(outlier_regions)) / ma.matrix.shape[0]
    ma.printchrtoremove(outlier_regions, label="Bins that are MAD outliers ({:.2f}%) "
                                               "out of".format(pct_outlier, ma.matrix.shape[0]),
                        restore_masked_bins=False)

    assert matrix_shape == ma.matrix.shape
    # mask filtered regions
    ma.maskBins(outlier_regions)
    total_filtered_out = set(outlier_regions)

    if args.sequencedCountCutoff and 0 < args.sequencedCountCutoff < 1:
        chrom, _, _, coverage = zip(*ma.cut_intervals)

        assert type(coverage[0]) == np.float64

        failed_bins = np.flatnonzero(
            np.array(coverage) < args.sequencedCountCutoff)

        ma.printchrtoremove(failed_bins, label="Bins with low coverage", restore_masked_bins=False)
        ma.maskBins(failed_bins)
        total_filtered_out = set(failed_bins)
        """
        ma.matrix, to_remove = fill_gaps(ma, failed_bins)
        log.warning("From {} failed bins, {} could "
                         "not be filled\n".format(len(failed_bins),
                                                  len(to_remove)))
        ma.maskBins(to_remove)
        """

    if args.transCutoff and 0 < args.transCutoff < 100:
        cutoff = float(args.transCutoff) / 100
        # a usual cutoff is 0.05
        ma.truncTrans(high=cutoff)

    pre_row_sum = np.asarray(ma.matrix.sum(axis=1)).flatten()
    correction_factors = []
    if args.perchr:
        corrected_matrix = lil_matrix(ma.matrix.shape)
        # normalize each chromosome independently
        for chrname in list(ma.interval_trees):
            chr_range = ma.getChrBinRange(chrname)
            chr_submatrix = ma.matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]]
            _matrix, _corr_factors = iterative_correction(chr_submatrix, args)
            corrected_matrix[chr_range[0]:chr_range[1], chr_range[0]:chr_range[1]] = _matrix
            correction_factors.append(_corr_factors)
        correction_factors = np.concatenate(correction_factors)

    else:
        corrected_matrix, correction_factors = iterative_correction(ma.matrix, args)

    ma.setMatrixValues(corrected_matrix)
    ma.setCorrectionFactors(correction_factors)
    log.info("Correction factors {}".format(correction_factors[:10]))
    if args.inflationCutoff and args.inflationCutoff > 0:
        after_row_sum = np.asarray(corrected_matrix.sum(axis=1)).flatten()
        # identify rows that were expanded more than args.inflationCutoff times
        to_remove = np.flatnonzero(after_row_sum / pre_row_sum >= args.inflationCutoff)
        ma.printchrtoremove(to_remove,
                            label="inflated >={} "
                            "regions".format(args.inflationCutoff), restore_masked_bins=False)
        total_filtered_out = total_filtered_out.union(to_remove)

        ma.maskBins(to_remove)

    ma.printchrtoremove(sorted(list(total_filtered_out)),
                        label="Total regions to be removed", restore_masked_bins=False)

    ma.save(args.outFileName, pApplyCorrection=False)

Example #9

Show file

def main(args=None):
    args = parse_arguments().parse_args(args)
    mpl.rcParams['pdf.fonttype'] = 42

    # read domains file
    domains_df = readDomainBoundaries(args.tadDomains)
    # log.debug('len(domains_df) {}'.format(len(domains_df)))
    domains = domains_df.values.tolist()
    old_chromosome = None

    tads_per_chromosome = []

    for j in range(len(domains)):
        if old_chromosome is None:
            old_chromosome = domains[j][0]
            per_chromosome = []
            per_chromosome.append(domains[j])

        elif old_chromosome == domains[j][0]:
            per_chromosome.append(domains[j])
            continue
        else:
            tads_per_chromosome.append(per_chromosome)
            per_chromosome = []
            per_chromosome.append(domains[j])
            old_chromosome = domains[j][0]
    tads_per_chromosome.append(per_chromosome)

    # read full h5 or only region if cooler
    is_cooler = check_cooler(args.matrix)

    if not is_cooler:
        hic_matrix = hm.hiCMatrix(args.matrix)
    else:
        hic_matrix = args.matrix

    inter_left_sum_list_chromosomes = []
    inter_right_sum_list_chromosomes = []
    inter_left_density_list_chromosomes = []
    inter_right_density_list_chromosomes = []
    inter_left_number_of_contacts_list_chromosomes = []
    inter_right_number_of_contacts_list_chromosomes = []
    inter_left_number_of_contacts_nnz_list_chromosomes = []
    inter_right_number_of_contacts_nzz_list_chromosomes = []

    intra_sum_list_chromosomes = []
    intra_number_of_contacts_list_chromosomes = []
    intra_number_of_contacts_nnz_list_chromosomes = []
    intra_density_list_chromosomes = []
    inter_left_intra_ratio_list_chromosomes = []
    inter_right_intra_ratio_list_chromosomes = []
    inter_left_inter_right_intra_ratio_list_chromosomes = []

    rows_chromosomes = []

    inter_left_sum_list_threads = [[]] * args.threads
    inter_right_sum_list_threads = [[]] * args.threads
    inter_left_density_list_threads = [[]] * args.threads
    inter_right_density_list_threads = [[]] * args.threads
    inter_left_number_of_contacts_list_threads = [[]] * args.threads
    inter_right_number_of_contacts_list_threads = [[]] * args.threads
    inter_left_number_of_contacts_nnz_list_threads = [[]] * args.threads
    inter_right_number_of_contacts_nzz_list_threads = [[]] * args.threads

    intra_sum_list_threads = [[]] * args.threads
    intra_number_of_contacts_list_threads = [[]] * args.threads
    intra_number_of_contacts_nnz_list_threads = [[]] * args.threads
    intra_density_list_threads = [[]] * args.threads
    inter_left_intra_ratio_list_threads = [[]] * args.threads
    inter_right_intra_ratio_list_threads = [[]] * args.threads
    inter_left_inter_right_intra_ratio_list_threads = [[]] * args.threads

    rows_threads = [[]] * args.threads

    threads_save = deepcopy(args.threads)
    for chromosome in tads_per_chromosome:
        # log.debug('tads_per_chromosome {}'.format(chromosome))
        domainsPerThread = len(chromosome) // args.threads
        if domainsPerThread == 0 and len(chromosome) > 0:
            domainsPerThread = 1
            args.threads = 1
        elif domainsPerThread > 0:
            args.threads = threads_save

        all_data_collected = False
        queue = [None] * args.threads
        process = [None] * args.threads
        thread_done = [False] * args.threads
        # None --> first thread, process first element in list, ignore last one
        # True --> middle thread: ignore first and last element in tad processing
        # False --> last thread: ignore first element, process last one
        thread_id = None
        for i in range(args.threads):

            if args.threads == 1:
                domainListThread = chromosome

            elif i == 0:
                domainListThread = chromosome[i * domainsPerThread:(
                    (i + 1) * domainsPerThread) + 1]
                thread_id = None
            elif i < args.threads - 1:
                domainListThread = chromosome[(i * domainsPerThread) -
                                              1:((i + 1) * domainsPerThread) +
                                              1]
                thread_id = True

            else:
                domainListThread = chromosome[(i * domainsPerThread) - 1:]
                thread_id = False

            if args.threads == 1:
                thread_id = ''

            # log.debug('len(domainListThread) {}'.format(len(domainListThread)))
            # log.debug('len(thread_id) {}'.format(thread_id))

            queue[i] = Queue()
            process[i] = Process(
                target=computeInterIntraTADs,
                kwargs=dict(
                    pMatrix=hic_matrix,
                    # pMatrixControl=hic_matrix_control,
                    pDomainList=domainListThread,
                    pCoolOrH5=is_cooler,
                    # pPValue=args.pValue,
                    pThreadId=thread_id,
                    pQueue=queue[i]))

            process[i].start()
        fail_flag = False
        fail_message = ''
        while not all_data_collected:
            for i in range(args.threads):

                if queue[i] is not None and not queue[i].empty():
                    queue_data = queue[i].get()
                    if 'Fail:' in queue_data:
                        fail_flag = True
                        fail_message = queue_data
                    else:
                        inter_left_sum_list_threads[i], \
                            inter_right_sum_list_threads[i], \
                            inter_left_density_list_threads[i], \
                            inter_right_density_list_threads[i], \
                            inter_left_number_of_contacts_list_threads[i], \
                            inter_right_number_of_contacts_list_threads[i], \
                            inter_left_number_of_contacts_nnz_list_threads[i], \
                            inter_right_number_of_contacts_nzz_list_threads[i], \
                            intra_sum_list_threads[i], \
                            intra_number_of_contacts_list_threads[i], \
                            intra_number_of_contacts_nnz_list_threads[i], \
                            intra_density_list_threads[i], \
                            inter_left_intra_ratio_list_threads[i], \
                            inter_right_intra_ratio_list_threads[i], \
                            inter_left_inter_right_intra_ratio_list_threads[i], \
                            rows_threads[i] = queue_data

                    queue[i] = None
                    process[i].join()
                    process[i].terminate()
                    process[i] = None
                    thread_done[i] = True
                # elif queue[i] is None and

            all_data_collected = True
            for thread in thread_done:
                if not thread:
                    all_data_collected = False
            time.sleep(1)

        if fail_flag:
            log.error(fail_message[6:])
            exit(1)

        inter_left_sum_list_chromosomes.append([
            item for sublist in inter_left_sum_list_threads for item in sublist
        ])
        inter_right_sum_list_chromosomes.append([
            item for sublist in inter_right_sum_list_threads
            for item in sublist
        ])
        inter_left_density_list_chromosomes.append([
            item for sublist in inter_left_density_list_threads
            for item in sublist
        ])
        inter_right_density_list_chromosomes.append([
            item for sublist in inter_right_density_list_threads
            for item in sublist
        ])
        inter_left_number_of_contacts_list_chromosomes.append([
            item for sublist in inter_left_number_of_contacts_list_threads
            for item in sublist
        ])
        inter_right_number_of_contacts_list_chromosomes.append([
            item for sublist in inter_right_number_of_contacts_list_threads
            for item in sublist
        ])
        inter_left_number_of_contacts_nnz_list_chromosomes.append([
            item for sublist in inter_left_number_of_contacts_nnz_list_threads
            for item in sublist
        ])
        inter_right_number_of_contacts_nzz_list_chromosomes.append([
            item for sublist in inter_right_number_of_contacts_nzz_list_threads
            for item in sublist
        ])

        intra_sum_list_chromosomes.append(
            [item for sublist in intra_sum_list_threads for item in sublist])
        intra_number_of_contacts_list_chromosomes.append([
            item for sublist in intra_number_of_contacts_list_threads
            for item in sublist
        ])
        intra_number_of_contacts_nnz_list_chromosomes.append([
            item for sublist in intra_number_of_contacts_nnz_list_threads
            for item in sublist
        ])
        intra_density_list_chromosomes.append([
            item for sublist in intra_density_list_threads for item in sublist
        ])
        inter_left_intra_ratio_list_chromosomes.append([
            item for sublist in inter_left_intra_ratio_list_threads
            for item in sublist
        ])
        inter_right_intra_ratio_list_chromosomes.append([
            item for sublist in inter_right_intra_ratio_list_threads
            for item in sublist
        ])
        inter_left_inter_right_intra_ratio_list_chromosomes.append([
            item for sublist in inter_left_inter_right_intra_ratio_list_threads
            for item in sublist
        ])

        rows_chromosomes.append(
            [item for sublist in rows_threads for item in sublist])

    inter_left_sum_list = [
        item for sublist in inter_left_sum_list_chromosomes for item in sublist
    ]
    inter_right_sum_list = [
        item for sublist in inter_right_sum_list_chromosomes
        for item in sublist
    ]
    inter_left_density_list = [
        item for sublist in inter_left_density_list_chromosomes
        for item in sublist
    ]
    inter_right_density_list = [
        item for sublist in inter_right_density_list_chromosomes
        for item in sublist
    ]
    inter_left_number_of_contacts_list = [
        item for sublist in inter_left_number_of_contacts_list_chromosomes
        for item in sublist
    ]
    inter_right_number_of_contacts_list = [
        item for sublist in inter_right_number_of_contacts_list_chromosomes
        for item in sublist
    ]
    inter_left_number_of_contacts_nnz_list = [
        item for sublist in inter_left_number_of_contacts_nnz_list_chromosomes
        for item in sublist
    ]
    inter_right_number_of_contacts_nzz_list = [
        item for sublist in inter_right_number_of_contacts_nzz_list_chromosomes
        for item in sublist
    ]

    intra_sum_list = [
        item for sublist in intra_sum_list_chromosomes for item in sublist
    ]
    intra_number_of_contacts_list = [
        item for sublist in intra_number_of_contacts_list_chromosomes
        for item in sublist
    ]
    intra_number_of_contacts_nnz_list = [
        item for sublist in intra_number_of_contacts_nnz_list_chromosomes
        for item in sublist
    ]
    intra_density_list = [
        item for sublist in intra_density_list_chromosomes for item in sublist
    ]
    inter_left_intra_ratio_list = [
        item for sublist in inter_left_intra_ratio_list_chromosomes
        for item in sublist
    ]
    inter_right_intra_ratio_list = [
        item for sublist in inter_right_intra_ratio_list_chromosomes
        for item in sublist
    ]
    inter_left_inter_right_intra_ratio_list = [
        item for sublist in inter_left_inter_right_intra_ratio_list_chromosomes
        for item in sublist
    ]

    rows = [item for sublist in rows_chromosomes for item in sublist]

    with open(args.outFileName, 'w') as file:
        header = '# Created with HiCExplorer\'s hicInterIntraTAD version ' + __version__ + '\n'
        header += '# Chromosome\tstart\tend\tname\tscore\tstrand\tinter_left_sum\tinter_right_sum\tinter_left_density\tinter_right_density\tinter_left_number_of_contacts\tinter_right_number_of_contacts\t'  \
            'inter_left_number_of_contacts_nnz\tinter_right_number_of_contacts_nnz\tintra_sum\tintra_number_of_contacts\tintra_number_of_contacts_nnz\tintra_density\tinter_left_intra_ratio\tinter_right_intra_ratio\tinter_left_inter_right_intra_ratio\n'
        file.write(header)
        for i, row in enumerate(rows):
            row_list = list(map(str, row))

            file.write('\t'.join(row_list))

            file.write('\t{}'.format(inter_left_sum_list[i]))
            file.write('\t{}'.format(inter_right_sum_list[i]))
            file.write('\t{}'.format(inter_left_density_list[i]))
            file.write('\t{}'.format(inter_right_density_list[i]))
            file.write('\t{}'.format(inter_left_number_of_contacts_list[i]))
            file.write('\t{}'.format(inter_right_number_of_contacts_list[i]))
            file.write('\t{}'.format(
                inter_left_number_of_contacts_nnz_list[i]))
            file.write('\t{}'.format(
                inter_right_number_of_contacts_nzz_list[i]))
            file.write('\t{}'.format(intra_sum_list[i]))
            file.write('\t{}'.format(intra_number_of_contacts_list[i]))
            file.write('\t{}'.format(intra_number_of_contacts_nnz_list[i]))
            file.write('\t{}'.format(intra_density_list[i]))
            file.write('\t{}'.format(inter_left_intra_ratio_list[i]))
            file.write('\t{}'.format(inter_right_intra_ratio_list[i]))
            file.write('\t{}'.format(
                inter_left_inter_right_intra_ratio_list[i]))

            file.write('\n')

    plt.scatter(inter_left_intra_ratio_list,
                inter_right_intra_ratio_list,
                s=20,
                alpha=0.7)
    plt.xlabel('Inter-left/intra TAD contact ratio', fontsize=args.fontsize)
    plt.ylabel('Inter-right/intra TAD contact ratio', fontsize=args.fontsize)
    plt.tight_layout()
    plt.savefig(args.outFileNameRatioPlot, dpi=args.dpi)
    plt.close()