def test_correct_matrix_ICE():
    outfile = NamedTemporaryFile(suffix='.ICE.h5', delete=False)
    outfile.close()

    args = "correct --matrix {} --correctionMethod ICE --chromosomes "\
           "chrUextra chr3LHet --iterNum 500  --outFileName {} "\
           "--filterThreshold -1.5 5.0".format(ROOT + "small_test_matrix.h5",
                                               outfile.name).split()
    # hicCorrectMatrix.main(args)
    compute(hicCorrectMatrix.main, args, 5)
    test = hm.hiCMatrix(
        ROOT + "hicCorrectMatrix/small_test_matrix_ICEcorrected_chrUextra_chr3LHet.h5")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    os.unlink(outfile.name)
Beispiel #2
0
def test_trivial_functionality(
    matrices,
    outputFormat,
    resolutions,
):
    """
        Test for all commandline arguments.
        Options for cool input format are testet seperately.
    """
    from pathlib import Path
    # get suffix of input matrix without the dot
    inputFormat = Path(matrices).suffix[1:]
    # create file corresponding to output format
    outFileName = NamedTemporaryFile(suffix=".{}".format(outputFormat),
                                     delete=True)
    outFileName.close()

    args = "--matrices {} --outFileName {} --inputFormat {} --outputFormat {} {}".format(
        matrices,
        outFileName.name,
        inputFormat,
        outputFormat,
        resolutions,
    ).split()

    hicConvertFormat.main(args)

    test = hm.hiCMatrix(matrices)

    new = hm.hiCMatrix(outFileName.name)
    nt.assert_array_almost_equal(test.matrix.data,
                                 new.matrix.data,
                                 decimal=DELTA_DECIMAL)

    nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals))

    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    os.unlink(outFileName.name)
Beispiel #3
0
def test_pca_bigwig_gene_density_intermediate_matrices():
    pca1 = NamedTemporaryFile(suffix='.bw', delete=False)
    pca2 = NamedTemporaryFile(suffix='.bw', delete=False)
    pearson_matrix = NamedTemporaryFile(suffix='.h5', delete=False)
    obs_exp_matrix = NamedTemporaryFile(suffix='.h5', delete=False)
    pca1.close()
    pca2.close()
    pearson_matrix.close()
    obs_exp_matrix.close()
    matrix = ROOT + "small_test_matrix.h5"
    gene_track = ROOT + 'dm3_genes.bed.gz'
    chromosomes = 'chrX chrXHet'
    args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 \
    --extraTrack {} --chromosomes {} --pearsonMatrix {} --obsexpMatrix {}"\
    .format(matrix, pca1.name, pca2.name, gene_track, chromosomes,
            pearson_matrix.name, obs_exp_matrix.name).split()
    hicPCA.main(args)

    chrom_list = ['chrX', 'chrXHet']
    assert are_files_equal_bigwig(ROOT + "hicPCA/pca1_gene_track.bw",
                                  pca1.name, chrom_list)
    assert are_files_equal_bigwig(ROOT + "hicPCA/pca2_gene_track.bw",
                                  pca2.name, chrom_list)

    test_pearson = hm.hiCMatrix(ROOT + "hicPCA/pearson_intermediate.h5")

    new_pearson = hm.hiCMatrix(pearson_matrix.name)

    test_obs_exp = hm.hiCMatrix(ROOT + "hicPCA/obs_exp_intermediate.h5")

    new_obs_exp = hm.hiCMatrix(obs_exp_matrix.name)
    nt.assert_array_almost_equal(test_pearson.matrix.data,
                                 new_pearson.matrix.data,
                                 decimal=DELTA_DECIMAL)
    nt.assert_array_almost_equal(test_obs_exp.matrix.data,
                                 new_obs_exp.matrix.data,
                                 decimal=DELTA_DECIMAL)

    # assert are_files_equal_bigwig(ROOT + "hicPCA/pearson_intermediate.h5", pearson_matrix.name, chrom_list)
    # assert are_files_equal_bigwig(ROOT + "hicPCA/obs_exp_intermediate.h5", obs_exp_matrix.name, chrom_list)

    os.unlink(pca1.name)
    os.unlink(pca2.name)
    os.unlink(obs_exp_matrix.name)
    os.unlink(pearson_matrix.name)
def test_correct_matrix_KR():
    outfile = NamedTemporaryFile(suffix='.KR.h5', delete=False)
    outfile.close()

    args = "correct --matrix {} --correctionMethod KR --chromosomes "\
           "chrUextra chr3LHet --outFileName {} ".format(ROOT + "small_"
                                                         "test_matrix.h5",
                                                         outfile.name).split()
    hicCorrectMatrix.main(args)

    test = hm.hiCMatrix(
        ROOT +
        "hicCorrectMatrix/small_test_matrix_KRcorrected_chrUextra_chr3LHet.h5")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_almost_equal(test.matrix.data, new.matrix.data, decimal=10)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    os.unlink(outfile.name)
def test_correct_matrix_KR_cool():
    outfile = NamedTemporaryFile(suffix='_KR.cool', delete=False)
    outfile.close()

    args = "correct --matrix {} --correctionMethod KR "\
           "--outFileName {} ".format(ROOT + "small_test_matrix.cool",
                                      outfile.name).split()
    # hicCorrectMatrix.main(args)
    compute(hicCorrectMatrix.main, args, 5)

    test = hm.hiCMatrix(ROOT + "hicCorrectMatrix/kr_full.cool")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_almost_equal(test.matrix.data, new.matrix.data, decimal=5)
    # nt.assert_almost_equal(test.correction_factors, new.correction_factors, decimal=5)

    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    os.unlink(outfile.name)
def main(args=None):
    """
    Main function to generate the polarization plot.
    """
    args = parse_arguments().parse_args(args)
    matplotlib.rcParams['pdf.fonttype'] = 42

    pc1 = pd.read_table(args.pca,
                        header=None,
                        sep="\t",
                        dtype={
                            0: "object",
                            1: "Int64",
                            2: "Int64",
                            3: "float32"
                        })

    pc1 = pc1.rename(columns={0: "chr", 1: "start", 2: "end", 3: "pc1"})

    if args.outliers != 0:
        quantile = [args.outliers / 100, (100 - args.outliers) / 100]
        boundaries = np.nanquantile(pc1['pc1'].values.astype(float), quantile)
        quantiled_bins = np.linspace(boundaries[0], boundaries[1],
                                     args.quantile)
    else:
        quantile = [j / (args.quantile - 1) for j in range(0, args.quantile)]
        quantiled_bins = np.nanquantile(pc1['pc1'].values.astype(float),
                                        quantile)

    pc1["quantile"] = np.searchsorted(quantiled_bins,
                                      pc1['pc1'].values.astype(float),
                                      side="right")
    pc1.loc[pc1["pc1"] == np.nan]["quantile"] = args.quantile + 1

    polarization_ratio = []
    output_matrices = []
    labels = []
    for matrix in args.obsexp_matrices:
        obs_exp = hm.hiCMatrix(matrix)
        pc1["bin_id"] = pc1.apply(lambda row: get_indices(obs_exp, row),
                                  axis=1)
        name = ".".join(matrix.split("/")[-1].split(".")[0:-1])
        labels.append(name)
        normalised_sum_per_quantile = count_interactions(
            obs_exp, pc1, args.quantile, args.offset)
        normalised_sum_per_quantile = np.nan_to_num(
            normalised_sum_per_quantile)
        if args.outputMatrix:
            output_matrices.append(normalised_sum_per_quantile)

        polarization_ratio.append(
            within_vs_between_compartments(normalised_sum_per_quantile,
                                           args.quantile))
    if args.outputMatrix:
        np.savez(args.outputMatrix, [matrix for matrix in output_matrices])
    plot_polarization_ratio(polarization_ratio, args.outputFileName, labels,
                            args.quantile)
Beispiel #7
0
def main(args=None):

    args = parse_arguments().parse_args(args)
    hic_matrix_list = []
    sum_list = []
    for matrix in args.matrices:
        hic_ma = hm.hiCMatrix(matrix)
        if args.normalize == 'smallest':
            sum_list.append(hic_ma.matrix.sum())
        hic_matrix_list.append(hic_ma)

    if args.normalize == 'norm_range':
        for i, hic_matrix in enumerate(hic_matrix_list):
            hic_matrix.matrix.data = hic_matrix.matrix.data.astype(np.float32)
            mask = np.isnan(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0

            mask = np.isinf(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0
            min_value = np.min(hic_matrix.matrix.data)
            max_value = np.max(hic_matrix.matrix.data)
            min_max_difference = np.float64(max_value - min_value)

            hic_matrix.matrix.data -= min_value
            hic_matrix.matrix.data /= min_max_difference

            mask = np.isnan(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0

            mask = np.isinf(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0
            hic_matrix.matrix.eliminate_zeros()

            hic_matrix.save(args.outFileName[i], pApplyCorrection=False)
    elif args.normalize == 'smallest':
        argmin = np.argmin(sum_list)

        for i, hic_matrix in enumerate(hic_matrix_list):
            hic_matrix.matrix.data = hic_matrix.matrix.data.astype(np.float32)
            if i != argmin:
                mask = np.isnan(hic_matrix.matrix.data)
                hic_matrix.matrix.data[mask] = 0

                mask = np.isinf(hic_matrix.matrix.data)
                hic_matrix.matrix.data[mask] = 0
                adjust_factor = sum_list[i] / sum_list[argmin]
                hic_matrix.matrix.data /= adjust_factor
                mask = np.isnan(hic_matrix.matrix.data)

            mask = np.isnan(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0

            mask = np.isinf(hic_matrix.matrix.data)
            hic_matrix.matrix.data[mask] = 0
            hic_matrix.matrix.eliminate_zeros()

            hic_matrix.save(args.outFileName[i], pApplyCorrection=False)
def main(args=None):

    args = parse_arguments().parse_args(args)

    hic_ma = hm.hiCMatrix(pMatrixFile=args.matrix)
    indices_values = []
    with open(args.regions, 'r') as file:
        for line in file.readlines():
            _line = line.strip().split('\t')
            if len(line) == 0:
                continue
            if len(_line) == 2:
                chrom, start = _line[0], _line[1]

                viewpoint = (chrom, start, start)
            elif len(_line) >= 3:
                chrom, start, end = _line[0], _line[1], _line[2]
                viewpoint = (chrom, start, end)
            if args.range:
                start_range_genomic, end_range_genomic, start_out, end_out = calculateViewpointRange(hic_ma, viewpoint, args.range, args.coordinatesToBinMapping)
                start_bin, end_bin = getBinIndices(hic_ma, (chrom, start_range_genomic, end_range_genomic))
            else:
                start_bin, end_bin, start_out, end_out = calculateViewpointRangeBins(hic_ma, viewpoint, args.rangeInBins, args.coordinatesToBinMapping)
            indices_values.append([start_bin, end_bin, start_out, end_out])

    if args.range:
        dimensions_new_matrix = (args.range[0] // hic_ma.getBinSize()) + (args.range[1] // hic_ma.getBinSize())
    elif args.rangeInBins:
        dimensions_new_matrix = args.rangeInBins[0] + args.rangeInBins[1]

    summed_matrix = lil_matrix((dimensions_new_matrix, dimensions_new_matrix), dtype=np.float32)
    count_matrix = np.zeros(shape=(dimensions_new_matrix, dimensions_new_matrix))

    # max_length = hic_ma.matrix.shape[1]
    for start, end, start_out, end_out in indices_values:
        _start = 0
        _end = summed_matrix.shape[1]
        # if start < 0:
        #     _start = np.absolute(start)
        #     start = 0
        # if end >= max_length:
        #     _end = end
        #     end = max_length
        orig_matrix_length = end - start
        if start_out:
            _start = _end - orig_matrix_length
        if end_out:
            _end = start + orig_matrix_length
        count_matrix[_start:_end, _start:_end] += 1
        summed_matrix[_start:_end, _start:_end] += hic_ma.matrix[start:end, start:end]
    summed_matrix /= count_matrix
    summed_matrix = np.array(summed_matrix)
    data = summed_matrix[np.nonzero(summed_matrix)]
    row = np.nonzero(summed_matrix)[0]
    col = np.nonzero(summed_matrix)[1]
    summed_matrix = csr_matrix((data, (row, col)), shape=(dimensions_new_matrix, dimensions_new_matrix))
    save_npz(args.outFileName, summed_matrix)
def test_hic_transfer_obs_exp_non_zero_perChromosome():

    outfile = NamedTemporaryFile(suffix='obs_exp_.cool', delete=False)
    outfile.close()

    args = "--matrix {} --outFileName {} --method obs_exp_non_zero --perChromosome".format(
        original_matrix_cool, outfile.name).split()
    # hicTransform.main(args)
    compute(hicTransform.main, args, 5)

    test = hm.hiCMatrix(ROOT +
                        "hicTransform/obs_exp_non_zero_per_chromosome.cool")

    new = hm.hiCMatrix(outfile.name)
    nt.assert_array_almost_equal(test.matrix.data,
                                 new.matrix.data,
                                 decimal=DELTA_DECIMAL)
    os.unlink(outfile.name)
Beispiel #10
0
def test_save():
    """
    Test will not cover testing of following formats due to unsupported file_formats (see __init__ of class hiCMatrix):

    * ren
    * lieberman
    * GInteractions

    see also single test for these formats (marked as xfail)
    """

    outfile_cool = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile_cool.close()

    outfile_h5 = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile_h5.close()
    # matrix_h5 = '/tmp/matrix.h5'
    # matrix_cool = '/tmp/matrix.cool'
    # matrix_npz = '/tmp/matrix.npz'
    # matrix_gz = '/tmp/matrix.gz'

    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.fillLowerTriangle()

    # test .h5
    hic.save(outfile_h5.name)
    h5_test = hm.hiCMatrix(outfile_h5.name)

    # test cool
    hic.save(outfile_cool.name)
    cool_test = hm.hiCMatrix(outfile_cool.name)

    nt.assert_equal(hic.getMatrix(), h5_test.getMatrix())
    nt.assert_equal(hic.getMatrix(), cool_test.getMatrix())
Beispiel #11
0
def main(args=None):

    args = parse_arguments().parse_args(args)
    if args.operation not in ['diff', 'ratio', 'log2ratio']:
        exit("Operation not found. Please use 'diff', 'ratio' or 'log2ratio'.")

    hic1 = hm.hiCMatrix(args.matrices[0])
    hic2 = hm.hiCMatrix(args.matrices[1])

    if hic1.matrix.shape != hic2.matrix.shape:
        exit(
            "The two matrices have different size. Use matrices having the same resolution and created using"
            "the same parameters. Check the matrix values using the tool `hicInfo`."
        )

    if hic1.chrBinBoundaries != hic2.chrBinBoundaries:
        exit(
            "The two matrices have different chromosome order. Use the tool `hicExport` to change the order.\n"
            "{}: {}\n"
            "{}: {}".format(args.matrices[0], hic1.chrBinBoundaries.keys(),
                            args.matrices[1], hic2.chrBinBoundaries.keys()))

    # normalize by total matrix sum
    hic1.matrix.data = hic1.matrix.data.astype(float) / hic1.matrix.data.sum()
    hic2.matrix.data = hic2.matrix.data.astype(float) / hic2.matrix.data.sum()

    nan_bins = set(hic1.nan_bins)
    nan_bins = nan_bins.union(hic2.nan_bins)

    if args.operation == 'diff':
        new_matrix = hic1.matrix - hic2.matrix
    elif args.operation == 'ratio' or args.operation == 'log2ratio':
        hic2.matrix.data = float(1) / hic2.matrix.data
        new_matrix = hic1.matrix.multiply(hic2.matrix)
        # just in case
        new_matrix.eliminate_zeros()
        if args.operation == 'log2ratio':
            new_matrix.data = np.log2(new_matrix.data)
            new_matrix.eliminate_zeros()

    hic1.setMatrixValues(new_matrix)
    hic1.maskBins(sorted(nan_bins))
    hic1.save(args.outFileName)
Beispiel #12
0
def test_plot_chromosomes():
    outfile_density = NamedTemporaryFile(suffix='.png', delete=False)
    outfile_density.close()
    outfile_coverage = NamedTemporaryFile(suffix='.png', delete=False)
    outfile_coverage.close()
    outfile_qc_report = NamedTemporaryFile(suffix='.txt', delete=False)
    outfile_qc_report.close()
    outfile_matrix = NamedTemporaryFile(suffix='.mcool', delete=False)
    outfile_matrix.close()
    args = "--matrix {} --outputMcool {} -t {} --dpi {} --outFileNameDensity {} \
            --outFileNameReadCoverage {} --outFileNameQCReport {} \
            --minimumReadCoverage {} --minimumDensity {} \
            --maximumRegionToConsider {} --chromosomes chr1 chr2".format(ROOT + 'test_matrix.mcool',
                                                                         outfile_matrix.name, 1, 300,
                                                                         outfile_density.name,
                                                                         outfile_coverage.name,
                                                                         outfile_qc_report.name,
                                                                         100000, 0.001, 30000000
                                                                         ).split()
    scHicQualityControl.main(args)

    test_image_density = ROOT + 'scHicQualityControl/density_chr1_chr2.png'
    res = compare_images(test_image_density, outfile_density.name, tolerance)
    assert res is None, res

    test_image_density = ROOT + 'scHicQualityControl/coverage_chr1_chr2.png'
    res = compare_images(test_image_density, outfile_coverage.name, tolerance)
    assert res is None, res

    assert are_files_equal(ROOT + "scHicQualityControl/qc_report_chr1_chr2.txt", outfile_qc_report.name)

    test_data_matrix = ROOT + 'scHicQualityControl/qc_matrix_chr1_chr2.mcool'
    matrices_list_test_data = cooler.fileops.list_coolers(test_data_matrix)
    matrices_list_created = cooler.fileops.list_coolers(outfile_matrix.name)

    matrices_list_test_data = sorted(matrices_list_test_data)
    matrices_list_created = sorted(matrices_list_created)

    for test_matrix, created_matrix in zip(matrices_list_test_data, matrices_list_created):
        test = hm.hiCMatrix(test_data_matrix + '::' + test_matrix)
        created = hm.hiCMatrix(outfile_matrix.name + '::' + created_matrix)
        nt.assert_almost_equal(test.matrix.data, created.matrix.data, decimal=5)
        nt.assert_equal(test.cut_intervals, created.cut_intervals)
def create_bulk_matrix(pMatrixName, pMatricesList, pQueue):
    bulk_matrix = None
    for i, matrix in enumerate(pMatricesList):
        hic_matrix_obj = hm.hiCMatrix(pMatrixFile=pMatrixName + '::' + matrix)

        if bulk_matrix is None:
            bulk_matrix = hic_matrix_obj
        else:
            bulk_matrix.matrix += hic_matrix_obj.matrix
    pQueue.put(bulk_matrix)
    return
def run_target_list_compilation(pInteractionFilesList,
                                pTargetList,
                                pArgs,
                                pViewpointObj,
                                pQueue=None,
                                pOneTarget=False):
    outfile_names_list = []
    accepted_scores_list = []
    target_regions_intervaltree = None
    try:
        if pOneTarget == True:
            try:
                target_regions = utilities.readBed(pTargetList)
            except Exception as exp:
                pQueue.put('Fail: ' + str(exp) + traceback.format_exc())
                return
            hicmatrix = hm.hiCMatrix()
            target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(
                target_regions)[0]

        for i, interactionFile in enumerate(pInteractionFilesList):
            outfile_names_list_intern = []
            accepted_scores_list_intern = []
            for sample in interactionFile:

                interaction_data, interaction_file_data, _ = pViewpointObj.readInteractionFile(
                    pArgs.interactionFile, sample)
                if pOneTarget == True:
                    target_file = None
                else:
                    target_file = pTargetList[i]

                accepted_scores = filter_scores_target_list(
                    interaction_file_data,
                    pTargetList=target_file,
                    pTargetIntervalTree=target_regions_intervaltree,
                    pTargetFile=pArgs.targetFile)

                outfile_names_list_intern.append(sample)
                accepted_scores_list_intern.append(accepted_scores)
            outfile_names_list.append(outfile_names_list_intern)
            accepted_scores_list.append(accepted_scores_list_intern)

    except Exception as exp:
        pQueue.put('Fail: ' + str(exp) + traceback.format_exc())
        return
    if pQueue is None:
        return
    counter = 0
    for item in accepted_scores_list_intern:
        if len(item) == 0:
            counter += 1
    pQueue.put([outfile_names_list, accepted_scores_list])
    return
Beispiel #15
0
def filter_scores_target_list(pScoresDictionary,
                              pTargetList=None,
                              pTargetIntervalTree=None):

    accepted_scores = {}
    same_target_dict = {}
    target_regions_intervaltree = None
    if pTargetList is not None:
        target_regions = utilities.readBed(pTargetList)
        if len(target_regions) == 0:
            return accepted_scores

        hicmatrix = hm.hiCMatrix()
        target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(
            target_regions)[0]
    elif pTargetIntervalTree is not None:
        target_regions_intervaltree = pTargetIntervalTree
    else:
        log.error('No target list given.')
        exit(1)
    for key in pScoresDictionary:
        # try:
        chromosome = pScoresDictionary[key][0]
        start = int(pScoresDictionary[key][1])
        end = int(pScoresDictionary[key][2])
        if chromosome in target_regions_intervaltree:
            target_interval = target_regions_intervaltree[chromosome][
                start:end]
        else:
            continue
        if target_interval:
            target_interval = sorted(target_interval)[0]
            if target_interval in same_target_dict:
                same_target_dict[target_interval].append(key)
            else:
                same_target_dict[target_interval] = [key]

    for target in same_target_dict:

        values = np.array([0.0, 0.0, 0.0])
        same_target_dict[target] = sorted(same_target_dict[target])

        for key in same_target_dict[target]:
            values += np.array(list(map(float, pScoresDictionary[key][-3:])))
        new_data_line = pScoresDictionary[same_target_dict[target][0]]
        new_data_line[2] = pScoresDictionary[same_target_dict[target][-1]][2]
        new_data_line[-5] = pScoresDictionary[same_target_dict[target][-1]][-5]
        new_data_line[-3] = values[0]
        new_data_line[-2] = values[1]
        new_data_line[-1] = values[2]

        accepted_scores[same_target_dict[target][0]] = new_data_line

    return accepted_scores
Beispiel #16
0
def main(args=None):

    args = parse_arguments().parse_args(args)
    hic = hm.hiCMatrix(args.matrix)

    if args.runningWindow:
        merged_matrix = running_window_merge(hic, args.numBins)
    else:
        merged_matrix = merge_bins(hic, args.numBins)

    merged_matrix.save(args.outFileName)
def test_build_matrix_cooler_metadata():
    outfile = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile.close()
    outfile_bam = NamedTemporaryFile(suffix='.bam', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} --outFileName {} -bs 5000 -b {} --QCfolder {} --threads 4 --genomeAssembly dm3  \
            --restrictionSequence GATC --danglingSequence GATC -rs {}".format(
        sam_R1, sam_R2, outfile.name, outfile_bam.name, qc_folder,
        dpnii_file).split()
    # hicBuildMatrix.main(args)
    compute(hicBuildMatrix.main, args, 5)

    test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5")
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(test.matrix.data, new.matrix.data)
    # nt.assert_equal(test.cut_intervals, new.cut_intervals)
    nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals))
    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    # print(set(os.listdir(ROOT + "QC/")))
    assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    outfile_metadata = NamedTemporaryFile(suffix='.txt', delete=False)
    outfile_metadata.close()
    args = "-m {} -o {}".format(outfile.name, outfile_metadata.name).split()
    hicInfo.main(args)
    assert are_files_equal(ROOT + "hicBuildMatrix/metadata.txt",
                           outfile_metadata.name,
                           delta=7)

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
Beispiel #18
0
def test_normalize_smallest_h5_cool_equal(capsys):
    outfile_one = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile_one.close()

    outfile_one_cool = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile_one.close()

    outfile_two = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile_two.close()
    outfile_two_h5 = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile_two.close()

    args = "--matrices {} {} --normalize smallest -o {} {}".format(
        matrix_one_cool, matrix_two_cool, outfile_one.name,
        outfile_one_cool.name).split()
    # hicNormalize.main(args)
    compute(hicNormalize.main, args, 5)

    args = "--matrices {} {} --normalize smallest -o {} {}".format(
        matrix_one_h5, matrix_two_h5, outfile_two.name,
        outfile_two_h5.name).split()
    # hicNormalize.main(args)
    compute(hicNormalize.main, args, 5)

    test_one = hm.hiCMatrix(ROOT + "/smallest_one.cool")
    test_two = hm.hiCMatrix(ROOT + "/smallest_one.h5")

    new_one = hm.hiCMatrix(outfile_one_cool.name)
    new_two = hm.hiCMatrix(outfile_two_h5.name)

    nt.assert_equal(test_one.matrix.data, new_one.matrix.data)
    nt.assert_equal(test_one.cut_intervals, new_one.cut_intervals)

    nt.assert_equal(test_two.matrix.data, new_two.matrix.data)
    nt.assert_equal(test_two.cut_intervals, new_two.cut_intervals)

    nt.assert_equal(new_one.matrix.data, new_two.matrix.data)
    nt.assert_equal(len(new_one.cut_intervals), len(new_two.cut_intervals))

    os.unlink(outfile_one.name)
    os.unlink(outfile_two.name)
Beispiel #19
0
def main(args=None):

    args = parse_arguments().parse_args(args)
    hic_ma = hm.hiCMatrix(args.matrix)
    hic_ma.restoreMaskedBins()

    # the bin id of boundary positions
    boundary_id_list = get_boundary_bin_id(hic_ma, args.domains)

    # make a reduce matrix by merging the TAD bins
    log.info("Generating matrix with merged bins")
    merge_tad_bins(hic_ma, boundary_id_list, args.outFile)
def test_hic_transfer_all():
    outfile = NamedTemporaryFile(suffix='all.h5', delete=False)
    outfile.close()

    args = "--matrix {} --outFileName {} --method all".format(
        original_matrix, outfile.name).split()
    hicTransform.main(args)

    dirname_new = dirname(outfile.name)
    basename_new = basename(outfile.name)
    # obs_exp
    test = hm.hiCMatrix(ROOT + "hicTransform/obs_exp_small_50kb.h5")
    new = hm.hiCMatrix(dirname_new + "/obs_exp_" + basename_new)
    nt.assert_array_almost_equal(test.matrix.data,
                                 new.matrix.data,
                                 decimal=DELTA_DECIMAL)
    os.unlink(dirname_new + "/obs_exp_" + basename_new)

    # pearson
    test = hm.hiCMatrix(ROOT + "hicTransform/pearson_small_50kb.h5")
    new = hm.hiCMatrix(dirname_new + "/pearson_" + basename_new)
    nt.assert_array_almost_equal(test.matrix.data,
                                 new.matrix.data,
                                 decimal=DELTA_DECIMAL)
    os.unlink(dirname_new + "/pearson_" + basename_new)

    # covariance
    test = hm.hiCMatrix(ROOT + "hicTransform/covariance_small_50kb.h5")
    new = hm.hiCMatrix(dirname_new + "/covariance_" + basename_new)
    nt.assert_array_almost_equal(test.matrix.data,
                                 new.matrix.data,
                                 decimal=DELTA_DECIMAL)
    os.unlink(dirname_new + "/covariance_" + basename_new)
    os.unlink(outfile.name)
Beispiel #21
0
def test_filterOutInterChrCounts():
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.fillLowerTriangle()
    hic.filterOutInterChrCounts()

    filtered_matrix = np.matrix([[1, 8, 5, 0, 0], [8, 4, 15, 0, 0],
                                 [5, 15, 0, 0, 0], [0, 0, 0, 0, 1],
                                 [0, 0, 0, 1, 0]])

    nt.assert_equal(hic.getMatrix(), filtered_matrix)

    row, col = np.triu_indices(5)
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]
    hic = hm.hiCMatrix()
    hic.nan_bins = []
    matrix = np.array([[0, 10, 5, 3, 0], [0, 0, 15, 5, 1], [0, 0, 0, 7, 3],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    # make the matrix symmetric:
    hic.matrix = csr_matrix(matrix + matrix.T)
    hic.setMatrix(csr_matrix(matrix + matrix.T, dtype=np.int32), cut_intervals)

    filtered = hic.filterOutInterChrCounts().todense()
    test_matrix = np.array(
        [[0, 10, 5, 0, 0], [10, 0, 15, 0, 0], [5, 15, 0, 0, 0],
         [0, 0, 0, 0, 1], [0, 0, 0, 1, 0]],
        dtype='i4')

    nt.assert_equal(filtered, test_matrix)
Beispiel #22
0
def test_convert_to_zscore_matrix_2():

    # load test matrix
    hic = hm.hiCMatrix(ROOT + 'Li_et_al_2015.h5')
    hic.maskBins(hic.nan_bins)

    mat = hic.matrix.todense()
    max_depth = 10000
    bin_size = hic.getBinSize()
    max_depth_in_bins = int(float(max_depth) / bin_size)

    m_size = mat.shape[0]
    # compute matrix values per distance
    chrom, start, end, extra = zip(
        *hm.hiCMatrix.fit_cut_intervals(hic.cut_intervals))
    dist_values = {}
    sys.stderr.write("Computing values per distance for each matrix entry\n")

    for _i in range(mat.shape[0]):
        for _j in range(mat.shape[0]):
            if _j >= _i:
                # dist is translated to bins
                dist = int(float(start[_j] - start[_i]) / bin_size)
                if dist <= max_depth_in_bins:
                    if dist not in dist_values:
                        dist_values[dist] = []
                    dist_values[dist].append(mat[_i, _j])

    mu = {}
    std = {}
    for dist, values in iteritems(dist_values):
        mu[dist] = np.mean(values)
        std[dist] = np.std(values)

    # compute z-score for test matrix
    sys.stderr.write("Computing zscore for each matrix entry\n")
    zscore_mat = np.full((m_size, m_size), np.nan)
    for _i in range(mat.shape[0]):
        for _j in range(mat.shape[0]):
            if _j >= _i:
                dist = int(float(start[_j] - start[_i]) / bin_size)
                if dist <= max_depth_in_bins:
                    zscore = (mat[_i, _j] - mu[dist]) / std[dist]
                    zscore_mat[_i, _j] = zscore

    # compare with zscore from class
    hic.convert_to_zscore_matrix(maxdepth=max_depth)

    # from numpy.testing import assert_almost_equal
    # only the main diagonal is check. Other diagonals show minimal differences
    nt.assert_almost_equal(hic.matrix.todense().diagonal(0).A1,
                           zscore_mat.diagonal(0))
Beispiel #23
0
def test_intervalListToIntervalTree(capsys):
    # get matrix
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    # empty list should raise AssertionError
    interval_list = []
    with pytest.raises(AssertionError):
        hic.intervalListToIntervalTree(interval_list)

        captured = capsys.readouterr()
        assert captured.out == "Interval list is empty"

    # test with correct interval_list
    interval_list = [('a', 0, 10, 1), ('a', 10, 20, 1), ('b', 20, 30, 1),
                     ('b', 30, 50, 1), ('b', 50, 100, 1), ('c', 100, 200, 1),
                     ('c', 200, 210, 1), ('d', 210, 220, 1), ('e', 220, 250)]

    tree, boundaries = hic.intervalListToIntervalTree(interval_list)

    # test tree
    nt.assert_equal(tree['a'],
                    IntervalTree([Interval(0, 10, 0),
                                  Interval(10, 20, 1)]))
    nt.assert_equal(
        tree['b'],
        IntervalTree(
            [Interval(20, 30, 2),
             Interval(30, 50, 3),
             Interval(50, 100, 4)]))
    nt.assert_equal(
        tree['c'], IntervalTree([Interval(100, 200, 5),
                                 Interval(200, 210, 6)]))
    nt.assert_equal(tree['d'], IntervalTree([Interval(210, 220, 7)]))
    nt.assert_equal(tree['e'], IntervalTree([Interval(220, 250, 8)]))

    # test boundaries
    nt.assert_equal(
        boundaries,
        OrderedDict([('a', (0, 2)), ('b', (2, 5)), ('c', (5, 7)),
                     ('d', (7, 8)), ('e', (8, 9))]))
Beispiel #24
0
def test_merge_matrices_running_window():
    outfile = NamedTemporaryFile(suffix='.mcool', delete=False)

    outfile.close()
    args = "--matrix {} --outFileName {} -t {} -nb {} --runningWindow".format(ROOT + 'test_matrix.mcool',
                                                                              outfile.name, 1, 11).split()
    scHicMergeMatrixBins.main(args)

    test_data_matrix = ROOT + 'scHicMergeMatrixBins/test_matrix_10mb_running_window.mcool'
    matrices_list_test_data = cooler.fileops.list_coolers(test_data_matrix)
    matrices_list_created = cooler.fileops.list_coolers(outfile.name)

    matrices_list_test_data = sorted(matrices_list_test_data)
    matrices_list_created = sorted(matrices_list_created)

    for test_matrix, created_matrix in zip(matrices_list_test_data, matrices_list_created):
        test = hm.hiCMatrix(test_data_matrix + '::' + test_matrix)
        created = hm.hiCMatrix(outfile.name + '::' + created_matrix)
        nt.assert_almost_equal(test.matrix.data, created.matrix.data, decimal=5)
        nt.assert_equal(test.cut_intervals, created.cut_intervals)

    os.unlink(outfile.name)
Beispiel #25
0
def compute_contains_all_chromosomes(pMatrixName, pMatricesList, pChromosomes, pQueue):

    keep_matrices_chromosome_names = []
    for i, matrix in enumerate(pMatricesList):
        ma = hm.hiCMatrix(pMatrixName + '::' + matrix)
        if pChromosomes is None:
            pChromosomes = list(ma.chrBinBoundaries)
        try:
            ma.keepOnlyTheseChr(pChromosomes)
            keep_matrices_chromosome_names.append(1)
        except Exception:
            keep_matrices_chromosome_names.append(0)
    pQueue.put(keep_matrices_chromosome_names)
def test_find_TADs_fdr_chromosomes():
    # full test case with build of the matrix and search for tads
    matrix = ROOT + "small_test_matrix.h5"
    tad_folder = mkdtemp(prefix="test_case_find_tads_fdr_chromosomes")
    args = "--matrix {} --minDepth 60000 --maxDepth 180000 --numberOfProcessors 2 --step 20000 \
    --outPrefix {}/test_multiFDR_chromosomes --minBoundaryDistance 20000 \
    --correctForMultipleTesting fdr --thresholdComparisons 0.5 --chromosomes chr2L chr3R".format(
        matrix, tad_folder).split()

    # hicFindTADs.main(args)
    compute(hicFindTADs.main, args, 5)

    new = hm.hiCMatrix(tad_folder +
                       "/test_multiFDR_chromosomes_zscore_matrix.h5")
    test = hm.hiCMatrix(ROOT +
                        'find_TADs/FDR_chromosomes/multiFDR_zscore_matrix.h5')
    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    print(tad_folder + "/test_multiFDR_boundaries.bed")
    assert are_files_equal(
        ROOT + "find_TADs/FDR_chromosomes/multiFDR_boundaries.bed",
        tad_folder + "/test_multiFDR_chromosomes_boundaries.bed")
    assert are_files_equal(
        ROOT + "find_TADs/FDR_chromosomes/multiFDR_domains.bed",
        tad_folder + "/test_multiFDR_chromosomes_domains.bed")
    assert are_files_equal(
        ROOT + "find_TADs/FDR_chromosomes/multiFDR_tad_score.bm",
        tad_folder + "/test_multiFDR_chromosomes_tad_score.bm")
    assert are_files_equal(
        ROOT + "find_TADs/FDR_chromosomes/multiFDR_boundaries.gff",
        tad_folder + "/test_multiFDR_chromosomes_boundaries.gff")
    # assert are_files_equal
    assert are_files_equal(
        ROOT + "find_TADs/FDR_chromosomes/multiFDR_score.bedgraph",
        tad_folder + "/test_multiFDR_chromosomes_score.bedgraph")

    shutil.rmtree(tad_folder)
Beispiel #27
0
def test_build_matrix_cooler_multiple():
    outfile = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} --outFileName {} -bs 5000 10000 20000 -b /tmp/test.bam --QCfolder {} --threads 4".format(sam_R1, sam_R2,
                                                                                                              outfile.name,
                                                                                                              qc_folder).split()
    hicBuildMatrix.main(args)

    test_5000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/5000")
    test_10000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/10000")
    test_20000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/20000")

    new_5000 = hm.hiCMatrix(outfile.name + '::/resolutions/5000')
    new_10000 = hm.hiCMatrix(outfile.name + '::/resolutions/10000')
    new_20000 = hm.hiCMatrix(outfile.name + '::/resolutions/20000')

    nt.assert_equal(test_5000.matrix.data, new_5000.matrix.data)
    nt.assert_equal(test_10000.matrix.data, new_10000.matrix.data)
    nt.assert_equal(test_20000.matrix.data, new_20000.matrix.data)

    # nt.assert_equal(test.cut_intervals, new.cut_intervals)
    nt.assert_equal(len(new_5000.cut_intervals), len(test_5000.cut_intervals))
    nt.assert_equal(len(new_10000.cut_intervals), len(test_10000.cut_intervals))
    nt.assert_equal(len(new_20000.cut_intervals), len(test_20000.cut_intervals))

    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new_5000.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test_5000.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)

    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new_10000.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test_10000.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)

    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new_20000.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test_20000.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    # print(set(os.listdir(ROOT + "QC/")))
    assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
Beispiel #28
0
def open_and_store_matrix(pMatrixName, pMatricesList, pIndex, pXDimension, pChromosomes, pQueue):
    neighborhood_matrix = None
    for i, matrix in enumerate(pMatricesList):
        if pChromosomes is not None and len(pChromosomes) == 1:
            hic_ma = hm.hiCMatrix(pMatrixFile=pMatrixName + '::' + matrix, pChrnameList=pChromosomes)
        else:
            hic_ma = hm.hiCMatrix(pMatrixFile=pMatrixName + '::' + matrix)
            if pChromosomes:
                hic_ma.keepOnlyTheseChr(pChromosomes)

        _matrix = hic_ma.matrix

        if neighborhood_matrix is None:
            neighborhood_matrix = csr_matrix((pXDimension, _matrix.shape[0] * _matrix.shape[1]), dtype=np.float)

        instances, features = _matrix.nonzero()

        instances *= _matrix.shape[1]
        instances += features
        features = None
        neighborhood_matrix[pIndex + i, instances] = _matrix.data

    pQueue.put(neighborhood_matrix)
def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pViewpointObj, pQueue=None):
    outfile_names = []
    target_regions_intervaltree = None
    if pArgs.batchMode and len(pTargetList) == 1:
        target_regions = utilities.readBed(pTargetList[0])
        hicmatrix = hm.hiCMatrix()
        target_regions_intervaltree = hicmatrix.intervalListToIntervalTree(target_regions)[0]

    for i, interactionFile in enumerate(pInteractionFilesList):
        for sample in interactionFile:
            if pArgs.interactionFileFolder != '.':
                absolute_sample_path = pArgs.interactionFileFolder + '/' + sample
            else:
                absolute_sample_path = sample
            header, interaction_data, interaction_file_data = pViewpointObj.readInteractionFileForAggregateStatistics(
                absolute_sample_path)
            log.debug('len(pTargetList) {}'.format(len(pTargetList)))
            if pArgs.batchMode and len(pTargetList) > 1:
                if pArgs.targetFileFolder != '.':
                    target_file = pArgs.targetFileFolder + '/' + pTargetList[i]
                else:
                    target_file = pTargetList[i]
            elif pArgs.batchMode and len(pTargetList) == 1:
                target_file = None
            else:
                target_file = pTargetList[i]

            accepted_scores = filter_scores_target_list(interaction_file_data, pTargetList=target_file, pTargetIntervalTree=target_regions_intervaltree)

            if len(accepted_scores) == 0:
                # do not call 'break' or 'continue'
                # with this an empty file is written and no track of 'no significant interactions' detected files needs to be recorded.
                if pArgs.batchMode:
                    with open('errorLog.txt', 'a+') as errorlog:
                        errorlog.write('Failed for: {} and {}.\n'.format(interactionFile[0], interactionFile[1]))
                else:
                    log.info('No target regions found')
            outFileName = '.'.join(sample.split('/')[-1].split('.')[:-1]) + '_' + pArgs.outFileNameSuffix

            if pArgs.batchMode:
                outfile_names.append(outFileName)
            if pArgs.outputFolder != '.':
                outFileName = pArgs.outputFolder + '/' + outFileName

            write(outFileName, header, accepted_scores,
                  interaction_file_data)
    if pQueue is None:
        return
    pQueue.put(outfile_names)
    return
Beispiel #30
0
def test_maskChromosomes():

    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.maskChromosomes(['a'])