Exemple #1
0
def test_getCountsByDistance():
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]
    hic = hm.hiCMatrix()
    hic.nan_bins = []
    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    distance = hic.getCountsByDistance()

    nt.assert_equal(distance[-1], [0, 1, 2, 1])
    nt.assert_equal(distance[0], [1, 4, 0, 0, 0])
    nt.assert_equal(distance[10], [8, 15, 0])
    nt.assert_equal(distance[20], [5, 5])
    nt.assert_equal(distance[30], [3])

    hic = hm.hiCMatrix()
    hic.nan_bins = []

    matrix = np.matrix([[np.nan for x in range(5)] for y in range(5)])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    distance = hic.getCountsByDistance()

    nt.assert_equal(distance[-1], [0, 0, 0, 0])
    nt.assert_equal(distance[0], [0, 0, 0, 0, 0])
    nt.assert_equal(distance[10], [0, 0, 0])
    nt.assert_equal(distance[20], [0, 0])
    nt.assert_equal(distance[30], [0])
Exemple #2
0
def test_save_GInteractions():
    """
    Test fails because GInteractions saves file as .tsv but __init__
    can only process .npz, h5, dekker, cool. Otherwise files are treated as h5f...
    """
    outfile = '/tmp/matrix_GInteractions'
    try:
        _outfile = open(outfile, 'r')
    except Exception:
        _outfile = open(outfile, 'w')
    _outfile.close()

    hic = hm.hiCMatrix()

    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    # make matrix symmetric
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hm.hiCMatrix.fillLowerTriangle(hic.matrix)

    hic.save_GInteractions(outfile)

    # test fails during load
    hm.hiCMatrix(outfile)
Exemple #3
0
def test_save_load_cooler_format():
    outfile = '/tmp/matrix2.cool'
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]
    hic = hm.hiCMatrix()
    hic.nan_bins = []
    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    # make matrix symmetric
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hm.hiCMatrix.fillLowerTriangle(hic.matrix)

    hic.save(outfile)

    matrix_cool = hm.hiCMatrix(outfile)

    log.info('original data: {}'.format(hic.matrix))
    log.info('cool data: {}'.format(matrix_cool.matrix))
    nt.assert_equal(hic.matrix.data, matrix_cool.matrix.data)
    nt.assert_equal(hic.matrix.indices, matrix_cool.matrix.indices)
    nt.assert_equal(hic.matrix.indptr, matrix_cool.matrix.indptr)

    # nan_bins and correction_factor are not supported by cool-format

    nt.assert_equal(hic.cut_intervals, matrix_cool.cut_intervals)
    unlink(outfile)
Exemple #4
0
def test_save_load():
    outfile = '/tmp/matrix.h5'
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1),
                     ('a', 20, 30, 1), ('a', 30, 40, 1), ('b', 40, 50, 1)]
    hic = hm.hiCMatrix()
    hic.nan_bins = []
    matrix = np.array([[1, 8, 5, 3, 0],
                       [0, 4, 15, 5, 1],
                       [0, 0, 0, np.nan, 2],
                       [0, 0, 0, 0, 1],
                       [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    # make matrix symmetric
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hm.hiCMatrix.fillLowerTriangle(hic.matrix)
    hic.correction_factors = np.array([0.5, 1, 2, 3, 4])
    hic.nan_bins = np.array([4])

    hic.save(outfile)

    h5 = hm.hiCMatrix(outfile)

    nt.assert_equal(hic.correction_factors, h5.correction_factors)
    nt.assert_equal(hic.matrix.data, h5.matrix.data)
    nt.assert_equal(hic.matrix.indices, h5.matrix.indices)
    nt.assert_equal(hic.matrix.indptr, h5.matrix.indptr)
    nt.assert_equal(hic.nan_bins, h5.nan_bins)
    assert hic.cut_intervals == h5.cut_intervals
    unlink(outfile)
Exemple #5
0
def test_build_matrix_rf():
    outfile = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} -rs {} --outFileName {}  --QCfolder {} " \
           "--restrictionSequence GATC " \
           "--danglingSequence GATC " \
           "--minDistance 150 " \
           "--maxLibraryInsertSize 1500 --threads 4".format(sam_R1, sam_R2, dpnii_file,
                                                            outfile.name,
                                                            qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_rf_matrix.h5")
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    print(set(os.listdir(ROOT + "QC_rc/")))
    assert are_files_equal(ROOT + "QC_rc/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC_rc/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
Exemple #6
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    hic = hm.hiCMatrix(args.matrices[0])
    summed_matrix = hic.matrix
    nan_bins = set(hic.nan_bins)
    for matrix in args.matrices[1:]:
        hic_to_append = hm.hiCMatrix(matrix)
        if hic.chrBinBoundaries != hic_to_append.chrBinBoundaries:
            log.error(
                "The two matrices have different chromosome order. Use the tool `hicExport` to change the order.\n"
                "{}: {}\n"
                "{}: {}".format(args.matrices[0], list(hic.chrBinBoundaries),
                                matrix, list(hic_to_append.chrBinBoundaries)))
            exit(1)

        try:
            summed_matrix = summed_matrix + hic_to_append.matrix
            if len(hic_to_append.nan_bins):
                nan_bins = nan_bins.union(hic_to_append.nan_bins)
        except Exception:
            log.exception(
                "\nMatrix {} seems to be corrupted or of different shape".
                format(matrix))
            exit(1)

    # save only the upper triangle of the
    # symmetric matrix
    hic.setMatrixValues(summed_matrix)
    hic.maskBins(sorted(nan_bins))
    hic.save(args.outFileName)
    return
def test_build_matrix_cooler():
    outfile = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} --outFileName {} -bs 5000 -b /tmp/test.bam --QCfolder {} --threads 4".format(sam_R1, sam_R2,
                                                                                                  outfile.name,
                                                                                                  qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5")
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(test.matrix.data, new.matrix.data)
    # nt.assert_equal(test.cut_intervals, new.cut_intervals)
    nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals))
    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    # print(set(os.listdir(ROOT + "QC/")))
    assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
def run_compare(pInputFile, pInputFormat, pOutputFormat, pChrNameList=None):

    outfile = NamedTemporaryFile(suffix='.' + pOutputFormat, delete=False)
    outfile.close()
    inputMatrix = pInputFile

    args = "--inFile {} --inputFormat {} "\
        "--outFileName {} " \
        "--outputFormat {}".format(inputMatrix,
                                   pInputFormat,
                                   outfile.name,
                                   pOutputFormat).split()
    hicExport.main(args)

    test = hm.hiCMatrix(inputMatrix)
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(new.matrix.data, test.matrix.data)
    nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals))
    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    os.unlink(outfile.name)
    return True
Exemple #9
0
def test_save_bing_ren():
    """ Test needs to be marked as xfail because .gz files are expected in __init__ to be in dekker file format """
    outfile = '/tmp/matrix.gz'
    try:
        _outfile = open(outfile, 'r')
    except Exception:
        _outfile = open(outfile, 'w')

    _outfile.close()

    hic = hm.hiCMatrix()

    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    # make matrix symmetric
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hic.fillLowerTriangle(hic.matrix)

    hic.save_bing_ren(outfile)

    # Test fails here due to __init__ of hiCMatrix
    hm.hiCMatrix(outfile)
Exemple #10
0
def test_save_dekker():
    outfile = '/tmp/matrix.gz'
    try:
        _outfile = open(outfile, 'r')
    except Exception:
        _outfile = open(outfile, 'w')
    _outfile.close()

    hic = hm.hiCMatrix()

    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    # make matrix symmetric
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hm.hiCMatrix.fillLowerTriangle(hic.matrix)

    hic.save_dekker(outfile)

    dekker_test = hm.hiCMatrix(outfile)
    dekker_test.fillLowerTriangle(dekker_test.matrix)

    nt.assert_equal(hic.getMatrix().shape, dekker_test.getMatrix().shape)
    nt.assert_equal(hic.getMatrix(), dekker_test.getMatrix())
Exemple #11
0
def main():
    args = parse_arguments().parse_args()

    # create hiC matrix with given input format
    # additional file needed for lieberman format
    if args.inputFormat == 'lieberman':
        if args.chrNameList is None:
            exit("Error: --chrNameList is required when the input format is lieberman. ")
        else:
            hic_ma = hm.hiCMatrix(matrixFile=args.inFile, file_format='lieberman', chrnameList=args.chrNameList)

    elif args.inputFormat == 'npz' and len(args.inFile) > 1:  # assume hicexplorer_multi format
        if args.bplimit:
            sys.stderr.write("\nCutting maximum matrix depth to {} for saving\n".format(args.bplimit))

        matrix, cut_intervals, nan_bins, corrections_factors, distance_counts = \
            combine_matrices(args.inFile, bplimit=args.bplimit)
        hic_ma = hm.hiCMatrix()
        hic_ma.setMatrix(matrix, cut_intervals=cut_intervals)

        if len(nan_bins):
            hic_ma.nan_bins = nan_bins
        if corrections_factors is not None:
            hic_ma.correction_factors = corrections_factors
        if distance_counts is not None:
            hic_ma.distance_counts = distance_counts

    else:
        hic_ma = hm.hiCMatrix(matrixFile=args.inFile[0], file_format=args.inputFormat)
        if args.bplimit:
            from scipy.sparse import triu
            sys.stderr.write("\nCutting maximum matrix depth to {} for saving\n".format(args.bplimit))

            limit = int(args.bplimit / hic_ma.getBinSize())
            hic_ma.matrix = (triu(hic_ma.matrix, k=-limit) - triu(hic_ma.matrix, k=limit)).tocsr()
            hic_ma.matrix.eliminate_zeros()

    if args.chromosomeOrder:
        hic_ma.keepOnlyTheseChr(args.chromosomeOrder)

    if args.clearMaskedBins:
        hic_ma.maskBins(hic_ma.nan_bins)

    sys.stderr.write('saving...\n')

    if args.outputFormat == 'dekker':
        hic_ma.save_dekker(args.outFileName)
    elif args.outputFormat == 'ren':
        hic_ma.save_bing_ren(args.outFileName)
    elif args.outputFormat == 'lieberman':
        hic_ma.save_lieberman(args.outFileName)
    elif args.outputFormat == 'npz':
        hic_ma.save_npz(args.outFileName)
    elif args.outputFormat == 'GInteractions':
        hic_ma.save_GInteractions(args.outFileName)
    else:
        hic_ma.save(args.outFileName)
Exemple #12
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    hic_t = hm.hiCMatrix(matrixFile=args.treatment)
    hic_c = hm.hiCMatrix(matrixFile=args.control)

    # scale larger matrix down
    total_t = hic_t.matrix.sum()
    total_c = hic_c.matrix.sum()

    if total_c > total_t:
        scale_factor = [1, float(total_t) / total_c]
    else:
        scale_factor = [float(total_c) / total_t, 1]

    hic_t.matrix.data = hic_t.matrix.data * scale_factor[0]
    hic_c.matrix.data = hic_c.matrix.data * scale_factor[1]
    """
    Uses sparse matrix tricks to convert
    into a vector the matrix values such
    that zero values that appear in only
    one of the matrices is kept. But
    zeros in two matrices are removed

    Requires two sparse matrices as input
    """
    assert hic_t.matrix.shape == hic_c.matrix.shape, log.error(
        "Matrices have different shapes.")

    assert (hic_t.matrix -
            hic_c.matrix).sum() != 0, log.error("Matrices are identical.")

    # create a new matrix that is the sum of the two
    # matrices to compare. The goal is to have
    # a matrix that contains all the positions
    # that are non-zero in both matrices
    _mat = hic_t.matrix + hic_c.matrix

    # add one to each element in the new matrix
    _mat.data += 1

    # get a vector of the values in hic_t from
    # _mat
    values_t = (_mat - hic_t.matrix).data - 1

    # get a vector of the values in hic_c from
    # _mat
    values_c = (_mat - hic_c.matrix).data - 1

    # compute log2ratio
    _mat.data = np.log2(values_t / values_c)

    hic_t.matrix = _mat
    hic_t.matrix.eliminate_zeros()
    hic_t.save(args.outFileName)
Exemple #13
0
def test_hic_transfer_covariance():
    outfile = NamedTemporaryFile(suffix='covariance_.h5', delete=False)
    outfile.close()
    matrix = ROOT + "/hicTransform/pearson_small_50kb.h5"

    args = "--matrix {} --outFileName {} --method covariance".format(
        matrix, outfile.name).split()
    hicTransform.main(args)
    test = hm.hiCMatrix(ROOT + "hicTransform/covariance_small_50kb.h5")

    new = hm.hiCMatrix(outfile.name)
    nt.assert_array_almost_equal(test.matrix.data, new.matrix.data)
    os.unlink(outfile.name)
Exemple #14
0
def test_hic_transfer_obs_exp():
    outfile = NamedTemporaryFile(suffix='obs_exp_.h5', delete=False)
    outfile.close()

    args = "--matrix {} --outFileName {} --method obs_exp".format(
        original_matrix, outfile.name).split()
    hicTransform.main(args)

    test = hm.hiCMatrix(ROOT + "hicTransform/obs_exp_small_50kb.h5")

    new = hm.hiCMatrix(outfile.name)
    nt.assert_array_almost_equal(test.matrix.data, new.matrix.data)
    os.unlink(outfile.name)
Exemple #15
0
def test_save_hdf5():
    """
    Test is running for 2 different configurations:
        save_hdf5(filename, pSymmetric=True) (Default)
        save_hdf5(filename, pSymmetric=True)
    """
    outfile = '/tmp/matrix.h5'

    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hic.fillLowerTriangle(hic.matrix)

    hic.save_hdf5(outfile)

    hdf5_test = hm.hiCMatrix(outfile)

    nt.assert_equal(hic.getMatrix(), hdf5_test.getMatrix())

    # Test pSymmetric=False
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    hic.save_hdf5(outfile)

    hdf5_test_pSym_False = hm.hiCMatrix(outfile)

    hdf5_test_pSym_False.matrix = hdf5_test_pSym_False.fillLowerTriangle(
        hdf5_test_pSym_False.matrix)
    hic.matrix = hic.fillLowerTriangle(hic.matrix)

    nt.assert_equal(hic.getMatrix(), hdf5_test_pSym_False.getMatrix())
def test_correct_matrix():
    outfile = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile.close()

    args = "--matrix {} --numBins 5 " \
        " --outFileName {}".format(ROOT + "small_test_matrix.h5",
                                   outfile.name).split()
    hicMergeMatrixBins.main(args)

    test = hm.hiCMatrix(ROOT + "hicMergeMatrixBins/result.h5")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    os.unlink(outfile.name)
Exemple #17
0
def test_save():
    """
    Test will not cover testing of following formats due to unsupported file_formats (see __init__ of class hiCMatrix):

    * ren
    * lieberman
    * GInteractions

    see also single test for these formats (marked as xfail)
    """
    matrix_h5 = '/tmp/matrix.h5'
    matrix_cool = '/tmp/matrix.cool'
    matrix_npz = '/tmp/matrix.npz'
    matrix_gz = '/tmp/matrix.gz'

    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('a', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.matrix = hic.fillLowerTriangle(hic.matrix)

    # test .h5
    hic.save(matrix_h5)
    h5_test = hm.hiCMatrix(matrix_h5)

    # test cool
    hic.save(matrix_cool)
    cool_test = hm.hiCMatrix(matrix_cool)

    # test npz
    hic.save(matrix_npz)
    npz_test = hm.hiCMatrix(matrix_npz)

    # test dekker
    hic.save(matrix_gz)
    dekker_test = hm.hiCMatrix(matrix_gz)

    nt.assert_equal(hic.getMatrix(), h5_test.getMatrix())
    nt.assert_equal(hic.getMatrix(), cool_test.getMatrix())
    nt.assert_equal(hic.getMatrix(), npz_test.getMatrix())
    nt.assert_equal(hic.getMatrix(), dekker_test.getMatrix())
Exemple #18
0
def main():

    args = parse_arguments().parse_args()
    hic = hm.hiCMatrix(args.matrix)
    if args.runningWindow:
        merged_matrix = running_window_merge_v2(hic, args.numBins)
    else:
        merged_matrix = merge_bins(hic, args.numBins)

    print 'saving matrix'
    # there is a pickle problem with large arrays
    # To increase the sparsity of the matrix and
    # overcome the problem
    # I transform al ones into zeros.
    """
    merged_matrix.matrix.data = merged_matrix.matrix.data - 1
    """
    merged_matrix.matrix.eliminate_zeros()
    if merged_matrix.correction_factors is not None:
        sys.stderr.write(
            "*WARNING*: The corrections factors are not merged and are set to None\n"
        )
        merged_matrix.correction_factors = None

    merged_matrix.save(args.outFileName)
Exemple #19
0
def test_convert_to_zscore_matrix():

    # make test matrix
    m_size = 100
    mat = np.triu(np.random.random_integers(0, 100, (m_size, m_size)))
    # add a number of zeros
    mat[mat < 90] = 0
    # import ipdb;ipdb.set_trace()
    mu = dict([(idx, mat.diagonal(idx).mean()) for idx in range(mat.shape[0])])
    std = dict([(idx, np.std(mat.diagonal(idx))) for idx in range(mat.shape[0])])

    # compute z-score for test matrix
    zscore_mat = np.zeros((m_size, m_size))
    for _i in range(mat.shape[0]):
        for _j in range(mat.shape[0]):
            if _j >= _i:
                diag = _j - _i
                zscore = (mat[_i, _j] - mu[diag]) / std[diag]
                zscore_mat[_i, _j] = zscore

    # make Hi-C matrix based on test matrix
    hic = hm.hiCMatrix()
    hic.matrix = csr_matrix(mat)
    cut_intervals = [('chr', idx, idx + 10, 0) for idx in range(0, mat.shape[0] * 10, 10)]
    hic.setMatrix(hic.matrix, cut_intervals)
    hic.convert_to_zscore_matrix()

    from numpy.testing import assert_almost_equal
    assert_almost_equal(hic.matrix.todense(), zscore_mat)
Exemple #20
0
def test_reorderMatrix():
    orig = (1, 3)
    dest = 2

    # get matrix
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    # reorder matrix
    hic.reorderMatrix(orig, dest)

    new_matrix = np.matrix([[1, 3, 8, 5, 0], [0, 0, 0, 0, 1], [0, 5, 4, 15, 1],
                            [0, 0, 0, 0, 2], [0, 0, 0, 0, 0]])

    new_cut_intervals = [('a', 0, 10, 1), ('b', 30, 40, 1), ('a', 10, 20, 1),
                         ('a', 20, 30, 1), ('b', 40, 50, 1)]

    # check if it is equal
    nt.assert_equal(hic.getMatrix(), new_matrix)
    nt.assert_equal(hic.matrix.shape, new_matrix.shape)
    nt.assert_equal(hic.cut_intervals, new_cut_intervals)
Exemple #21
0
def test_removeBySequencedCount():
    # get matrix
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    # function returns directly if last entry of cut_intervals not float64
    _, _, _, coverage = zip(*hic.cut_intervals)
    assert type(coverage[0]) != np.float64

    # define expected outcome
    to_remove_expected = None

    # and test outcome
    to_remove = hic.removeBySequencedCount()

    nt.assert_equal(to_remove, to_remove_expected)
Exemple #22
0
def main():

    args = parse_arguments().parse_args()
    for matrix in args.matrices:
        print("File:\t{}".format(matrix))

        hic_ma = hm.hiCMatrix(matrix)
        size = hic_ma.matrix.shape[0]
        num_non_zero = hic_ma.matrix.nnz
        sum_elements = hic_ma.matrix.sum() / 2
        bin_length = hic_ma.getBinSize()
        num_nan_bins = len(hic_ma.nan_bins)
        min_non_zero = hic_ma.matrix.data.min()
        max_non_zero = hic_ma.matrix.data.max()
        chromosomes = hic_ma.chrBinBoundaries.keys()

        print("Size:\t{:,}".format(size))
        print("Sum:\t{:,}".format(sum_elements))
        print("Bin_length:\t{}".format(bin_length))
        print("Chromosomes:\t{}".format(", ".join(chromosomes)))
        print("Non-zero elements:\t{:,}".format(num_non_zero))
        print("Minimum (non zero):\t{}".format(min_non_zero))
        print("Maximum:\t{}".format(max_non_zero))
        print("NaN bins:\t{}".format(num_nan_bins))
        print("")
Exemple #23
0
def test_removeBins():
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    new_matrix = np.matrix([[1, 3, 0], [0, 0, 1], [0, 0, 0]])

    ids2remove = [1, 2]
    hic.removeBins(ids2remove)

    nt.assert_equal(hic.getMatrix(), new_matrix)
    nt.assert_equal(hic.matrix.shape, new_matrix.shape)
    nt.assert_equal(hic.chrBinBoundaries,
                    OrderedDict([('a', (0, 1)), ('b', (1, 3))]))
    nt.assert_equal(hic.cut_intervals, [('a', 0, 10, 1), ('b', 30, 40, 1),
                                        ('b', 40, 50, 1)])
Exemple #24
0
def test_reorderChromosomes_old():
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    new_chr_order = ['b', 'a']
    hic.reorderChromosomes_old(new_chr_order)

    nt.assert_equal(hic.chrBinBoundaries,
                    OrderedDict([('b', (0, 2)), ('a', (2, 5))]))

    old_chr_order = ['a', 'b']
    hic.reorderChromosomes_old(old_chr_order)

    nt.assert_equal(hic.chrBinBoundaries,
                    OrderedDict([('a', (0, 3)), ('b', (3, 5))]))

    # new order too long will cause function to return
    false_chr_order = ['a', 'b', 'c']
    hic.reorderChromosomes_old(false_chr_order)

    nt.assert_equal(hic.chrBinBoundaries,
                    OrderedDict([('a', (0, 3)), ('b', (3, 5))]))
Exemple #25
0
def test_get_chromosome_sizes():
    # get matrix
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    # define expected outcome
    expected_sizes = OrderedDict([('a', 30), ('b', 50)])

    chrom_sizes = hic.get_chromosome_sizes()

    nt.assert_equal(chrom_sizes, expected_sizes)

    # define new intervals and test again
    new_cut_intervals = [('a', 0, 10, 1), ('b', 10, 20, 1), ('b', 20, 30, 1),
                         ('c', 30, 40, 1), ('c', 40, 90, 1)]

    expected_sizes = OrderedDict([('a', 10), ('b', 30), ('c', 90)])

    hic.setMatrix(hic.matrix, new_cut_intervals)

    chrom_sizes = hic.get_chromosome_sizes()

    nt.assert_equal(chrom_sizes, expected_sizes)
Exemple #26
0
def chr_diagonals(matrix_file_name, chromosome_exclude):
    """
    extract the counts by distance per chromosome
    """
    hic_ma = HiCMatrix.hiCMatrix(matrix_file_name)
    print "removing unwanted chrs"
    hic_ma.filterUnwantedChr()
    if chromosome_exclude is None:
        chromosome_exclude = []

    chrtokeep = [
        x for x in hic_ma.interval_trees.keys() if x not in chromosome_exclude
    ]
    print "Number of contacts {}".format(hic_ma.matrix.sum())
    hic_ma.keepOnlyTheseChr(chrtokeep)
    diagonal_dict = hic_ma.getCountsByDistance(per_chr=True)

    common_dist = None
    max_dist = 0
    chrom_list = hic_ma.getChrNames()
    for chrom in chrom_list:
        try:
            distances = diagonal_dict[chrom].keys()
            distances[0]
        except (KeyError, IndexError):
            continue
        # get list of common distances
        if max(distances) > max_dist:
            max_dist = max(distances)
        if common_dist is None:
            common_dist = set(distances)
        else:
            common_dist = common_dist.intersection(distances)

    return diagonal_dict, chrom_list, list(common_dist), max_dist
Exemple #27
0
def test_update_matrix(capsys):
    hic = hm.hiCMatrix()
    cut_intervals = [('a', 0, 10, 1), ('a', 10, 20, 1), ('a', 20, 30, 1),
                     ('b', 30, 40, 1), ('b', 40, 50, 1)]

    hic.nan_bins = []

    matrix = np.array([[1, 8, 5, 3, 0], [0, 4, 15, 5, 1], [0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    hic.matrix = csr_matrix(matrix)
    hic.setMatrix(hic.matrix, cut_intervals)

    nt.assert_equal(hic.getMatrix(), matrix)

    new_cut_intervals = [('c', 0, 10, 1), ('d', 10, 20, 1), ('d', 20, 30, 1)]

    new_matrix = np.array([[3, 6, 4], [np.nan, 0, 2], [1, 0, 0]])
    try:
        hic.update_matrix(new_matrix, new_cut_intervals)
    except AttributeError:
        pass
    # if matrix.shape[0] not equal to length of cut_intervals assertionError is raised
    short_cut_intervals = [('c', 0, 10, 1), ('d', 10, 20, 1)]

    with pytest.raises(AssertionError):
        hic.update_matrix(new_matrix, short_cut_intervals)

    # if matrix contains masked bins exception is raised
    masking_ids = [0, 1]
    hic.maskBins(masking_ids)

    with pytest.raises(Exception):
        hic.update_matrix(new_matrix, new_cut_intervals)
def main(args=None):

    args = parse_arguments().parse_args(args)
    hicma = hm.hiCMatrix(args.matrix)

    prev_chrom = None
    prev_start = None

    bed_h = readBed.ReadBed(args.tads)
    sum_tads = 0
    sum_inter = 0
    for bed in bed_h:
        if prev_chrom is None or bed.chromosome != prev_chrom:
            start_bin, end_bin = hicma.getRegionBinRange(
                bed.chromosome, bed.start, bed.end)
            sum_tads += hicma.matrix[start_bin:end_bin,
                                     start_bin:end_bin].sum()
            prev_chrom = bed.chromosome
            prev_start = start_bin
            continue

        start_bin, end_bin = hicma.getRegionBinRange(bed.chromosome, bed.start,
                                                     bed.end)

        sum_inter += hicma.matrix[prev_start:start_bin,
                                  start_bin:end_bin].sum()
        sum_tads += hicma.matrix[start_bin:end_bin, start_bin:end_bin].sum()

    print "sum tads\t{}\nsum inter\t{}\nratio inter/tads\t{:.3f}".format(
        sum_tads, sum_inter, sum_inter / sum_tads)
Exemple #29
0
def test_filterUnwantedChr():
    hic = hm.hiCMatrix(ROOT + 'small_test_matrix.h5')

    assert 'chr2RHet' in hic.getChrNames()
    assert 'chr3LHet' in hic.getChrNames()
    assert 'chr3RHet' in hic.getChrNames()

    hic.filterUnwantedChr()

    assert 'chr2RHet' not in hic.getChrNames()
    assert 'chr3LHet' not in hic.getChrNames()
    assert 'chr3RHet' not in hic.getChrNames()

    chromosomes = list(hic.getChrNames())

    # make sure there are any other chromosomes than 'chrX'
    assert any(x != 'chrX' for x in chromosomes)

    # then filter for 'chrX'
    hic.filterUnwantedChr(chromosome='chrX')

    chromosomes = list(hic.getChrNames())

    # and check that there are only 'chrX'-chromosomes left in matrix
    assert all(x == 'chrX' for x in chromosomes)
Exemple #30
0
def test_keepOnlyTheseChr():
    chromosome_list = ['chrX', 'chr2RHet']

    hic = hm.hiCMatrix(ROOT + 'small_test_matrix.h5')

    hic.keepOnlyTheseChr(chromosome_list)

    nt.assert_equal(hic.getChrNames().sort(), chromosome_list.sort())