Example #1
0
def infer_structure(contactMat,
                    structure,
                    alpha,
                    num_threads,
                    classical=False):
    """Infers 3D coordinates for one structure"""
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = la.cmds(distMat)
    else:
        coords = manifold.MDS(n_components=3,
                              metric=True,
                              random_state=np.random.RandomState(),
                              verbose=0,
                              dissimilarity="precomputed",
                              n_jobs=num_threads).fit_transform(distMat)

    structure.setCoords(coords)
Example #2
0
def infer_structure(contactMat,
                    structure,
                    alpha,
                    num_threads,
                    weight,
                    classical=False):
    """Infers 3D coordinates for one structure"""
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    expected = get_expected(contactMat)
    for i in range(len(contactMat)):
        for j in range(i):
            contactMat[i, j] = (
                1 - weight) * contactMat[i, j] + weight * expected[i - j - 1]

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    distMat = distMat / np.mean(distMat)  #normalize

    if classical:  #classical MDS
        coords = la.cmds(distMat)
    else:
        coords = manifold.MDS(n_components=3,
                              metric=True,
                              random_state=np.random.RandomState(),
                              verbose=0,
                              dissimilarity="precomputed",
                              n_jobs=num_threads).fit_transform(distMat)

    structure.setCoords(coords)
Example #3
0
def matFromBed(path, structure=None):
    """Converts BED file to matrix. Only includes loci in structure."""
    if structure is None:
        structure = structureFromBed(path, None, None)

    abs_indices = structure.nonzero_abs_indices()

    numpoints = len(abs_indices)
    mat = np.zeros((numpoints, numpoints))

    assert max(abs_indices) - structure.offset < len(structure.points)

    with open(path) as infile:
        for line in infile:
            line = line.strip().split()
            loc1 = int(line[1])
            loc2 = int(line[4])
            index1 = structure.get_rel_index(loc1)
            index2 = structure.get_rel_index(loc2)
            if index1 is not None and index2 is not None:
                if index1 > index2:
                    row = index1
                    col = index2
                else:
                    row = index2
                    col = index1
                mat[row, col] += float(line[6])
        infile.close()

    at.makeSymmetric(mat)
    rowsums = np.array([sum(row) for row in mat])
    assert len(np.where(rowsums == 0)[0]) == 0

    return mat
Example #4
0
def infer_clusters(contactMat, clusters, offsets, classical=False):
    """Infers 3D coordinates for multiple clusters with same contact matrix"""
    assert sum([len(cluster.getPointNums())
                for cluster in clusters]) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3,
                           metric=True,
                           random_state=np.random.RandomState(seed=3),
                           verbose=0,
                           dissimilarity="precomputed",
                           n_jobs=-1)
        coords = mds.fit_transform(distMat)

    for offset, cluster in zip(offsets, clusters):
        for i in range(len(cluster.getPoints())):
            cluster.getPoints()[i].pos = coords[i + offset]
Example #5
0
def get_compartments(mat, enrichments=None, active=True):
    """From Lieberman-Aiden et al (2009)"""
    oe_mat = oe(mat)
    at.makeSymmetric(oe_mat)
    cor_mat = cor(oe_mat)
    at.makeSymmetric(cor_mat)
    pca = PCA(n_components=1)
    pca.fit(cor_mat)
    scores = pca.fit_transform(cor_mat)[:, 0]

    #enforce positive score = active chromatin
    if enrichments is not None:
        r, p = st.pearsonr(scores, enrichments)
        if active and r < 0:
            scores = -scores
        elif not active and r > 0:
            scores = -scores

    #normalize
    max_val = max(scores)
    min_val = -min(scores)
    for i, score in enumerate(scores):
        if score > 0:
            scores[i] = score / max_val
        else:
            scores[i] = score / min_val

    return scores
Example #6
0
def removeInfinite(mat):
	"""Replaces infinite values in matrix with zeroes"""
	n = len(mat)
	copy = np.copy(mat)
	for i in range(n):
		for j in range(i+1):
			if not np.isfinite(copy[i,j]):
				 copy[i,j] = 0
	at.makeSymmetric(copy)
	return copy
Example #7
0
def infer_structures(contactMat, structures, offsets, alpha, num_threads, classical=False):
	"""Infers 3D coordinates for multiple structures with same contact matrix"""
	assert sum([len(structure.getPointNums()) for structure in structures]) == len(contactMat)

	at.makeSymmetric(contactMat)
	rowsums = np.array([sum(row) for row in contactMat])
	assert len(np.where(rowsums == 0)[0]) == 0 

	distMat = at.contactToDist(contactMat, alpha)
	at.makeSymmetric(distMat)

	if classical:	#classical MDS
		coords = la.cmds(distMat)
	else:
		coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat)

	for offset, structure in zip(offsets, structures):
		structure.setCoords(coords[offset:offset+len(structure.getPoints())])
Example #8
0
def distmat(contactMat, structure, alpha, weight, num_threads):
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    expected = get_expected(contactMat)
    for i in range(len(contactMat)):
        for j in range(i):
            contactMat[i, j] = (
                1 - weight) * contactMat[i, j] + weight * expected[i - j - 1]

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    distMat = distMat / np.mean(distMat)  #normalize

    return distMat
Example #9
0
def matFromBed(path, cluster, interpolate=True):
    """Converts BED file to matrix. Only includes loci in cluster."""
    cluster.indexPoints()
    pointNums = cluster.getPointNums()

    numpoints = len(pointNums)
    maxPointNum = max(pointNums)
    minPointNum = min(pointNums)
    mat = np.zeros((numpoints, numpoints))

    assert maxPointNum - cluster.offset < len(cluster.points)

    with open(path) as infile:
        for line in infile:
            linearray = line.strip().split()  #line as array of strings
            loc1 = int(linearray[1])
            loc2 = int(linearray[4])
            index1 = cluster.getIndex(loc1)
            index2 = cluster.getIndex(loc2)
            if index1 is not None and index2 is not None:
                if index1 > index2:
                    row = index1
                    col = index2
                else:
                    row = index2
                    col = index1
                mat[row, col] += float(linearray[6])
    infile.close()

    at.makeSymmetric(mat)
    rowsums = np.array([sum(row) for row in mat])
    empty = np.where(rowsums == 0)[0]
    assert len(np.where(rowsums == 0)[0]) == 0

    #if interpolate:
    #	mat = at.sp_interpolate(mat)

    at.makeSymmetric(mat)

    return mat
Example #10
0
def fullMatFromBed(path, chrom):
    """Converts BED file to matrix"""
    numpoints = (chrom.maxPos - chrom.minPos) / chrom.res + 1
    mat = np.zeros((numpoints, numpoints))

    with open(path) as infile:
        for line in infile:
            line = line.strip().split()  #line as array of strings
            loc1 = int(line[1])
            loc2 = int(line[4])
            index1 = chrom.getAbsoluteIndex(loc1)
            index2 = chrom.getAbsoluteIndex(loc2)
            if index1 > index2:
                row = index1
                col = index2
            else:
                row = index2
                col = index1
            mat[row, col] += float(line[6])
        infile.close()

    at.makeSymmetric(mat)

    return mat
Example #11
0
def matFromBed(path, cluster):
    """Converts BED file to matrix. Only includes loci in cluster."""
    if cluster is None:
        cluster = clusterFromBed(path, None, None)

    cluster.indexPoints()
    pointNums = cluster.getPointNums()

    numpoints = len(pointNums)
    mat = np.zeros((numpoints, numpoints))

    maxPointNum = max(pointNums)
    assert maxPointNum - cluster.offset < len(cluster.points)

    with open(path) as infile:
        for line in infile:
            line = line.strip().split()
            loc1 = int(line[1])
            loc2 = int(line[4])
            index1 = cluster.getIndex(loc1)
            index2 = cluster.getIndex(loc2)
            if index1 is not None and index2 is not None:
                if index1 > index2:
                    row = index1
                    col = index2
                else:
                    row = index2
                    col = index1
                mat[row, col] += float(line[6])
        infile.close()

    at.makeSymmetric(mat)
    rowsums = np.array([sum(row) for row in mat])
    assert len(np.where(rowsums == 0)[0]) == 0

    return mat
Example #12
0
def infer_cluster(contactMat, cluster, classical=False):
    """Infers 3D coordinates for one cluster"""
    assert len(cluster.getPointNums()) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3,
                           metric=True,
                           random_state=np.random.RandomState(seed=3),
                           verbose=0,
                           dissimilarity="precomputed",
                           n_jobs=-1)
        coords = mds.fit(distMat).embedding_

    for i in range(len(cluster.getPoints())):
        cluster.getPoints()[i].pos = coords[i]
Example #13
0
import array_tools as at
import os
import numpy as np

res = int(sys.argv[1])
res_kb = res / 1000

if os.path.isfile("A_compartment_{}kb.bed".format(res_kb)):
    os.system("rm A_compartment_{}kb.bed".format(res_kb))

for chrom in (1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
              20, 21, 22):
    path = "hic_data/GM12878_combined_{}_100kb.bed".format(chrom)
    structure = dt.structureFromBed(path)
    contacts = dt.matFromBed(path, structure)
    at.makeSymmetric(contacts)
    enrichments = np.array(np.loadtxt(
        "binding_data/Gm12878_{}_100kb_active_coverage.bed".format(chrom),
        dtype=object)[:, 6],
                           dtype=float)
    bin_nums = structure.nonzero_abs_indices(
    ) + structure.chrom.minPos / structure.chrom.res
    enrichments = enrichments[bin_nums]
    compartments = np.array(ca.get_compartments(contacts, enrichments))
    gen_coords = np.array(structure.getGenCoords())
    a_gen_coords = gen_coords[np.where(compartments > 0)]
    with open("A_compartment_{}kb.bed".format(res_kb), "a") as out:
        for a_gen_coord in a_gen_coords:
            for i in range(100 / res_kb):
                out.write("\t".join(
                    (structure.chrom.name,
Example #14
0
from matplotlib import pyplot as plt
import numpy as np
import sys
sys.path.append("..")
import data_tools as dt
import array_tools as at
import misc

#"true" distance matrix
cluster = dt.clusterFromBed(bedpath, None, None)
contactMat = dt.matFromBed(bedpath, cluster)
distMat = at.contactToDist(contactMat)
at.makeSymmetric(distMat)
for j in range(len(distMat)):  #remove diagonal
    distMat[j, j] = 0

chromthreed_distMat = misc.distsFromCoords(
    "Chromosome3D/output/chr22_100kb/chr22_100kb_coords.tsv")
chromthreed_r = misc.pearson(distMat, chromthreed_distMat)

mmds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv").distMat()
mmds_r = misc.pearson(distMat, mmds_distMat)

cmds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv").distMat()
cmds_r = misc.pearson(distMat, cmds_distMat)

minimds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv").distMat()
minimds_r = misc.pearson(distMat, minimds_distMat)
Example #15
0
def heatMapFromMat(mat, maxvalue, tads, outpath, colors=None):
    at.makeSymmetric(mat)
    if maxvalue is not None:
        threshold(mat, maxvalue)
    createHeatmap(mat, tads, outpath, colors)
Example #16
0
def normalized_dist_mat(path, structure):
    """Standard processing for creating distance matrix"""
    contacts = matFromBed(path, structure)
    dists = at.contactToDist(contacts, 4)
    at.makeSymmetric(dists)
    return dists / np.mean(dists)  #normalize
Example #17
0
        os.system("python ~/git/multimds/multimds.py --full {} {}".format(
            path1, path2))
        structure1 = dt.structure_from_file(
            "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type1, chrom,
                                                       res_kb))
        structure2 = dt.structure_from_file(
            "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type2, chrom,
                                                       res_kb))

        #plot.plot_structures_interactive((structure1, structure2))

        #compartments
        contacts1 = dt.matFromBed(path1, structure1)
        contacts2 = dt.matFromBed(path2, structure2)

        at.makeSymmetric(contacts1)
        at.makeSymmetric(contacts2)

        compartments1 = np.array(ca.get_compartments(contacts1))
        compartments2 = np.array(ca.get_compartments(contacts2))

        r, p = st.pearsonr(compartments1, compartments2)
        if r < 0:
            compartments2 = -compartments2

        #SVR
        coords1 = structure1.getCoords()
        coords2 = structure2.getCoords()
        coords = np.concatenate((coords1, coords2))
        compartments = np.concatenate((compartments1, compartments2))
        clf = svm.LinearSVR()
Example #18
0
          20, 21, 22, "X")
n = len(chroms)

mmds_rs = np.zeros(n)
cmds_rs = np.zeros(n)
minimds_rs = np.zeros(n)
mogen_rs = np.zeros(n)

for i, chrom in enumerate(chroms):
    bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom)

    mmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, mmds_structure)
    mmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(mmds_true_mat)
    for j in range(len(mmds_true_mat)):  #remove diagonal
        mmds_true_mat[j, j] = 0
    mmds_distMat = misc.distMat(mmds_structure)
    mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat)

    cmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, cmds_structure)
    cmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(cmds_true_mat)
    for j in range(len(cmds_true_mat)):  #remove diagonal
        cmds_true_mat[j, j] = 0
    cmds_distMat = misc.distMat(cmds_structure)
    cmds_rs[i] = misc.pearson(cmds_true_mat, cmds_distMat)