Exemple #1
0
def infer_structure(contactMat,
                    structure,
                    alpha,
                    num_threads,
                    classical=False):
    """Infers 3D coordinates for one structure"""
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = la.cmds(distMat)
    else:
        coords = manifold.MDS(n_components=3,
                              metric=True,
                              random_state=np.random.RandomState(),
                              verbose=0,
                              dissimilarity="precomputed",
                              n_jobs=num_threads).fit_transform(distMat)

    structure.setCoords(coords)
Exemple #2
0
def fullMDS(path, classical=False):
    """MDS without partitioning"""
    cluster = dt.clusterFromBed(path, None, None)
    contactMat = dt.matFromBed(path, cluster)
    distMat = at.contactToDist(contactMat)
    infer_cluster(contactMat, cluster, classical)
    return cluster
Exemple #3
0
def infer_clusters(contactMat, clusters, offsets, classical=False):
    """Infers 3D coordinates for multiple clusters with same contact matrix"""
    assert sum([len(cluster.getPointNums())
                for cluster in clusters]) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3,
                           metric=True,
                           random_state=np.random.RandomState(seed=3),
                           verbose=0,
                           dissimilarity="precomputed",
                           n_jobs=-1)
        coords = mds.fit_transform(distMat)

    for offset, cluster in zip(offsets, clusters):
        for i in range(len(cluster.getPoints())):
            cluster.getPoints()[i].pos = coords[i + offset]
Exemple #4
0
def infer_structure(contactMat,
                    structure,
                    alpha,
                    num_threads,
                    weight,
                    classical=False):
    """Infers 3D coordinates for one structure"""
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    expected = get_expected(contactMat)
    for i in range(len(contactMat)):
        for j in range(i):
            contactMat[i, j] = (
                1 - weight) * contactMat[i, j] + weight * expected[i - j - 1]

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)
    at.makeSymmetric(distMat)

    distMat = distMat / np.mean(distMat)  #normalize

    if classical:  #classical MDS
        coords = la.cmds(distMat)
    else:
        coords = manifold.MDS(n_components=3,
                              metric=True,
                              random_state=np.random.RandomState(),
                              verbose=0,
                              dissimilarity="precomputed",
                              n_jobs=num_threads).fit_transform(distMat)

    structure.setCoords(coords)
Exemple #5
0
def infer_structures(contactMat, structures, offsets, alpha, num_threads, classical=False):
	"""Infers 3D coordinates for multiple structures with same contact matrix"""
	assert sum([len(structure.getPointNums()) for structure in structures]) == len(contactMat)

	at.makeSymmetric(contactMat)
	rowsums = np.array([sum(row) for row in contactMat])
	assert len(np.where(rowsums == 0)[0]) == 0 

	distMat = at.contactToDist(contactMat, alpha)
	at.makeSymmetric(distMat)

	if classical:	#classical MDS
		coords = la.cmds(distMat)
	else:
		coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat)

	for offset, structure in zip(offsets, structures):
		structure.setCoords(coords[offset:offset+len(structure.getPoints())])
Exemple #6
0
def distmat(contactMat, structure, alpha, weight, num_threads):
    assert len(structure.nonzero_abs_indices()) == len(contactMat)

    expected = get_expected(contactMat)
    for i in range(len(contactMat)):
        for j in range(i):
            corrected = (
                1 - weight) * contactMat[i, j] + weight * expected[i - j - 1]
            contactMat[i, j] = corrected
            contactMat[j, i] = corrected

    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat, alpha)

    distMat = distMat / np.mean(distMat)  #normalize

    return distMat
Exemple #7
0
def infer_cluster(contactMat, cluster, classical=False):
    """Infers 3D coordinates for one cluster"""
    assert len(cluster.getPointNums()) == len(contactMat)

    at.makeSymmetric(contactMat)
    rowsums = np.array([sum(row) for row in contactMat])
    assert len(np.where(rowsums == 0)[0]) == 0

    distMat = at.contactToDist(contactMat)
    at.makeSymmetric(distMat)

    if classical:  #classical MDS
        coords = st.cmds(distMat)
    else:
        mds = manifold.MDS(n_components=3,
                           metric=True,
                           random_state=np.random.RandomState(seed=3),
                           verbose=0,
                           dissimilarity="precomputed",
                           n_jobs=-1)
        coords = mds.fit(distMat).embedding_

    for i in range(len(cluster.getPoints())):
        cluster.getPoints()[i].pos = coords[i]
Exemple #8
0
from matplotlib import pyplot as plt
import numpy as np
import sys
sys.path.append("..")
import data_tools as dt
import array_tools as at
import misc

#"true" distance matrix
cluster = dt.clusterFromBed(bedpath, None, None)
contactMat = dt.matFromBed(bedpath, cluster)
distMat = at.contactToDist(contactMat)
at.makeSymmetric(distMat)
for j in range(len(distMat)):  #remove diagonal
    distMat[j, j] = 0

chromthreed_distMat = misc.distsFromCoords(
    "Chromosome3D/output/chr22_100kb/chr22_100kb_coords.tsv")
chromthreed_r = misc.pearson(distMat, chromthreed_distMat)

mmds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv").distMat()
mmds_r = misc.pearson(distMat, mmds_distMat)

cmds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv").distMat()
cmds_r = misc.pearson(distMat, cmds_distMat)

minimds_distMat = dt.clusterFromFile(
    "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv").distMat()
minimds_r = misc.pearson(distMat, minimds_distMat)
Exemple #9
0
def normalized_dist_mat(path, structure):
    """Standard processing for creating distance matrix"""
    contacts = matFromBed(path, structure)
    dists = at.contactToDist(contacts, 4)
    at.makeSymmetric(dists)
    return dists / np.mean(dists)  #normalize
Exemple #10
0
chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
          20, 21, 22, "X")
n = len(chroms)

mmds_rs = np.zeros(n)
cmds_rs = np.zeros(n)
minimds_rs = np.zeros(n)
mogen_rs = np.zeros(n)

for i, chrom in enumerate(chroms):
    bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom)

    mmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, mmds_structure)
    mmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(mmds_true_mat)
    for j in range(len(mmds_true_mat)):  #remove diagonal
        mmds_true_mat[j, j] = 0
    mmds_distMat = misc.distMat(mmds_structure)
    mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat)

    cmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, cmds_structure)
    cmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(cmds_true_mat)
    for j in range(len(cmds_true_mat)):  #remove diagonal
        cmds_true_mat[j, j] = 0
    cmds_distMat = misc.distMat(cmds_structure)
    cmds_rs[i] = misc.pearson(cmds_true_mat, cmds_distMat)