Beispiel #1
0
import os
import numpy as np
import sys
sys.path.append("..")
import data_tools as dt
import plotting as plot

os.system(
    "python ../multimds.py -P 0.1 -w 0 ctrl_Scer_13_32kb.bed galactose_Scer_13_32kb.bed"
)
struct1 = dt.structure_from_file("ctrl_Suva_13_32kb_structure.tsv")
struct2 = dt.structure_from_file("galactose_Suva_13_32kb_structure.tsv")

colors = np.zeros_like(struct1.getPoints(), dtype=int)
colors[struct1.get_rel_index(852000)] = 1

plot.plot_structures_interactive((struct1, struct2), (colors, colors))
Beispiel #2
0
import sys

sys.path.append("..")
import data_tools as dt
import numpy as np
import plotting as plot

cell_type = sys.argv[1]
res_kb = int(sys.argv[2])

struct = dt.structure_from_file("{}_21_{}kb_structure.tsv".format(
    cell_type, res_kb))

#truncate
start = 45000000
index = struct.chrom.getAbsoluteIndex(start)
struct.points = struct.points[index:len(struct.points)]
struct.chrom.minPos = start
for i in range(len(struct.points)):
    if struct.points[i] != 0:
        struct.points[i].absolute_index -= index
struct.set_rel_indices()

colors = np.zeros_like(struct.getPoints(), dtype=int)
colors[struct.get_rel_index(46900000):struct.get_rel_index(46950000)] = 2
colors[struct.get_rel_index(47475000)] = 1

plot.plot_structure_interactive(struct, colors, colormap="brg")
Beispiel #3
0
prefix2 = os.path.basename(path2.split(".")[0])

n = 10

all_r_sq = []

ps = np.arange(0, 0.1, 0.01)

for p in ps:
    all_changes = []
    for i in range(n):
        print(i)
        os.system("python ../multimds.py -P {} --full {} {}".format(
            p, path1, path2))

        structure1 = dt.structure_from_file("{}_structure.tsv".format(
            os.path.basename(prefix1)))
        structure2 = dt.structure_from_file("{}_structure.tsv".format(
            os.path.basename(prefix2)))

        if p == 0:
            r, t = la.getTransformation(structure1, structure2)
            structure1.transform(r, t)

        all_changes.append(
            np.array([
                la.calcDistance(coord1, coord2) for coord1, coord2 in zip(
                    structure1.getCoords(), structure2.getCoords())
            ]))

    r_sq = []
    for i in range(n):
Beispiel #4
0
    return misc.pearson(dists1, dists2)


#labels = ("Chromosome3D", "mMDS", "miniMDS", "MOGEN", "HSA", "ChromSDE")
labels = ("mMDS", "miniMDS", "MOGEN", "HSA")
n = len(labels)
rs = np.zeros(n)

#Chromosome3D
#coords1 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep1_coords.tsv")
#coords2 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep2_coords.tsv")
#rs[0] = rep_correlation(coords1, coords2)

#mMDS
coords1 = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_mmds_rep1.tsv").getCoords()
coords2 = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_mmds_rep2.tsv").getCoords()
#rs[1] = rep_correlation(coords1, coords2)
rs[0] = rep_correlation(coords1, coords2)

#miniMDS
coords1 = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_minimds_rep1.tsv").getCoords()
coords2 = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_minimds_rep2.tsv").getCoords()
#rs[2] = rep_correlation(coords1, coords2)
rs[1] = rep_correlation(coords1, coords2)

#MOGEN
coords1 = np.loadtxt(
Beispiel #5
0
    chromatinDiameter = 30  #diameter of heterochromatin (nm)

    totDist = 0
    count = 0
    n = len(coords)
    for i in range(1, n):
        totDist += la.calcDistance(coords[i - 1], coords[i])
        count += 1
    avgDist = totDist / count  #average distance between neighboring loci
    physicalDist = kl * (res / bpPerKL)**(
        1. / 2)  #physical distance between neighboring loci (nm)
    conversionFactor = avgDist / physicalDist
    return chromatinDiameter / 2 * conversionFactor


mmds_structure = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv")
cmds_structure = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv")
minimds_structure = dt.structure_from_file(
    "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv")

mmds_res = mmds_structure.chrom.res
cmds_res = cmds_structure.chrom.res
minimds_res = minimds_structure.chrom.res

assert mmds_res == cmds_res == minimds_res

res = mmds_res

plot.plot_structure_interactive(mmds_structure, out_path="Fig9A.png")
plot.plot_structure_interactive(cmds_structure, out_path="Fig9B.png")
Beispiel #6
0
import sys
sys.path.append("..")
import data_tools as dt
import plotting as plot

struct1 = dt.structure_from_file("GM12878_combined_21_100kb_structure.tsv")
struct2 = dt.structure_from_file("K562_21_100kb_structure.tsv")

plot.plot_structures_interactive((struct1, struct2))
Beispiel #7
0
from scipy import stats as st
import misc

chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
          20, 21, 22, "X")
n = len(chroms)

mmds_rs = np.zeros(n)
cmds_rs = np.zeros(n)
minimds_rs = np.zeros(n)
mogen_rs = np.zeros(n)

for i, chrom in enumerate(chroms):
    bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom)

    mmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, mmds_structure)
    mmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(mmds_true_mat)
    for j in range(len(mmds_true_mat)):  #remove diagonal
        mmds_true_mat[j, j] = 0
    mmds_distMat = misc.distMat(mmds_structure)
    mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat)

    cmds_structure = dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom))
    contactMat = dt.matFromBed(bedpath, cmds_structure)
    cmds_true_mat = at.contactToDist(contactMat)
    at.makeSymmetric(cmds_true_mat)
    for j in range(len(cmds_true_mat)):  #remove diagonal
        cmds_true_mat[j, j] = 0
import linear_algebra as la
from matplotlib import pyplot as plt
import numpy as np

gene_name = sys.argv[1]
chrom_num = sys.argv[2]
gene_loc = int(sys.argv[3])
strain = sys.argv[4]
prefix = sys.argv[5]
res_kb = 32
chrom_name = "{}_{}".format(strain, chrom_num)

all_dists = []

for iteration in range(1, 11):
    struct1 = dt.structure_from_file("{}_{}ctrl_{}_{}kb_structure.tsv".format(
        iteration, prefix, chrom_name, res_kb))
    struct2 = dt.structure_from_file(
        "{}_{}galactose_{}_{}kb_structure.tsv".format(iteration, prefix,
                                                      chrom_name, res_kb))

    #dt.make_compatible((struct1, struct2))
    #struct1.rescale()
    #struct2.rescale()
    #r, t = la.getTransformation(struct1, struct2)
    #struct1.transform(r,t)

    all_dists.append([
        la.calcDistance(coord1, coord2)
        for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())
    ])
Beispiel #9
0

chrom = 21
res_kb = 100
prefix1 = "GM12878_combined"
prefix2 = "K562"

path1 = "hic_data/{}_{}_{}kb.bed".format(prefix1, chrom, res_kb)
path2 = "hic_data/{}_{}_{}kb.bed".format(prefix2, chrom, res_kb)

ps = np.arange(0, 0.6, 0.1)
errors = np.zeros_like(ps)

for i, p in enumerate(ps):
    os.system("python ../multimds.py -P {} {} {}".format(p, path1, path2))
    structure1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(
        prefix1, chrom, res_kb))
    structure2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(
        prefix2, chrom, res_kb))

    dists1 = dt.normalized_dist_mat(path1, structure1)
    dists2 = dt.normalized_dist_mat(path2, structure2)

    errors[i] = np.mean(
        (error(dists1,
               structure1.getCoords()), error(dists1, structure1.getCoords())))

xs = ps
x_int_size = 0.1
ys = errors
y_int_size = 0.05
x_start = min(xs) - x_int_size / 4.
Beispiel #10
0
for i, comparison in enumerate(comparisons):
    print comparison
    with open("{}_design.txt".format(comparison)) as infile:
        for line in infile:
            prefix1, prefix2 = line.strip().split()
            for chrom in range(1, 23):
                path1 = "hic_data/{}_{}_100kb.bed".format(prefix1, chrom)
                path2 = "hic_data/{}_{}_100kb.bed".format(prefix2, chrom)

                if os.path.isfile(path1) and os.path.isfile(path2):
                    os.system("python ../multimds.py {} {}".format(
                        path1, path2))

                    #load structures
                    structure1 = dt.structure_from_file(
                        "{}_{}_100kb_structure.tsv".format(prefix1, chrom))
                    structure2 = dt.structure_from_file(
                        "{}_{}_100kb_structure.tsv".format(prefix2, chrom))

                    dists = [
                        la.calcDistance(coord1, coord2)
                        for coord1, coord2 in zip(structure1.getCoords(),
                                                  structure2.getCoords())
                    ]

                    boxes[i].append(np.mean(dists))

        infile.close()

#start with a frameless plot (extra room on the left)
plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False)
Beispiel #11
0
import sys
sys.path.append("..")
import data_tools as dt

res_kb = int(sys.argv[1])
with open("chrom_sizes_{}kb.txt".format(res_kb), "w") as out:
    for chrom in [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, "X"
    ]:
        structure = dt.structure_from_file(
            "hic_data/GM12878_combined_chr{}_{}kb_structure.tsv".format(
                chrom, res_kb))
        out.write(str(len(structure.getPoints())) + "\n")
out.close()
Beispiel #12
0
import sys
sys.path.append("../..")
import data_tools as dt
from matplotlib import pyplot as plt
import linear_algebra as la

struct1 = dt.structure_from_file("sim1_chr21_100kb_structure.tsv")
struct2 = dt.structure_from_file("sim2_chr21_100kb_structure.tsv")

#dt.make_compatible((struct1, struct2))
#struct1.rescale()
#struct2.rescale()
#r, t = la.getTransformation(struct1, struct2)
#struct1.transform(r,t)

gen_coords = struct1.getGenCoords()
dists = [
    la.calcDistance(coord1, coord2)
    for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())
]

plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False)
plt.plot(gen_coords, dists, lw=2)

#define offsets
xmin = min(gen_coords)
xmax = max(gen_coords)
x_range = xmax - xmin
x_start = xmin - x_range / 25.
x_end = xmax + x_range / 25.
Beispiel #13
0
import sys
sys.path.append("..")
import plotting as plot
import data_tools as dt

chroms = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X"]
structures = [dt.structure_from_file("hic_data/GM12878_combined_chr{}_10kb_structure.tsv".format(chrom)) for chrom in chroms]
plot.plot_structures_interactive(structures, out_path="Fig10.png")
Beispiel #14
0
x_means = []
y_means = []
z_means = []
x_lengths = []
y_lengths = []
z_lengths = []

for chrom in chroms:
    path1 = "hic_data/{}_{}_{}kb.bed".format(cell_type1, chrom, res_kb)
    path2 = "hic_data/{}_{}_{}kb.bed".format(cell_type2, chrom, res_kb)

    if os.path.isfile(path1) and os.path.isfile(path2):
        os.system("python ~/git/multimds/multimds.py --full {} {}".format(
            path1, path2))
        structure1 = dt.structure_from_file(
            "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type1, chrom,
                                                       res_kb))
        structure2 = dt.structure_from_file(
            "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type2, chrom,
                                                       res_kb))

        #plot.plot_structures_interactive((structure1, structure2))

        #compartments
        contacts1 = dt.matFromBed(path1, structure1)
        contacts2 = dt.matFromBed(path2, structure2)

        at.makeSymmetric(contacts1)
        at.makeSymmetric(contacts2)

        compartments1 = np.array(ca.get_compartments(contacts1))
Beispiel #15
0
import sys
sys.path.append("..")
import data_tools as dt
import linear_algebra as la
from matplotlib import pyplot as plt
import numpy as np
import compartment_analysis as ca
from scipy import stats as st

cell_type1 = sys.argv[1]
cell_type2 = sys.argv[2]
res_kb = int(sys.argv[3])

struct1 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format(
    cell_type1, res_kb))
struct2 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format(
    cell_type2, res_kb))
gen_coords = np.array(struct1.getGenCoords())
dists = np.array([
    la.calcDistance(coord1, coord2)
    for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())
])

mat1 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type1, res_kb),
                     struct1)
comps1 = ca.get_compartments(mat1, struct1)
mat2 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type2, res_kb),
                     struct2)
comps2 = ca.get_compartments(mat2, struct2)

r, p = st.pearsonr(comps1, comps2)
Beispiel #16
0
import sys
sys.path.append("..")
import plotting as plot
import data_tools as dt

chroms = [
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
    22, "X"
]
structures = [
    dt.structure_from_file(
        "hic_data/GM12878_combined_{}_10kb_structure.tsv".format(chrom))
    for chrom in chroms
]
plot.plot_structures_interactive(structures, out_path="Fig10.png")
Beispiel #17
0
import sys

sys.path.append("..")
import data_tools as dt
import numpy as np
from mayavi import mlab

gene_name = sys.argv[1]
chrom_num = sys.argv[2]
gene_loc = int(sys.argv[3])
strain = sys.argv[4]
res_kb = 32

chrom_name = "{}_{}".format(strain, chrom_num)
struct1 = dt.structure_from_file("ctrl_{}_{}kb_structure.tsv".format(
    chrom_name, res_kb))
struct2 = dt.structure_from_file("galactose_{}_{}kb_structure.tsv".format(
    chrom_name, res_kb))
coords1 = np.array(struct1.getCoords())
coords2 = np.array(struct2.getCoords())

colors = np.zeros_like(struct1.getPoints(), dtype=int)
colors[struct1.get_rel_index(gene_loc)] = 1

mlab.figure(bgcolor=(1, 1, 1))
line = mlab.plot3d(coords1[:, 0], coords1[:, 1], coords1[:, 2], colors)
lut = line.module_manager.scalar_lut_manager.lut.table.to_array()
lut[0] = (0, 0, 255, 128)
lut[1:len(lut)] = (255, 0, 0, 128)
line.module_manager.scalar_lut_manager.lut.table = lut
line = mlab.plot3d(coords2[:, 0], coords2[:, 1], coords2[:, 2], colors)