y_means = [] z_means = [] x_lengths = [] y_lengths = [] z_lengths = [] with open(design_file) as infile: for line in infile: cell_type1, cell_type2 = line.strip().split() for chrom in chroms: path1 = "hic_data/{}_{}_{}kb.bed".format(cell_type1, chrom, res_kb) path2 = "hic_data/{}_{}_{}kb.bed".format(cell_type2, chrom, res_kb) if os.path.isfile(path1) and os.path.isfile(path2): structure1, structure2 = multimds.full_mds(path1, path2, penalty=penalty) structure1.rescale() structure2.rescale() r, t = la.getTransformation(structure1, structure2) structure1.transform(r, t) #compartments contacts1 = dt.matFromBed(path1, structure=structure1) contacts2 = dt.matFromBed(path2, structure=structure2) compartments1 = np.array( ca.get_compartments(contacts1, structure1)) compartments2 = np.array( ca.get_compartments(contacts2, structure2))
from multimds import compartment_analysis as ca from multimds import data_tools as dt from scipy import stats as st from matplotlib import pyplot as plt import numpy as np from multimds import linear_algebra as la from scipy import signal as sg from multimds import multimds as mm path1 = "hic_data/GM12878_combined_19_100kb.bed" path2 = "hic_data/K562_19_100kb.bed" struct1, struct2 = mm.full_mds(path1, path2, prefix="test_") mat1 = dt.matFromBed( "hic_data/GM12878_combined_{}_{}kb.bed".format(chrom, res_kb), struct1) comps1 = ca.get_compartments(mat1, struct1) mat2 = dt.matFromBed("hic_data/K562_{}_{}kb.bed".format(chrom, res_kb), struct2) comps2 = ca.get_compartments(mat2, struct2) r, p = st.pearsonr(comps1, comps2) if r < 0: comps1 = -comps1 comp_diffs = np.abs(comps1 - comps2) dists = np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ])
from multimds import multimds import sys chrom = sys.argv[1] multimds.full_mds("hic_data/GM12878_combined_{}_100kb.bed".format(chrom), "hic_data/K562_{}_100kb.bed".format(chrom))
from multimds import multimds import sys iteration = sys.argv[1] chrom = sys.argv[2] strain = sys.argv[3] multimds.full_mds("hic_data/ctrl_{}_{}_32kb.bed".format(strain, chrom), "hic_data/galactose_{}_{}_32kb.bed".format(strain, chrom), weight=0, penalty=0.1, prefix=iteration + "_")
import numpy as np from multimds import plotting as plot from multimds import multimds as mm mappability = np.loadtxt("mappability_21_5kb.bed", usecols=3) #struct1 = dt.structure_from_file("GM12878_combined_21_5kb_structure.tsv") #struct2 = dt.structure_from_file("K562_21_5kb_structure.tsv") struct1, struct2 = mm.full_mds("hic_data/GM12878_combined_21_5kb.bed", "hic_data/K562_21_5kb.bed") mappability = mappability[ struct1.chrom.minPos / struct1.chrom.res + struct1.nonzero_abs_indices()] #only loci in structures mappable = np.where(mappability > 0.75) struct1.points = struct1.points[mappable] struct2.points = struct2.points[mappable] plot.plot_structures_interactive((struct1, struct2))
from multimds import multimds import sys chrom = sys.argv[1] strain = sys.argv[2] multimds.full_mds("hic_data/ctrl_{}_{}_32kb.bed".format(strain, chrom), "hic_data/galactose_{}_{}_32kb.bed".format(strain, chrom), weight=0, penalty=0.1)
import sys from multimds import multimds celltype1 = sys.argv[1] celltype2 = sys.argv[2] multimds.full_mds("hic_data/{}_21_100kb.bed".format(celltype1), "hic_data/{}_21_100kb.bed".format(celltype2))
from multimds import plotting as plot from multimds import multimds as mm struct1, struct2 = mm.full_mds("ctrl_Scer_12_32kb.bed", "galactose_Scer_12_32kb.bed", weight=0, penalty=0.1) plot.plot_structures_interactive((struct1, struct2), out_path="sup6a.png") struct1, struct2 = mm.full_mds("ctrl_Scer_12-upstream_32kb.bed", "galactose_Scer_12-upstream_32kb.bed", weight=0, penalty=0.1) plot.plot_structures_interactive((struct1, struct2), out_path="sup6b_upstream.png") struct1, struct2 = mm.full_mds("ctrl_Scer_12-downstream_32kb.bed", "galactose_Scer_12-downstream_32kb.bed", weight=0, penalty=0.1) plot.plot_structures_interactive((struct1, struct2), out_path="sup6b_downstream.png")
path1 = sys.argv[1] path2 = sys.argv[2] prefix1 = os.path.basename(path1.split(".")[0]) prefix2 = os.path.basename(path2.split(".")[0]) n = 10 all_r_sq = [] ps = np.arange(0, 0.1, 0.01) for p in ps: all_changes = [] for i in range(n): multimds.full_mds(path1, path2, penalty=p) structure1 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix1))) structure2 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix2))) if p == 0: r, t = la.getTransformation(structure1, structure2) structure1.transform(r,t) all_changes.append(np.array([la.calcDistance(coord1, coord2) for coord1, coord2 in zip(structure1.getCoords(), structure2.getCoords())])) r_sq = [] for i in range(n): for j in range(i): r, p = st.pearsonr(all_changes[i], all_changes[j]) r_sq.append(r**2)
import matplotlib from matplotlib import pyplot as plt from multimds import multimds from multimds import linear_algebra as la struct1, struct2 = multimds.full_mds("sim1_chr21_100kb.bed", "sim2_chr21_100kb.bed") gen_coords = struct1.getGenCoords() dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())] plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False) plt.plot(gen_coords, dists, lw=2) #define offsets xmin = min(gen_coords) xmax = max(gen_coords) x_range = xmax - xmin x_start = xmin - x_range/25. x_end = xmax + x_range/25. ymin = 0 ymax = max(dists) y_range = ymax - ymin y_start = ymin - y_range/25. y_end = ymax + y_range/25. #define axes with offsets plt.axis([x_start, x_end, y_start, y_end], frameon=False) #plot axes (black with line width of 4) plt.axvline(x=x_start, color="k", lw=4)
res_kb = 100 prefix1 = "GM12878_combined" prefix2 = "K562" path1 = "hic_data/{}_{}_{}kb.bed".format(prefix1, chrom, res_kb) path2 = "hic_data/{}_{}_{}kb.bed".format(prefix2, chrom, res_kb) size1 = dt.size_from_bed(path1) size2 = dt.size_from_bed(path2) ps = np.arange(0, 0.6, 0.1) errors = np.zeros_like(ps) for i, p in enumerate(ps): #os.system("python ../multimds.py -P {} {} {}".format(p, path1, path2)) structure1, structure2 = mm.full_mds(path1, path2, penalty=p) #structure1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix1, chrom, res_kb)) #structure2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix2, chrom, res_kb)) dists1 = dt.distmat(path1, structure1, size1) dists2 = dt.distmat(path2, structure2, size2) errors[i] = np.mean( (error(dists1, structure1.getCoords()), error(dists1, structure1.getCoords()))) xs = ps x_int_size = 0.1 ys = errors y_int_size = 0.05 x_start = min(xs) - x_int_size / 4.
from multimds import plotting as plot from multimds import multimds as mm struct1, struct2 = mm.full_mds("hic_data/GM12878_combined_21_10kb.bed", "hic_data/K562_21_10kb.bed", weight=0) plot.plot_structures_interactive((struct1, struct2))
from multimds import data_tools as dt import numpy as np from multimds import compartment_analysis as ca from sklearn import svm from multimds import linear_algebra as la from mayavi import mlab from multimds import multimds as mm path1 = "hic_data/GM12878_combined_21_100kb.bed" path2 = "hic_data/K562_21_100kb.bed" struct1, struct2 = mm.full_mds(path1, path2) contacts1 = dt.matFromBed(path1, struct1) enrichments1 = np.loadtxt("binding_data/GM12878_21_100kb_active_coverage.bed", usecols=6) bin_nums1 = struct1.nonzero_abs_indices() + int( struct1.chrom.minPos / struct1.chrom.res) enrichments1 = enrichments1[bin_nums1] comps1 = np.array(ca.get_compartments(contacts1, struct1, enrichments1)) contacts2 = dt.matFromBed(path2, struct2) enrichments2 = np.loadtxt("binding_data/K562_21_100kb_active_coverage.bed", usecols=6) bin_nums2 = struct2.nonzero_abs_indices() + int( struct2.chrom.minPos / struct2.chrom.res) enrichments2 = enrichments2[bin_nums2] comps2 = np.array(ca.get_compartments(contacts2, struct2, enrichments2)) coords1 = struct1.getCoords() coords2 = struct2.getCoords()