def rmsd(struct1, struct2): coords1 = struct1.getCoords() coords2 = struct2.getCoords() assert len(coords1) == len(coords2) ssd = 0 ssd = sum([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(coords1, coords2) ]) msd = ssd / len(coords1) return msd**(1. / 2)
def error(dists, coords): assert len(dists) == len(coords) n = len(dists) sse = 0 count = 0 for i in range(n): for j in range(i): embedded_dist = la.calcDistance(coords[i], coords[j]) sse += (embedded_dist - dists[i, j])**2 count += 1 mse = sse / count rmse = mse**(1. / 2) return rmse
mat1 = dt.matFromBed( "hic_data/GM12878_combined_{}_{}kb.bed".format(chrom, res_kb), struct1) comps1 = ca.get_compartments(mat1, struct1) mat2 = dt.matFromBed("hic_data/K562_{}_{}kb.bed".format(chrom, res_kb), struct2) comps2 = ca.get_compartments(mat2, struct2) r, p = st.pearsonr(comps1, comps2) if r < 0: comps1 = -comps1 comp_diffs = np.abs(comps1 - comps2) dists = np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ]) dist_peaks = sg.find_peaks_cwt(dists, np.arange(1, 10)) plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False) gen_coords = struct1.getGenCoords() plt.plot(gen_coords, comp_diffs / max(comp_diffs), lw=2, color=(0.75, 0, 0), label="Compartment score change", zorder=1) plt.plot(gen_coords, dists / max(dists), lw=2,
#load structures structure1 = dt.structure_from_file("hic_data/{}_{}_{}kb_structure.tsv".format(cell_type1, chrom, res_kb)) structure2 = dt.structure_from_file("hic_data/{}_{}_{}kb_structure.tsv".format(cell_type2, chrom, res_kb)) #rescale structure1.rescale() structure2.rescale() #make structures compatible dt.make_compatible((structure1, structure2)) #calculate changes coords1 = np.array(structure1.getCoords()) coords2 = np.array(structure2.getCoords()) dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(coords1, coords2)] #compartments chrom1 = dt.chromFromBed(path1) chrom2 = dt.chromFromBed(path2) chrom1.res = 100000 #reduce res to reduce RAM usage in compartment calculation chrom2.res = 100000 chrom1.minPos = int(np.floor(float(chrom1.minPos)/chrom1.res)) * chrom1.res #round chrom1.maxPos = int(np.ceil(float(chrom1.maxPos)/chrom1.res)) * chrom1.res chrom2.minPos = int(np.floor(float(chrom2.minPos)/chrom2.res)) * chrom2.res #round chrom2.maxPos = int(np.ceil(float(chrom2.maxPos)/chrom2.res)) * chrom2.res low_struct1 = dt.structureFromBed(path1, chrom1) low_struct2 = dt.structureFromBed(path2, chrom2) dt.make_compatible((low_struct1, low_struct2)) contacts1 = dt.matFromBed(path1, low_struct1)
ps = np.arange(0, 0.1, 0.01) for p in ps: all_changes = [] for i in range(n): multimds.full_mds(path1, path2, penalty=p) structure1 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix1))) structure2 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix2))) if p == 0: r, t = la.getTransformation(structure1, structure2) structure1.transform(r,t) all_changes.append(np.array([la.calcDistance(coord1, coord2) for coord1, coord2 in zip(structure1.getCoords(), structure2.getCoords())])) r_sq = [] for i in range(n): for j in range(i): r, p = st.pearsonr(all_changes[i], all_changes[j]) r_sq.append(r**2) all_r_sq.append(r_sq) ys = all_r_sq #start with a frameless plot (extra room on the left) plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False) #label axes
import matplotlib from matplotlib import pyplot as plt from multimds import multimds from multimds import linear_algebra as la struct1, struct2 = multimds.full_mds("sim1_chr21_100kb.bed", "sim2_chr21_100kb.bed") gen_coords = struct1.getGenCoords() dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())] plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False) plt.plot(gen_coords, dists, lw=2) #define offsets xmin = min(gen_coords) xmax = max(gen_coords) x_range = xmax - xmin x_start = xmin - x_range/25. x_end = xmax + x_range/25. ymin = 0 ymax = max(dists) y_range = ymax - ymin y_start = ymin - y_range/25. y_end = ymax + y_range/25. #define axes with offsets plt.axis([x_start, x_end, y_start, y_end], frameon=False) #plot axes (black with line width of 4) plt.axvline(x=x_start, color="k", lw=4)