days = ['B','D2','D4','D6','D8','ES'] struc_times = [ 0, 4, 8, 12, 16, 20] struc_map = {} real_map = {} real_o_e = {} struc_o_e = {} real_pearson = {} struc_pearson = {} real_vec = {} struc_vec = {} for chro in chros: for day in days: print(str(chro),str(day)) real_map[day] = ut.loadConstraintAsMat("../Real_Data/iPluripotent/day_"+str(day)+"_rep_1_chro_"+str(chro)) real_map_torch = torch.from_numpy(real_map[day]) real_map_torch = real_map_torch.unsqueeze(0).unsqueeze(0) resized_real_map = torch.nn.functional.avg_pool2d(real_map_torch, kernel_size=2) resized_real_map = resized_real_map.squeeze() real_map[day] = resized_real_map.numpy() struc = np.load("../Generated_Structures/ipsc_full_rep_1_eta_1000_alpha_0.6_lr_0.0001_epoch_400_res_50000_step_21_chro_"+str(chro)+".npy") for struc_time in struc_times: print(str(chro), str(struc_time)) struc_map[struc_time] = ut.struc2contacts(struc[struc_time]) struc_map_torch = torch.from_numpy(struc_map[struc_time]) struc_map_torch = struc_map_torch.unsqueeze(0).unsqueeze(0) resized_struc_map = torch.nn.functional.avg_pool2d(struc_map_torch, kernel_size=2) resized_struc_map = resized_struc_map.squeeze()
from sklearn.decomposition import PCA import sys import numpy as np import numpy.ma as ma import matplotlib.pyplot as plt import pdb sys.path.insert(0, "../") from Utils import util as ut chro = "13" struc = np.load( "../Generated_Structures/ipsc_missing_2_rep_1_eta_1000_alpha_0.6_lr_0.0001_epoch_400_res_50000_step_21_chro_" + str(chro) + ".npy") real_map = ut.loadConstraintAsMat( "../Real_Data/iPluripotent/day_D2_rep_1_chro_" + str(chro)) struc_map = ut.struc2contacts(struc[4]) struc_map = np.clip(struc_map, 0, 10) real_map = np.clip(real_map, 0, 30) real_pear = ma.corrcoef(ma.masked_invalid(real_map)) struc_pear = ma.corrcoef(ma.masked_invalid(struc_map)) pca_real = PCA(n_components=1) pca_struc = PCA(n_components=1) ab_real = pca_real.fit_transform(real_pear) ab_struc = pca_struc.fit_transform(struc_pear) real_vec = np.squeeze(ab_real) struc_vec = np.squeeze(ab_struc) fig, ax = plt.subplots(3, 2) ax[0, 0].imshow(np.clip(struc_map, 0, 10), cmap="Reds")
fig, ax = plt.subplots(ncols=3, nrows=2) fig.suptitle("Chro " + str(chro)) for r, rep in enumerate(reps): FULL_STRUC_STRING = "Generated_Structures/cardio_full_rep_" + str( rep ) + "_eta_1000_alpha_0.6_lr_0.0001_epoch_400_res_1_step_15_chro_" + str( chro) + ".npy" MIS_STRUC_STRING = "Generated_Structures/cardio_missing_2_rep_" + str( rep ) + "_eta_1000_alpha_0.6_lr_0.0001_epoch_400_res_1_step_15_chro_" + str( chro) + ".npy" CONTACT_STRING = "Real_Data/Cardiomyocyte/RUES2/By_Chros/*_MES_Rep" + str( rep) + "_500KB_" + str(chro) CONTACT_STRING = glob.glob(CONTACT_STRING)[0] mat_contacts[r] = ut.loadConstraintAsMat(CONTACT_STRING, res=1) mat_mis_struc[r] = ut.loadStrucAtTimeAsMat(MIS_STRUC_STRING, time) mat_full_struc[r] = ut.loadStrucAtTimeAsMat(FULL_STRUC_STRING, time) for r, rep in enumerate(reps): ax[r, 0].set_ylabel("Rep " + str(rep)) ax[r, 0].imshow(np.clip(mat_contacts[r], 0, 30), cmap="Reds") ax[r, 1].imshow(np.clip(mat_mis_struc[r], 0, 10), cmap="Reds") ax[r, 2].imshow(np.clip(mat_full_struc[r], 0, 10), cmap="Reds") ax[1, 0].set_xlabel("Hi-C") ax[1, 1].set_xlabel("Recon") ax[1, 2].set_xlabel("Interp") print("CHRO" + str(chro)) print(spearmanr(mat_contacts[0], mat_contacts[0], axis=None)) print(spearmanr(mat_contacts[0], mat_full_struc[0], axis=None)) print(spearmanr(mat_contacts[0], mat_mis_struc[0], axis=None))
mat_contacts = {} mat_mis_struc = {} mat_full_struc = {} rep = 1 FULL_STRUC_STRING = "Generated_Structures/ipsc_full_rep_" + str( rep) + "_eta_" + str(eta) + "_alpha_" + str(alpha) + "_lr_" + str(lr) FULL_STRUC_STRING += "_epoch_" + str(epoch) + "_res_" + str( res) + "_step_" + str(step) + "_chro_" + str(chro) + ".npy" for t, (time, day) in enumerate( zip([0, 4, 8, 12, 16, 20], ['B', 'D2', 'D4', 'D6', 'D8', 'ES'])): mat_full_struc[t] = ut.loadStrucAtTimeAsMat(FULL_STRUC_STRING, time) CONTACT_STRING = "Real_Data/iPluripotent/day_" + str( day) + "_rep_" + str(rep) + "_chro_" + str(chro) mat_contacts[t] = ut.loadConstraintAsMat(CONTACT_STRING) for d, day in enumerate(['B', 'D2', 'D4', 'D6', 'D8', 'ES']): for t, time in enumerate([0, 4, 8, 12, 16, 20]): print( str(day) + "/" + str(time) + ":" + str( spearmanr(mat_full_struc[t], mat_contacts[d], axis=None) [0])) fig, ax = plt.subplots(2, 6, gridspec_kw={'wspace': 0.0, 'hspace': 0.0}) for t, time in enumerate([0, 4, 8, 12, 16, 20]): ax[0, t].imshow(np.clip(mat_full_struc[t], 0, 10), cmap="PuBuGn") ax[1, t].imshow(np.clip(mat_contacts[t], 0, 30), cmap="YlOrRd") ax[0, t].set_xticks([]) ax[1, t].set_xticks([]) ax[0, t].set_yticks([])
#This script will provide the HiC_Tool Tads for the orignal Contact maps import pdb import numpy as np import sys import matplotlib.pyplot as plt sys.path.insert(0,"../") from Utils import util as ut TIME = 4 line=sys.argv[1] day=sys.argv[2] rep=sys.argv[3] chro=sys.argv[4] out_file=sys.argv[5] #CONTACT_STRING = "../Real_Data/iPluripotent/day_D2_rep_1_chro_15" CONTACT_STRING = "../Real_Data/"+str(line)+"/day_"+str(day)+"_rep_"+str(rep)+"_chro_"+str(chro) mat = ut.loadConstraintAsMat(CONTACT_STRING) np.savetxt(out_file, mat, fmt='%0.2f', delimiter=' ')
#This file contains code for extracting AB compartments from all time points in a structure import time import numpy as np import numpy.ma as ma import matplotlib.pyplot as plt import pdb from sklearn.decomposition import PCA import matplotlib.pyplot as plt import sys sys.path.insert(0, "../") from Utils import util as ut import sys hic_file = sys.argv[1] out_file = sys.argv[2] start_time = time.time() mat = ut.loadConstraintAsMat(hic_file) mat = np.clip(mat, 0, 30) pear = ma.corrcoef(ma.masked_invalid(mat)) pca = PCA(n_components=1) AB = pca.fit_transform(pear) AB_VEC = np.squeeze(AB) np.save(out_file, AB_VEC)
import pdb from scipy.stats import pearsonr from scipy.stats import spearmanr import numpy as np import matplotlib.pyplot as plt from Utils import util as ut fig, ax = plt.subplots(6, 6) for day, time in zip([0, 1, 2, 3, 4, 5], [0, 4, 8, 12, 16, 20]): real1 = 1 / ut.loadConstraintAsMat( "Synthetic_Data/Synthetic_Contact_Maps/struc1_" + str(day) + ".txt", res=100000) real2 = 1 / ut.loadConstraintAsMat( "Synthetic_Data/Synthetic_Contact_Maps/struc2_" + str(day) + ".txt", res=100000) full1 = 1 / ut.loadStrucAtTimeAsMat( "Generated_Structures/synthetic_full_rep_1_eta_10_alpha_1.0_lr_0.01_epoch_1000_res_100000_step_21_chro_all.npy", time) full2 = 1 / ut.loadStrucAtTimeAsMat( "Generated_Structures/synthetic_full_rep_2_eta_10_alpha_1.0_lr_0.01_epoch_1000_res_100000_step_21_chro_all.npy", time) if day == 0 or day == 5: struc1 = 1 / ut.loadStrucAtTimeAsMat( "Generated_Structures/synthetic_full_rep_1_eta_10_alpha_1.0_lr_0.01_epoch_1000_res_100000_step_21_chro_all.npy", time) struc2 = 1 / ut.loadStrucAtTimeAsMat( "Generated_Structures/synthetic_full_rep_2_eta_10_alpha_1.0_lr_0.01_epoch_1000_res_100000_step_21_chro_all.npy", time) else: struc1 = 1 / ut.loadStrucAtTimeAsMat( "Generated_Structures/synthetic_missing_" + str(day) + "_rep_1_eta_10_alpha_1.0_lr_0.01_epoch_1000_res_100000_step_21_chro_all.npy",