def run(profile,tndiff,tstart,tend,left=None,right=None): MRTp, MRTs, RFDs, Rept_time, single_mol_exp, pos_time_activated_ori, It,wRFD = get_fast_MRT_RFDs( nsim, profile, tndiff, kon=kon, fork_speed=fork_speed, dori=20*5/resolution,single_mol_exp=False,continuous=args.continuous,wholeRFD=True) #print("check", np.sum(d3p), np.sum(np.ones_like(d3p)*np.sum(d3p)*0.1/len(d3p))) #print("Start, end",tstart,tend,tndiff) # Compare to exp data """ MRTpearson, MRTstd, MRT = compare( MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=start, end=end, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch, start=start, end=end, return_exp=True, rescale=1/resolution) """ if left is not None: RFDs += 1.5*left if right is not None: RFDs += 1.5*right if cell != "Cerevisae": MRTpearson, MRTstd, MRT = compare( MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=tstart, end=tend, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch, start=tstart, end=tend, return_exp=True, rescale=1/resolution) else: MRTpearson, MRTstd, MRT = compare( MRTp, "MRT", cell, res=1, ch=ch, start=tstart, end=tend, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=1, ch=ch, start=tstart, end=tend, return_exp=True, rescale=1,nanpolate=True,smoothf=2) RFDs = smooth(RFDs,2) if args.RFDo: #print(RFDstd) return RFDstd,MRTp,MRT,RFD,RFDs,wRFD else: return MRTstd+RFDstd,MRTp,MRT,RFD,RFDs,wRFD
#if len(weights)== 0 print(weights) #print(ast.literal_eval(weights)) try: weights = ast.literal_eval(weights) except: if weights not in ["submed"]: print("Warning unrecon option %s" % weights) pass if type(weights) == list: d3p = np.zeros_like(raw) #print(weights) for smoothv, weightv in weights: d3p += smooth(raw, int(smoothv)) * weightv raw = d3p else: #print(type(weights),np.nanmean(raw)) if weights == "submed": raw -= np.nanmedian(raw) d3p = raw #print(weights, "here") if args.nan0 and "Exp" not in signal: d3p[np.isnan(d3p)] = 0 btosee.append([x, d3p, signal]) if sup_sig != None: btosee.append(sup_sig)
def fun(x, alpha): global iter global gscore signal = init_x0 signal[where] = x if np.sum(x < 0) > 0: return 2 filen = root + "/tmp.csv" d = pd.DataFrame({ "chrom": whole_info.chrom, "chromStart": whole_info.chromStart, "chromEnd": whole_info.chromStart, "signalValue": signal }) d.to_csv(filen, index=False) process = subprocess.Popen(command + " --signal %s --name %s" % (filen, root + "/tmp"), shell=True, stdout=subprocess.PIPE) process.wait() scored = pd.read_csv(root + "/tmpglobal_corre.csv") c1 = float(scored["MRTp"][0].split(",")[0][1:]) c1 = 0 c2 = float(scored["RFDp"][0].split(",")[0][1:]) print(scored) if iter % 10 == 0: print("every10", c1, c2) score = 2 - c1 - c2 # + 0.01 * (np.sum(x)-1)**2 if iter == 0: print("Initial value", gscore) gscore = score if score < gscore: print("New minimum %.3f , old %.3f", score, gscore) print(c1, c2) d.to_csv(root + "_%i.csv" % iter, index=False) gscore = score iter += 1 scored = pd.read_csv(root + "/tmpglobal_profiles.csv") def delta(s): return np.array(s)[1:] - np.array(s)[:-1] deltas = smooth( delta(scored["RFDs"]) - delta(scored["RFDe"]), args.extension) direction = deltas[where] direction /= np.mean(np.abs(direction)) x -= alpha * direction * x x[x < 0] = 0 return score, x
start=start, end=end, resolution=resolution, raw=False) x, CNV = replication_data(cell, "CNV", chromosome=ch, start=start, end=end, resolution=resolution, raw=False) CNV[CNV == 0] = 2 DNaseI[np.isnan(DNaseI)] = 0 DNaseI /= CNV DNaseIsm = smooth(DNaseI, 100) DNaseIsm /= np.mean(DNaseIsm) d3p *= DNaseIsm d3p[np.isnan(d3p)] = 0 d3p[np.isinf(d3p)] = 0 print(np.sum(np.isnan(d3p))) # pylab.plot(d3p) # pylab.show() elif args.signal == "ARSpeak": x, d3p = replication_data(cell, "ARS", chromosome=ch,
start=start, end=end, resolution=resolution, raw=False) x, CNV = replication_data(cell, "CNV", chromosome=ch, start=start, end=end, resolution=resolution, raw=False) CNV[CNV == 0] = 2 DNaseI[np.isnan(DNaseI)] = 0 DNaseI /= CNV DNaseIsm = smooth(DNaseI, 100) DNaseIsm /= np.mean(DNaseIsm) d3p *= DNaseIsm d3p[np.isnan(d3p)] = 0 d3p[np.isinf(d3p)] = 0 print(np.sum(np.isnan(d3p))) # pylab.plot(d3p) # pylab.show() else: x, d3p = replication_data(cell, args.signal, chromosome=ch, start=start,
def load_signal(name, marks=[ 'H2az', 'H3k27ac', 'H3k79me2', 'H3k27me3', 'H3k9ac', 'H3k4me2', 'H3k4me3', 'H3k9me3', 'H3k4me1', 'H3k36me3', "H4k20me1" ], targets=["initiation"], t_norm=None, smm=None, wig=True, augment=None): df = pd.read_csv(name) #wig = True if "signal" in df.columns: df["initiation"] = df["signal"] if wig: lm = [ "DNaseI", "initiation", "Meth", "Meth450", "RFDs", "MRTs", "RFDe", "MRTe", "AT_20" ] marks0 = [m + "wig" for m in marks if m not in lm] for sm in lm: if sm in marks: marks0 += [sm] assert (len(marks) == len(marks0)) marks = marks0 if "notnan" in df.columns: print("Found notnan") notnan = df["notnan"] else: notnan = [] df = df[targets + marks] print(df.describe()) yinit = [df.pop(target) for target in targets] # print(yinit.shape,"Yinit shape") if t_norm is not None: transform_norm = t_norm for col in df.columns: # print(col) if col not in [ "DNaseI", "initiation", "Meth", "Meth450", "RFDe", "MRTe", "RFDs", "MRTs" ]: df[col] = transform_norm(df[col]) elif col == "DNaseI": df[col] = transform_DNase(df[col]) elif col in ["initiation", "Stall"]: df[col] = df[col] / np.max(df[col]) elif "Meth" in col: df[col] = transform_norm_meth(df[col]) elif "RFD" in col: if "RFD" in col: # print("Nanpo") df[col] = nan_polate(df[col]) if smm is not None: df[col] = smooth(df[col], smm) df[col] = (df[col] + 1) / 2 elif "MRT" in col: if "MRT" in col: df[col] = nan_polate(df[col]) if augment == "test": for asm in [10, 50, 200]: df[col + f"_sm_{asm}"] = smooth( nan_polate(df[col]), asm) df[col + f"_sm_{asm}"] -= np.mean(df[col + f"_sm_{asm}"]) df[col + f"_sm_{asm}"] /= np.std(df[col + f"_sm_{asm}"]) pass if np.sum(np.isnan(df[col])) != 0: raise "NanVal" print(np.max(yinit[0]), "max") print(df.describe()) yinit0 = [] for y, t in zip(yinit, targets): if t in ["initiation", "Stall"]: trunc = y / np.max(y) #np.percentile(y,99) #trunc[trunc>1] = 1 yinit0.append(trunc) elif t == "DNaseI": yinit0.append(transform_DNase(y)) elif t == "OKSeq": yinit0.append((y + 1) / 2) else: raise "Undefined target" yinit = np.array(yinit0).T yinit[np.isnan(yinit)] = 0 # print(yinit.shape) return df, yinit, notnan
def run(profile, tndiff, tstart, tend, actualProfile=None): res = args.resolution if actualProfile is not None: tmpProfile = actualProfile.copy() #print(tmpProfile.shape,np.array(profile).shape,start,tstart,tend,tmpProfile[tstart-start:tend-start].shape,tstart-start,tend-start) weight_t = np.sum(tmpProfile[tstart - start:tend - start]) tmpProfile[tstart - start:tend - start] = weight_t * np.array(profile) / np.sum(profile) else: tmpProfile = profile MRTp, MRTs, RFDs, Rept_time, single_mol_exp, pos_time_activated_ori, It, wRFD = get_fast_MRT_RFDs( nsim, tmpProfile, tndiff, kon=kon, fork_speed=fork_speed, dori=20 * 5 / resolution, single_mol_exp=False, continuous=args.continuous, wholeRFD=True) #print("check", np.sum(d3p), np.sum(np.ones_like(d3p)*np.sum(d3p)*0.1/len(d3p))) #print("Start, end",tstart,tend,tndiff) # Compare to exp data """ MRTpearson, MRTstd, MRT = compare( MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=start, end=end, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch, start=start, end=end, return_exp=True, rescale=1/resolution) """ if cell != "Cerevisae": if actualProfile is not None: MRTpearsonl, MRTstdl, MRTl = compare( MRTp[int((tstart - start) / res):int((tend - start) / res)][::10 // resolution], "MRT", cell, res=10, ch=ch, start=tstart, end=tend, return_exp=True) RFDpearsonl, RFDstdl, RFDl = compare( RFDs[int((tstart - start) / res):int((tend - start) / res)], "OKSeq", cell, res=resolution, ch=ch, start=tstart, end=tend, return_exp=True, rescale=1 / resolution) else: RFDstdl = 0 MRTpearson, MRTstd, MRT = compare(MRTp[::10 // resolution], "MRT", cell, res=10, ch=ch, start=start, end=end, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch, start=start, end=end, return_exp=True, rescale=1 / resolution) else: if actualProfile is not None: MRTpearsonl, MRTstdl, MRTl = compare( MRTp[int((tstart - start) / res):int((tend - start) / res)], "MRT", cell, res=1, ch=ch, start=tstart, end=tend, return_exp=True) RFDpearsonl, RFDstdl, RFDl = compare( RFDs[int((tstart - start) / res):int((tend - start) / res)], "OKSeq", cell, res=1, ch=ch, start=tstart, end=tend, return_exp=True, rescale=1, nanpolate=True, smoothf=2) else: RFDstdl = 0 MRTpearson, MRTstd, MRT = compare(MRTp, "MRT", cell, res=1, ch=ch, start=start, end=end, return_exp=True) RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=1, ch=ch, start=start, end=end, return_exp=True, rescale=1, nanpolate=True, smoothf=2) RFDs = smooth(RFDs, 2) if args.RFDo: print(RFDstd, RFDstdl) return RFDstd + RFDstdl, MRTp, MRT, RFD, RFDs, wRFD else: return MRTstd + RFDstd, MRTp, MRT, RFD, RFDs, wRFD