コード例 #1
0
def run(profile,tndiff,tstart,tend,left=None,right=None):

        MRTp, MRTs, RFDs, Rept_time, single_mol_exp, pos_time_activated_ori, It,wRFD = get_fast_MRT_RFDs(
            nsim, profile, tndiff, kon=kon,
            fork_speed=fork_speed, dori=20*5/resolution,single_mol_exp=False,continuous=args.continuous,wholeRFD=True)
        #print("check", np.sum(d3p), np.sum(np.ones_like(d3p)*np.sum(d3p)*0.1/len(d3p)))

        #print("Start, end",tstart,tend,tndiff)
        # Compare to exp data
        """
        MRTpearson, MRTstd, MRT = compare(
            MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=start, end=end, return_exp=True)
        RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch,
                                          start=start, end=end, return_exp=True, rescale=1/resolution)

        """
        if left is not None:
            RFDs += 1.5*left
        if right is not None:
            RFDs += 1.5*right
        if cell != "Cerevisae":
            MRTpearson, MRTstd, MRT = compare(
                MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=tstart, end=tend, return_exp=True)
            RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch,
                                              start=tstart, end=tend, return_exp=True, rescale=1/resolution)
        else:
            MRTpearson, MRTstd, MRT = compare(
                MRTp, "MRT", cell, res=1, ch=ch, start=tstart, end=tend, return_exp=True)
            RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=1, ch=ch,
                                              start=tstart, end=tend, return_exp=True, rescale=1,nanpolate=True,smoothf=2)
            RFDs = smooth(RFDs,2)

        if args.RFDo:
            #print(RFDstd)
            return RFDstd,MRTp,MRT,RFD,RFDs,wRFD
        else:
            return MRTstd+RFDstd,MRTp,MRT,RFD,RFDs,wRFD
コード例 #2
0
                    #if len(weights)== 0
                    print(weights)
                    #print(ast.literal_eval(weights))
                    try:
                        weights = ast.literal_eval(weights)
                    except:
                        if weights not in ["submed"]:
                            print("Warning unrecon option %s" % weights)
                        pass
                    if type(weights) == list:

                        d3p = np.zeros_like(raw)
                        #print(weights)
                        for smoothv, weightv in weights:
                            d3p += smooth(raw, int(smoothv)) * weightv

                        raw = d3p
                    else:
                        #print(type(weights),np.nanmean(raw))
                        if weights == "submed":
                            raw -= np.nanmedian(raw)
                        d3p = raw
                #print(weights, "here")

            if args.nan0 and "Exp" not in signal:

                d3p[np.isnan(d3p)] = 0
            btosee.append([x, d3p, signal])
            if sup_sig != None:
                btosee.append(sup_sig)
コード例 #3
0
    def fun(x, alpha):
        global iter
        global gscore
        signal = init_x0
        signal[where] = x
        if np.sum(x < 0) > 0:
            return 2
        filen = root + "/tmp.csv"
        d = pd.DataFrame({
            "chrom": whole_info.chrom,
            "chromStart": whole_info.chromStart,
            "chromEnd": whole_info.chromStart,
            "signalValue": signal
        })
        d.to_csv(filen, index=False)
        process = subprocess.Popen(command + " --signal %s --name %s" %
                                   (filen, root + "/tmp"),
                                   shell=True,
                                   stdout=subprocess.PIPE)

        process.wait()

        scored = pd.read_csv(root + "/tmpglobal_corre.csv")
        c1 = float(scored["MRTp"][0].split(",")[0][1:])
        c1 = 0
        c2 = float(scored["RFDp"][0].split(",")[0][1:])

        print(scored)

        if iter % 10 == 0:
            print("every10", c1, c2)

        score = 2 - c1 - c2  # + 0.01 * (np.sum(x)-1)**2

        if iter == 0:
            print("Initial value", gscore)
            gscore = score

        if score < gscore:
            print("New minimum %.3f , old %.3f", score, gscore)
            print(c1, c2)
            d.to_csv(root + "_%i.csv" % iter, index=False)
            gscore = score

        iter += 1

        scored = pd.read_csv(root + "/tmpglobal_profiles.csv")

        def delta(s):
            return np.array(s)[1:] - np.array(s)[:-1]

        deltas = smooth(
            delta(scored["RFDs"]) - delta(scored["RFDe"]), args.extension)

        direction = deltas[where]
        direction /= np.mean(np.abs(direction))

        x -= alpha * direction * x
        x[x < 0] = 0

        return score, x
コード例 #4
0
                                         start=start,
                                         end=end,
                                         resolution=resolution,
                                         raw=False)
            x, CNV = replication_data(cell,
                                      "CNV",
                                      chromosome=ch,
                                      start=start,
                                      end=end,
                                      resolution=resolution,
                                      raw=False)
            CNV[CNV == 0] = 2
            DNaseI[np.isnan(DNaseI)] = 0
            DNaseI /= CNV

            DNaseIsm = smooth(DNaseI, 100)
            DNaseIsm /= np.mean(DNaseIsm)

            d3p *= DNaseIsm

            d3p[np.isnan(d3p)] = 0
            d3p[np.isinf(d3p)] = 0
            print(np.sum(np.isnan(d3p)))
            # pylab.plot(d3p)
            # pylab.show()

    elif args.signal == "ARSpeak":

        x, d3p = replication_data(cell,
                                  "ARS",
                                  chromosome=ch,
コード例 #5
0
                                     start=start,
                                     end=end,
                                     resolution=resolution,
                                     raw=False)
        x, CNV = replication_data(cell,
                                  "CNV",
                                  chromosome=ch,
                                  start=start,
                                  end=end,
                                  resolution=resolution,
                                  raw=False)
        CNV[CNV == 0] = 2
        DNaseI[np.isnan(DNaseI)] = 0
        DNaseI /= CNV

        DNaseIsm = smooth(DNaseI, 100)
        DNaseIsm /= np.mean(DNaseIsm)

        d3p *= DNaseIsm

        d3p[np.isnan(d3p)] = 0
        d3p[np.isinf(d3p)] = 0
        print(np.sum(np.isnan(d3p)))
        # pylab.plot(d3p)
        # pylab.show()

else:
    x, d3p = replication_data(cell,
                              args.signal,
                              chromosome=ch,
                              start=start,
コード例 #6
0
ファイル: nn.py プロジェクト: organic-chemistry/repli1D
def load_signal(name,
                marks=[
                    'H2az', 'H3k27ac', 'H3k79me2', 'H3k27me3', 'H3k9ac',
                    'H3k4me2', 'H3k4me3', 'H3k9me3', 'H3k4me1', 'H3k36me3',
                    "H4k20me1"
                ],
                targets=["initiation"],
                t_norm=None,
                smm=None,
                wig=True,
                augment=None):

    df = pd.read_csv(name)
    #wig = True

    if "signal" in df.columns:
        df["initiation"] = df["signal"]

    if wig:
        lm = [
            "DNaseI", "initiation", "Meth", "Meth450", "RFDs", "MRTs", "RFDe",
            "MRTe", "AT_20"
        ]
        marks0 = [m + "wig" for m in marks if m not in lm]
        for sm in lm:
            if sm in marks:
                marks0 += [sm]

        assert (len(marks) == len(marks0))
        marks = marks0

    if "notnan" in df.columns:
        print("Found notnan")
        notnan = df["notnan"]
    else:
        notnan = []

    df = df[targets + marks]
    print(df.describe())

    yinit = [df.pop(target) for target in targets]
    # print(yinit.shape,"Yinit shape")

    if t_norm is not None:
        transform_norm = t_norm

    for col in df.columns:
        # print(col)
        if col not in [
                "DNaseI", "initiation", "Meth", "Meth450", "RFDe", "MRTe",
                "RFDs", "MRTs"
        ]:
            df[col] = transform_norm(df[col])
        elif col == "DNaseI":
            df[col] = transform_DNase(df[col])
        elif col in ["initiation", "Stall"]:
            df[col] = df[col] / np.max(df[col])
        elif "Meth" in col:
            df[col] = transform_norm_meth(df[col])
        elif "RFD" in col:
            if "RFD" in col:
                # print("Nanpo")
                df[col] = nan_polate(df[col])
            if smm is not None:
                df[col] = smooth(df[col], smm)
            df[col] = (df[col] + 1) / 2
        elif "MRT" in col:
            if "MRT" in col:
                df[col] = nan_polate(df[col])
                if augment == "test":
                    for asm in [10, 50, 200]:
                        df[col + f"_sm_{asm}"] = smooth(
                            nan_polate(df[col]), asm)
                        df[col + f"_sm_{asm}"] -= np.mean(df[col +
                                                             f"_sm_{asm}"])
                        df[col + f"_sm_{asm}"] /= np.std(df[col +
                                                            f"_sm_{asm}"])

            pass

        if np.sum(np.isnan(df[col])) != 0:
            raise "NanVal"

    print(np.max(yinit[0]), "max")
    print(df.describe())

    yinit0 = []
    for y, t in zip(yinit, targets):
        if t in ["initiation", "Stall"]:
            trunc = y / np.max(y)  #np.percentile(y,99)
            #trunc[trunc>1] = 1
            yinit0.append(trunc)

        elif t == "DNaseI":
            yinit0.append(transform_DNase(y))
        elif t == "OKSeq":
            yinit0.append((y + 1) / 2)
        else:
            raise "Undefined target"

    yinit = np.array(yinit0).T
    yinit[np.isnan(yinit)] = 0
    # print(yinit.shape)
    return df, yinit, notnan
コード例 #7
0
def run(profile, tndiff, tstart, tend, actualProfile=None):

    res = args.resolution
    if actualProfile is not None:

        tmpProfile = actualProfile.copy()
        #print(tmpProfile.shape,np.array(profile).shape,start,tstart,tend,tmpProfile[tstart-start:tend-start].shape,tstart-start,tend-start)
        weight_t = np.sum(tmpProfile[tstart - start:tend - start])
        tmpProfile[tstart - start:tend -
                   start] = weight_t * np.array(profile) / np.sum(profile)
    else:
        tmpProfile = profile
    MRTp, MRTs, RFDs, Rept_time, single_mol_exp, pos_time_activated_ori, It, wRFD = get_fast_MRT_RFDs(
        nsim,
        tmpProfile,
        tndiff,
        kon=kon,
        fork_speed=fork_speed,
        dori=20 * 5 / resolution,
        single_mol_exp=False,
        continuous=args.continuous,
        wholeRFD=True)
    #print("check", np.sum(d3p), np.sum(np.ones_like(d3p)*np.sum(d3p)*0.1/len(d3p)))

    #print("Start, end",tstart,tend,tndiff)
    # Compare to exp data
    """
        MRTpearson, MRTstd, MRT = compare(
            MRTp[::10//resolution], "MRT", cell, res=10, ch=ch, start=start, end=end, return_exp=True)
        RFDpearson, RFDstd, RFD = compare(RFDs, "OKSeq", cell, res=resolution, ch=ch,
                                          start=start, end=end, return_exp=True, rescale=1/resolution)

        """

    if cell != "Cerevisae":
        if actualProfile is not None:
            MRTpearsonl, MRTstdl, MRTl = compare(
                MRTp[int((tstart - start) / res):int((tend - start) /
                                                     res)][::10 // resolution],
                "MRT",
                cell,
                res=10,
                ch=ch,
                start=tstart,
                end=tend,
                return_exp=True)
            RFDpearsonl, RFDstdl, RFDl = compare(
                RFDs[int((tstart - start) / res):int((tend - start) / res)],
                "OKSeq",
                cell,
                res=resolution,
                ch=ch,
                start=tstart,
                end=tend,
                return_exp=True,
                rescale=1 / resolution)
        else:
            RFDstdl = 0

        MRTpearson, MRTstd, MRT = compare(MRTp[::10 // resolution],
                                          "MRT",
                                          cell,
                                          res=10,
                                          ch=ch,
                                          start=start,
                                          end=end,
                                          return_exp=True)
        RFDpearson, RFDstd, RFD = compare(RFDs,
                                          "OKSeq",
                                          cell,
                                          res=resolution,
                                          ch=ch,
                                          start=start,
                                          end=end,
                                          return_exp=True,
                                          rescale=1 / resolution)
    else:
        if actualProfile is not None:
            MRTpearsonl, MRTstdl, MRTl = compare(
                MRTp[int((tstart - start) / res):int((tend - start) / res)],
                "MRT",
                cell,
                res=1,
                ch=ch,
                start=tstart,
                end=tend,
                return_exp=True)
            RFDpearsonl, RFDstdl, RFDl = compare(
                RFDs[int((tstart - start) / res):int((tend - start) / res)],
                "OKSeq",
                cell,
                res=1,
                ch=ch,
                start=tstart,
                end=tend,
                return_exp=True,
                rescale=1,
                nanpolate=True,
                smoothf=2)
        else:
            RFDstdl = 0

        MRTpearson, MRTstd, MRT = compare(MRTp,
                                          "MRT",
                                          cell,
                                          res=1,
                                          ch=ch,
                                          start=start,
                                          end=end,
                                          return_exp=True)
        RFDpearson, RFDstd, RFD = compare(RFDs,
                                          "OKSeq",
                                          cell,
                                          res=1,
                                          ch=ch,
                                          start=start,
                                          end=end,
                                          return_exp=True,
                                          rescale=1,
                                          nanpolate=True,
                                          smoothf=2)

        RFDs = smooth(RFDs, 2)

    if args.RFDo:
        print(RFDstd, RFDstdl)
        return RFDstd + RFDstdl, MRTp, MRT, RFD, RFDs, wRFD
    else:
        return MRTstd + RFDstd, MRTp, MRT, RFD, RFDs, wRFD
コード例 #8
0
                                     start=start,
                                     end=end,
                                     resolution=resolution,
                                     raw=False)
        x, CNV = replication_data(cell,
                                  "CNV",
                                  chromosome=ch,
                                  start=start,
                                  end=end,
                                  resolution=resolution,
                                  raw=False)
        CNV[CNV == 0] = 2
        DNaseI[np.isnan(DNaseI)] = 0
        DNaseI /= CNV

        DNaseIsm = smooth(DNaseI, 100)
        DNaseIsm /= np.mean(DNaseIsm)

        d3p *= DNaseIsm

        d3p[np.isnan(d3p)] = 0
        d3p[np.isinf(d3p)] = 0
        print(np.sum(np.isnan(d3p)))
        # pylab.plot(d3p)
        # pylab.show()

else:
    x, d3p = replication_data(cell,
                              args.signal,
                              chromosome=ch,
                              start=start,