Esempio n. 1
0
extra = args.extra
root = args.root
nc = args.nc
target = args.target

ntwk = model(typem=typem, base=args.base, nc=args.nc)
ntwk.load_weights(weight_name)

if args.base:
    root = '/data/bioinfo@borvo/users/jarbona/deepnano5bases/data/tomb/clean_name/'

X, y = load_data([filename], root=root, nc=args.nc)
if not args.base:
    Xr, yr, fn = load_events(X,
                             y,
                             min_length=length_window,
                             base=args.base,
                             maxf=args.maxf)
    extra_e = []
else:
    Xr, yr, fn, extra_e = load_events(X,
                                      y,
                                      min_length=length_window,
                                      base=args.base,
                                      maxf=args.maxf,
                                      extra=True)
assert (len(fn) == len(Xr))
print("Nfiles", len(Xr))
yr = np.array(yr)
Xt, yt, which_keep, NotT = transform_reads(Xr,
                                           yr,
        # print("la")
        ws = 8
    # print(ws)
    X, y = load_data(
        [t],
        root=root,
        values=[[
            "test_with_tombo_CNV_logcosh_3layers/weights.22-0.01", 0
        ], [
            "test_longueur_lstm_from_scratch_without_human_weights.25-0.02", 0
        ], ["init_B", 0], ["init_I", 1]],
        nc=args.nc)
    if not args.base:
        Xrt, yrt, fnt = load_events(X,
                                    y,
                                    min_length=10 * length_window,
                                    raw=args.raw,
                                    base=args.base,
                                    maxf=args.maxf)
        extra_e = []
    else:
        Xrt, yrt, fnt, extra_e = load_events(X,
                                             y,
                                             min_length=10 * length_window,
                                             raw=args.raw,
                                             base=args.base,
                                             maxf=args.maxf,
                                             extra=True)

    # print(Xrt[0])
    if args.raw:
        max_len = 10000
Esempio n. 3
0
def load_data_complete(dataset,
                       root,
                       per_dataset=None,
                       lenv=200,
                       shuffle=True,
                       pmix=None,
                       values=[],
                       delta=False,
                       raw=False,
                       rescale=False,
                       base=False,
                       noise=False,
                       nc=1):

    Tt = np.load("data/training/T-T1-corrected-transition_iter3.npy")
    X_t, y_t = [], []
    for data in dataset:
        print("Loading", data)
        ws = 5
        if "T-yeast" in data:
            ws = 8

        t0 = time.time()
        X, y = load_data([data],
                         root=root,
                         per_dataset=per_dataset,
                         values=values + [["init_B", 0], ["init_I", 1]],
                         nc=nc)  # X filename,y B amount

        t1 = time.time()
        print(t1 - t0, "load csv")
        t0 = time.time()
        # X events y B amount  filtered for length < 10000
        if base:
            Xp, yp, fn, extra_e = load_events(X,
                                              y,
                                              min_length=None,
                                              ws=ws,
                                              raw=raw,
                                              base=base,
                                              extra=True)
        else:
            extra_e = []
            Xp, yp, fn = load_events(X,
                                     y,
                                     min_length=None,
                                     ws=ws,
                                     raw=raw,
                                     base=base)
        t1 = time.time()
        print(t1 - t0, "load events")
        t0 = time.time()

        print("Mean Values", np.mean(yp, axis=0))
        print("Total cumulated read length",
              np.sum([len(xi["mean"]) for xi in Xp]))
        assert (len(Xp) == len(yp))

        Xpp, ypp, _, _ = transform_reads(Xp,
                                         np.array(yp),
                                         lenv=lenv,
                                         delta=delta,
                                         rescale=rescale,
                                         noise=noise,
                                         extra_e=extra_e,
                                         Tt=Tt)
        t1 = time.time()
        print(t1 - t0, "transform")
        t0 = time.time()

        Xpp = np.concatenate(Xpp, axis=0)
        ypp = np.concatenate(ypp, axis=0)
        t1 = time.time()
        print(t1 - t0, "concat")
        t0 = time.time()
        print("Total cumulated read length_after_cut", Xpp.shape[0])

        X_t.append(Xpp)
        y_t.append(ypp)

    # print(X_t)
    X_t = np.concatenate(X_t, axis=0)
    y_t = np.concatenate(y_t, axis=0)

    if pmix is not None:
        print("Mixing", pmix)
        a = np.arange(len(X_t))
        m1 = np.random.choice(a, int(len(a) * pmix))
        m2 = np.random.choice(a, int(len(a) * pmix))
        nX = np.concatenate((X_t[m1, :100, ::], X_t[m2, 100:, ::]), axis=1)
        ny = y_t[m1] / 2 + y_t[m2] / 2
        X_t = np.concatenate([X_t, nX], axis=0)
        y_t = np.concatenate([y_t, ny], axis=0)

    if shuffle:
        X_t, y_t = unison_shuffled_copies(X_t, y_t)
    return X_t, y_t
Esempio n. 4
0
 def fun(*args, **kwargs):
     return [load_events(*args, **kwargs)]