Exemple #1
0
def ridgeLowRankOneKernel(tcga, y_tr, kernel, kernel_args, rank, method):
    #K = Kinterface(data=X_tr, kernel=rbf_kernel, kernel_args={"sigma": 110})
    K = Kinterface(data=np.array(tcga), kernel=kernel, kernel_args=kernel_args)

    #for method in "nystrom", "icd":
    model = RidgeLowRank(method=method, rank=rank, lbd=1)
    model.fit([K], y_tr)
    #yp = model.predict([np.array(X_te)])
    #mse = mean_squared_error(y_te, yp)
    #rmse = np.var(y_te-yp)**0.5
    #print "Method:", method, "Test MSE:", mse
    return model
Exemple #2
0
 def testCSIFit(self):
     Ks = [
         Kinterface(kernel=string_kernel,
                    data=self.Xa,
                    kernel_args={"mode": SPECTRUM})
     ]
     model = RidgeLowRank(rank=5,
                          method="csi",
                          method_init_args={"delta": 5},
                          lbd=0.01)
     model.fit(Ks, self.y)
     yp = model.predict([self.Xa] * len(Ks))
     c, p = st.spearmanr(yp, self.y)
     self.assertGreater(c, 0)
     self.assertLess(p, 0.05)
Exemple #3
0
def variousKernelVariousMethodsOneTCGA(tcga, X_te, y_tr, y_te, method, rank):

    K_exp = Kinterface(data=tcga, kernel=rbf_kernel,
                       kernel_args={"sigma": 30})  # RBF kernel
    K_poly = Kinterface(data=tcga,
                        kernel=poly_kernel,
                        kernel_args={"degree":
                                     3})  # polynomial kernel with degree=3
    K_lin = Kinterface(data=tcga, kernel=linear_kernel)  # linear kernel

    model = RidgeLowRank(method=method, rank=rank, lbd=1)
    model.fit([K_exp, K_lin, K_poly], y_tr)
    yp = model.predict([X_te, X_te,
                        X_te])  # The features passed to each kernel
    mse = mean_squared_error(y_te, yp)
    #rmse = np.var(y_tr-yp)**0.5
    print "Test MSE:", mse
Exemple #4
0
def process(dataset, outdir):
    """
    Run experiments with epcified parameters.
    :param dataset: Dataset key.
    :param outdir: Output directory.
    :return:
    """

    # List available kernels
    K_range = range(1, 11)
    kargs = [{"mode": SPECTRUM, "K": kl} for kl in K_range]
    kernels = ",".join(set(map(lambda t: t["mode"], kargs)))

    # Fixed settings
    methods = ["Mklaren", "CSI", "Nystrom", "ICD"]
    rank_range = (rnk, )
    trueK = RNA_OPTIMAL_K.get(dataset, None)

    # Fixed output
    # Create output directory
    detname = os.path.join(outdir, "_%s" % dataset)
    if not os.path.exists(outdir): os.makedirs(outdir)
    if not os.path.exists(detname): os.makedirs(detname)
    fname = os.path.join(outdir, "%s.csv" % dataset)
    print("Writing to %s ..." % fname)

    # Output
    header = [
        "dataset", "n", "L", "kernels", "method", "rank", "iteration",
        "lambda", "pivots", "time", "evar_tr", "evar_va", "evar", "mse"
    ]
    fp = open(fname, "w", buffering=0)
    writer = csv.DictWriter(fp,
                            fieldnames=header,
                            quotechar='"',
                            quoting=csv.QUOTE_ALL)
    writer.writeheader()

    # Load data
    data = load_rna(dataset)
    X = data["data"]
    y = st.zscore(data["target"])
    n, L = len(X), len(X[0])

    # Load feature spaces
    Ys = [
        pickle.load(gzip.open(dataset2spectrum(dataset, K))) for K in K_range
    ]

    # Generate random datasets and perform prediction
    seed = 0
    for cv in iterations:

        # Select random test/train indices
        np.random.seed(seed)
        inxs = np.arange(n, dtype=int)
        np.random.shuffle(inxs)
        tr = inxs[:n_tr]
        va = inxs[n_tr:n_tr + n_val]
        te = inxs[n_tr + n_val:]

        # Training / test split
        y_tr = y[tr]
        y_va = y[va]
        y_te = y[te]

        # Print after dataset generation
        dat = datetime.datetime.now()
        print("%s\tdataset=%s cv=%d (computing kernels...)" %
              (dat, dataset, cv))

        # For plotting
        X_te = X[te]
        Ks = [
            Kinterface(kernel=string_kernel, data=X[tr], kernel_args=arg)
            for arg in kargs
        ]

        # Precomputed kernel matrices
        Ls_tr = [np.array(Y[tr, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_va = [np.array(Y[va, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_te = [np.array(Y[te, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_tr_sum = [sum(Ls_tr)]
        Ls_va_sum = [sum(Ls_va)]
        Ls_te_sum = [sum(Ls_te)]

        # Modeling
        for rank in rank_range:
            dat = datetime.datetime.now()
            print("\t%s\tdataset=%s cv=%d rank=%d" % (dat, dataset, cv, rank))
            best_models = {
                "True": {
                    "y": y_te,
                    "color": "black",
                    "fmt": "--",
                }
            }
            for method in methods:
                best_models[method] = {"color": meth2color[method], "fmt": "-"}
                best_evar = -np.inf

                for lbd in lbd_range:
                    t1 = time.time()
                    if method == "Mklaren":
                        mkl = Mklaren(rank=rank, lbd=lbd, delta=delta)
                        try:
                            mkl.fit(Ls_tr, y_tr)
                            yt = mkl.predict(Xs=None, Ks=Ls_tr)
                            yv = mkl.predict(Xs=None, Ks=Ls_va)
                            yp = mkl.predict(Xs=None, Ks=Ls_te)
                            pivots = ",".join(
                                map(lambda pi: str(K_range[pi]),
                                    mkl.G_mask.astype(int)))
                        except Exception as e:
                            print(e)
                            continue
                    else:
                        pivots = ""
                        if method == "CSI":
                            model = RidgeLowRank(
                                rank=rank,
                                method="csi",
                                method_init_args={"delta": delta},
                                lbd=lbd)
                        else:
                            model = RidgeLowRank(rank=rank,
                                                 method=method.lower(),
                                                 lbd=lbd)
                        try:
                            model.fit(Ls_tr_sum, y_tr)
                            yt = model.predict(Xs=None, Ks=Ls_tr_sum)
                            yv = model.predict(Xs=None, Ks=Ls_va_sum)
                            yp = model.predict(Xs=None, Ks=Ls_te_sum)
                        except Exception as e:
                            print(e)
                            continue
                    t2 = time.time() - t1

                    # Evaluate explained variance on the three sets
                    evar_tr = (np.var(y_tr) - np.var(yt - y_tr)) / np.var(y_tr)
                    evar_va = (np.var(y_va) - np.var(yv - y_va)) / np.var(y_va)
                    evar = (np.var(y_te) - np.var(yp - y_te)) / np.var(y_te)
                    mse = np.var(yp - y_te)

                    # Select best lambda to plot
                    if evar_va > best_evar:
                        best_evar = evar_va
                        best_yp = yp
                        best_models[method]["y"] = best_yp

                    # Write to output
                    row = {
                        "L": L,
                        "n": len(X),
                        "method": method,
                        "dataset": dataset,
                        "kernels": kernels,
                        "rank": rank,
                        "iteration": cv,
                        "lambda": lbd,
                        "time": t2,
                        "evar_tr": evar_tr,
                        "evar_va": evar_va,
                        "evar": evar,
                        "mse": mse,
                        "pivots": pivots
                    }

                    writer.writerow(row)
                    seed += 1

            # Plot a function fit after selecting best lambda
            fname = os.path.join(
                detname,
                "%s.generic_plot_cv-%d_rank-%d.pdf" % (dataset, cv, rank))
            generic_function_plot(
                f_out=fname,
                Ks=Ks,
                X=X_te,
                models=best_models,
                xlabel="K-mer length",
                xnames=K_range,
                truePar=K_range.index(trueK) if trueK else None)
Exemple #5
0
def test(Ksum,
         Klist,
         inxs,
         X,
         Xp,
         y,
         f,
         delta=10,
         lbd=0.1,
         kappa=0.99,
         methods=("Mklaren", "ICD", "CSI", "Nystrom", "SPGP")):
    """
    Sample data from a Gaussian process and compare fits with the sum of kernels
    versus list of kernels.
    :param Ksum:
    :param Klist:
    :param inxs:
    :param X:
    :param Xp:
    :param y:
    :param f:
    :param delta:
    :param lbd:
    :param methods:
    :return:
    """
    def flatten(l):
        return [item for sublist in l for item in sublist]

    P = len(Klist)  # Number of kernels
    rank = len(inxs)  # Total number of inducing points over all lengthscales
    anchors = X[inxs, ]

    # True results
    results = {"True": {"anchors": anchors, "color": "black"}}

    # Fit MKL for kernel sum and
    if "Mklaren" in methods:
        mkl = Mklaren(rank=rank, delta=delta, lbd=lbd)
        t1 = time.time()
        mkl.fit(Klist, y)
        t2 = time.time() - t1
        y_Klist = mkl.predict([X] * len(Klist))
        yp_Klist = mkl.predict([Xp] * len(Klist))
        active_Klist = [
            flatten([mkl.data.get(gi, {}).get("act", []) for gi in range(P)])
        ]
        anchors_Klist = [X[ix] for ix in active_Klist]
        try:
            rho_Klist, _ = pearsonr(y_Klist, f)
        except Exception as e:
            rho_Klist = 0
        evar = (np.var(y) - np.var(y - y_Klist)) / np.var(y)
        results["Mklaren"] = {
            "rho": rho_Klist,
            "active": active_Klist,
            "anchors": anchors_Klist,
            "sol_path": mkl.sol_path,
            "yp": yp_Klist,
            "time": t2,
            "evar": evar,
            "model": mkl,
            "color": meth2color["Mklaren"]
        }

    # Fit CSI
    if "CSI" in methods:
        csi = RidgeLowRank(
            rank=rank,
            lbd=lbd,
            method="csi",
            method_init_args={
                "delta": delta,
                "kappa": kappa
            },
        )
        t1 = time.time()
        csi.fit([Ksum], y)
        t2 = time.time() - t1
        y_csi = csi.predict([X])
        yp_csi = csi.predict([Xp])
        active_csi = csi.active_set_
        anchors_csi = [X[ix] for ix in active_csi]
        try:
            rho_csi, _ = pearsonr(y_csi, f)
        except Exception as e:
            rho_csi = 0
        evar = (np.var(y) - np.var(y - y_csi)) / np.var(y)
        results["CSI"] = {
            "rho": rho_csi,
            "active": active_csi,
            "anchors": anchors_csi,
            "time": t2,
            "yp": yp_csi,
            "evar": evar,
            "model": csi,
            "color": meth2color["CSI"]
        }

    # Fit RFF_KMP
    if "RFF" in methods:
        gamma_range = map(lambda k: k.kernel_args["gamma"], Klist)
        rff = RFF_KMP(delta=delta,
                      rank=rank,
                      lbd=lbd,
                      gamma_range=gamma_range,
                      typ=RFF_TYP_STAT)
        t1 = time.time()
        rff.fit(X, y)
        t2 = time.time() - t1
        y_rff = rff.predict(X)
        yp_rff = rff.predict(Xp)
        try:
            rho_rff, _ = pearsonr(y_rff, f)
        except Exception as e:
            rho_rff = 0
        evar = (np.var(y) - np.var(y - y_rff)) / np.var(y)
        results["RFF"] = {
            "rho": rho_rff,
            # "active": active_rff,
            # "anchors": anchors_rff,
            "time": t2,
            "yp": yp_rff,
            "evar": evar,
            "model": rff,
            "color": meth2color["RFF"]
        }

    # Fit RFF_KMP
    if "RFF-NS" in methods:
        gamma_range = map(lambda k: k.kernel_args["gamma"], Klist)
        rff = RFF_KMP(delta=delta,
                      rank=rank,
                      lbd=lbd,
                      gamma_range=gamma_range,
                      typ=RFF_TYP_NS)
        t1 = time.time()
        rff.fit(X, y)
        t2 = time.time() - t1
        y_rff = rff.predict(X)
        yp_rff = rff.predict(Xp)
        try:
            rho_rff, _ = pearsonr(y_rff, f)
        except Exception as e:
            rho_rff = 0
        evar = (np.var(y) - np.var(y - y_rff)) / np.var(y)
        results["RFF-NS"] = {
            "rho": rho_rff,
            "time": t2,
            "yp": yp_rff,
            "evar": evar,
            "model": rff,
            "color": meth2color["RFF-NS"]
        }

    # Fit FITC
    if "SPGP" in methods:
        fitc = SPGP(rank=rank)
        t1 = time.time()
        fitc.fit(Klist, y, optimize=True, fix_kernel=False)
        t2 = time.time() - t1
        y_fitc = fitc.predict([X]).ravel()
        yp_fitc = fitc.predict([Xp]).ravel()
        try:
            rho_fitc, _ = pearsonr(np.round(y_fitc, 4), f)
        except Exception as e:
            sys.stderr.write("FITC exception: %s\n" % e)
            rho_fitc = 0
        evar = (np.var(y) - np.var(y - y_fitc)) / np.var(y)

        # Approximate closest active index to each inducing point
        anchors = fitc.anchors_
        actives = [[np.argmin(np.sum((a - X)**2, axis=1)) for a in anchors]]

        results["SPGP"] = {
            "rho": rho_fitc,
            "active": actives,
            "anchors": anchors,
            "time": t2,
            "yp": yp_fitc,
            "evar": evar,
            "model": fitc,
            "color": meth2color["SPGP"]
        }

    # Relevat excerpt.
    if "Arima" in methods:
        arima = Arima(rank=rank, alpha=lbd)
        t1 = time.time()
        arima.fit(X, y)
        t2 = time.time() - t1
        y_arima = arima.predict(X).ravel()
        yp_arima = arima.predict(Xp).ravel()
        try:
            rho_arima, _ = pearsonr(np.round(y_arima, 4), f)
        except Exception as e:
            sys.stderr.write("Arima exception: %s\n" % e)
            rho_arima = 0
        evar = (np.var(y) - np.var(y - y_arima)) / np.var(y)

        results["Arima"] = {
            "rho": rho_arima,
            "time": t2,
            "yp": yp_arima,
            "evar": evar,
            "model": arima,
            "color": meth2color["Arima"]
        }

    # Fit ICD
    if "ICD" in methods:
        icd = RidgeLowRank(rank=rank, lbd=lbd, method="icd")
        t1 = time.time()
        icd.fit([Ksum], y)
        t2 = time.time() - t1
        y_icd = icd.predict([X])
        yp_icd = icd.predict([Xp])
        active_icd = icd.active_set_
        anchors_icd = [X[ix] for ix in active_icd]
        try:
            rho_icd, _ = pearsonr(y_icd, f)
        except Exception as e:
            rho_icd = 0
        evar = (np.var(y) - np.var(y - y_icd)) / np.var(y)
        results["ICD"] = {
            "rho": rho_icd,
            "active": active_icd,
            "anchors": anchors_icd,
            "yp": yp_icd,
            "time": t2,
            "evar": evar,
            "model": icd,
            "color": meth2color["ICD"]
        }

    # Fit Nystrom
    if "Nystrom" in methods:
        nystrom = RidgeLowRank(rank=rank,
                               lbd=lbd,
                               method="nystrom",
                               method_init_args={
                                   "lbd": lbd,
                                   "verbose": False
                               })
        t1 = time.time()
        nystrom.fit([Ksum], y)
        t2 = time.time() - t1
        y_nystrom = nystrom.predict([X])
        yp_nystrom = nystrom.predict([Xp])
        active_nystrom = nystrom.active_set_
        anchors_nystrom = [X[ix] for ix in active_nystrom]
        try:
            rho_nystrom, _ = pearsonr(y_nystrom, f)
        except Exception as e:
            rho_nystrom = 0
        evar = (np.var(y) - np.var(y - y_nystrom)) / np.var(y)
        results["Nystrom"] = {
            "rho": rho_nystrom,
            "active": active_nystrom,
            "anchors": anchors_nystrom,
            "yp": yp_nystrom,
            "time": t2,
            "evar": evar,
            "model": nystrom,
            "color": meth2color["Nystrom"]
        }

    # Fit MKL methods (just for time comparison)
    for method in set(RidgeMKL.mkls.keys()) & set(methods):
        model = RidgeMKL(lbd=lbd, method=method)
        t1 = time.time()
        model.fit(Klist, y)
        t2 = time.time() - t1
        results[method] = {"time": t2}

    return results
Exemple #6
0
def process(outdir):
    """
    Run experiments with epcified parameters.
    :param outdir: Output directory.
    :return:
    """

    # Fixed settings
    N = n_tr + n_val + n_te
    methods = ["Mklaren", "CSI", "Nystrom", "ICD"]

    # Fixed output
    # Create output directory
    if not os.path.exists(outdir): os.makedirs(outdir)
    fname = os.path.join(outdir, "results.csv")
    detname = os.path.join(outdir, "_details")
    if not os.path.exists(outdir): os.makedirs(outdir)
    if not os.path.exists(detname): os.makedirs(detname)
    print("Writing to %s ..." % fname)

    # Output
    header = [
        "n", "L", "iteration", "method", "lambda", "rank", "sp.corr",
        "sp.pval", "evar_tr", "evar_va", "evar", "mse"
    ]
    fp = open(fname, "w", buffering=0)
    writer = csv.DictWriter(fp, fieldnames=header)
    writer.writeheader()

    # Training test split
    tr = range(0, n_tr)
    va = range(n_tr, n_tr + n_val)
    te = range(n_tr + n_val, n_tr + n_val + n_te)

    for cv in cv_iter:

        # Random subset of N sequences of length L
        X, _ = generate_data(N=N,
                             L=L,
                             p=0.0,
                             motif="TGTG",
                             mean=0,
                             var=3,
                             seed=cv)
        X = np.array(X)

        # Split into training in test set
        inxs = np.arange(N, dtype=int)
        np.random.shuffle(inxs)
        tr = inxs[tr]
        va = inxs[va]
        te = inxs[te]
        X_tr = X[tr]
        X_va = X[va]
        X_te = X[te]

        # Generate a sparse signal based on 4-mer composion (maximum lengthscale)
        act = np.random.choice(tr, size=rank, replace=False)
        K_full = Kinterface(data=X,
                            kernel=string_kernel,
                            kernel_args={
                                "mode": SPECTRUM,
                                "K": trueK
                            },
                            row_normalize=False)
        K_act = K_full[:, act]
        H = K_act.dot(sqrtm(np.linalg.inv(K_act[act])))
        w = st.multivariate_normal.rvs(mean=np.zeros((rank, )),
                                       cov=np.eye(rank))
        y = H.dot(w)
        y_tr = y[tr]
        y_va = y[va]
        y_te = y[te]

        # Proposal kernels
        kargs = [{"mode": SPECTRUM, "K": k} for k in K_range]
        Ksum = Kinterface(data=X_tr,
                          kernel=kernel_sum,
                          row_normalize=False,
                          kernel_args={
                              "kernels": [string_kernel] * len(kargs),
                              "kernels_args": kargs
                          })
        Ks = [
            Kinterface(data=X_tr,
                       kernel=string_kernel,
                       kernel_args=a,
                       row_normalize=False) for a in kargs
        ]

        # Modeling
        best_models = {
            "True": {
                "y": y_te,
                "color": "black",
                "fmt": "--",
            }
        }

        for method in methods:
            best_models[method] = {"color": meth2color[method], "fmt": "-"}
            best_evar = -np.inf

            for lbd in lbd_range:
                if method == "Mklaren":
                    mkl = Mklaren(rank=rank, lbd=lbd, delta=delta)
                    try:
                        mkl.fit(Ks, y_tr)
                        yt = mkl.predict([X_tr] * len(Ks))
                        yv = mkl.predict([X_va] * len(Ks))
                        yp = mkl.predict([X_te] * len(Ks))
                    except Exception as e:
                        print(e)
                        continue
                else:
                    if method == "CSI":
                        model = RidgeLowRank(rank=rank,
                                             method="csi",
                                             method_init_args={"delta": delta},
                                             lbd=lbd)
                    else:
                        model = RidgeLowRank(rank=rank,
                                             method=method.lower(),
                                             lbd=lbd)
                    try:
                        model.fit([Ksum], y_tr)
                        yt = model.predict([X_tr])
                        yv = model.predict([X_va])
                        yp = model.predict([X_te])
                    except Exception as e:
                        print(e)
                        continue

                # Store best *test* set prediction for a lambda
                spc = st.spearmanr(y_te, yp)
                evar_tr = (np.var(y_tr) - np.var(yt - y_tr)) / np.var(y_tr)
                evar_va = (np.var(y_va) - np.var(yv - y_va)) / np.var(y_va)
                evar = (np.var(y_te) - np.var(yp - y_te)) / np.var(y_te)
                mse = np.var(yp - y_te)
                if evar_va > best_evar:
                    best_evar = evar_va
                    best_yp = yp
                    best_models[method]["y"] = best_yp
                    print("Best lambda for %s: %.3E, expl. var.: %.3f" %
                          (method, lbd, float(evar_va)))

                # Store row for each methods
                row = {
                    "n": N,
                    "L": L,
                    "method": method,
                    "rank": rank,
                    "iteration": cv,
                    "sp.corr": spc[0],
                    "sp.pval": spc[1],
                    "lambda": lbd,
                    "evar_tr": evar_tr,
                    "evar_va": evar_va,
                    "evar": evar,
                    "mse": mse
                }
                writer.writerow(row)

        # Plot a generic function plot for all methods, selecting best lambda
        fname = os.path.join(detname, "cv_K-%d_cv-%d.pdf" % (trueK, cv))
        generic_function_plot(f_out=fname,
                              Ks=Ks,
                              X=X_te,
                              models=best_models,
                              xlabel="K-mer length",
                              xnames=K_range,
                              truePar=K_range.index(trueK),
                              seed=0)
Exemple #7
0
def process(dataset=RNA_DATASETS[0], repl=0):
    """ Process one iteration of a dataset. """

    # Load data
    np.random.seed(42)
    k_range = range(2, 6)
    snr = 0
    rows = list()

    # Load data
    data = load_rna(dataset)
    inxs = np.argsort(st.zscore(data["target"]))
    X = data["data"][inxs]
    y = st.zscore(data["target"])[inxs]

    # Ground truth kernels;
    Ks = [
        Kinterface(data=X,
                   kernel=string_kernel,
                   row_normalize=True,
                   kernel_args={
                       "mode": "1spectrum",
                       "K": k
                   }) for k in k_range
    ]

    # Training/test
    sample = np.random.choice(inxs, size=int(N), replace=False)
    tr, te = np.sort(sample[:int(N * p_tr)]), np.sort(sample[int(N * p_tr):])

    # Training kernels
    Ks_tr = [
        Kinterface(data=X[tr],
                   kernel=string_kernel,
                   kernel_args={
                       "mode": "1spectrum",
                       "K": k
                   },
                   row_normalize=True) for k in k_range
    ]

    Ksum_tr = Kinterface(data=X[tr],
                         kernel=kernel_sum,
                         row_normalize=True,
                         kernel_args={
                             "kernels": [string_kernel] * len(k_range),
                             "kernels_args": [{
                                 "K": k,
                                 "mode": "1spectrum"
                             } for k in k_range]
                         })

    # Collect test error paths
    results = dict()
    try:
        for m in formats.keys():
            if m.startswith("lars-"):
                model = LarsMKL(delta=delta, rank=rank, f=penalty[m])
                model.fit(Ks_tr, y[tr])
                ypath = model.predict_path_ls([X[te]] * len(Ks_tr))
            elif m == "kmp":
                model = KMP(rank=rank, delta=delta, lbd=0)
                model.fit(Ks_tr, y[tr])
                ypath = model.predict_path([X[te]] * len(Ks_tr))
            elif m == "icd":
                model = RidgeLowRank(method="icd", rank=rank, lbd=0)
                model.fit([Ksum_tr], y[tr])
                ypath = model.predict_path([X[te]])
            elif m == "nystrom":
                model = RidgeLowRank(method="nystrom", rank=rank, lbd=0)
                model.fit([Ksum_tr], y[tr])
                ypath = model.predict_path([X[te]])
            elif m == "csi":
                model = RidgeLowRank(method="csi",
                                     rank=rank,
                                     lbd=0,
                                     method_init_args={"delta": delta})
                model.fit([Ksum_tr], y[tr])
                ypath = model.predict_path([X[te]])
            elif m == "L2KRR":
                model = RidgeMKL(method="l2krr", lbd=0)
                model.fit(Ks=Ks, y=y, holdout=te)
                ypath = np.vstack([model.predict(te)] * rank).T
            else:
                raise ValueError(m)

            # Compute explained variance
            evars = np.zeros(ypath.shape[1])
            for j in range(ypath.shape[1]):
                evars[j] = (np.var(y[te]) -
                            np.var(ypath[:, j] - y[te])) / np.var(y[te])
            results[m] = evars
    except ValueError as ve:
        print("Exception", ve)
        return

    # Compute ranking
    # scores = dict([(m, np.mean(ev)) for m, ev in results.items()])
    scores = dict([(m, ev[-1]) for m, ev in results.items()])
    scale = np.array(sorted(scores.values(), reverse=True)).ravel()
    for m in results.keys():
        ranking = 1 + np.where(scale == scores[m])[0][0]
        row = {
            "dataset": dataset,
            "repl": repl,
            "method": m,
            "N": N,
            "evar": scores[m],
            "ranking": ranking,
            "snr": snr
        }
        rows.append(row)

    return rows
Exemple #8
0
    def testPredictionKernPrecomp(self):
        for t in range(self.trials):
            X = np.random.rand(self.n, self.m)
            Ks = [
                Kinterface(kernel=exponential_kernel,
                           data=X,
                           kernel_args={"gamma": 0.1}),
                Kinterface(kernel=exponential_kernel,
                           data=X,
                           kernel_args={"gamma": 0.2}),
            ]
            Ls = [K[:, :] for K in Ks]
            y = X[:, :3].sum(axis=1)
            y = y - y.mean()

            X_te = np.random.rand(10, self.m)
            Ls_te = [K(X_te, X) for K in Ks]
            for method in ["icd", "csi", "nystrom"]:
                print method

                # Kinterface model
                model0 = RidgeLowRank(method=method, lbd=0.01)
                model0.fit(Ks, y)
                y0 = model0.predict([X, X])
                yp0 = model0.predict([X_te, X_te])

                # Kernel matrix model
                model1 = RidgeLowRank(method=method, lbd=0.01)
                model1.fit(Ls, y)
                y1 = model0.predict(Xs=None, Ks=Ls)
                yp1 = model0.predict(Xs=None, Ks=Ls_te)

                self.assertAlmostEqual(np.linalg.norm(y0 - y1), 0, places=3)
                self.assertAlmostEqual(np.linalg.norm(yp0 - yp1), 0, places=3)
Exemple #9
0
def process():
    """
    Run experiments with specified parameters.
    :param dataset: Dataset key.
    :param outdir: Output directory.
    :return:
    """

    # Load data
    dataset = "quake"
    data = load_keel(n=N, name=dataset)

    # Parameters
    rank = 100
    delta = 10
    lbd = 1
    gamma = 10.0

    # Load data and normalize columns
    X = st.zscore(data["data"], axis=0)
    y = st.zscore(data["target"])
    inxs = np.argsort(y)
    X = X[inxs, :]
    y = y[inxs]
    K = Kinterface(data=X,
                   kernel=exponential_kernel,
                   kernel_args={"gamma": gamma})

    # Fit library models
    # model = RidgeLowRank(lbd=lbd, rank=rank, method="csi", method_init_args={"delta": delta})
    t1 = time()
    model = RidgeLowRank(lbd=lbd, rank=rank, method="icd")
    model.fit([K], y)
    yp = model.predict([X])
    t1 = time() - t1
    print("ICD time: %f" % t1)

    # Fit Kernel-LARS
    t1 = time()
    Q, R, path, mu, act = lars_kernel(K, y, rank=rank, delta=delta)
    t1 = time() - t1
    print("LARS time: %f" % t1)

    # Compute risk
    _, sigma_est = estimate_sigma(K[:, :], y)
    Cp_est = np.zeros(path.shape[0])
    for i, b in enumerate(path):
        mu = Q.dot(b)
        Cp_est[i] = estimate_risk(Q[:, :i + 1], y, mu, sigma_est)

    # Plot fit
    plt.figure()
    plt.plot(y, ".")
    plt.plot(yp, "--", label="ICD")
    plt.plot(mu, "-", label="LARS")
    plt.legend()
    plt.show()

    # Diagnostic LARS plots
    plot_residuals(Q, y, path, tit="LARS")
    plot_path(path, tit="LARS")

    # Risk estimation
    plt.figure()
    plt.plot(Cp_est)
    plt.xlabel("Model capacity $\\rightarrow$")
    plt.ylabel("$C_p$")
    plt.grid()