Esempio n. 1
0
    def testAllKernels(self):
        X = self.X
        y = np.random.rand(X.shape[0], 1)

        Ks = [
            Kinterface(data=X,
                       kernel=exponential_kernel,
                       kernel_args={"gamma": 0.1}),
            Kinterface(data=X,
                       kernel=matern32_gpy,
                       kernel_args={"lengthscale": 3.0}),
            Kinterface(data=X,
                       kernel=matern52_gpy,
                       kernel_args={"lengthscale": 5.0}),
            # Kinterface(data=X, kernel=periodic_gpy, kernel_args={"lengthscale": 5.0, "period": 4.0}),
        ]
        Km = sum([K[:, :] for K in Ks])

        kern = GPy.kern.RBF(1, lengthscale=FITC.gamma2lengthscale(0.1)) \
               + GPy.kern.Matern32(1, lengthscale=3) \
               + GPy.kern.Matern52(1, lengthscale=5)
        # + GPy.kern.PeriodicExponential(1, lengthscale=5, period=4)

        Ky = kern.K(X, X)
        self.assertAlmostEqual(np.linalg.norm(Ky - Km[:, :]), 0, places=3)

        model = FITC()
        model.fit(Ks, y, optimize=True, fix_kernel=True)
        yp = model.predict([X])
        v1 = np.var(y.ravel())
        v2 = np.var((y - yp).ravel())
        self.assertTrue(v2 < v1)
Esempio n. 2
0
def ramdom_kernels(kernel_indexs, samples, classes, rbf_par, poly_par):
    kernels = []
    for indexes in kernel_indexs:
        choice = np.random.randint(3, size=1)[0]
        if choice == 0:
            kernels.append(
                Kinterface(data=x_train[:, indexes], kernel=linear_kernel))
        elif choice == 1:
            #print(rbf_par)
            length_of_param1 = len(rbf_par["gamma"])
            # print(rbf_par["gamma"])
            # print(np.random.randint(length_of_param1, size=1)[0])
            K = Kinterface(data=x_train[:, indexes],
                           kernel=rbf_kernel,
                           kernel_args={
                               "gamma":
                               rbf_par["gamma"][np.random.randint(
                                   length_of_param1, size=1)[0]]
                           })
            kernels.append(K)
        else:
            length_of_param1 = len(poly_par["degree"])
            K = Kinterface(data=x_train[:, indexes],
                           kernel=poly_kernel,
                           kernel_args={
                               "degree":
                               poly_par["degree"][np.random.randint(
                                   length_of_param1, size=1)[0]]
                           })
            kernels.append(K)
    return kernels
Esempio n. 3
0
def createKernelCombination(kernel_indexs,samples,classes,rbf_par,poly_par,scorer):
   kernels= []
   for indexes in kernel_indexs:
      svm = tunning_svm(samples[:,indexes],classes,rbf_par,poly_par,scorer)
      kernel = svm.get_params()["kernel"]
      if kernel=="linear":
         kernels.append( Kinterface(data=x_train[:,indexes], kernel=linear_kernel))
      elif kernel =="rbf":
         gamma=svm.get_params()["gamma"]
         K = Kinterface(data=x_train[:,indexes], kernel=rbf_kernel,kernel_args={"gamma": gamma})
         kernels.append(K)
      else:
         degree=svm.get_params()["degree"]
         coef0 =degree=svm.get_params()["coef0"]
         K = Kinterface(data=x_train[:,indexes], kernel=poly_kernel,kernel_args={"degree": degree})
         kernels.append(K)

   model = Alignf(typ="convex")
   model.fit(kernels, classes.values)
   model.mu  # kernel weights (convex combination)
   mu = model.mu
   print(mu)

   combined_k = lambda x,y: \
      sum([mu[i]*kernels[i](x[:,kernel_indexs[i]],y[:,kernel_indexs[i]]) for i in range(len(kernels))])
   return combined_k
Esempio n. 4
0
 def test_bias(self):
     """ Assert least squares solution is valid at each step. """
     n = 100
     rank = 20
     delta = 5
     bias = 20
     X = np.linspace(-10, 10, n).reshape((n, 1))
     Ks = [
         Kinterface(data=X,
                    kernel=exponential_kernel,
                    kernel_args={"gamma": 0.6}),
         Kinterface(data=X,
                    kernel=exponential_kernel,
                    kernel_args={"gamma": 0.1}),
     ]
     Kt = 1.0 + Ks[0][:, :] + 0.0 * Ks[1][:, :]
     y = mvn.rvs(mean=np.zeros(n, ), cov=Kt).reshape((n, 1))
     y = y + bias
     model = KMP(rank=rank, delta=delta, lbd=0)
     model.fit(Ks, y)
     ypath = model.predict_path([X, X])
     for i in range(model.rank):
         yp = ypath[:, i] - model.bias
         yu = y.ravel() - model.bias
         assert np.linalg.norm(yp.T.dot(yu - yp)) < 1e-3
Esempio n. 5
0
def ramdom_kernels_combination(kernel_indexs,samples,classes,rbf_par,poly_par,scorer):
   kernels= []
   for indexes in kernel_indexs:    
      choice = np.random.randint(3, size=1)[0]
      if choice ==0:
         kernels.append( Kinterface(data=x_train[:,indexes], kernel=linear_kernel))
      elif choice ==1:
         #print(rbf_par)
         length_of_param1 = len(rbf_par["gamma"])
         # print(rbf_par["gamma"])
         # print(np.random.randint(length_of_param1, size=1)[0])
         K = Kinterface(data=x_train[:,indexes], kernel=rbf_kernel,kernel_args={"gamma": rbf_par["gamma"][np.random.randint(length_of_param1, size=1)[0]]})
         kernels.append(K)
      else:
         length_of_param1 = len(poly_par["degree"])
         K = Kinterface(data=x_train[:,indexes], kernel=poly_kernel,kernel_args={"degree": poly_par["degree"][np.random.randint(length_of_param1, size=1)[0]]})
         kernels.append(K)
   #mu = [random.randrange(0,1) for i in range(40)]
   model = Alignf(typ="convex")
   model.fit(kernels, classes.values)
   model.mu  # kernel weights (convex combination)
   mu = model.mu
   #print("numbers:" +str(mu))

   combined_k = lambda x,y: \
      sum([mu[i]*kernels[i](x[:,kernel_indexs[i]],y[:,kernel_indexs[i]]) for i in range(len(kernels))])
   return combined_k
Esempio n. 6
0
    def testPredictionKernPrecomp(self):
        for t in range(self.trials):
            X = np.random.rand(self.n, self.m)
            Ks = [
                Kinterface(kernel=exponential_kernel,
                           data=X,
                           kernel_args={"gamma": 0.1}),
                Kinterface(kernel=exponential_kernel,
                           data=X,
                           kernel_args={"gamma": 0.2}),
            ]
            Ls = [K[:, :] for K in Ks]
            y = X[:, :3].sum(axis=1)
            y = y - y.mean()

            X_te = np.random.rand(10, self.m)
            Ls_te = [K(X_te, X) for K in Ks]
            for method in ["icd", "csi", "nystrom"]:
                print method

                # Kinterface model
                model0 = RidgeLowRank(method=method, lbd=0.01)
                model0.fit(Ks, y)
                y0 = model0.predict([X, X])
                yp0 = model0.predict([X_te, X_te])

                # Kernel matrix model
                model1 = RidgeLowRank(method=method, lbd=0.01)
                model1.fit(Ls, y)
                y1 = model0.predict(Xs=None, Ks=Ls)
                yp1 = model0.predict(Xs=None, Ks=Ls_te)

                self.assertAlmostEqual(np.linalg.norm(y0 - y1), 0, places=3)
                self.assertAlmostEqual(np.linalg.norm(yp0 - yp1), 0, places=3)
Esempio n. 7
0
 def testRowNorm(self):
     Kp = poly_kernel(self.X, self.X, degree=2)
     Kr = kernel_row_normalize(Kp)
     Ki = Kinterface(data=self.X,
                     kernel=poly_kernel,
                     kernel_args={"degree": 2},
                     row_normalize=True)
     self.assertAlmostEquals(np.linalg.norm(Ki.diag().ravel() -
                                            np.ones((self.n, ))),
                             0,
                             delta=3)
     self.assertAlmostEquals(np.linalg.norm(Ki(self.X, self.X) - Kr),
                             0,
                             delta=3)
     self.assertAlmostEquals(np.linalg.norm(Ki[:, :] - Kr), 0, delta=3)
Esempio n. 8
0
def create_kinterfce(kernel_list, type_k):
    """
    Creates a kinterface of type_k
    
    Parameters
    ----------
    kernel_list : List
        A list of all kernels
    type_k : Tran
        Type of transformation for kernels 
    
    Returns
    --------
    train_array : list
        A list of all training kernels as array 
    kinterface_kernel : Kinterface 
        Kernel of type kinterface
    """
    kinterface_kernel = []
    train_array = []
    for ker in kernel_list:
        arr = ker.toarray()
        train_array.append(arr)
        k_arr = Kinterface(data=arr, kernel=type_k)
        kinterface_kernel.append(k_arr)
    return train_array, kinterface_kernel
Esempio n. 9
0
    def testPrediction(self):
        for t in range(self.trials):
            X = np.random.rand(self.n, self.m)
            tr = np.arange(self.n / 2).astype(int)  # necessarily int 1D array
            te = np.arange(self.n / 2, self.n).astype(int)
            Ks = [
                Kinterface(data=X,
                           kernel=exponential_kernel,
                           kernel_args={"gamma": g}) for g in self.gamma_range
            ]

            inxs = np.random.choice(tr.ravel(), size=self.n / 3)
            alpha = np.zeros((self.n, 1))
            alpha[inxs] = np.random.randn(len(inxs), 1)
            mu0 = np.random.randn(len(Ks), 1)
            K0 = sum([w * K[:, :] for K, w in zip(Ks, mu0)])
            y = K0.dot(alpha).ravel()
            y = y - y.mean()  # y necessarily 1D array
            y += np.random.randn(len(K0), 1).ravel() * 0.001

            for method in RidgeMKL.mkls.keys():

                model = RidgeMKL(method=method)
                model.fit(Ks, y, holdout=te)
                yp = model.predict(te)
                expl_var = (np.var(y[te]) - np.var(y[te] - yp)) / np.var(y[te])
                self.assertGreater(expl_var, 0.5)
Esempio n. 10
0
 def testCallOtherNorm(self):
     Ki = Kinterface(data=self.X,
                     kernel=poly_kernel,
                     kernel_args={"degree": 2},
                     row_normalize=True)
     Kr = Ki(self.X, self.Y)
     self.assertTrue(np.all(Kr < 1))
Esempio n. 11
0
def variousKernelVariousMethodsOneTCGA(tcga, X_te, y_tr, y_te, method, rank):

    K_exp = Kinterface(data=tcga, kernel=rbf_kernel,
                       kernel_args={"sigma": 30})  # RBF kernel
    K_poly = Kinterface(data=tcga,
                        kernel=poly_kernel,
                        kernel_args={"degree":
                                     3})  # polynomial kernel with degree=3
    K_lin = Kinterface(data=tcga, kernel=linear_kernel)  # linear kernel

    model = RidgeLowRank(method=method, rank=rank, lbd=1)
    model.fit([K_exp, K_lin, K_poly], y_tr)
    yp = model.predict([X_te, X_te,
                        X_te])  # The features passed to each kernel
    mse = mean_squared_error(y_te, yp)
    #rmse = np.var(y_tr-yp)**0.5
    print "Test MSE:", mse
Esempio n. 12
0
 def testCallOther(self):
     Kp = poly_kernel(self.X, self.Y, degree=2)
     Ki = Kinterface(data=self.X,
                     kernel=poly_kernel,
                     kernel_args={"degree": 2},
                     row_normalize=False)
     Kr = Ki(self.X, self.Y)
     self.assertAlmostEquals(np.linalg.norm(Kp - Kr), 0, delta=3)
Esempio n. 13
0
 def testCall(self):
     Kp = poly_kernel(self.X, self.X, degree=2)
     Ki = Kinterface(data=self.X,
                     kernel=poly_kernel,
                     kernel_args={"degree": 2})
     self.assertAlmostEquals(np.linalg.norm(Ki(self.X, self.X) - Kp),
                             0,
                             delta=3)
Esempio n. 14
0
    def testFITCfit(self):
        n = self.n
        X = self.X
        noise = 1.0

        # Construct a combined kernel
        gamma_range = [0.1, 0.3, 1.0]
        Ks = [
            Kinterface(data=X,
                       kernel=exponential_kernel,
                       kernel_args={"gamma": gm}) for gm in gamma_range
        ]
        Km = Kinterface(data=X,
                        kernel=kernel_sum,
                        kernel_args={
                            "kernels": [exponential_kernel] * len(gamma_range),
                            "kernels_args":
                            map(lambda gm: {"gamma": gm}, gamma_range)
                        })

        for seed in range(5):
            # Sample a function from a GP
            f = mvn.rvs(mean=np.zeros((n, )), cov=Km[:, :], random_state=seed)
            y = mvn.rvs(mean=f, cov=np.eye(n, n) * noise, random_state=seed)
            y = y.reshape((n, 1))

            # Fit a model
            model = FITC()
            model.fit(Ks, y, optimize=False, fix_kernel=False)

            # Compare kernels
            self.assertAlmostEqual(
                np.linalg.norm(model.kernel.K(X, X) - Km[:, :]), 0, places=3)

            # Predictions
            yp = model.predict([X])
            v1 = np.var(y.ravel())
            v2 = np.var((y - yp).ravel())
            self.assertTrue(v2 < v1)

            # Fixed model
            model_fix = FITC()
            model_fix.fit(Ks, y, optimize=False, fix_kernel=True)
            ypf = model_fix.predict([X])
            v3 = np.var((y - ypf).ravel())
            self.assertTrue(v3 < v1)
Esempio n. 15
0
 def variousKernel(tcga, sigmaKernel, degreeKernel, biasKernel, cKernel,
                   sigmaABSKernel, sigmaPerKernel, nuKernel):
     #Kernels
     K_exp = Kinterface(data=np.array(tcga),
                        kernel=rbf_kernel,
                        kernel_args={"sigma": sigmaKernel})  # RBF kernel
     K_poly = Kinterface(data=np.array(tcga),
                         kernel=poly_kernel,
                         kernel_args={"degree": degreeKernel
                                      })  # polynomial kernel with degree=3
     K_lin = Kinterface(data=np.array(tcga),
                        kernel=linear_kernel,
                        kernel_args={'b': biasKernel})
     K_sig = Kinterface(data=np.array(tcga),
                        kernel=sigmoid_kernel,
                        kernel_args={'c': cKernel})
     K_expoAbs = Kinterface(data=np.array(tcga),
                            kernel=exponential_absolute,
                            kernel_args={"sigma": sigmaABSKernel})
     K_perio = Kinterface(data=np.array(tcga),
                          kernel=periodic_kernel,
                          kernel_args={"sigma": sigmaPerKernel})
     K_matern = Kinterface(data=np.array(tcga),
                           kernel=matern_kernel,
                           kernel_args={"nu": nuKernel})
     return K_exp, K_poly, K_lin, K_sig, K_expoAbs, K_perio, K_matern
Esempio n. 16
0
 def testKernGamma(self):
     for gamma in [0.1, 1.0, 2.0, 10.0]:
         k = GPy.kern.RBF(1,
                          variance=1,
                          lengthscale=FITC.gamma2lengthscale(gamma))
         K = k.K(self.X, self.X)
         Ki = Kinterface(data=self.X,
                         kernel=exponential_kernel,
                         kernel_args={"gamma": gamma})
         self.assertAlmostEqual(np.linalg.norm(K - Ki[:, :]), 0, places=3)
Esempio n. 17
0
    def setUp(self):
        X, y = generate_data(N=100,
                             L=100,
                             p=0.5,
                             motif="TGTG",
                             mean=0,
                             var=3,
                             seed=42)
        self.Xa = np.array(X)
        self.y = y

        self.Ks = [
            Kinterface(kernel=string_kernel,
                       data=self.Xa,
                       kernel_args={"mode": SPECTRUM}),
            Kinterface(kernel=string_kernel,
                       data=self.Xa,
                       kernel_args={"mode": SPECTRUM_MISMATCH})
        ]
Esempio n. 18
0
def NystromOneKernelOneTCGA(tcga, kernel, kernel_args, rank):

    K = Kinterface(data=np.array(tcga), kernel=kernel, kernel_args=kernel_args)

    model = Nystrom(rank=rank)
    model.fit(K)
    G_nyst = model.G
    print "G shape:", G_nyst.shape, "Error:", np.linalg.norm(
        K[:, :] - G_nyst.dot(G_nyst.T))
    return model
Esempio n. 19
0
def ridgeLowRankOneKernel(tcga, y_tr, kernel, kernel_args, rank, method):
    #K = Kinterface(data=X_tr, kernel=rbf_kernel, kernel_args={"sigma": 110})
    K = Kinterface(data=np.array(tcga), kernel=kernel, kernel_args=kernel_args)

    #for method in "nystrom", "icd":
    model = RidgeLowRank(method=method, rank=rank, lbd=1)
    model.fit([K], y_tr)
    #yp = model.predict([np.array(X_te)])
    #mse = mean_squared_error(y_te, yp)
    #rmse = np.var(y_te-yp)**0.5
    #print "Method:", method, "Test MSE:", mse
    return model
Esempio n. 20
0
def ICDoneKernelOneTCGA(tcga, kernel, kernel_args, rank):

    #K = Kinterface(data=np.array(cnv), kernel=rfb_kernel, kernel_args={"sigma": 110})
    K = Kinterface(data=np.array(tcga), kernel=kernel, kernel_args=kernel_args)

    model = ICD(rank=rank)
    model.fit(K)
    G_icd = model.G
    #inxs = model.active_set_
    print("G shape:", G_icd.shape, "Error:",
          np.linalg.norm(K[:, :] - G_icd.dot(G_icd.T)))
    return model
Esempio n. 21
0
 def setUp(self):
     self.n = 100
     self.m = 3
     self.gamma_range = np.logspace(-1, 1, 5)
     self.lbd_range = [0, 1, 100, 1000]
     self.X = np.random.rand(self.n, self.m)
     self.Ks = [
         Kinterface(data=self.X,
                    kernel=exponential_kernel,
                    kernel_args={"gamma": g}) for g in self.gamma_range
     ]
     self.trials = 5
Esempio n. 22
0
 def test_least_squares_sol(self):
     np.random.seed(1)
     n = 100
     rank = 20
     delta = 5
     X = np.linspace(-10, 10, n).reshape((n, 1))
     Ks = [
         Kinterface(data=X,
                    kernel=exponential_kernel,
                    kernel_args={"gamma": 0.6}),
         Kinterface(data=X,
                    kernel=exponential_kernel,
                    kernel_args={"gamma": 0.1}),
     ]
     Kt = 1.0 + Ks[0][:, :] + 0.0 * Ks[1][:, :]
     y = mvn.rvs(mean=np.zeros(n, ), cov=Kt).reshape((n, 1))
     y = y - y.mean()
     model = KMP(rank=rank, delta=delta, lbd=0)
     model.fit(Ks, y)
     yp = model.predict([X, X])
     assert np.linalg.norm(yp.T.dot(y.ravel() - yp)) < 1e-2
Esempio n. 23
0
    def testMklarenPredict(self):
        X_tr = self.Xa[:50]
        X_te = self.Xa[50:]
        y_tr = self.y[:50]
        y_te = self.y[50:]

        Ks = [
            Kinterface(kernel=string_kernel,
                       data=X_tr,
                       kernel_args={"mode": SPECTRUM}),
            Kinterface(kernel=string_kernel,
                       data=X_tr,
                       kernel_args={"mode": SPECTRUM_MISMATCH})
        ]

        model = Mklaren(rank=10)
        model.fit(Ks, y_tr)
        yp = model.predict([X_te] * len(Ks))

        c, p = st.spearmanr(yp, y_te)
        self.assertGreater(c, 0)
        self.assertLess(p, 0.05)
Esempio n. 24
0
 def testCSIFit(self):
     Ks = [
         Kinterface(kernel=string_kernel,
                    data=self.Xa,
                    kernel_args={"mode": SPECTRUM})
     ]
     model = RidgeLowRank(rank=5,
                          method="csi",
                          method_init_args={"delta": 5},
                          lbd=0.01)
     model.fit(Ks, self.y)
     yp = model.predict([self.Xa] * len(Ks))
     c, p = st.spearmanr(yp, self.y)
     self.assertGreater(c, 0)
     self.assertLess(p, 0.05)
Esempio n. 25
0
def get_kernel_matrix(dframe, n_dim=15):
    """
    This returns a Kernel Transformation Matrix $\Theta$

    It uses kernel approximation offered by the MKlaren package
    For the sake of completeness (and for my peace of mind, I use the best possible approx.)

    :param dframe: input data as a pandas dataframe.
    :param n_dim: Number of dimensions for the kernel matrix (default=15)
    :return: $\Theta$ matrix
    """
    ker = Kinterface(data=dframe.values, kernel=linear_kernel)
    model = ICD(rank=n_dim)
    model.fit(ker)
    g_nystrom = model.G
    return g_nystrom
Esempio n. 26
0
    def testKernelSum(self):
        Ki = Kinterface(data=self.X,
                        kernel=kernel_sum,
                        kernel_args={
                            "kernels": [poly_kernel, poly_kernel, poly_kernel],
                            "kernels_args": [{
                                "degree": 2
                            }, {
                                "degree": 3
                            }, {
                                "degree": 4
                            }]
                        },
                        row_normalize=False)

        Kc = poly_kernel(self.X, self.X, degree=2) + \
             poly_kernel(self.X, self.X, degree=3) + \
             poly_kernel(self.X, self.X, degree=4)
        self.assertAlmostEqual(np.linalg.norm(Ki[:, :] - Kc), 0, places=3)
Esempio n. 27
0
    def testPolySum(self):
        """
        Test expected reconstruction properties of the ICD.
        Kernels are iteratively summed.
        """
        K = np.zeros((self.n, self.n))
        for d in range(1, 6):
            K += Kinterface(data=self.X, kernel=poly_kernel,
                            kernel_args={"degree": d},
                            row_normalize=True)[:, :]
            model = ICD(rank=self.n)
            model.fit(K)

            errors = np.zeros((self.n, ))
            for i in range(self.n):
                Ki = model.G[:, :i+1].dot(model.G[:, :i+1].T)
                errors[i] = np.linalg.norm(K-Ki)

            self.assertTrue(np.all(errors[:-1] > errors[1:]))
            self.assertAlmostEqual(errors[-1], 0, delta=3)
Esempio n. 28
0
def process(dataset, outdir):
    """
    Run experiments with epcified parameters.
    :param dataset: Dataset key.
    :param outdir: Output directory.
    :return:
    """

    # List available kernels
    K_range = range(1, 11)
    kargs = [{"mode": SPECTRUM, "K": kl} for kl in K_range]
    kernels = ",".join(set(map(lambda t: t["mode"], kargs)))

    # Fixed settings
    methods = ["Mklaren", "CSI", "Nystrom", "ICD"]
    rank_range = (rnk, )
    trueK = RNA_OPTIMAL_K.get(dataset, None)

    # Fixed output
    # Create output directory
    detname = os.path.join(outdir, "_%s" % dataset)
    if not os.path.exists(outdir): os.makedirs(outdir)
    if not os.path.exists(detname): os.makedirs(detname)
    fname = os.path.join(outdir, "%s.csv" % dataset)
    print("Writing to %s ..." % fname)

    # Output
    header = [
        "dataset", "n", "L", "kernels", "method", "rank", "iteration",
        "lambda", "pivots", "time", "evar_tr", "evar_va", "evar", "mse"
    ]
    fp = open(fname, "w", buffering=0)
    writer = csv.DictWriter(fp,
                            fieldnames=header,
                            quotechar='"',
                            quoting=csv.QUOTE_ALL)
    writer.writeheader()

    # Load data
    data = load_rna(dataset)
    X = data["data"]
    y = st.zscore(data["target"])
    n, L = len(X), len(X[0])

    # Load feature spaces
    Ys = [
        pickle.load(gzip.open(dataset2spectrum(dataset, K))) for K in K_range
    ]

    # Generate random datasets and perform prediction
    seed = 0
    for cv in iterations:

        # Select random test/train indices
        np.random.seed(seed)
        inxs = np.arange(n, dtype=int)
        np.random.shuffle(inxs)
        tr = inxs[:n_tr]
        va = inxs[n_tr:n_tr + n_val]
        te = inxs[n_tr + n_val:]

        # Training / test split
        y_tr = y[tr]
        y_va = y[va]
        y_te = y[te]

        # Print after dataset generation
        dat = datetime.datetime.now()
        print("%s\tdataset=%s cv=%d (computing kernels...)" %
              (dat, dataset, cv))

        # For plotting
        X_te = X[te]
        Ks = [
            Kinterface(kernel=string_kernel, data=X[tr], kernel_args=arg)
            for arg in kargs
        ]

        # Precomputed kernel matrices
        Ls_tr = [np.array(Y[tr, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_va = [np.array(Y[va, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_te = [np.array(Y[te, :].dot(Y[tr, :].T).todense()) for Y in Ys]
        Ls_tr_sum = [sum(Ls_tr)]
        Ls_va_sum = [sum(Ls_va)]
        Ls_te_sum = [sum(Ls_te)]

        # Modeling
        for rank in rank_range:
            dat = datetime.datetime.now()
            print("\t%s\tdataset=%s cv=%d rank=%d" % (dat, dataset, cv, rank))
            best_models = {
                "True": {
                    "y": y_te,
                    "color": "black",
                    "fmt": "--",
                }
            }
            for method in methods:
                best_models[method] = {"color": meth2color[method], "fmt": "-"}
                best_evar = -np.inf

                for lbd in lbd_range:
                    t1 = time.time()
                    if method == "Mklaren":
                        mkl = Mklaren(rank=rank, lbd=lbd, delta=delta)
                        try:
                            mkl.fit(Ls_tr, y_tr)
                            yt = mkl.predict(Xs=None, Ks=Ls_tr)
                            yv = mkl.predict(Xs=None, Ks=Ls_va)
                            yp = mkl.predict(Xs=None, Ks=Ls_te)
                            pivots = ",".join(
                                map(lambda pi: str(K_range[pi]),
                                    mkl.G_mask.astype(int)))
                        except Exception as e:
                            print(e)
                            continue
                    else:
                        pivots = ""
                        if method == "CSI":
                            model = RidgeLowRank(
                                rank=rank,
                                method="csi",
                                method_init_args={"delta": delta},
                                lbd=lbd)
                        else:
                            model = RidgeLowRank(rank=rank,
                                                 method=method.lower(),
                                                 lbd=lbd)
                        try:
                            model.fit(Ls_tr_sum, y_tr)
                            yt = model.predict(Xs=None, Ks=Ls_tr_sum)
                            yv = model.predict(Xs=None, Ks=Ls_va_sum)
                            yp = model.predict(Xs=None, Ks=Ls_te_sum)
                        except Exception as e:
                            print(e)
                            continue
                    t2 = time.time() - t1

                    # Evaluate explained variance on the three sets
                    evar_tr = (np.var(y_tr) - np.var(yt - y_tr)) / np.var(y_tr)
                    evar_va = (np.var(y_va) - np.var(yv - y_va)) / np.var(y_va)
                    evar = (np.var(y_te) - np.var(yp - y_te)) / np.var(y_te)
                    mse = np.var(yp - y_te)

                    # Select best lambda to plot
                    if evar_va > best_evar:
                        best_evar = evar_va
                        best_yp = yp
                        best_models[method]["y"] = best_yp

                    # Write to output
                    row = {
                        "L": L,
                        "n": len(X),
                        "method": method,
                        "dataset": dataset,
                        "kernels": kernels,
                        "rank": rank,
                        "iteration": cv,
                        "lambda": lbd,
                        "time": t2,
                        "evar_tr": evar_tr,
                        "evar_va": evar_va,
                        "evar": evar,
                        "mse": mse,
                        "pivots": pivots
                    }

                    writer.writerow(row)
                    seed += 1

            # Plot a function fit after selecting best lambda
            fname = os.path.join(
                detname,
                "%s.generic_plot_cv-%d_rank-%d.pdf" % (dataset, cv, rank))
            generic_function_plot(
                f_out=fname,
                Ks=Ks,
                X=X_te,
                models=best_models,
                xlabel="K-mer length",
                xnames=K_range,
                truePar=K_range.index(trueK) if trueK else None)
Esempio n. 29
0
def process(dataset=RNA_DATASETS[0], repl=0):
    """ Process one iteration of a dataset. """
    dat = datetime.datetime.now()
    print("\t%s\tdataset=%s cv=%d rank=%d" % (dat, dataset, repl, rank))

    # Load data
    np.random.seed(repl)
    K_range = range(3, 8)

    # Load data
    data = load_rna(dataset)
    inxs = np.argsort(st.zscore(data["target"]))
    y = st.zscore(data["target"])[inxs]

    # Training/test; return a shuffled list
    sample = np.random.choice(inxs, size=len(inxs), replace=False)
    a, b = int(N * p_tr), int(N)
    tr, va, te = np.sort(sample[:a]), \
                 np.sort(sample[a:b]), \
                 np.sort(sample[b:])

    # Load feature spaces
    try:
        Ys = [
            pickle.load(gzip.open(dataset2spectrum(dataset, K)))
            for K in K_range
        ]
    except IOError:
        return None

    # Training kernels
    Ks_tr = [
        Kinterface(data=Y[tr], kernel=linear_kernel, row_normalize=True)
        for Y in Ys
    ]

    # Process
    results = dict()
    for m in formats.keys():
        model = LarsMKL(delta=delta, rank=rank, f=penalty[m])
        try:
            model.fit(Ks_tr, y[tr])
        except Exception as e:
            print("%s: %s" % (m, str(e)))
            continue

        y_va = y[va].reshape((len(va), 1))
        y_te = y[te].reshape((len(te), 1))

        ypath_va = model.predict_path_ls([Y[va] for Y in Ys])
        ypath_te = model.predict_path_ls([Y[te] for Y in Ys])

        scores_va = (np.var(y_va) -
                     np.var(ypath_va - y_va, axis=0)) / np.var(y_va)
        scores_te = (np.var(y_te) -
                     np.var(ypath_te - y_te, axis=0)) / np.var(y_te)

        t = np.argmax(scores_va)
        results[m] = np.round(scores_te[t], 3)

    # Compute ranking
    rows = list()
    scores = dict([(m, ev) for m, ev in results.items()])
    scale = np.array(sorted(scores.values(), reverse=True)).ravel()
    for m in results.keys():
        ranking = 1 + np.where(scale == scores[m])[0][0]
        row = {
            "dataset": dataset,
            "repl": repl,
            "method": m,
            "N_tr": len(tr),
            "N_va": len(va),
            "N_te": len(te),
            "evar": scores[m],
            "ranking": ranking
        }
        rows.append(row)

    return rows
Esempio n. 30
0
def generate_data(n,
                  rank,
                  inducing_mode="uniform",
                  noise=1,
                  gamma_range=(0.1, ),
                  seed=None,
                  input_dim=1,
                  signal_sampling="GP",
                  data="mesh"):
    """
    Generate an artificial dataset with imput dimension.
    :param n: Number od data points.
    :param rank: Number of inducing points.
    :param inducing_mode:   Biased or uniform distribution of data points.
    :param noise: Noise variance.
    :param gamma_range: Number of kernels and hyperparameters.
    :param seed: Random seed.
    :param input_dim: Input space dimension.
    :param signal_sampling: 'GP' or 'weights'. Weights is more efficient.
    :param data: mesh or input_dim.
    :return:
    """
    if seed is not None:
        np.random.seed(seed)

    # Generate data for arbitray input_dim
    if data == "mesh":
        x = np.linspace(-10, 10, n).reshape((n, 1))
        M = np.meshgrid(*(input_dim * [x]))
        X = np.array(zip(*[m.ravel() for m in M]))
        N = X.shape[0]

        xp = np.linspace(-10, 10, 100).reshape((100, 1))
        Mp = np.meshgrid(*(input_dim * [xp]))
        Xp = np.array(zip(*[m.ravel() for m in Mp]))
    elif data == "random":
        # Ensure data is separated at proper lengthscales
        ls = SPGP.gamma2lengthscale(min(gamma_range)) / np.sqrt(input_dim)
        a, b = -n * ls / 2.0, n * ls / 2.0
        X = a + 2 * b * np.random.rand(n, input_dim)
        N = X.shape[0]
        Xp = np.random.rand(100, input_dim)
    else:
        raise ValueError("Unknown data mode: %s" % data)

    # Kernel sum
    Ksum = Kinterface(data=X,
                      kernel=kernel_sum,
                      kernel_args={
                          "kernels": [exponential_kernel] * len(gamma_range),
                          "kernels_args": [{
                              "gamma": g
                          } for g in gamma_range]
                      })

    # Sum of kernels
    Klist = [
        Kinterface(data=X, kernel=exponential_kernel, kernel_args={"gamma": g})
        for g in gamma_range
    ]

    a = np.arange(X.shape[0], dtype=int)
    if inducing_mode == "uniform":
        p = None
    elif inducing_mode == "biased":
        af = np.sum(X + abs(X.min(axis=0)), axis=1)
        p = (af**2 / (af**2).sum())
    else:
        raise ValueError(inducing_mode)

    inxs = np.random.choice(a, p=p, size=rank, replace=False)
    if signal_sampling == "GP":
        Kny = Ksum[:, inxs].dot(np.linalg.inv(Ksum[inxs,
                                                   inxs])).dot(Ksum[inxs, :])
        f = mvn.rvs(mean=np.zeros((N, )), cov=Kny)
        y = mvn.rvs(mean=f, cov=noise * np.eye(N, N))
    elif signal_sampling == "weights":
        L = Ksum[:,
                 inxs].dot(scipy.linalg.sqrtm(np.linalg.inv(Ksum[inxs, inxs])))
        w = mvn.rvs(mean=np.zeros(rank, ), cov=np.eye(rank, rank)).ravel()
        f = L.dot(w)
        y = f + np.random.rand(n, 1).ravel() * noise
    else:
        raise ValueError(signal_sampling)

    return Ksum, Klist, inxs, X, Xp, y, f