def test_kernels(self): from GPy.kern import RBF,Linear,MLP,Bias,White Q = self.Z.shape[1] kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True),MLP(Q,ARD=True), RBF(Q,ARD=True)+Linear(Q,ARD=True)+Bias(Q)+White(Q) ,RBF(Q,ARD=True)+Bias(Q)+White(Q), Linear(Q,ARD=True)+Bias(Q)+White(Q)] for k in kernels: k.randomize() self._test_kernel_param(k) self._test_Z(k) self._test_qX(k) self._test_kernel_param(k, psi2n=True) self._test_Z(k, psi2n=True) self._test_qX(k, psi2n=True)
def _create_kernel(self, V): self._kerns = [ RBF(1, ARD=True, active_dims=[i]) for i in range(self.n_dims) ] self._kernf = Fixed(self.n_dims, tdot(V)) self._kernb = Bias(self.n_dims) self.kernel = np.sum(self._kerns) + self._kernf + self._kernb
def gp_on_fold(feature_sets, train, test, y, y_all, learn_options): sequences = np.array([str(x) for x in y_all.index.get_level_values(0).tolist()]) kern = WeightedDegree( 1, sequences, d=learn_options["kernel degree"], active_dims=[0] ) X = np.arange(len(train))[:, None] current_dim = 1 if "gc_count" in feature_sets: kern += RBF(1, active_dims=[current_dim], name="GC_rbf") X = np.concatenate((X, feature_sets["gc_count"].values), axis=1) current_dim += 1 if X.shape[1] != current_dim: raise AssertionError("incorrect number of columns") if "drug" in feature_sets: Q = feature_sets["drug"].values.shape[1] kern += Linear( Q, active_dims=range(current_dim, current_dim + Q), name="drug_lin" ) X = np.concatenate((X, feature_sets["drug"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "gene effect" in feature_sets: Q = feature_sets["gene effect"].values.shape[1] kern += Linear( Q, active_dims=range(current_dim, current_dim + Q), name="gene_lin" ) X = np.concatenate((X, feature_sets["gene effect"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "Percent Peptide" in feature_sets: Q = feature_sets["Percent Peptide"].values.shape[1] kern += RBF( Q, active_dims=range(current_dim, current_dim + Q), name="percent_pept" ) X = np.concatenate((X, feature_sets["Percent Peptide"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "Nucleotide cut position" in feature_sets: Q = feature_sets["Nucleotide cut position"].values.shape[1] kern += RBF( Q, active_dims=range(current_dim, current_dim + Q), name="nucleo_cut" ) X = np.concatenate((X, feature_sets["Nucleotide cut position"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "Strand effect" in feature_sets: Q = feature_sets["Strand effect"].values.shape[1] kern += Linear( Q, active_dims=range(current_dim, current_dim + Q), name="strand" ) X = np.concatenate((X, feature_sets["Strand effect"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "NGGX" in feature_sets: Q = feature_sets["NGGX"].values.shape[1] kern += Linear(Q, active_dims=range(current_dim, current_dim + Q), name="NGGX") X = np.concatenate((X, feature_sets["NGGX"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "TM" in feature_sets: Q = feature_sets["TM"].values.shape[1] kern += RBF( Q, ARD=True, active_dims=range(current_dim, current_dim + Q), name="TM" ) X = np.concatenate((X, feature_sets["TM"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") if "gene features" in feature_sets: Q = feature_sets["gene features"].values.shape[1] kern += Linear( Q, ARD=True, active_dims=range(current_dim, current_dim + Q), name="genefeat", ) X = np.concatenate((X, feature_sets["gene features"].values), axis=1) current_dim += Q if X.shape[1] != current_dim: raise AssertionError("incorrect number or columns") kern += Bias(X.shape[1]) if learn_options["warpedGP"]: m = WarpedGP(X[train], y[train], kernel=kern) else: m = GPRegression(X[train], y[train], kernel=kern) m.optimize_restarts(3) y_pred, _ = m.predict(X[test]) # TODO add offset such that low scores are around 0 (not -4 or so) return y_pred, m[:]