Beispiel #1
0
    def test_kernels(self):
        from GPy.kern import RBF,Linear,MLP,Bias,White
        Q = self.Z.shape[1]
        kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True),MLP(Q,ARD=True), RBF(Q,ARD=True)+Linear(Q,ARD=True)+Bias(Q)+White(Q)
                  ,RBF(Q,ARD=True)+Bias(Q)+White(Q),  Linear(Q,ARD=True)+Bias(Q)+White(Q)]

        for k in kernels:
            k.randomize()
            self._test_kernel_param(k)
            self._test_Z(k)
            self._test_qX(k)
            self._test_kernel_param(k, psi2n=True)
            self._test_Z(k, psi2n=True)
            self._test_qX(k, psi2n=True)
Beispiel #2
0
 def _create_kernel(self, V):
     self._kerns = [
         RBF(1, ARD=True, active_dims=[i]) for i in range(self.n_dims)
     ]
     self._kernf = Fixed(self.n_dims, tdot(V))
     self._kernb = Bias(self.n_dims)
     self.kernel = np.sum(self._kerns) + self._kernf + self._kernb
Beispiel #3
0
def gp_on_fold(feature_sets, train, test, y, y_all, learn_options):

    sequences = np.array([str(x) for x in y_all.index.get_level_values(0).tolist()])

    kern = WeightedDegree(
        1, sequences, d=learn_options["kernel degree"], active_dims=[0]
    )
    X = np.arange(len(train))[:, None]

    current_dim = 1

    if "gc_count" in feature_sets:
        kern += RBF(1, active_dims=[current_dim], name="GC_rbf")
        X = np.concatenate((X, feature_sets["gc_count"].values), axis=1)
        current_dim += 1
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number of columns")

    if "drug" in feature_sets:
        Q = feature_sets["drug"].values.shape[1]
        kern += Linear(
            Q, active_dims=range(current_dim, current_dim + Q), name="drug_lin"
        )
        X = np.concatenate((X, feature_sets["drug"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "gene effect" in feature_sets:
        Q = feature_sets["gene effect"].values.shape[1]
        kern += Linear(
            Q, active_dims=range(current_dim, current_dim + Q), name="gene_lin"
        )
        X = np.concatenate((X, feature_sets["gene effect"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "Percent Peptide" in feature_sets:
        Q = feature_sets["Percent Peptide"].values.shape[1]
        kern += RBF(
            Q, active_dims=range(current_dim, current_dim + Q), name="percent_pept"
        )
        X = np.concatenate((X, feature_sets["Percent Peptide"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "Nucleotide cut position" in feature_sets:
        Q = feature_sets["Nucleotide cut position"].values.shape[1]
        kern += RBF(
            Q, active_dims=range(current_dim, current_dim + Q), name="nucleo_cut"
        )
        X = np.concatenate((X, feature_sets["Nucleotide cut position"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "Strand effect" in feature_sets:
        Q = feature_sets["Strand effect"].values.shape[1]
        kern += Linear(
            Q, active_dims=range(current_dim, current_dim + Q), name="strand"
        )
        X = np.concatenate((X, feature_sets["Strand effect"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "NGGX" in feature_sets:
        Q = feature_sets["NGGX"].values.shape[1]
        kern += Linear(Q, active_dims=range(current_dim, current_dim + Q), name="NGGX")
        X = np.concatenate((X, feature_sets["NGGX"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "TM" in feature_sets:
        Q = feature_sets["TM"].values.shape[1]
        kern += RBF(
            Q, ARD=True, active_dims=range(current_dim, current_dim + Q), name="TM"
        )
        X = np.concatenate((X, feature_sets["TM"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    if "gene features" in feature_sets:
        Q = feature_sets["gene features"].values.shape[1]
        kern += Linear(
            Q,
            ARD=True,
            active_dims=range(current_dim, current_dim + Q),
            name="genefeat",
        )
        X = np.concatenate((X, feature_sets["gene features"].values), axis=1)
        current_dim += Q
        if X.shape[1] != current_dim:
            raise AssertionError("incorrect number or columns")

    kern += Bias(X.shape[1])

    if learn_options["warpedGP"]:
        m = WarpedGP(X[train], y[train], kernel=kern)
    else:
        m = GPRegression(X[train], y[train], kernel=kern)

    m.optimize_restarts(3)
    y_pred, _ = m.predict(X[test])

    # TODO add offset such that low scores are around 0 (not -4 or so)

    return y_pred, m[:]