def init_linear(X, Z, all_kernels, initialized_Zs=False):
    """
    if there are no Zs from an initialization (e.g. for warm-starting),
    all_Zs is initialized according to the Salimbeni scheme (Z should be MxD).
    otherwise the Zs obtained from the initialization are simply taken and put
    into the all_Zs array (Z should be a list of L arrays)
    """
    if initialized_Zs:
        all_Zs = Z
    else:
        all_Zs = []
    all_mean_funcs = []
    X_running = X.copy()
    if not initialized_Zs:
        Z_running = Z.copy()
    for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        if dim_in == dim_out:
            mf = Identity()
        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        all_mean_funcs.append(mf)
        if not initialized_Zs:
            all_Zs.append(Z_running)

        if dim_in != dim_out:
            X_running = X_running.dot(W)
            if not initialized_Zs:
                Z_running = Z_running.dot(W)

    # final layer
    all_mean_funcs.append(Zero())
    if not initialized_Zs:
        all_Zs.append(Z_running)
    return all_Zs, all_mean_funcs
Beispiel #2
0
def init_layers_linear(X,
                       Y,
                       Z,
                       kernels,
                       num_outputs=None,
                       mean_function=Zero(),
                       Layer=SVGP_Layer,
                       white=False):
    num_outputs = num_outputs or Y.shape[1]

    layers = []

    X_running, Z_running = X.copy(), Z.copy()
    for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
        dim_in = kern_in.input_dim
        dim_out = kern_out.input_dim
        print(dim_in, dim_out)
        if dim_in == dim_out:
            mf = Identity()

        else:
            if dim_in > dim_out:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T

            else:  # stepping up, use identity + padding
                W = np.concatenate(
                    [np.eye(dim_in),
                     np.zeros((dim_in, dim_out - dim_in))], 1)

            mf = Linear(W)
            mf.set_trainable(False)

        layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white))

        if dim_in != dim_out:
            Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    # final layer
    layers.append(
        Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white))
    return layers
    def __init__(self, X, Y, Z, kernels, likelihood, 
                 num_outputs=None,
                 mean_function=Zero(),  # the final layer mean function
                 **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):
            dim_in = kern_in.input_dim
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:
                if dim_in > dim_out:  # stepping down, use the pca projection
                    _, _, V = np.linalg.svd(X_running, full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # pad with zeros
                    zeros = np.zeros((dim_in, dim_out - dim_in))
                    W = np.concatenate([np.eye(dim_in), zeros], 1)

                mf = Linear(W)
                mf.set_trainable(False)

            layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)


        # final layer
        layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))

        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
    def __init__(
            self,
            X,
            Y,
            Z,
            kernels,
            likelihood,
            num_outputs=None,
            mean_function=Zero(),  # the final layer mean function
            **kwargs):
        Model.__init__(self)
        num_outputs = num_outputs or Y.shape[1]

        # init the layers
        layers = []

        # inner layers
        X_running, Z_running = X.copy(), Z.copy()
        for kern_in, kern_out in zip(kernels[:-1], kernels[1:]):

            if isinstance(kern_in, Conv):
                dim_in = kern_in.basekern.input_dim
            else:
                dim_in = kern_in.input_dim
            '''  
            if isinstance(kern_out,Conv):
                dim_out = kern_out.basekern.input_dim
            else:
                dim_out = kern_out.input_dim
            '''
            dim_out = kern_out.input_dim

            if dim_in == dim_out:
                mf = Identity()

            else:  # stepping down, use the pca projection
                _, _, V = np.linalg.svd(X_running, full_matrices=False)
                W = V[:dim_out, :].T
                b = np.zeros(1, dtype=np.float32)
                mf = Linear(W, b)
                mf.set_trainable(False)

            if isinstance(kern_in, Conv):
                Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape(
                    -1, kern_in.patch_len),
                                    axis=0)
                Z_patch = Z_patch[np.random.permutation(
                    (len(Z_patch)))[:Z_running.shape[0]], :]
                layers.append(svconvgp(kern_in, Z_patch, dim_out, mf))

            else:
                layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf))

            if dim_in != dim_out:
                Z_running = Z_running.dot(W)
                X_running = X_running.dot(W)

        # final layer
        if isinstance(kernels[-1], Conv):
            Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape(
                -1, kernels[-1].patch_len),
                                axis=0)
            Z_patch = Z_patch[np.random.permutation(
                (len(Z_patch)))[:Z_running.shape[0]], :]
            layers.append(
                svconvgp(kernels[-1], Z_patch, num_outputs, mean_function))
        else:
            layers.append(
                SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function))
        DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
Beispiel #5
0
def init_layers_graph(X, Y, Z, kernels, gmat,
                      num_layers=2,
                      num_nodes=None,
                      dim_per_node=5,
                      dim_per_X=5, dim_per_Y=5,
                      share_Z=False,
                      nb_init=True):
    layers = []

    def pa_idx(nd, dim_per_in):
        res = []
        for n in range(num_nodes):
            w = gmat[nd, n]
            if w > 0:
                # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in))
                res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in))
        res = np.asarray(res)
        return res

    X_running, Z_running = X.copy(), Z.copy()
    for l in range(num_layers - 1):
        if l == 0:
            dim_in = dim_per_X
            dim_out = dim_per_node
        else:
            dim_in = dim_per_node
            dim_out = dim_per_node
        # print(dim_in, dim_out)
        X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes))
        Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes))
        mf_lst = ParamList([], trainable=False)
        for nd in range(num_nodes):
            if nb_init:
                pa = pa_idx(nd, dim_in)
            else:
                pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in))
            agg_dim_in = len(pa)

            if agg_dim_in == dim_out:
                mf = Identity()

            else:
                if agg_dim_in > dim_out:  # stepping down, use the pca projection
                    # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False)
                    _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False)
                    W = V[:dim_out, :].T

                else:  # stepping up, use identity + padding
                    W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1)

                mf = Linear(W)
                mf.set_trainable(False)
            mf_lst.append(mf)
            if agg_dim_in != dim_out:
                # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape,
                #       W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape)
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W)
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W)
            else:
                Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa]
                X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa]

        layers.append(
            SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init))
        Z_running = Z_running_tmp
        X_running = X_running_tmp

    # final layer
    if num_layers == 1:
        fin_dim_in = dim_per_X
    else:
        fin_dim_in = dim_per_node
    layers.append(
        SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init))
    return layers