Example #1
0
 def __init__(self,init_lengthscales,init_variance,input_dim=None,name="kernel"):
     with tf.name_scope(name):
         lengthscales = Param(init_lengthscales,
                                   transform=transforms.Log1pe(),
                                   name="lengthscale")
         variance     = Param(init_variance,
                                   transform=transforms.Log1pe(),
                                   name="variance")
     self.lengthscales = lengthscales()
     self.input_dim = input_dim
     self.variance = variance()
Example #2
0
 def __init__(self,
              likelihood,
              pred_layer,
              dimX,
              dimY,
              dimW,
              latent_dim,
              Zx,
              Zw,
              num_samples=1,
              num_data=None,
              elbo='IWVI-VI',
              beta=1e-2,
              inner_dims=[100, 100, 100]):
     LGP_base.__init__(self, likelihood, pred_layer, dimX, dimY, dimW,
                       num_samples, num_data, elbo, inner_dims)
     self.latent_dim = latent_dim
     self.beta = beta
     self.variance = Param(1e-2,
                           transform=transforms.Log1pe(),
                           name="prior_var")()
     # reassign Z
     self.Zx, self.Zw = Param(Zx, name="zx")(), Param(Zw, name="zw")()
     self.Zw_latent = self.nn_W_xy.forward(self.Zw)[0]
     self.nn_X_xw = mlp_share(self.dimX + self.dimW,
                              self.latent_dim * 2,
                              inner_dims=inner_dims,
                              var=self.variance)
     Z_trans = self.nn_X_xw.forward(tf.concat([self.Zx, self.Zw_latent],
                                              -1))[0]
     self.pred_layer.initialize_Z(Z_trans)  # convert Z to tensor
 def __init__(self, variance=1e-0, D=None, **kwargs):
     super().__init__(**kwargs)
     if D is not None:  # allow different noises for outputs
         variance = variance * np.ones((1, D))
     self.variance = Param(variance,
                           transform=transforms.Log1pe(),
                           name="noise_variance")()
Example #4
0
 def __init__(self, likelihood, pred_layer, latent_dim, num_samples=1,
              flow_time = 1.0, flow_nsteps = 20, num_data=None):
     GP.__init__(self, likelihood, pred_layer, num_samples, num_data)
     self.latent_dim = latent_dim
     self.flow_time, self.flow_nsteps = flow_time, flow_nsteps
     self.prior_noise = Param(1e-2 / self.flow_time, transform=transforms.Log1pe(), name="prior_var")()
     self.nn_diff = mlp_share_t(self.latent_dim+1, self.latent_dim * 2, var=self.prior_noise)
     self.sde_solver = EulerMaruyama(self.nn_diff.forward, self.flow_time, self.flow_nsteps)
Example #5
0
 def __init__(self, likelihood, pred_layer, dimX, latent_dim, Z, num_samples, num_data=None):
     GP.__init__(self, likelihood, pred_layer, num_samples, num_data)
     self.dimX, self.latent_dim = dimX, latent_dim
     self.prior_noise = Param(1e-1, transform=transforms.Log1pe(), name="prior_var")()
     self.nn_encoder = mlp_share(self.dimX, self.latent_dim*2, var=self.prior_noise)
     # reasign Z
     self.Z = Param(Z, name="z")()
     Z_mean = self.nn_encoder.forward(self.Z)[0]
     self.pred_layer.initialize_Z(Z_mean)
Example #6
0
    def __init__(self,
                 x0,
                 t,
                 Y,
                 Z0,
                 U0,
                 sn0,
                 kern,
                 jitter=jitter0,
                 summ=False,
                 whiten=True,
                 fix_Z=False,
                 fix_U=False,
                 fix_sn=False):
        """ Constructor for the NPODE model
        
        Args:
            x0: Numpy matrix of size TxD of initial values. T is the number of 
                input sequences and D is the problem dimensionality.
            t: Python array of T numpy vectors storing observation times
            Y: Python array of T numpy matrices storing observations. Observations
                 are stored in rows.
            Z0: Numpy matrix of initial inducing points of size MxD, M being the
                number of inducing points.
            U0: Numpy matrix of initial inducing vectors of size MxD, M being the
                number of inducing points.
            sn0: Numpy vector of size 1xD for initial signal variance
            kern: Kernel object for GP interpolation
            jitter: Float of jitter level
            whiten: Boolean. Currently we perform the optimization only in the 
                white domain
            summ: Boolean for Tensorflow summary
            fix_Z: Boolean - whether inducing locations are fixed or optimized
            fix_U: Boolean - whether inducing vectors are fixed or optimized
            fix_sn: Boolean - whether noise variance is fixed or optimized
        """
        self.name = 'npode'
        self.whiten = whiten
        self.kern = kern
        self.jitter = jitter
        with tf.name_scope("NPDE"):
            Z = Param(Z0, name="Z", summ=False, fixed=fix_Z)
            U = Param(U0, name="U", summ=False, fixed=fix_U)

            sn = Param(np.array(sn0),
                       name="sn",
                       summ=summ,
                       fixed=fix_sn,
                       transform=transforms.Log1pe())
        self.Z = Z()
        self.U = U()
        self.sn = sn()
        self.D = U.shape[1]
        self.x0 = x0
        self.t = t
        self.Y = Y
        self.integrator = ODERK4(self, x0, t)
Example #7
0
 def __init__(self,sf0,ell0,name="kernel",learning_rate=0.01,
              summ=False,fix_sf=False,fix_ell=False):
     with tf.name_scope(name):
         sf = Param(sf0,
                           transform=transforms.Log1pe(),
                           name="sf",
                           learning_rate = learning_rate,
                           summ = summ,
                           fixed = fix_sf)
         ell = Param(ell0,
                           transform=transforms.Log1pe(),
                           name="ell",
                           learning_rate = learning_rate,
                           summ = summ,
                           fixed = fix_ell)
     self.sf = sf()
     self.ell = ell()
     self.fix_sf = fix_sf
     self.fix_ell = fix_ell
Example #8
0
    def __init__(self,
                 Z0,
                 U0,
                 sn0,
                 kern,
                 jitter=jitter0,
                 summ=False,
                 whiten=True,
                 fix_Z=False,
                 fix_U=False,
                 fix_sn=False):
        """ Constructor for the NPODE model
        
        Args:
            Z0: Numpy matrix of initial inducing points of size MxD, M being the
                number of inducing points.
            U0: Numpy matrix of initial inducing vectors of size MxD, M being the
                number of inducing points.
            sn0: Numpy vector of size 1xD for initial signal variance
            kern: Kernel object for GP interpolation
            jitter: Float of jitter level
            whiten: Boolean. Currently we perform the optimization only in the 
                white domain
            summ: Boolean for Tensorflow summary
            fix_Z: Boolean - whether inducing locations are fixed or optimized
            fix_U: Boolean - whether inducing vectors are fixed or optimized
            fix_sn: Boolean - whether noise variance is fixed or optimized
        """
        self.name = 'npode'
        self.whiten = whiten
        self.kern = kern
        self.jitter = jitter
        with tf.name_scope("NPDE"):
            Z = Param(Z0, name="Z", summ=False, fixed=fix_Z)
            U = Param(U0, name="U", summ=False, fixed=fix_U)

            sn = Param(np.array(sn0),
                       name="sn",
                       summ=summ,
                       fixed=fix_sn,
                       transform=transforms.Log1pe())
        self.Z = Z()
        self.U = U()
        self.sn = sn()
        self.D = U.shape[1]
        self.integrator = ODERK4(self)
        self.fix_Z = fix_Z
        self.fix_sn = fix_sn
        self.fix_U = fix_U
Example #9
0
def onoff(Xtrain,Ytrain,Xtest,Ytest,dir):
    tf.reset_default_graph()
    parentDir = "/l/hegdep1/onoffgp/uai/experiments/pptr"
    sys.path.append(parentDir)

    from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron
    from onofftf.utils import modelmanager
    from gpflow import transforms

    modelPath = dir
    tbPath    = dir
    logPath   = dir + 'modelsumm.log'

    logger = logging.getLogger('log')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(logging.FileHandler(logPath))
    logger.info("traning size   = " + str(Xtrain.shape[0]))
    logger.info("test size   = " + str(Xtest.shape[0]))

    traindf = pd.DataFrame({'ndatehour':Xtrain[:,2].flatten()*1000,'pptr':Ytrain.flatten()})
    train_data = DataSet(Xtrain, Ytrain)

    logger.info("number of training examples:" + str(Xtrain.shape))

    # ****************************************************************
    # parameter initializations
    # ****************************************************************
    list_to_np = lambda _list : [np.array(e) for e in _list]

    num_iter = 50000
    num_inducing_f = np.array([10,100])
    num_inducing_g = np.array([10,100])
    num_data = Xtrain.shape[0]
    num_minibatch = 1000

    init_fkell = list_to_np([[8., 8.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])

    init_gkell = list_to_np([[8.,8.],[5./1000]])
    init_gkvar = list_to_np([[10.],[10.]])

    init_noisevar = 0.01

    q_diag = True

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)

    init_Zf = [init_Zf_s,init_Zf_t]
    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T

    init_Zg = init_Zf.copy()
    init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1
    init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T

    kern_param_learning_rate = 1e-3
    indp_param_learning_rate = 1e-3

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]

    with tf.name_scope("g_kern"):
        gkell = [Param(init_gkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

        gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

    gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)


    with tf.name_scope("g_ind"):
        Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_g))]

        u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)


    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,
                       u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \
                    tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_g))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \
                 GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)
        input_mask_g = _gen_inp_mask(Zg_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()

        # compute gmean and gvar from the kronecker inference
        gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g)

        # compute augemented distributions
        ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar)

        # compute augmented f
        # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A)))
        gfmean = tf.multiply(ephi_g, fmean)
        gfvar = tf.multiply(ephi2_g, fvar)
        gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean))

        # return mean and variance vectors in order
        return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g

    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        Kmn_kron = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1])
        A  = tf.matmul(tf_kron(*Kmm_inv),Kmn)

        mu = tf.matmul(Kmn, alpha, transpose_a=True)
        var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \
                               tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1])

        return mu , var

    def __kron_mv( As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(*args):
        def __tf_kron(a,b):

            a_shape = [tf.shape(a)[0],tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0],tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord,Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    def variational_expectations(Y, fmu, fvar, fmuvar, noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
               - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar

    def probit_expectations(gmean, gvar):
        def normcdf(x):
            return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3

        def owent(h, a):
            h = tf.abs(h)
            term1 = tf.atan(a) / (2 * np.pi)
            term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1))))
            return tf.multiply(term1, term2)

        z = gmean / tf.sqrt(1. + gvar)
        a = 1 / tf.sqrt(1. + (2 * gvar))

        cdfz = normcdf(z)
        tz = owent(z, a)

        ephig = cdfz
        ephisqg = (cdfz - 2. * tz)
        evarphig = (cdfz - 2. * tz - tf.square(cdfz))

        # clip negative values from variance terms to zero
        ephisqg = (ephisqg + tf.abs(ephisqg)) / 2.
        evarphig = (evarphig + tf.abs(evarphig)) / 2.

        return ephig, ephisqg, evarphig

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************
    # get kl term
    with tf.name_scope("kl"):
        kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list,
                            u_gm,u_gs_sqrt,gkern_list,Zg_list)
        tf.summary.scalar('kl', kl)

    # get augmented functions
    with tf.name_scope("model_build"):
        gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list,
                                                                                               u_gm,u_gs_sqrt,gkern_list,Zg_list)
        tf.summary.histogram('gfmean',gfmean)
        tf.summary.histogram('gfvar',gfvar)
        tf.summary.histogram('gfmeanu',gfmeanu)
        tf.summary.histogram('fmean',fmean)
        tf.summary.histogram('fvar',fvar)
        tf.summary.histogram('gmean',gmean)
        tf.summary.histogram('gvar',gvar)
        tf.summary.histogram('pgmean',pgmean)
        tf.summary.histogram('pgvar',pgvar)

    # compute likelihood
    with tf.name_scope("var_exp"):
        var_exp = tf.reduce_sum(variational_expectations(Y,gfmean,gfvar,gfmeanu,noisevar.get_tfv()))
        tf.summary.scalar('var_exp', var_exp)

        # mini-batch scaling
        scale =  tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type)
        var_exp_scaled = var_exp * scale
        tf.summary.scalar('var_exp_scaled', var_exp_scaled)


    # final lower bound
    with tf.name_scope("cost"):
        cost =  -(var_exp_scaled - kl)
        tf.summary.scalar('cost',cost)


    # ****************************************************************
    # define optimizer op
    # ****************************************************************
    all_var_list = tf.trainable_variables()
    all_lr_list = [var._learning_rate for var in all_var_list]

    train_opt_group = []

    for group_learning_rate in set(all_lr_list):
        _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0]
        group_var_list = [all_var_list[ind] for ind in _ind_bool]
        group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate)
        group_grad_list = tf.gradients(cost,group_var_list)
        group_grads_and_vars = list(zip(group_grad_list,group_var_list))


        group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars)

        # Summarize all gradients
        for grad, var in group_grads_and_vars:
            tf.summary.histogram(var.name + '/gradient', grad)

        train_opt_group.append({'names':[var.name for var in group_var_list],
                                'vars':group_var_list,
                                'learning_rate':group_learning_rate,
                                'grads':group_grad_list,
                                'train_op':group_train_op})

    train_op = tf.group(*[group['train_op'] for group in train_opt_group])



    # ****************************************************************
    # define graph and run optimization
    # ****************************************************************
    sess = tf.InteractiveSession()

    # model saver
    saver = tf.train.Saver()

    # tensorboard summary
    summ_merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(tbPath,
                                            graph=sess.graph)

    sess.run(tf.global_variables_initializer())


    logger.info('*******  started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******")
    optstime = time.time()
    logger.info(
        '{:>16s}'.format("iteration") + '{:>6s}'.format("time"))

    for i in range(num_iter):
        optstime = time.time()
        batch = train_data.next_batch(num_minibatch)
        try:
            summary, _ = sess.run([summ_merged,train_op],
                                 feed_dict={X : batch[0],
                                            Y : batch[1]
                                 })

            if i% 200 == 0:
                logger.info(
                    '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60))
                summary_writer.add_summary(summary,i)
                summary_writer.flush()

            if i% 10000 == 0:
                modelmngr = modelmanager(saver, sess, modelPath)
                modelmngr.save()

                # ****************************************************************
                # plot inducing monitoring plots
                # ****************************************************************
                lp_u_fm = u_fm.get_tfv().eval().flatten()
                lp_u_gm = u_gm.get_tfv().eval().flatten()

                lp_zf_t = Zf_list[1].get_tfv().eval().flatten()
                lp_zg_t = Zg_list[1].get_tfv().eval().flatten()

                lp_zf_sort_ind = np.argsort(lp_zf_t)
                lp_zg_sort_ind = np.argsort(lp_zg_t)

                scale_z = 1000
                mpl.rcParams['figure.figsize'] = (16,8)
                fig, (ax1,ax2,ax3) = plt.subplots(3, 1, sharex=True)

                mean_pptr = traindf.groupby('ndatehour')['pptr'].mean()
                ax1.bar(mean_pptr.index, mean_pptr.values, align='center')

                for m in np.arange(num_inducing_f[0]):
                    u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]]
                    ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7)
                ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30")

                for m in np.arange(num_inducing_g[0]):
                    u_gm_temporal = lp_u_gm[m*num_inducing_g[1]:(m+1)*num_inducing_g[1]]
                    ax3.plot(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),u_gm_temporal[lp_zg_sort_ind],alpha=0.7)
                ax3.scatter(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),np.ones([num_inducing_g[1],1])*lp_u_gm.min(),color="#514A30")

                fig.savefig(dir +"inducing_"+str(i)+".png")

        except KeyboardInterrupt as e:
            print("Stopping training")
            break

    modelmngr = modelmanager(saver, sess, modelPath)
    modelmngr.save()
    summary_writer.close()


    # ****************************************************************
    # param summary
    # ****************************************************************
    logger.info("Noise variance          = " + str(noisevar.get_tfv().eval()))
    logger.info("Kf spatial lengthscale  = " + str(fkell[0].get_tfv().eval()))
    logger.info("Kf spatial variance     = " + str(fkvar[0].get_tfv().eval()))
    logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval()))
    logger.info("Kf temporal variance    = " + str(fkvar[1].get_tfv().eval()))

    logger.info("Kg spatial lengthscale  = " + str(gkell[0].get_tfv().eval()))
    logger.info("Kg spatial variance     = " + str(gkvar[0].get_tfv().eval()))
    logger.info("Kg temporal lengthscale = " + str(gkell[1].get_tfv().eval()))
    logger.info("Kg temporal variance    = " + str(gkvar[1].get_tfv().eval()))

    # ****************************************************************
    # model predictions
    # ****************************************************************
    # get test and training predictions
    # def predict_onoff(Xtrain,Xtest):
    #     pred_train = np.maximum(gfmean.eval(feed_dict = {X:Xtrain}),0)
    #     pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0)
    #     return pred_train, pred_test
    #
    # pred_train, pred_test = predict_onoff(Xtrain,Xtest)
    #
    # train_rmse = np.sqrt(np.mean((pred_train - Ytrain)**2))
    # train_mae  = np.mean(np.abs(pred_train - Ytrain))
    # test_rmse  = np.sqrt(np.mean((pred_test - Ytest)**2))
    # test_mae   = np.mean(np.abs(pred_test - Ytest))
    #
    # logger.info("train rmse:"+str(train_rmse))
    # logger.info("train mae:"+str(train_mae))
    #
    # logger.info("test rmse:"+str(test_rmse))
    # logger.info("test mae:"+str(test_mae))
    # logger.removeHandler(logger.handlers)

    def predict_onoff(Xtest):
        pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0)
        return pred_test

    pred_test = predict_onoff(Xtest)

    test_rmse  = np.sqrt(np.mean((pred_test - Ytest)**2))
    test_mae   = np.mean(np.abs(pred_test - Ytest))

    logger.info("test rmse:"+str(test_rmse))
    logger.info("test mae:"+str(test_mae))
    logger.removeHandler(logger.handlers)

    # ****************************************************************
    # return values
    # ****************************************************************
    retdict = {'Xtrain':Xtrain,'Ytrain':Ytrain,
               'Xtest':Xtest,'Ytest':Ytest,
            #    'rawpred_train':gfmean.eval(feed_dict = {X:Xtrain}),
            #    'rawpred_test':gfmean.eval(feed_dict = {X:Xtest}),
            #    'pred_train':pred_train,
            #    'pred_test':pred_test,
            #    'train_rmse':train_rmse,
            #    'train_mae':train_mae,
               'test_rmse':test_rmse,
               'test_mae':test_mae
            #    ,'train_log_evidence': -cost.eval({X : Xtrain,Y : Ytrain})
               }

    return retdict
Example #10
0
 def __init__(self, variance=1.0, **kwargs):
     super().__init__(**kwargs)
     self.variance = Param(variance,
              transform=transforms.Log1pe(),
              name  = "noise_variance")()
Example #11
0
def main(scriptPath):
    tf.reset_default_graph()
    parentDir = '/'.join(os.path.dirname(os.path.realpath(scriptPath)).split('/')[:-1]) 
    subDir = "/" + scriptPath.split("/")[-2].split(".py")[0] + "/"
    sys.path.append(parentDir)

    from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron
    from onofftf.utils import modelmanager
    from gpflow import transforms

    cmodelPath = parentDir + subDir + 'results_scgp.pickle'
    modelPath = parentDir + subDir + 'model_hurdle.ckpt'
    logPath   = parentDir + subDir + 'modelsumm_hurdle.log'

    logger = logging.getLogger('log')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(logging.FileHandler(logPath))

    data = pickle.load(open(parentDir + subDir +"data.pickle","rb"))
    Xtrain = data['Xtrain']
    Ytrain = data['Ytrain']
    Ytrain_c = data['Ytrain'] > 0 * 1
    Xtest = data['Xtest']
    Ytest = data['Ytest']
    Ytest_c = data['Ytest'] > 0 * 1

    # load results from the classifier model
    cresults = pickle.load(open(cmodelPath,"rb"))
    train_pred_on_idx,_ = np.where(cresults['pred_train']['pfmean'] > 0.5)
    test_pred_on_idx,_  = np.where(cresults['pred_test']['pfmean'] > 0.5)
    Xtrain_reg_hurdle = Xtrain[train_pred_on_idx,:]
    Ytrain_reg_hurdle = Ytrain[train_pred_on_idx]
    Xtest_reg_hurdle = Xtest[test_pred_on_idx,:]
    Ytest_reg_hurdle = Ytest[test_pred_on_idx]

    traindf = pd.DataFrame({'ndatehour':Xtrain[train_pred_on_idx,2].flatten()*1000,'pptr':Ytrain[train_pred_on_idx].flatten()})
    train_data = DataSet(Xtrain_reg_hurdle,Ytrain_reg_hurdle)

    logger.info("traning size   = " + str(Xtrain.shape[0]))
    logger.info("test size   = " + str(Xtest.shape[0]))


    # ****************************************************************
    # parameter initializations
    # ****************************************************************
    list_to_np = lambda _list : [np.array(e) for e in _list]

    num_iter = 50000
    num_inducing_f = np.array([10,100])
    num_data = Xtrain.shape[0]
    num_minibatch = 500

    init_fkell = list_to_np([[5.,5.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])
    init_noisevar = 0.01

    q_diag = True

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)
    init_Zf = [init_Zf_s,init_Zf_t]

    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.01
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T

    kern_param_learning_rate = 1e-3
    indp_param_learning_rate = 1e-3

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]


    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)

    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()

        # return mean and variance vectors in order
        return fmean, fvar

    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        Kmn_kron = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1])
        A  = tf.matmul(tf_kron(*Kmm_inv),Kmn)

        mu = tf.matmul(Kmn, alpha, transpose_a=True)
        var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \
                               tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1])

        return mu , var

    def __kron_mv( As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(*args):
        def __tf_kron(a,b):

            a_shape = [tf.shape(a)[0],tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0],tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord,Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    def variational_expectations(Y, fmu, fvar, noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
               - 0.5 * (tf.square(Y - fmu) + fvar) / noisevar

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************
    # get kl term
    with tf.name_scope("kl"):
        kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list)

    # get augmented functions
    with tf.name_scope("model_build"):
        fmean, fvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list)

    # compute likelihood
    with tf.name_scope("var_exp"):
        var_exp = tf.reduce_sum(variational_expectations(Y,fmean,fvar,noisevar.get_tfv()))
        scale =  tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type)
        var_exp_scaled = var_exp * scale

    # final lower bound
    with tf.name_scope("cost"):
        cost =  -(var_exp_scaled - kl)
    # ****************************************************************
    # define optimizer op
    # ****************************************************************
    all_var_list = tf.trainable_variables()
    all_lr_list = [var._learning_rate for var in all_var_list]

    train_opt_group = []

    for group_learning_rate in set(all_lr_list):
        _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0]
        group_var_list = [all_var_list[ind] for ind in _ind_bool]
        group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate)
        group_grad_list = tf.gradients(cost,group_var_list)
        group_grads_and_vars = list(zip(group_grad_list,group_var_list))


        group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars)
        train_opt_group.append({'names':[var.name for var in group_var_list],
                                'vars':group_var_list,
                                'learning_rate':group_learning_rate,
                                'grads':group_grad_list,
                                'train_op':group_train_op})

    train_op = tf.group(*[group['train_op'] for group in train_opt_group])

    # ****************************************************************
    # define graph and run optimization
    # ****************************************************************
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())


    logger.info('*******  started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******")
    optstime = time.time()
    logger.info(
        '{:>16s}'.format("iteration") + '{:>6s}'.format("time"))

    for i in range(num_iter):
        optstime = time.time()
        batch = train_data.next_batch(num_minibatch)
        try:
            sess.run([train_op],feed_dict={X : batch[0],Y : batch[1]})
            if i% 100 == 0:
                logger.info(
                    '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60))

            if i% 10000 == 0:
                modelmngr = modelmanager(saver, sess, modelPath)
                modelmngr.save()

                # ****************************************************************
                # plot inducing monitoring plots
                # ****************************************************************
                lp_u_fm = u_fm.get_tfv().eval().flatten()

                lp_zf_t = Zf_list[1].get_tfv().eval().flatten()

                lp_zf_sort_ind = np.argsort(lp_zf_t)

                scale_z = 1000
                mpl.rcParams['figure.figsize'] = (16,8)
                fig, (ax1,ax2) = plt.subplots(2, 1, sharex=True)

                mean_pptr = traindf.groupby('ndatehour')['pptr'].mean()
                ax1.bar(mean_pptr.index, mean_pptr.values, align='center')

                for m in np.arange(num_inducing_f[0]):
                    u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]]
                    ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7)
                ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30")

                fig.savefig(parentDir+ subDir + "svgp_inducing_"+str(i)+".png")

        except KeyboardInterrupt as e:
            print("Stopping training")
            break

    modelmngr = modelmanager(saver, sess, modelPath)
    modelmngr.save()
    tf.reset_default_graph()

    # ****************************************************************
    # param summary
    # ****************************************************************
    logger.info("Noise variance          = " + str(noisevar.get_tfv().eval()))
    logger.info("Kf spatial lengthscale  = " + str(fkell[0].get_tfv().eval()))
    logger.info("Kf spatial variance     = " + str(fkvar[0].get_tfv().eval()))
    logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval()))
    logger.info("Kf temporal variance    = " + str(fkvar[1].get_tfv().eval()))

    # ****************************************************************
    # model predictions
    # ****************************************************************

    # get regession summary
    from onofftf.svgppred import predict_svgp
    def rmse(predict,actual):
        predict = np.maximum(predict,0)
        return np.sqrt(np.mean((actual-predict)**2))

    def mad(predict,actual):
        predict = np.maximum(predict,0)
        return np.mean(np.abs(actual-predict))

    pred_train_hurdle_svgp, pred_test_hurdle_svgp = predict_svgp(Xtrain = Xtrain_reg_hurdle,
                                                   Xtest = Xtest_reg_hurdle,
                                                   checkpointPath = modelPath)

    train_hurdle_reg_rmse = rmse(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle)
    logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_reg_rmse))
    train_hurdle_reg_mae = mad(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle)
    logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_reg_mae))

    test_hurdle_reg_rmse = rmse(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle)
    logger.info("rmse on test set for hurdle svgp  : "+str(test_hurdle_reg_rmse))
    test_hurdle_reg_mae = mad(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle)
    logger.info("mad on test set for hurdle svgp  : "+str(test_hurdle_reg_mae))

    # combine the results from regression and classification
    train_pred_hurdle_clf = (cresults['pred_train']['pfmean'] > 0.5)*1.0
    test_pred_hurdle_clf  = (cresults['pred_test']['pfmean'] > 0.5)*1.0
    train_pred_hurdle_comb = train_pred_hurdle_clf.copy()
    train_pred_hurdle_comb[train_pred_on_idx] = pred_train_hurdle_svgp["fmean"]
    test_pred_hurdle_comb = test_pred_hurdle_clf.copy()
    test_pred_hurdle_comb[test_pred_on_idx] = pred_test_hurdle_svgp["fmean"]

    # final results
    train_hurdle_comb_rmse = rmse(train_pred_hurdle_comb,Ytrain)
    logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_comb_rmse))
    train_hurdle_comb_mae = mad(train_pred_hurdle_comb,Ytrain)
    logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_comb_mae))

    test_hurdle_comb_rmse = rmse(test_pred_hurdle_comb,Ytest)
    logger.info("rmse on test set for hurdle svgp  : "+str(test_hurdle_comb_rmse))
    test_hurdle_comb_mae = mad(test_pred_hurdle_comb,Ytest)
    logger.info("mad on test set for hurdle svgp  : "+str(test_hurdle_comb_mae))

    for handler in logger.handlers:
        handler.close()
        logger.removeHandler(handler)

    # ****************************************************************
    # return values
    # ****************************************************************
    results = {
               'pred_train_hurdle_svgp':pred_train_hurdle_svgp,
               'pred_test_hurdle_svgp':pred_test_hurdle_svgp,
               'train_hurdle_reg_rmse':train_hurdle_reg_rmse,
               'train_hurdle_reg_mae':train_hurdle_reg_mae,
               'test_hurdle_reg_rmse':test_hurdle_reg_rmse,
               'test_hurdle_reg_mae':test_hurdle_reg_mae,
               'train_pred_hurdle_comb':train_pred_hurdle_comb,
               'test_pred_hurdle_comb':test_pred_hurdle_comb,
               'train_hurdle_comb_rmse':train_hurdle_comb_rmse,
               'train_hurdle_comb_mae':train_hurdle_comb_mae,
               'test_hurdle_comb_rmse':test_hurdle_comb_rmse,
               'test_hurdle_comb_mae':test_hurdle_comb_mae,
               'train_pred_on_idx':train_pred_on_idx,
               'test_pred_on_idx':test_pred_on_idx
               }
    pickle.dump(results,open(parentDir+ subDir +"results_hurdle.pickle","wb"))
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.c = Param(1., transform=transforms.Log1pe(), name="para_c")()
def predict_svgp(Xtrain,
                 Xtest,
                 checkpointPath,
                 num_inducing_f=np.array([10, 100]),
                 include_fmu=False):
    tf.reset_default_graph()

    # param initializations
    list_to_np = lambda _list: [np.array(e) for e in _list]

    init_fkell = list_to_np([[8., 8.], [5. / 1000]])
    init_fkvar = list_to_np([[20.], [20.]])

    init_noisevar = 0.001

    q_diag = True
    if include_fmu:
        init_f_mu = 0.

    init_Zf_s = kmeans(Xtrain[:, 0:2], num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:, 2].min(), Xtrain[:,
                                                                      2].max(),
                                           num_inducing_f[1]),
                               axis=1)

    init_Zf = [init_Zf_s, init_Zf_t]

    init_u_fm = np.random.randn(np.prod(num_inducing_f), 1) * 0.1
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1, -1).T

    kern_param_learning_rate = 1e-4
    indp_param_learning_rate = 1e-4

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype=float_type)
    Y = tf.placeholder(dtype=float_type)

    with tf.name_scope("f_kern"):
        fkell = [
            Param(init_fkell[i],
                  transform=transforms.Log1pe(),
                  name="lengthscale",
                  learning_rate=kern_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

        fkvar = [
            Param(init_fkvar[i],
                  transform=transforms.Log1pe(),
                  name="variance",
                  learning_rate=kern_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

    fkern_list = [
        KernSE(fkell[i], fkvar[i]) for i in range(len(num_inducing_f))
    ]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,
                         transform=transforms.Log1pe(),
                         name="variance",
                         learning_rate=kern_param_learning_rate,
                         summ=True)

    with tf.name_scope("f_ind"):
        Zf_list = [
            Param(init_Zf[i],
                  name="z",
                  learning_rate=indp_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

        u_fm = Param(init_u_fm,
                     name="value",
                     learning_rate=indp_param_learning_rate,
                     summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,
                              transforms.positive,
                              name="variance",
                              learning_rate=indp_param_learning_rate,
                              summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,
                              transforms.LowerTriangular(
                                  init_u_fs_sqrt.shape[0]),
                              name="variance",
                              learning_rate=indp_param_learning_rate,
                              summ=True)

    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_predict(Xnew, u_fm, u_fs_sqrt, fkern_list, Zf_list, f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)

        # compute fmean and fvar from the kronecker inference
        fmean, fvar = kron_inf(Xnew, fkern_list, Zf_list, u_fm, u_fs_sqrt,
                               num_inducing_f, input_mask_f)
        if not f_mu is None:
            fmean = fmean + f_mu.get_tfv()

        # return mean and variance vectors in order
        return fmean, fvar

    def kron_inf(Xnew, kern_list, Z_list, q_mu, q_sqrt, num_inducing,
                 input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv, q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0], np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        KMN = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        def loop_rows(n, mu, var):
            Kmn = tf.reshape(KMN[0][:, n], [num_inducing[0], 1])
            for p in range(1, len(num_inducing)):
                Kmn = tf_kron(Kmn,
                              tf.reshape(KMN[p][:, n], [num_inducing[p], 1]))

            mu_n = tf.matmul(Kmn, alpha, transpose_a=True)
            mu = mu.write(n, mu_n)
            A = __kron_mv(Kmm_inv, Kmn)
            tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \
                           tf.matmul(tf.matmul(A,S,transpose_a=True),A)

            var = var.write(n, tmp)
            return tf.add(n, 1), mu, var

        def loop_cond(n, mu, var):
            return tf.less(n, n_batch[0])

        mu = tf.TensorArray(float_type, size=n_batch[0])
        var = tf.TensorArray(float_type, size=n_batch[0])
        _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var])

        mu = tf.reshape(mu.stack(), n_batch)
        var = tf.reshape(var.stack(), n_batch)

        return mu, var

    def __kron_mv(As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N, 1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p], np.round(
                N / num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N, 1])
        return b

    def tf_kron(*args):
        def __tf_kron(a, b):

            a_shape = [tf.shape(a)[0], tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0], tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1., shape=[1, 1], dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord, Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************

    # get augmented functions
    with tf.name_scope("model_build"):
        fmean, fvar = build_predict(X, u_fm, u_fs_sqrt, fkern_list, Zf_list)

    # load model
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    modelmngr = modelmanager(saver, sess, checkpointPath)
    modelmngr.load()

    # return inside a dictionary
    pred_train = {
        'fmean': fmean.eval(feed_dict={X: Xtrain}),
        'fvar': fvar.eval(feed_dict={X: Xtrain})
    }

    if Xtest is not None:
        pred_test = {
            'fmean': fmean.eval(feed_dict={X: Xtest}),
            'fvar': fvar.eval(feed_dict={X: Xtest})
        }

    sess.close()

    if Xtest is not None:
        return pred_train, pred_test
    else:
        return pred_train
Example #14
0
def predict_onoff(Xtrain,Xtest,checkpointPath,num_inducing_f = np.array([10,100]),num_inducing_g = np.array([10,100]),include_fmu = False):
    tf.reset_default_graph()

    # param initializations
    list_to_np = lambda _list : [np.array(e) for e in _list]

    init_fkell = list_to_np([[8.,8.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])

    init_gkell = list_to_np([[8.,8.],[5./1000]])
    init_gkvar = list_to_np([[10.],[10.]])

    init_noisevar = 0.001

    q_diag = True
    if include_fmu:
        init_f_mu = 0.

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)

    init_Zf = [init_Zf_s,init_Zf_t]
    init_Zg = init_Zf.copy()


    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1
    init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1

    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T
    init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T

    kern_param_learning_rate = 1e-4
    indp_param_learning_rate = 1e-4


    # tf variable declarations
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]

    with tf.name_scope("g_kern"):
        gkell = [Param(init_gkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

        gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

    gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)

    #     f_mu = Param(init_f_mu,name="fmu",learning_rate = indp_param_learning_rate,summ=True)

    with tf.name_scope("g_ind"):
        Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_g))]

        u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)



    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,
                       u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \
                    tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_g))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \
                 GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)
        input_mask_g = _gen_inp_mask(Zg_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        # fmean = fmean + mean_function(Xnew)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()


        # compute gmean and gvar from the kronecker inference
        gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g)
        gmean = gmean + tf.cast(tf.constant(-1.0),float_type)

        # compute augemented distributions
        ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar)

        # compute augmented f
        # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A)))
        gfmean = tf.multiply(ephi_g, fmean)
        gfvar = tf.multiply(ephi2_g, fvar)
        gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean))

        # return mean and variance vectors in order
        return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g


    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv(),num_inducing)

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        KMN = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        def loop_rows(n,mu,var):
            Kmn = tf.reshape(KMN[0][:,n], [num_inducing[0],1])
            for p in range(1,len(num_inducing)):
                Kmn = tf_kron(Kmn,tf.reshape(KMN[p][:,n],[num_inducing[p],1]))

            mu_n = tf.matmul(Kmn, alpha, transpose_a=True)
            mu = mu.write(n, mu_n)
            A = __kron_mv(Kmm_inv,Kmn,num_inducing)
            tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \
                           tf.matmul(tf.matmul(A,S,transpose_a=True),A)

            var = var.write(n, tmp)
            return tf.add(n,1), mu, var

        def loop_cond(n,mu,var):
            return tf.less(n, n_batch[0])

        mu = tf.TensorArray(float_type, size=n_batch[0])
        var = tf.TensorArray(float_type, size=n_batch[0])
        _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var])

        mu = tf.reshape(mu.stack(), n_batch)
        var = tf.reshape(var.stack(), n_batch)

        return mu , var

    def __kron_mv( As, x,num_inducing):
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(a,b):
        a_shape = [a.shape[0].value,a.shape[1].value]
        b_shape = [b.shape[0].value,b.shape[1].value]
        return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                          tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                          [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask


    def variational_expectations(Y,fmu,fvar,fmuvar,noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
                - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar

    def probit_expectations(gmean, gvar):
        def normcdf(x):
            return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3

        def owent(h, a):
            h = tf.abs(h)
            term1 = tf.atan(a) / (2 * np.pi)
            term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1))))
            return tf.multiply(term1, term2)

        z = gmean / tf.sqrt(1. + gvar)
        a = 1 / tf.sqrt(1. + (2 * gvar))

        cdfz = normcdf(z)
        tz = owent(z, a)

        ephig = cdfz
        ephisqg = (cdfz - 2. * tz)
        evarphig = (cdfz - 2. * tz - tf.square(cdfz))

        # clip negative values from variance terms to zero
        ephisqg = (ephisqg + tf.abs(ephisqg)) / 2.
        evarphig = (evarphig + tf.abs(evarphig)) / 2.

        return ephig, ephisqg, evarphig


    kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list,
                        u_fm,u_fs_sqrt,fkern_list,Zf_list)
    gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list)

    # load model
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    modelmngr = modelmanager(saver, sess, checkpointPath)
    modelmngr.load()

    pred_train = {'gfmean' : gfmean.eval(feed_dict = {X:Xtrain}),
                  'fmean' : fmean.eval(feed_dict = {X:Xtrain}),
                  'pgmean' : pgmean.eval(feed_dict = {X:Xtrain})}

    if Xtest is not None:
        pred_test = {'gfmean' : gfmean.eval(feed_dict = {X:Xtest}),
                     'fmean' : fmean.eval(feed_dict = {X:Xtest}),
                     'pgmean' : pgmean.eval(feed_dict = {X:Xtest})}
    sess.close()

    if Xtest is not None:
        return pred_train, pred_test
    else:
        return pred_train