def test_hv_with_builtin():
    iris = load_iris()
    x = tf.placeholder(tf.float32, name='x')
    y = tf.placeholder(tf.float32, name='y')
    model = LinearModel(x, 4, 3)
    net_w, net_out = vectorize_model(model.var_list, model.inp[-1])

    v = tf.constant(np.ones(net_w.tensor.get_shape()),
                    dtype=tf.float32)  # vector of ones of right shape

    ce_builtin = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=net_out, labels=y)
    )  # this is the builtin function advertised on tensorflow for computing cross entropy loss with softmax output

    ce_standard = tf.reduce_mean(
        -tf.reduce_sum(y * tf.log(tf.nn.softmax(net_out)),
                       reduction_indices=[1])  # this is normal CE loss
    )

    hvp_builtin = hvp(
        ce_builtin, net_w.tensor,
        v)  # WITH PREVIOUS VERSIONS (r.0.11) WAS 0. NOW RAISES ERROR
    # UPDATE r1.2 now it's working! yeah!
    hessian_builtin = tf.hessians(ce_builtin, net_w.tensor)[0]

    hvp_standard = hvp(ce_standard, net_w.tensor, v)
    hessian_standard = tf.hessians(ce_standard, net_w.tensor)[0]

    def training_supplier():
        return {x: iris.train.data, y: iris.train.target}

    ts = tf.train.GradientDescentOptimizer(.1).minimize(
        ce_standard, var_list=model.var_list)

    with tf.Session().as_default() as ss:
        tf.global_variables_initializer().run()

        print('builtin, standard:',
              ss.run([ce_builtin, ce_standard], feed_dict=training_supplier()))

        for _ in range(2000):
            ts.run(feed_dict=training_supplier())

        print('builtin',
              ss.run([hvp_builtin, hessian_builtin],
                     feed_dict=training_supplier()))  # output is wrongly 0.

        print(
            'standard',
            ss.run([hvp_standard, hessian_standard],
                   feed_dict=training_supplier()))
def gradient(ckpt_file_path):

    detection_graph = tf.Graph()
    with tf.Session(graph=detection_graph) as sess:
        saver = tf.train.import_meta_graph(ckpt_file_path)
        saver.restore(sess, "../model/normal_cifar/normal_Cifar-19900")
        graph = tf.get_default_graph()
        x_input = graph.get_tensor_by_name('x:0')
        y_input = graph.get_tensor_by_name('y:0')
        keep_prob = graph.get_tensor_by_name('keep_prob:0')
        logits = graph.get_tensor_by_name('logits:0')
        max_index = tf.argmax(logits[0])
        gradient_op = tf.gradients(logits[0][max_index], x_input)
        hess_2 = tf.hessians(logits[0][max_index], x_input)
        gradient = sess.run(gradient_op, {
            x_input: xx[j],
            y_input: y,
            keep_prob: 1
        })
        hess_2 = sess.run(hess_2, {x_input: xx[j], y_input: y, keep_prob: 1})
        gradient = np.array(gradient)
        gradient = gradient.reshape(1, 3072)
        hess_2 = np.array(hess_2)
        hess_2 = hess_2.reshape(3072, 3072)
        save(gradient, "(1).xlsx")
        save(hess_2, "(2).xlsx")
Example #3
0
 def test_natural_gradient(self):
     """
     Test random natural gradient cases.
     """
     with tf.Graph().as_default():
         with tf.Session() as sess:
             for size in range(3, 9):
                 dist = NaturalSoftmax(size, epsilon=0)
                 softmax = CategoricalSoftmax(size)
                 param_row = tf.constant(np.random.normal(size=(size, )),
                                         dtype=tf.float64)
                 params = tf.stack([param_row])
                 one_hot = np.zeros((1, size))
                 one_hot[0, 1] = 1
                 samples = tf.constant(one_hot, dtype=tf.float64)
                 kl_div = softmax.kl_divergence(tf.stop_gradient(params),
                                                params)
                 hessian = sess.run(tf.hessians(kl_div, param_row)[0])
                 gradient = sess.run(
                     tf.gradients(softmax.log_prob(params, samples),
                                  params)[0][0])
                 expected = np.matmul(np.array([gradient]),
                                      np.linalg.pinv(hessian))[0]
                 actual = sess.run(
                     tf.gradients(dist.log_prob(params, samples),
                                  params)[0][0])
                 self.assertTrue(np.allclose(actual, expected))
Example #4
0
    def _log_likelihood(self,
                        i_batch,
                        dsetname,
                        data_tensor,
                        batch_info,
                        omit_grads=tuple(),
                        second_order=False,
                        **params):
        # Stack the params to create a single node
        # to differentiate with respect to.
        grad_par_stack = tf.stack(
            [params[k] for k in self.param_names if k not in omit_grads])

        # Retrieve individual params from the stacked node,
        # then add back the params we do not differentiate w.r.t.
        params_unstacked = dict(
            zip([x for x in self.param_names if x not in omit_grads],
                tf.unstack(grad_par_stack)))
        for k in omit_grads:
            params_unstacked[k] = params[k]

        # Forward computation
        ll = self._log_likelihood_inner(i_batch, params_unstacked, dsetname,
                                        data_tensor, batch_info)

        # Autodifferentiation. This is why we use tensorflow:
        grad = tf.gradients(ll, grad_par_stack)[0]
        if second_order:
            return ll, grad, tf.hessians(ll, grad_par_stack)[0]
        return ll, grad, None
Example #5
0
    def constrained_bestfit(self, objective, constrained_mu, data, pdf,
                            init_pars, par_bounds):
        #the graph
        data = self.tb.astensor(data)

        nuis_pars = [
            self.tb.astensor([p]) for i, p in enumerate(init_pars)
            if i != pdf.config.poi_index
        ]
        poi_par = self.tb.astensor([constrained_mu])

        nuis_cat = self.tb.concatenate(nuis_pars)
        pars = self.tb.concatenate([nuis_cat[:0], poi_par, nuis_cat[0:]])
        objective = objective(pars, data, pdf)
        hessian = tf.hessians(objective, nuis_cat)[0]
        gradient = tf.gradients(objective, nuis_cat)[0]
        invhess = tf.linalg.inv(hessian)
        update = tf.transpose(
            tf.matmul(invhess, tf.transpose(tf.stack([gradient]))))[0]

        #run newton's method
        best_fit_nuis = [
            x for i, x in enumerate(init_pars) if i != pdf.config.poi_index
        ]
        for i in range(1000):
            up = self.tb.session.run(update,
                                     feed_dict={nuis_cat: best_fit_nuis})
            best_fit_nuis = best_fit_nuis - up
            if np.abs(np.max(up)) < 1e-4:
                break
        best_fit = best_fit_nuis.tolist()
        best_fit.insert(pdf.config.poi_index, constrained_mu)
        return best_fit
Example #6
0
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize `PointMass` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
Example #7
0
def getHess(f, x_value):
    x = tf.placeholder(tf.float32, shape=len(x_value))
    f_grad = tf.hessians(f(x), x)
    sess = tf.Session()
    f_g = sess.run(f_grad, feed_dict={x: x_value})
    f_g_value = f_g[0]
    return f_g_value
Example #8
0
def G_tot_nn_unconstr(X_m,
                      X_T,
                      x_scaling_1,
                      x_scaling_2,
                      T_scaling_1,
                      T_scaling_2,
                      weights,
                      n_hidden=2):
    # Assuming convex_nn architecture
    n_weights = 7 + (n_hidden - 1) * 10 + 8
    weights_1 = weights[:n_weights]
    weights_2 = weights[-n_weights + 1:] + fixed_bias
    # Divide microstrucutral features into composition and fraction:
    X_1 = X_m[:, 0:1]
    X_2 = X_m[:, 1:2]
    f = X_m[:, 2:]
    # Temperature musts be scaled differently for each input
    X_T_1 = (X_T - T_scaling_1["mean"]) / T_scaling_1["std"]
    X_T_2 = (X_T - T_scaling_2["mean"]) / T_scaling_2["std"]
    # Phase 2 composition is calculated from phase 1 composition, f.
    X_2 = (X_2 - x_scaling_2["mean"]) / x_scaling_2["std"]
    X_1 = (X_1 - x_scaling_1["mean"]) / x_scaling_1["std"]
    G_nn_1 = convex_nn(X_T_1, X_1, weights_1)
    G_nn_2 = convex_nn(X_T_2, X_2, weights_2)
    G_tot = G_nn_1 * (1. - f) + G_nn_2 * f
    G_tot_grad = tf.gradients(G_tot, X_m, stop_gradients=X_m)
    G_tot_hess = tf.einsum("ijkl->ijl", tf.hessians(G_tot, X_m)[0])
    return G_tot, G_tot_grad, G_tot_hess
Example #9
0
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize ``PointMass`` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
Example #10
0
def _test_loss(sample_shape_fn):
    our_loss_fn = CrossEntropyLoss()

    single_vector_inputs = len(sample_shape_fn()) == 1 and sample_shape_fn() == sample_shape_fn()
    length = [None] if not single_vector_inputs else [sample_shape_fn()[0]]

    targets_ph = tf.placeholder(tf.float64, length * len(sample_shape_fn()))
    logits_ph = tf.placeholder(tf.float64, length * len(sample_shape_fn()))
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=targets_ph, logits=logits_ph)
    grad = tf.gradients(loss, logits_ph)
    if single_vector_inputs:
        ihvp = tf.reshape(tf.matrix_solve(tf.hessians(loss, logits_ph)[0] + tf.eye(length[0], dtype=tf.float64),
                                          tf.reshape(grad, (-1,1))), (-1,))
    session = tf.Session()
    for _ in xrange(1000):
        shape = sample_shape_fn()
        targets = np.random.rand(*shape)
        targets = targets ** 2
        targets /= np.sum(targets, axis=-1, keepdims=True)
        logits = np.random.rand(*shape)

        our_loss = our_loss_fn(targets, logits)
        our_grad = our_loss_fn.gradient(targets, logits)
        if single_vector_inputs:
            our_ihvp = our_loss_fn.ihvp(targets, logits, l2_reg=1)

        feed_dict = {targets_ph: targets, logits_ph: logits}
        true_loss = session.run(loss, feed_dict=feed_dict)
        true_gradient = session.run(grad, feed_dict=feed_dict)
        if single_vector_inputs:
            true_ihvp = session.run(ihvp, feed_dict=feed_dict)

        assert np.allclose(our_loss, true_loss) and np.allclose(our_grad, true_gradient)
        if single_vector_inputs:
            assert np.allclose(our_ihvp, true_ihvp)
Example #11
0
    def finetune_and_test_hessian(self, input_pts, output_pts, num_steps, test_input_pts, inp_tau):
        "This returns the Hessian at the adapted parameter value for uncertainty estimates"
        pred = self.forward_pass(input_pts, self.theta)
        loss = mse(pred, output_pts)
        grad = tf.gradients(loss, list(self.theta.values()))
        grad = dict(zip(self.theta.keys(), grad))
        phi = dict(zip(self.theta.keys(), [self.theta[key] - alpha * grad[key] for key in self.theta.keys()]))

        for _ in range(num_steps - 1): #this is never gone through
            pred = self.forward_pass(input_pts, phi)
            loss = mse(pred, output_pts)

            grad = tf.gradients(loss, list(phi.values()))
            grad = dict(zip(phi.keys(), grad))

            phi = dict(zip(phi.keys(), [phi[key] - alpha * grad[key] for key in phi.keys()])) 
        
        #splice in flat_params
        keys, vals = zip(*[(k, v) for k, v in phi.items()])
        flat_params = tf.squeeze(tensors_to_column(vals))
        phi = column_to_tensors(vals, flat_params)
        phi = {keys[i]: phi[i] for i in range(len(phi))}

        adapted_pred = self.forward_pass(input_pts, phi)
        adapted_mse = mse(adapted_pred, output_pts)
        log_pr_hessian = tf.hessians(adapted_mse, flat_params)
        log_prior_hessian = tf.eye(1761) * inp_tau
        hessian = tf.add(log_pr_hessian, log_prior_hessian)
        
        test_pred = self.forward_pass(test_input_pts, phi)

        return test_pred, flat_params, hessian
Example #12
0
def flathess(loss, var_list, clip_norm=None):

    #Pseudocdoe:

    #We just need to get the hessians of the policy
    #So, step 1: get all the hessians
    #Step 2: Get the hessians in the namespace of the policy
    #Reshape them properly into a matrix - can probably flatten each of the first n/2 dimensions and then concatenate them into a block
    #Step 3: concatenate them all blockwise
    hessians = tf.hessians(loss, var_list)
    #TODO: is this right?
    if clip_norm is not None:
        IPython.embed()
        hessians = [
            tf.clip_by_norm(hessian, clip_norm=clip_norm)
            for hessian in hessians
        ]
    for i in range(len(hessians)):
        #reshape
        #TODO: write this more cleanly as a list comprehension?
        hessian = hessians[i]
        shape = [int(s) for s in hessian.shape]
        dims = int(np.sqrt(reduce(mul, shape, 1)))  #assumes symmetry
        hessians[i] = tf.reshape(
            hessian, [dims, dims
                      ])  #TODO: verify this is the correct reshaping direction

    return block_diagonal(hessians)
 def __init__(self, icb, train_documents, train_labels, leaf_method, weights=None):
     self.icb = icb
     if weights is None:
         self.weights_num = np.ones_like(train_labels, dtype=np.float64)
     else:
         self.weights_num = np.array(weights, dtype=tf.float64)
     self.sess = tf.Session()
     self.sess.run(tf.global_variables_initializer())
     self.train_documents = train_documents
     self.train_labels = train_labels
     self.weights = tf.placeholder(tf.float64, shape=train_labels.shape, name='weights')
     self.x = tf.placeholder(tf.float64, shape=train_documents.shape, name='x')
     self.y = tf.placeholder(tf.float64, shape=train_labels.shape, name='y')
     self.approxes = []
     self.approxes.append(tf.zeros_like(train_labels, dtype=tf.float64))
     self.leaf_values = []
     self.leaf_values_grads = []
     for t in xrange(len(icb.trees)):
         a = self.approxes[-1]
         loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=a,
                                                                        name='loss_step_%s' % str(t)))
         grads = tf.gradients(loss, a)[0]
         hessians = tf.diag_part(tf.hessians(loss, a)[0])
         leaf_doc_idxs = [sorted(list(l)) for l in icb.trees[t]._document_idxs_for_leaves]
         doc_leaf_idxs = [0] * len(train_labels)
         for l, leaf_idxs in enumerate(leaf_doc_idxs):
             for i in leaf_idxs:
                 doc_leaf_idxs[i] = l
         doc_leaf_idxs = tf.constant(doc_leaf_idxs, dtype=tf.int32)
         leaf_values_lst = []
         for l in xrange(len(icb.trees[t].leaf_values)):
             leaf_mask = tf.equal(doc_leaf_idxs, l)
             leaf_gradients = tf.boolean_mask(grads, leaf_mask)
             leaf_hessians = tf.boolean_mask(hessians, leaf_mask)
             leaf_weights = tf.boolean_mask(self.weights, leaf_mask)
             if leaf_method == 'Gradient':
                 leaf_values_lst.append(-tf.divide(
                     tf.reduce_sum(tf.multiply(leaf_weights, leaf_gradients)),
                     tf.reduce_sum(leaf_weights) + icb.trees[t].l2_reg_coef
                 ) * icb.trees[t].learning_rate)
             else:
                 leaf_values_lst.append(-tf.divide(
                     tf.reduce_sum(tf.multiply(leaf_weights, leaf_gradients)),
                     tf.reduce_sum(tf.multiply(leaf_weights, leaf_hessians)) + icb.trees[t].l2_reg_coef
                 ) * icb.trees[t].learning_rate)
         leaf_values = tf.stack(leaf_values_lst)
         self.leaf_values.append(leaf_values)
         tree_predictions = tf.gather(leaf_values, doc_leaf_idxs)
         self.approxes.append(a + tree_predictions)
         leaf_value_grad = []
         for lv in leaf_values_lst:
             lvg = tf.gradients(lv, self.weights)[0]
             leaf_value_grad.append(lvg)
         self.leaf_values_grads.append(leaf_value_grad)
     self.train_idx = tf.placeholder(tf.int32, shape=[])
     train_prediction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
         labels=self.y[self.train_idx:self.train_idx + 1], logits=self.approxes[-1][self.train_idx:self.train_idx + 1]
     ))
     self.train_prediction_loss_grad = tf.gradients(train_prediction_loss, self.weights)[0]
Example #14
0
    def loss_func(self, model, S_interior, t_interior, Smin_boundary, tmin_boundary, Smax_boundary,\
            tmax_boundary, S_terminal, t_terminal, use_fd_hessian, use_L2_err):
        ''' Compute total loss for training.
        Note: only geometric avg boundary condition is considered.
        Args:
            model:      DGMNet model object
            t_interior: sampled time points in the interior of the function's domain
            S_interior: sampled space points in the interior of the function's domain
            t_terminal: sampled time points at terminal point (vector of terminal times)
            S_terminal: sampled space points at terminal time
        '''
        # Loss term #1: PDE
        # compute function value and derivatives at current sampled points
        V = model(S_interior, t_interior)
        V_t = tf.gradients(V, t_interior)[0]
        V_s = tf.gradients(V, S_interior)[0]
        S_mean = tf.reduce_mean(S_interior)
        if use_fd_hessian:  # deprecated
            V_ss = (fd_hessian(model, S_interior, t_interior, 1.5e-6 * S_mean) + \
                   fd_hessian(model, S_interior, t_interior, 1.5e-7 * S_mean) + \
                   fd_hessian(model, S_interior, t_interior, 1.5e-8 * S_mean)) / 3
        else:
            V_ss = tf.hessians(V, S_interior)[0]
            V_ss = tf.reduce_sum(V_ss, axis=2)

        cov_Vss = tf.multiply(V_ss, self.cov_mat)
        sec_ord = tf.map_fn(lambda i: tf.tensordot(S_interior[i], tf.linalg.matvec(cov_Vss[i], S_interior[i]), 1) / 2,\
                  tf.range(tf.shape(S_interior)[0]), dtype=tf.float64)
        first_ord = tf.reduce_sum(tf.multiply(tf.multiply(V_s, S_interior),
                                              self.ir - self.dividend_vec),
                                  axis=1)
        diff_V = tf.reshape(
            V_t, [-1]) + sec_ord + first_ord - self.ir * tf.reshape(V, [-1])

        # compute average L2-norm of differential operator
        L1 = tf.reduce_mean(tf.math.square(diff_V))

        # Loss term #2: boundary condition
        V_minboundary = model(Smin_boundary, tmin_boundary)
        real_minboundary = tf.map_fn(lambda x: GeometricAvg_tf(self.dim, x[:-1], self.payoff_func.strike, self.domain.T-x[-1],\
                self.ir, self.vol_vec, self.dividend_vec[0], self.corr_mat).european_option_price(), tf.concat([Smin_boundary, tmin_boundary], 1))
        L2min = tf.reduce_mean(
            tf.math.square(tf.reshape(V_minboundary, [-1]) - real_minboundary))

        V_maxboundary = model(Smax_boundary, tmax_boundary)
        real_maxboundary = tf.map_fn(lambda x: GeometricAvg_tf(self.dim, x[:-1], self.payoff_func.strike, self.domain.T-x[-1],\
                self.ir, self.vol_vec, self.dividend_vec[0], self.corr_mat).european_option_price(), tf.concat([Smax_boundary, tmax_boundary], 1))
        L2max = tf.reduce_mean(
            tf.math.square(tf.reshape(V_maxboundary, [-1]) - real_maxboundary))

        # Loss term #3: initial/terminal condition
        target_payoff = self.payoff_func(S_terminal)
        fitted_payoff = tf.reshape(model(S_terminal, t_terminal), [-1])

        if use_L2_err:
            L3 = tf.reduce_mean(tf.math.square(fitted_payoff - target_payoff))
        else:
            L3 = tf.reduce_mean(tf.math.abs(fitted_payoff - target_payoff))
        return L1, L2min, L2max, L3
Example #15
0
def main(args):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        global x_test, y_test

        EPOCH = args.epoch
        BATCHSIZE = args.batch_size

        model_chosen = args.model_type
        if model_chosen == '1':
            model = model1
        elif model_chosen == '2':
            model = model2

        model.compile(optimizer=tf.train.AdamOptimizer(**ADAMPARAM),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=EPOCH, batch_size=BATCHSIZE)
        test_result = model.evaluate(x_test, y_test)
        train_result = model.evaluate(x_train, y_train)
        print('training loss:%f' % train_result[0])
        print('training accuracy:%f' % train_result[1])
        print('testing loss:%f' % test_result[0])
        print('testing accuracy:%f' % test_result[1])

        x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
        y_pred = model.apply(x_test)
        #print(type(y_pred))

        y_test = tf.convert_to_tensor(y_test)
        #model_weights = tf.concat([tf.reshape(i, [-1]) for i in model.trainable_variables], axis=0)

        loss = tf.keras.losses.categorical_crossentropy(y_test, y_pred)

        #grad = tf.gradients(loss, model.trainable_variables)
        #print(grad)
        #input()
        hess = tf.hessians(loss, model.trainable_variables)

        print(len(hess))

        #print(type(hess))
        hess_norm = []
        for i in hess:
            norm = tf.norm(i, 2)
            hess_norm.append(norm)
        hess_norm = sess.run(hess_norm)
        sharpness = max(hess_norm) * (1e-8) / 2 / (1 + test_result[0])

        with open('sharpness.csv', 'a') as f:
            print(args.model_type, end=',', file=f)
            print(args.batch_size, end=',', file=f)
            print(train_result[0], end=',', file=f)
            print(train_result[1], end=',', file=f)
            print(test_result[0], end=',', file=f)
            print(test_result[1], end=',', file=f)
            print(sharpness, file=f)

    return
Example #16
0
 def get_hessian(self, layer_num=-2):
     self.hessian = tf.hessians(self.cost, self.params[layer_num])[0]
     shape = (self.params[layer_num].shape[0] *
              self.params[layer_num].shape[1],
              self.params[layer_num].shape[0] *
              self.params[layer_num].shape[1])
     self.hessian = tf.reshape(self.hessian, shape=shape)
     return self.hessian
Example #17
0
def compute_ghm(energy_op, x, params):
    """
    Computes gradients, hessians, mixed_partials in one go
    """
    grads = densify(tf.gradients(energy_op, x)[0])
    hess = densify(tf.hessians(energy_op, x)[0])
    mp = list_jacobian(grads, params)
    return grads, hess, mp
Example #18
0
 def eval_hess(self):
     if self.hess_op == None:
         self.hess_op = tf.hessians(self.meanloss, self.parameters)
     self.v_hess = self.sess.run(self.hess_op,
                                 feed_dict={
                                     self.images: self.datax,
                                     self.label: self.datay,
                                     self.dropout_keep_prob: self.dp
                                 })
Example #19
0
 def get_hession_matrix_mutiply_v(v, x):
     loss = loss_func(x, tf.stop_gradient(x))
     old_shape = v.get_shape()
     num_elements = old_shape.num_elements()
     H = tf.hessians(loss, x)
     H = tf.reshape(H, [num_elements, num_elements])
     v = tf.reshape(v, [num_elements, 1])
     out = tf.matmul(H, v)
     return tf.reshape(out, old_shape.as_list()), H
Example #20
0
  def test_hessian_gradient_2(self):
    dim = 10
    w1_t = tf.Variable(np.random.randn(dim).astype(np.float32))
    w2_t = tf.Variable(np.random.randn(dim).astype(np.float32))

    w1w1_t = tf.reduce_sum(w1_t * w1_t)
    w1w2_t = tf.reduce_sum(w1_t * w2_t)
    w2w2_t = tf.reduce_sum(w2_t * w2_t)

    L_t = 0.3 * w1w1_t + 0.1 * w1w2_t - 0.2 * w2w2_t \
          + 0.15 * w1w1_t * w1w1_t \
          - 0.45 * w1w1_t * w2w2_t \
          + 0.23 * w1w2_t * w1w1_t

    grad_t = tf.gradients(L_t, [w1_t, w2_t])
    H11_t = tf.hessians(L_t, w1_t)[0]
    H22_t = tf.hessians(L_t, w2_t)[0]
    H12_t = [tf.gradients(grad_t[0][i], w2_t)[0] for i in range(dim)]
    H21_t = [tf.gradients(grad_t[1][i], w1_t)[0] for i in range(dim)]

    actual_Hg_t = self._compute_hess_grad(L_t, [w1_t, w2_t])

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      grads = sess.run(grad_t)
      H11 = sess.run(H11_t)
      H22 = sess.run(H22_t)
      H12 = np.stack(sess.run(H12_t))
      H21 = np.stack(sess.run(H21_t))

      H = np.zeros((2 * dim, 2 * dim))
      H[:dim, :dim] = H11
      H[dim:, dim:] = H22
      H[:dim, dim:] = H12
      H[dim:, :dim] = H21

      grad = np.zeros(2 * dim)
      grad[:dim] = grads[0]
      grad[dim:] = grads[1]

      expected_Hg = H.dot(grad)
      actual_Hg = np.concatenate(sess.run(actual_Hg_t))

      self.assertTrue(np.allclose(expected_Hg, actual_Hg, rtol=1e-3))
Example #21
0
 def FIM_F(g, X):
     hessian = tf.hessians(self.KL_divergence, X)
     X_size = tf.size(X)
     hessian = tf.reshape(hessian, [X_size, X_size]) + epsilon * tf.eye(
         X_size
     )
     FIM = tf.linalg.inv(hessian)
     return tf.reshape(
         tf.linalg.matvec(FIM, tf.reshape(g, [-1])), tf.shape(X)
     )
Example #22
0
def simu_hessian():
    X = tf.svd(tf.random.normal(shape=(n, p)))[1]
    f = tf.matmul(X, beta)
    y = f + tf.random.normal(shape=(n, 1))
    loss = tf.reduce_mean((y - w[0] * tf.matmul(
        X, w[1:] / tf.math.sqrt(tf.matmul(tf.transpose(w[1:]), w[1:]))))**2)
    # dl_dv = tf.gradients(loss, v)
    # d2l_dvdg = tf.gradients(dl_dv, g)
    d2l_dvdg = tf.hessians(loss, w)
    return d2l_dvdg
Example #23
0
def hessians(ys, xs, no_backprop=False):
    if not no_backprop:
        return tf.squeeze(tf.hessians(ys, xs)[0], axis=[0, 2])
    grads = tf.gradients(ys, xs)[0][0]
    # Note: it is important to use parallel_iterations=None here, to avoid an
    # in graph while loop inside the jacobians computation, which itself is an
    # in graph while loop. This is more efficient since we do not have a large
    # number of parameters, but can use many samples to compute gradient estimator
    # variance.
    return tf.squeeze(jacobians(grads, xs, parallel_iterations=None), axis=1)
def loss_fcn_gradient_hessian(video_indices, **kwargs):
    """Compute the loss function, gradient and the Hessian."""
    variable = kwargs['model_tensor']
    loss = loss_fcn_dense(**kwargs)['loss']
    g = tf.gradients(loss, variable)[0]
    g = tf.gather(g, axis=1, indices=video_indices)
    h = tf.hessians(loss, variable)[0]
    h = tf.gather(h, axis=1, indices=video_indices)
    h = tf.gather(h, axis=4, indices=video_indices)
    return {'loss': loss, 'gradient': g, 'hessian': h}
Example #25
0
 def split_and_hessian(self, out_node, innode):
     out_nodes = tf.split(out_node, 1, axis=1)
     hessian_node = []
     for o_node in out_nodes:
         hessian_node.append(tf.stack(tf.hessians(o_node, innode)))
     new_dim = len(hessian_node[0].shape.as_list()) + 1
     new_dim = list(range(new_dim))
     new_dim[0] = 1
     new_dim[1] = 0
     return tf.transpose(tf.stack(hessian_node), perm=new_dim)
Example #26
0
def hess_elemwise(F,X):
    #return the elementwise hessian of F w.r.t X in the following form
    #H = d^2F/dX, where H[i,j,k] = dF_i/dx_j dx_k
    #F needs to be a rank 1 tensor for this to work,
    #so a reshape operation is needed beforehand
    
    #turn into iterable list of tensors
    Ftemp = tf.unstack(F)
    hess = [tf.hessians(f, X) for f in Ftemp] # if F is a python list
    #convert back to rank 2 tensor with list dimension squeezed out
    H = tf.squeeze(tf.stack(hess, axis=0),axis=1)
    return H
Example #27
0
    def ML_point(self, task):
        with tf.name_scope("ML_point"):
            task_phi = task
            input_pts, output_pts = sample_linear_task_pts(
                np.random.choice([5, 7, 10, 15, 18, 20, 400, 500, 630, 800]),
                task_phi,
                noise=np.random.uniform(0.1, 10.))

            phi = {}

            with tf.name_scope("train"):
                # Initialize phi with the first gradient update
                pred = self.forward_pass(input_pts, self.theta)
                loss = mse(pred, output_pts)
                loss = tf.Print(loss, [loss])
                grad = tf.gradients(loss, list(self.theta.values()))
                #phi = dict(zip(self.theta.keys(), [self.theta[key] + 0. for key in self.theta.keys()]))
                #keys, vals = zip(*[(k, v) for k, v in phi.items()])
                #og_flat_params = tf.squeeze(tensors_to_column(vals))

                grad = dict(zip(self.theta.keys(), grad))
                phi = dict(
                    zip(self.theta.keys(), [
                        self.theta[key] - alpha * grad[key]
                        for key in self.theta.keys()
                    ]))

                keys, vals = zip(*[(k, v) for k, v in phi.items()])
                flat_params = tf.squeeze(tensors_to_column(vals))
                phi = column_to_tensors(vals, flat_params)
                phi = {keys[i]: phi[i] for i in range(len(phi))}

            with tf.name_scope("test"):
                test_input_pts, test_output_pts = sample_linear_task_pts(
                    M, task_phi)
                test_pred = self.forward_pass(test_input_pts, phi)
                test_mse = mse(test_pred, test_output_pts)

                log_pr_hessian = tf.hessians(test_mse, flat_params)
                log_prior_hessian = tf.eye(n_fc + 1) * tau
                hessian = tf.add(log_prior_hessian, log_pr_hessian)
                test_mse = tf.Print(
                    test_mse, [test_mse, tf.linalg.logdet(hessian)],
                    message="Sanity")
                loss = tf.cond(
                    tf.equal(self.use_hess, tf.constant(True)),
                    lambda: tf.add(test_mse, tf.linalg.logdet(hessian)),
                    lambda: test_mse)

                #test_mse = tf.Print(test_mse, [log_pr_hessian], message = "Log Pr Hessian")
                #test_mse = tf.Print(test_mse, [tf.linalg.logdet(hessian)], message = "Log det")

                return loss
Example #28
0
def add_hess_ops(function, inputs, graph_dictionary):
    """adds ops to calculate and diagonalize the hessian to the graph and graph_dictionary
    """
    with tf.variable_scope("hessian"):
        hessian_matrix = tf.hessians(function, inputs, name="hessians_output")[0]
        eigenvalues, eigenvectors = tf.self_adjoint_eig(hessian_matrix)

    graph_dictionary.update({
                           "hessian_matrix": hessian_matrix,
                           "eigenvalues": eigenvalues,
                           "eigenvectors": eigenvectors
                           })
Example #29
0
            def hessian(data):
                """ Helper function that computes hessian for a given gene.

                :param data: tuple (X_t, size_factors_t, params_t)
                """
                # Extract input data:
                X_t, size_factors_t, params_t = data
                size_factors = tf.transpose(
                    size_factors_t)  # observations x features
                X = tf.transpose(X_t)  # observations x features
                params = tf.transpose(params_t)  # design_params x features

                a_split, b_split = tf.split(
                    params, tf.TensorShape([p_shape_a, p_shape_b]))

                # Define the model graph based on which the likelihood is evaluated
                # which which the hessian is computed:
                model = BasicModelGraph(X=X,
                                        design_loc=design_loc,
                                        design_scale=design_scale,
                                        constraints_loc=constraints_loc,
                                        constraints_scale=constraints_scale,
                                        a_var=a_split,
                                        b_var=b_split,
                                        dtype=dtype,
                                        size_factors=size_factors)

                # Compute the hessian of the model of the given gene:
                if self._compute_hess_a and self._compute_hess_b:
                    H = tf.hessians(model.log_likelihood, params)
                elif self._compute_hess_a and not self._compute_hess_b:
                    H = tf.hessians(model.log_likelihood, a_split)
                elif not self._compute_hess_a and self._compute_hess_b:
                    H = tf.hessians(model.log_likelihood, b_split)
                else:
                    raise ValueError("either require hess_a or hess_b")

                return H
Example #30
0
  def test_full_hessian(self):
    dim1 = 10
    dim2 = 15

    w1_t = tf.Variable(np.random.randn(dim1).astype(np.float32))
    w2_t = tf.Variable(np.random.randn(dim2).astype(np.float32))

    w1w1_t = tf.reduce_sum(w1_t * w1_t)
    w2w2_t = tf.reduce_sum(w2_t * w2_t)

    L_t = 0.3 * w1w1_t - 0.2 * w2w2_t \
          + 0.15 * w1w1_t * w1w1_t - 0.45 * w1w1_t * w2w2_t

    grad_t = tf.gradients(L_t, [w1_t, w2_t])
    H11_t = tf.hessians(L_t, w1_t)[0]
    H22_t = tf.hessians(L_t, w2_t)[0]
    H12_t = [tf.gradients(grad_t[0][i], w2_t)[0] for i in range(dim1)]
    H21_t = [tf.gradients(grad_t[1][i], w1_t)[0] for i in range(dim2)]

    hess_blocks_t = tfutils.hessian_tensor_blocks(L_t, [w1_t, w2_t])

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      H11 = sess.run(H11_t)
      H22 = sess.run(H22_t)
      H12 = np.stack(sess.run(H12_t))
      H21 = np.stack(sess.run(H21_t))

      H = np.zeros((dim1 + dim2, dim1 + dim2))
      H[:dim1, :dim1] = H11
      H[dim1:, dim1:] = H22
      H[:dim1, dim1:] = H12
      H[dim1:, :dim1] = H21

      hess_blocks = sess.run(hess_blocks_t)

    actual_hess = tfutils.hessian_combine_blocks(hess_blocks)
    self.assertTrue(np.allclose(actual_hess, H))
Example #31
0
def fisher6(num_sample=1000, meanf=None, cov=None):
    if meanf is None:
        mean = var
    else:
        mean = meanf(var)
    mgd = tf.contrib.distributions.MultivariateNormalFullCovariance(
        loc=mean, covariance_matrix=cov
    )
    sample = tf.stop_gradient(mgd.sample(num_sample))
    lnp = lnunnormalp(sample, mean, cov)
    lnpoverp = idn(lnp)
    kl = tf.log(tf.reduce_mean(lnpoverp)) - tf.reduce_mean(tf.log(lnpoverp))
    fisher = tf.hessians(kl, var)
    return fisher
Example #32
0
  def testSecondGradient(self):
    images_placeholder = tf.placeholder(tf.float32, shape=(3, 2))
    labels_placeholder = tf.placeholder(tf.int32, shape=(3))
    weights = tf.Variable(tf.truncated_normal([2], stddev=1.0))
    weights_with_zeros = tf.pack([tf.zeros([2]), weights], axis=1)
    logits = tf.matmul(images_placeholder, weights_with_zeros)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits, labels_placeholder)
    loss = tf.reduce_mean(cross_entropy)

    # Taking ths second gradient should fail, since it is not
    # yet supported.
    with self.assertRaisesRegexp(LookupError,
                                 ".*No gradient defined.*PreventGradient.*"):
      _ = tf.hessians(loss, [weights])
Example #33
0
  def testSecondGradient(self):
    with self.test_session():
      l = tf.constant([0.0, 0.0, 1.0, 0.0,
                       1.0, 0.0, 0.0, 0.0,
                       0.0, 0.5, 0.0, 0.5], shape=[12],
                      dtype=tf.float64, name="l")
      f = tf.constant([0.1, 0.2, 0.3, 0.4,
                       0.1, 0.4, 0.9, 1.6,
                       0.1, 0.8, 2.7, 6.4], shape=[12],
                      dtype=tf.float64, name="f")
      x = tf.nn.softmax_cross_entropy_with_logits(f, l, name="xent")
      loss = tf.reduce_mean(x)

    # Taking ths second gradient should fail, since it is not
    # yet supported.
    with self.assertRaisesRegexp(LookupError,
                                 ".*No gradient defined.*PreventGradient.*"):
      _ = tf.hessians(loss, [f])