Python hessian 예제들, theano.tensor.hessian Python 예제들

예제 #1

0

파일 보기

파일: two_uncon.py 프로젝트: v-shinc/Buaa

def auto4check2(input, dataset):
    a = theano.shared(value=dataset[0], name="a")
    b = theano.shared(value=dataset[1], name="b")
    c = theano.shared(value=dataset[2], name="c")
    x = T.vector('x')
    u = x[0] - 0.8
    v = x[1] - (a[0] + a[1] * u ** 2 * (1 - u) ** 0.5 - a[2] * u)
    alpha = -b[0] + b[1] * u ** 2 * (1 + u) ** 0.5 + b[2] * u
    beta = c[0] * v ** 2 * (1 - c[1] * v) / (1 + c[2] * u ** 2)
    fx = alpha * np.e ** (-beta)
    g_f_x = T.jacobian(fx, x)
    grad = theano.function([x], g_f_x)
    Hessian = theano.function([x], T.hessian(fx, x))
    H_alpha_x = theano.function([x], T.hessian(alpha, x))
    H_beta_x = theano.function([x], T.hessian(beta, x))
    J_f_alpha = theano.function([x], T.grad(fx, alpha))
    J_f_beta = theano.function([x], T.grad(fx, beta))
    J_alpha_x = theano.function([x], T.grad(alpha, x))

    J_beta_x = theano.function([x], T.grad(beta, x))

    J_f_y = [J_f_alpha(input), J_f_beta(input)]
    J_y_x = [J_alpha_x(input), J_beta_x(input)]
    # print "H_alpha_x"
    # print H_alpha_x(input)
    # print "H_beta_x"
    # print H_beta_x(input)
    # print "J_f_y"
    # print J_f_y
    # print "J_y_x"
    # print J_y_x
    # print grad(input)

    return Hessian(input)

예제 #2

0

파일 보기

파일: test_2nd_order_grads.py 프로젝트: 317070/Theano

def test004_hessian():
    x = tensor.vector()
    y = tensor.sum(x ** 2)
    Hx = tensor.hessian(y, x)
    f = theano.function([x], Hx)
    vx = numpy.arange(10).astype(theano.config.floatX)
    assert numpy.allclose(f(vx), numpy.eye(10) * 2)

예제 #3

0

파일 보기

    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
        x_vec = tensor.vector('x')
        z = tensor.dot(x_vec.dimshuffle(0, 'x'), x_vec.dimshuffle('x', 0))
        y = max_pool_2d(input=z, ds=(2, 2), ignore_border=True)
        C = tensor.exp(tensor.sum(y))

        grad_hess = tensor.hessian(cost=C, wrt=x_vec)
        fn_hess = function(inputs=[x_vec], outputs=grad_hess)

        # The value has been manually computed from the theoretical gradient,
        # and confirmed by the implementation.
        assert numpy.allclose(fn_hess([1, 2]), [[0., 0.], [0., 982.7667]])

예제 #4

0

파일 보기

파일: test_downsample.py 프로젝트: ZhangAustin/attention-lvcsr

    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
        x_vec = tensor.vector("x")
        z = tensor.dot(x_vec.dimshuffle(0, "x"), x_vec.dimshuffle("x", 0))
        y = max_pool_2d(input=z, ds=(2, 2))
        C = tensor.exp(tensor.sum(y))

        grad_hess = tensor.hessian(cost=C, wrt=x_vec)
        fn_hess = function(inputs=[x_vec], outputs=grad_hess)

        # The value has been manually computed from the theoretical gradient,
        # and confirmed by the implementation.
        assert numpy.allclose(fn_hess([1, 2]), [[0.0, 0.0], [0.0, 982.7667]])

예제 #5

0

파일 보기

파일: autodiff.py 프로젝트: njbittner/ilqr

def hessian_vector(expr, wrt):
    """Computes the Hessian of a vector expression with respect to varaibles.

    Args:
        expr: Vector Theano tensor expression.
        wrt: List of Theano variables.

    Returns:
        Theano tensor.
    """
    try:
        return _tensor_map(lambda f: hessian_scalar(f, wrt), expr)
    except ValueError:
        # Fallback for wider support.
        return T.stack([T.hessian(expr, wrt, disconnected_inputs="ignore")])

예제 #6

0

파일 보기

    def get_gessians(self, y):
        """Return a list of hessians wrt to the model parameters

        Args:
            y (theano.tensor.TensorVariable): corresponds to a vector that gives for
                each example the correct label.

        Returns:
            list(TensorSharedVariable): a list of hessian matrix
        """
        hessians = []
        for param in [self.beta_flat, self.asc]:
            shp = param.shape
            batch_size = y.shape[0]
            cost = self.negative_log_likelihood(y)
            h = T.hessian(cost, param, disconnected_inputs='ignore')
            hessians.append(h)

        return hessians

예제 #7

0

파일 보기

파일: two_uncon.py 프로젝트: v-shinc/Buaa

def auto4check(dataset, x, tol=1e-9, maxiter=1000):

    t0 = theano.shared(value=dataset[0], name="t0")
    a0 = theano.shared(value=dataset[1], name="a0")
    b0 = theano.shared(value=dataset[2], name="b0")
    c0 = theano.shared(value=dataset[3], name="c0")
    k = T.vector('k')
    a_t = np.e ** (-(k[0] + k[1]) * t0)
    b_t = k[0] / (k[0] + k[1]) * (1 - a_t)
    c_t = k[1] / (k[0] + k[1]) * (1 - a_t)
    f = T.sum((a0 - a_t) ** 2 + (b0 - b_t) ** 2 + (c0 - c_t) ** 2)
    F = theano.function([k], f)
    g_f_k = T.jacobian(f, k)
    j_f_k = theano.function([k], g_f_k)
    H_f_k = T.hessian(f, k)
    Hessian = theano.function([k], H_f_k)


    track, f_val = [], []
    track.append(array(x))
    f_val.append(F(x))
    g = j_f_k(x)
    i = 0
    print "Step =", i, "g=", g, "x=", x, "loss=", F(x)
    while norm(g) > tol:
        i += 1
        if i > maxiter:
            break
        G = Hessian(x)
        s = -np.linalg.solve(G, g)
        x += s
        track.append(array(x))
        f_val.append(F(x))
        g = j_f_k(x)
        print "step =", i, "g=", g, "x=", x, "loss=", F(x), "G=", G
    return x, F(x), track, f_val

예제 #8

0

파일 보기

파일: kernels.py 프로젝트: JHamp/kernel_exp_family

 def get_expr_rff_feature_map_component_third_order_tensor(x, omega, u):
     grad = get_expr_rff_feature_map_component_grad(x, omega, u)
     G3, updates = theano.scan(lambda i, grad, x: T.hessian(grad[i], x),
                               sequences=T.arange(grad.shape[0]),
                               non_sequences=[grad, x])
     return G3, updates

예제 #9

0

파일 보기

파일: kernels.py 프로젝트: psederberg/kernel_exp_family

 def get_expr_gaussian_kernel_hessian(x, y, sigma):
     return T.hessian(get_expr_gaussian_kernel(x, y, sigma), x)

예제 #10

0

파일 보기

def run_crbm():
    """ Discrete choice model estimation with Theano

    Setup
    -----
    step 1: Load variables from csv file
    step 2: Define hyperparameters used in the computation
    step 3: define symbolic Theano tensors
    step 4: build model and define cost function
    step 5: define gradient calculation algorithm
    step 6: define Theano symbolic functions
    step 7: run main estimaiton loop for n iterations
    step 8: perform analytics and model statistics

    """
    # compile and import dataset from csv#
    d_x_ng, d_x_g, d_y, avail, d_ind = extractdata(csvString)
    data_x_ng = shared(np.asarray(d_x_ng, dtype=floatX), borrow=True)
    data_x_g = shared(np.asarray(d_x_g, dtype=floatX), borrow=True)
    data_y = T.cast(shared(np.asarray(d_y - 1, dtype=floatX), borrow=True),
                    'int32')
    data_av = shared(np.asarray(avail, dtype=floatX), borrow=True)
    data_ind = shared(np.asarray(d_ind, dtype=floatX), borrow=True)

    sz_n = d_x_g.shape[0]  # number of samples
    sz_k = d_x_g.shape[1]  # number of generic variables
    sz_m = d_x_ng.shape[2]  # number of non-generic variables
    sz_i = d_x_ng.shape[1]  # number of alternatives
    sz_z = d_ind.shape[1]  # number of indicators

    sz_minibatch = sz_n  # model hyperparameters
    learning_rate = 0.1
    gen_rate = 1.0
    momentum = 0.9

    n_hidden = 3  # latent variable model parameters

    x_ng = T.tensor3('data_x_ng')  # symbolic theano tensors
    x_g = T.matrix('data_x_g')
    y = T.ivector('data_y')
    av = T.matrix('data_av')

    index = T.lscalar('index')

    z = T.matrix('data_ind')

    # construct model
    model = CRBM(sz_i,
                 av,
                 n_in=[(sz_m, ), (sz_k, n_hidden)],
                 n_hid=[(n_hidden, ), (n_hidden, sz_i), (n_hidden, sz_z)],
                 n_ind=(sz_z, ),
                 input=[x_ng, x_g],
                 output=y,
                 inds=z)

    cost, error, chain_end, updates = model.gibbs_sampling(y,
                                                           x_ng,
                                                           x_g,
                                                           av,
                                                           alts=6,
                                                           steps=25)

    grads = T.grad(cost=cost - model.loglikelihood(y),
                   wrt=model.params,
                   consider_constant=[chain_end])

    cost2 = -(model.loglikelihood(y) + 0.1 * model.cross_entropy(z))

    grads2 = T.grad(cost=cost2, wrt=model.params2)

    opt = optimizers.adadelta(model.params, model.masks, momentum)
    opt2 = optimizers.adadelta(model.params2, model.masks2, momentum)
    # opt = optimizers.sgd(model.params, model.masks)

    updates.update(opt.updates(model.params, grads, learning_rate))

    updates2 = opt2.updates(model.params2, grads2, learning_rate)

    # null loglikelihood function
    fn_null = function(inputs=[],
                       outputs=model.loglikelihood(y),
                       givens={
                           x_ng: data_x_ng,
                           x_g: data_x_g,
                           y: data_y,
                           av: data_av
                       },
                       on_unused_input='ignore')

    # compile the theano functions
    fn_estimate = function(
        name='estimate',
        inputs=[index],
        outputs=[model.loglikelihood(y), cost],
        updates=updates,
        givens={
            x_ng:
            data_x_ng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                  sz_n))],
            x_g:
            data_x_g[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                 sz_n))],
            y:
            data_y[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                               sz_n))],
            av:
            data_av[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                sz_n))]
        },
        allow_input_downcast=True,
        on_unused_input='ignore',
    )

    fn_optimize = function(
        name='optimize',
        inputs=[index],
        outputs=[model.loglikelihood(y)],
        updates=updates2,
        givens={
            x_ng:
            data_x_ng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                  sz_n))],
            x_g:
            data_x_g[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                 sz_n))],
            y:
            data_y[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                               sz_n))],
            av:
            data_av[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                sz_n))],
            z:
            data_ind[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                 sz_n))]
        },
        allow_input_downcast=True,
        on_unused_input='ignore',
    )

    fn_pred = function(inputs=[],
                       outputs=model.y_pred,
                       givens={
                           x_ng: data_x_ng,
                           x_g: data_x_g,
                           y: data_y,
                           av: data_av
                       },
                       on_unused_input='ignore')
    """ Main estimation process loop """
    print('Begin estimation...')

    epoch = 0  # process loop parameters
    sz_epoches = 2000
    sz_batches = np.ceil(sz_n / sz_minibatch).astype(np.int32)
    done_looping = False
    patience = 300
    patience_inc = 10
    best_loglikelihood = -np.inf
    null_Loglikelihood = fn_null()
    start_time = timeit.default_timer()

    while epoch < sz_epoches and done_looping is False:
        epoch_cost = []
        epoch_loglikelihood = []
        for i in range(sz_batches):
            (batch_loglikelihood, batch_cost) = fn_estimate(i)
            epoch_cost.append(batch_cost)
            epoch_loglikelihood.append(batch_loglikelihood)

        this_loglikelihood = np.sum(epoch_loglikelihood)
        this_cost = np.sum(epoch_cost)
        print('@ iteration %d/%d loglikelihood: %.3f' %
              (epoch, patience, this_loglikelihood))
        print('               cost %.3f' % this_cost)
        print(fn_pred())
        print(data_y.eval())

        if this_loglikelihood > best_loglikelihood:
            if this_loglikelihood > 0.998 * best_loglikelihood:
                patience += patience_inc
            best_loglikelihood = this_loglikelihood
            best_model = model

        if (epoch > patience
                or this_loglikelihood < 1.01 * best_loglikelihood):
            done_looping = True

        epoch += 1

    epoch = 0
    patience = 900
    done_looping = False
    best_loglikelihood = -np.inf
    # done_looping = True
    while epoch < sz_epoches and done_looping is False:
        epoch_cost = []
        epoch_loglikelihood = []
        for i in range(sz_batches):
            (batch_loglikelihood) = fn_optimize(i)
            epoch_loglikelihood.append(batch_loglikelihood)

        this_loglikelihood = np.sum(epoch_loglikelihood)
        this_cost = np.sum(epoch_cost)
        print('@ iteration %d/%d loglikelihood: %.3f' %
              (epoch, patience, this_loglikelihood))
        print(fn_pred())
        print(data_y.eval())

        if this_loglikelihood > best_loglikelihood:
            if this_loglikelihood > 0.999 * best_loglikelihood:
                patience += patience_inc
            best_loglikelihood = this_loglikelihood
            best_model = model

        if (epoch > patience
                or this_loglikelihood < 1.01 * best_loglikelihood):
            done_looping = True

        epoch += 1

    final_Loglikelihood = best_loglikelihood
    rho_square = 1. - (final_Loglikelihood / null_Loglikelihood)

    with open('best_model.pkl', 'wb') as f:
        pickle.dump(best_model, f)

    end_time = timeit.default_timer()
    """ Analytics and model statistics """
    with open('best_model.pkl', 'rb') as f:
        best_model = pickle.load(f)

    print('... solving Hessians')
    # hessian function
    fn_hessian = function(
        inputs=[best_model.x_ng, best_model.x_g, best_model.av],
        outputs=T.hessian(
            cost=-(best_model.loglikelihood(y) + best_model.cross_entropy(z)),
            wrt=best_model.params2),
        givens={
            y: data_y,
            z: data_ind
        },
        on_unused_input='ignore')

    h = np.hstack([
        np.diagonal(mat) for mat in fn_hessian(data_x_ng.eval(),
                                               data_x_g.eval(), data_av.eval())
    ])
    n_est_params = np.count_nonzero(h)
    aic = 2 * n_est_params - 2 * final_Loglikelihood
    bic = np.log(sz_n) * n_est_params - 2 * final_Loglikelihood

    print('@iteration %d, run time %.3f ' % (epoch, end_time - start_time))
    print('Null Loglikelihood: %.3f' % null_Loglikelihood)
    print('Final Loglikelihood: %.3f' % final_Loglikelihood)
    print('rho square %.3f' % rho_square)
    print('AIC %.3f' % aic)
    print('BIC %.3f' % bic)

    run_analytics(best_model, h, n_hidden)

예제 #11

0

파일 보기

def sym_hes(*args, **kwargs):
    return T.hessian(*args, disconnected_inputs='warn', **kwargs)

예제 #12

0

파일 보기

    def get_generative_cost_updates(self, k=1, lr=1e-3):
        """
        get_generative_cost_updates func
            updates weights for W^(1), W^(2), a, c and d
        """
        # prepare visible samples from x input and y outputs
        v0_samples = self.input + self.output
        labels = self.label

        # perform positive Gibbs sampling phase
        # one step Gibbs sampling p(h|v1,v2,...) = p(h|v1)+p(h|v2)+...
        h1_pre, h1_means, h1_samples = self.sample_h_given_v(v0_samples)

        # start of Gibbs sampling chain
        # we only want the samples generated from the Gibbs sampling phase
        chain_start = h1_samples
        scan_out = 3 * len(v0_samples) * [None] + [None, None, chain_start]

        # theano scan function to loop over all Gibbs steps k
        # [v1_pre[], v1_means[], v1_samples[], h1_pre, h1_means, h1_samples]
        # outputs are given by outputs_info
        # [[t,t+1,t+2,...], [t,t+1,t+2,...], ], gibbs_updates
        # NOTE: scan returns a dictionary of updates
        gibbs_output, gibbs_updates = theano.scan(fn=self.gibbs_hvh,
                                                  outputs_info=scan_out,
                                                  n_steps=k,
                                                  name='gibbs_hvh')

        # note that we only need the visible samples at the end of the chain
        chain_end = []
        a = self.hyperparameters['alpha']
        for output in gibbs_output:
            chain_end.append(output[-1])
        gibbs_pre = chain_end[:len(v0_samples)]
        gibbs_means = chain_end[len(v0_samples):2 * len(v0_samples)]
        gibbs_samples = chain_end[2 * len(v0_samples):3 * len(v0_samples)]

        # calculate the model cost
        ginitial_cost = self.free_energy(self.input)
        gfinal_cost = self.free_energy(gibbs_samples[:len(self.input)])
        gcost = a * (T.mean(ginitial_cost) - T.mean(gfinal_cost))

        dinitial_cost = self.discriminative_free_energy()
        dfinal_cost = self.discriminative_free_energy(gibbs_samples)
        dgcost = T.mean(dinitial_cost) - T.mean(dfinal_cost)

        g_params = self.vbias_f + self.V_params_f + self.hbias + self.vsigmas_f
        dg_params = self.B_params_f + self.U_params_f + self.cbias_f
        dg_masks = self.B_params_m + self.U_params_m + self.cbias_m

        # conditonal probability
        dcost = 0.
        sigmas = []
        for i, (logit, label) in enumerate(zip(dinitial_cost, labels)):
            p_y_given_x = T.nnet.softmax(logit)
            dcost += Metric.loglikelihood(p_y_given_x, label)
            pred = T.argmax(p_y_given_x, axis=-1)
            errors = T.neq(pred, label)

            # calculate the Hessians
            hessians = T.hessian(cost=Metric.loglikelihood(p_y_given_x, label),
                                 wrt=dg_params,
                                 disconnected_inputs='ignore')
            sigma = [T.sqrt(s) for s in [T.diag(2. / h) for h in hessians]]
            sigmas.extend(sigma)

        # calculate the gradients
        g_grads = T.grad(cost=gcost,
                         wrt=g_params,
                         consider_constant=gibbs_samples,
                         disconnected_inputs='ignore')
        dg_grads = T.grad(cost=dgcost + dcost,
                          wrt=dg_params,
                          consider_constant=gibbs_samples,
                          disconnected_inputs='ignore')
        for i, m in enumerate(dg_masks):
            dg_grads[i] = dg_grads[i] * m

        # update Gibbs chain with update expressions from updates list[]
        g_updates = self.update_opt(g_params, g_grads, lr)
        dg_updates = self.update_opt(dg_params, dg_grads, lr)
        for variable, expression in g_updates:
            gibbs_updates[variable] = expression
        for variable, expression in dg_updates:
            gibbs_updates[variable] = expression

        # pseudo loglikelihood to track the quality of the hidden units
        # on input variables ONLY
        monitoring_cost = self.pseudo_loglikelihood(
            inputs=self.input, preactivation=gibbs_pre[:len(self.input)])

        return monitoring_cost, dcost, errors, gibbs_updates, [
            ginitial_cost, gfinal_cost
        ], [dinitial_cost, dfinal_cost], sigmas

예제 #13

0

파일 보기

파일: kernels.py 프로젝트: psederberg/kernel_exp_family

 def get_expr_rff_feature_map_component_third_order_tensor(x, omega, u):
     grad = get_expr_rff_feature_map_component_grad(x, omega, u)
     G3, updates = theano.scan(lambda i, grad, x: T.hessian(grad[i], x),
                               sequences=T.arange(grad.shape[0]),
                               non_sequences=[grad, x])
     return G3, updates

예제 #14

0

파일 보기

파일: kernels.py 프로젝트: psederberg/kernel_exp_family

 def get_expr_rff_feature_map_component_hessian(x, omega, u):
     expr = get_expr_rff_feature_map_component(x, omega, u)
     return T.hessian(expr, x)

예제 #15

0

파일 보기

파일: kernels.py 프로젝트: psederberg/kernel_exp_family

 def get_expr_gaussian_kernel_third_order_tensor(x, y, sigma):
     grad = get_expr_gaussian_kernel_grad(x, y, sigma)
     G3, updates = theano.scan(lambda i, grad, x: T.hessian(grad[i], x),
                               sequences=T.arange(grad.shape[0]),
                               non_sequences=[grad, x])
     return G3, updates

예제 #16

0

파일 보기

def main(data):
    # optimizer
    opt = Optimizers()

    # sampler
    theano_rng = RandomStreams(999)

    # import dataset
    n_samples = data.attrs['n_rows']
    lr = 1e-3
    batch_size = 128

    x_data = [
        data['purpose'], data['avg_speed'], data['duration'], data['trip_km'],
        data['n_coord'], data['interval'], data['dow'], data['startdistrict'],
        data['enddistrict']
    ]

    y_data = [data['mode']]

    params = OrderedDict()
    params_shp = OrderedDict()

    output = []
    input = []
    asc_params = []
    asc_params_m = []
    beta_params_f = []
    beta_params_s = []
    beta_params_sf = []
    beta_params = []
    beta_params_m = []

    for var in y_data:
        name = 'asc_' + var.name.strip('/')
        asc_shp = var['data'][:].squeeze().shape[1:]
        print('y', name, asc_shp)

        output.append(init_tensor((), name))

        mask = np.ones(asc_shp, DTYPE_FLOATX)
        mask[-1] = 0.
        asc_value = np.zeros(asc_shp, DTYPE_FLOATX) * mask

        asc_params.append(shared(asc_value, name))
        asc_params_m.append(shared(mask, name + '_mask'))

        params[name] = asc_params[-1]
        params_shp[name] = asc_shp

    for var in x_data:
        name = 'beta_' + var.name.strip('/')
        shp = var['data'].shape[1:] + asc_shp
        print('x', name, shp)

        input.append(init_tensor(var['data'].shape[1:], name))

        mask = np.ones(shp, DTYPE_FLOATX)
        mask[..., -1] = 0.
        mask = mask.flatten()
        beta_value = np.zeros(np.prod(shp), DTYPE_FLOATX) * mask
        sigma_value = np.ones(np.prod(shp), DTYPE_FLOATX) * mask

        beta_params_f.append(shared(beta_value, name))
        beta_params_sf.append(shared(sigma_value, name + '_sigma'))

        beta_params.append(T.reshape(beta_params_f[-1], shp))
        beta_params_s.append(T.reshape(beta_params_sf[-1], shp))
        beta_params_m.append(shared(mask, name + '_mask'))

        params[name] = beta_params_f[-1]
        params[name + '_sigma'] = beta_params_sf[-1]
        params_shp[name] = shp
        params_shp[name + '_sigma'] = shp

    # compute the utility function
    utility = 0.
    h_utility = 0.
    for x, b, s in zip(input, beta_params, beta_params_s):

        normal_sample = b[..., None] + T.sqr(s)[..., None] * theano_rng.normal(
            size=b.eval().shape + (1, ), avg=0., std=1., dtype=DTYPE_FLOATX)

        ax = [np.arange(x.ndim)[1:], np.arange(b.ndim)[:-1]]
        utility += T.tensordot(x, normal_sample, axes=ax)
        if x.ndim > 2:
            h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1, 2], [0, 1]])
        else:
            h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1], [0]])

    for y, asc in zip(output, asc_params):
        utility += asc[None, ..., None]
        h_utility += asc
        (d1, d2, d3) = utility.shape
        utility = utility.reshape((d1 * d3, d2))
        p_y_given_x = T.nnet.softmax(utility)

        hessian_prob = T.nnet.softmax(h_utility)  #!
        hessian_nll = T.log(hessian_prob)
        hessian_cr = hessian_nll[T.arange(y.shape[0]), y]
        hessian_cost = -T.sum(hessian_cr)

        nll = T.log(p_y_given_x).reshape((d3, d1, d2))
        nll = nll[:, T.arange(y.shape[0]), y]
        cost = -T.sum(T.mean(nll, axis=0))

    gparams = asc_params + beta_params_f + beta_params_sf
    grads = T.grad(cost, gparams)

    # mask gradient updates
    mask = asc_params_m + beta_params_m + beta_params_m
    for j, g in enumerate(grads):
        grads[j] = g * mask[j]

    # create list of updates to iterate over
    updates = opt.sgd_updates(gparams, grads, lr)

    # symbolic equation for the Hessian function
    stderrs = []
    hessian = T.hessian(cost=hessian_cost, wrt=gparams)
    stderr = [T.sqrt(f) for f in [T.diag(2. / h) for h in hessian]]
    stderrs.extend(stderr)

    tensors = input + output
    shared_x = [shared(var['data'][:], borrow=True) for var in x_data]
    shared_y = [T.cast(shared(var['label'][:]), 'int32') for var in y_data]
    shared_variables = shared_x + shared_y

    i = T.lscalar('index')
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size

    print('constructing Theano computational graph...')

    train = theano.function(
        inputs=[i],
        outputs=cost,
        updates=updates,
        givens={
            key: val[start_idx:end_idx]
            for key, val in zip(tensors, shared_variables)
        },
        name='train',
        allow_input_downcast=True,
    )

    std_err = theano.function(
        inputs=[],
        outputs=stderrs,
        givens={key: val[:]
                for key, val in zip(tensors, shared_variables)},
        name='std errors',
        allow_input_downcast=True,
    )

    # train model
    print('training the model...')
    curves = []
    n_batches = n_samples // batch_size
    epochs = 100
    epoch = 0
    t0 = time.time()
    while epoch < epochs:
        epoch += 1
        cost = []
        for i in range(n_batches):
            cost_items = train(i)
            cost.append(cost_items)

        epoch_cost = np.sum(cost)
        curves.append((epoch, epoch_cost))
        minutes, seconds = divmod(time.time() - t0, 60.)
        hours, minutes = divmod(minutes, 60.)
        print(("epoch {0:d} loglikelihood "
               "{1:.3f} time {hh:02d}:{mm:02d}:{ss:05.2f}").format(
                   epoch,
                   epoch_cost,
                   hh=int(hours),
                   mm=int(minutes),
                   ss=seconds))

        if (epoch % 5) == 0:
            print('checkpoint')
            param_values = {}
            for name, param in params.items():
                param_shp = params_shp[name]
                param_values[name] = param.eval().reshape(param_shp)
                np.savetxt('params/{}.csv'.format(name),
                           param_values[name].squeeze(),
                           fmt='%.3f',
                           delimiter=',')

            to_file = param_values, curves
            path = 'params/epoch_{0:d}.params'.format(epoch)
            with open(path, 'wb') as f:
                pickle.dump(to_file, f, protocol=pickle.HIGHEST_PROTOCOL)

    # save parameters and stderrs to .csv
    stderrs = std_err()
    params_list = [p for p in asc_params + beta_params_f + beta_params_sf]
    param_names = [p.name for p in asc_params + beta_params_f + beta_params_sf]
    for se, param, name in zip(stderrs, params_list, param_names):
        v = param.eval().squeeze()
        shp = v.shape
        path = 'params/stderrs_{}.csv'.format(name)
        np.savetxt(path, se.reshape(shp), fmt='%.3f', delimiter=',')
        path = 'params/tstat_{}.csv'.format(name)
        np.savetxt(path, v / se.reshape(shp), fmt='%.3f', delimiter=',')

예제 #17

0

파일 보기

파일: kernels.py 프로젝트: JHamp/kernel_exp_family

 def get_expr_gaussian_kernel_hessian(x, y, sigma):
     return T.hessian(get_expr_gaussian_kernel(x, y, sigma), x)

예제 #18

0

파일 보기

파일: kernels.py 프로젝트: JHamp/kernel_exp_family

 def get_expr_gaussian_kernel_third_order_tensor(x, y, sigma):
     grad = get_expr_gaussian_kernel_grad(x, y, sigma)
     G3, updates = theano.scan(lambda i, grad, x: T.hessian(grad[i], x),
                               sequences=T.arange(grad.shape[0]),
                               non_sequences=[grad, x])
     return G3, updates

예제 #19

0

파일 보기

파일: kernels.py 프로젝트: JHamp/kernel_exp_family

 def get_expr_rff_feature_map_component_hessian(x, omega, u):
     expr = get_expr_rff_feature_map_component(x, omega, u)
     return T.hessian(expr, x)

예제 #20

0

파일 보기

파일: main_script - Copy.py 프로젝트: mrG7/AutotuneDeepLearning

def test_mlp(learning_rate=0.1, L1_reg=0.01, L2_reg=0.0001, n_epochs=1000,
             batch_size=200, n_hidden=10, n_in=40, n_out=6):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    # datasets = load_data(dataset)

    train_set, test_set, valid_set = load_from_file("processed_dataset.pkl")

    # train_set, test_set, valid_set = prepareData.get_data()
    # temp1, temp2 = test_set
    # print temp1.shape

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = np.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=n_in,
        n_hidden=n_hidden,
        n_out=n_out
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size,]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant

    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatches before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = theano.shared(train_model(minibatch_index))
            test_W_flat = theano.shared(classifier.hiddenLayer.W.get_value().flatten())
            w1 = test_W_flat.reshape((40,10))
            test = theano.shared(classifier.hiddenLayer.W.get_value().flatten())

            hessianMatrix = T.hessian(
                        cost=minibatch_avg_cost,
                        wrt=test)
            f = theano.function(inputs=[], outputs=hessianMatrix)

            print f()

            pause()

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    best_weights = classifier.hiddenLayer.W.get_value()

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print("Final weights of the hidden layer:")
    print(best_weights)
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    return best_validation_loss, best_iter, best_weights

예제 #21

0

파일 보기

파일: run_mxl.py 프로젝트: LiTrans/ICLV-RBM

def run_mxl():
    """ Discrete choice model estimation by
        Mixed Logit (MxL) formulation with Theano

    Setup
    -----
    step 1: Load variables from csv file
    step 2: Define hyperparameters used in the computation
    step 3: define symbolic Theano tensors
    step 4: build model and define cost function
    step 5: define gradient calculation algorithm
    step 6: define Theano symbolic functions
    step 7: run main estimaiton loop for n iterations
    step 8: perform analytics and model statistics

    """
    # compile and import dataset from csv#
    d_x_ng, d_x_g, d_y, avail, d_ind = extractdata(csvString)
    data_x_ng = shared(np.asarray(d_x_ng, dtype=floatX), borrow=True)
    data_x_g = shared(np.asarray(d_x_g, dtype=floatX), borrow=True)
    data_y = T.cast(shared(np.asarray(d_y - 1, dtype=floatX), borrow=True),
                    'int32')
    data_av = shared(np.asarray(avail, dtype=floatX), borrow=True)

    sz_n = d_x_g.shape[0]  # number of samples
    sz_k = d_x_g.shape[1]  # number of generic variables
    sz_m = d_x_ng.shape[2]  # number of non-generic variables
    sz_i = d_x_ng.shape[1]  # number of alternatives

    sz_minibatch = sz_n  # model hyperparameters
    sz_draw = 50
    learning_rate = 0.3
    momentum = 0.9

    srng = RandomStreams(1234)  # random draws
    rng = srng.normal((sz_n, sz_draw, sz_m))

    x_ng = T.tensor3('data_x_ng')  # symbolic theano tensors
    x_g = T.matrix('data_x_g')
    y = T.ivector('data_y')
    av = T.matrix('data_av')

    index = T.lscalar('index')

    draws = T.tensor3('normal_draws')

    # construct model
    model = MixedLogit(sz_i,
                       av,
                       input=[x_ng, x_g],
                       n_in=[(sz_m), (sz_k, sz_i)],
                       draws=draws)

    cost = -model.loglikelihood(y)

    # calculate the gradients wrt to the loss function
    grads = T.grad(cost=cost, wrt=model.params)
    opt = optimizers.adadelta(model.params, model.masks, momentum)

    updates = optimizer.updates(model.params, grads, learning_rate)

    # returns the distribution of the draws at iteration
    fn_checkdraw = function(inputs=[],
                            outputs=model.draws,
                            givens={draws: rng})

    # hessian function
    fn_hessian = function(inputs=[],
                          outputs=T.hessian(cost=cost, wrt=model.params),
                          givens={
                              x_ng: data_x_ng,
                              x_g: data_x_g,
                              y: data_y,
                              av: data_av,
                              draws: rng
                          },
                          on_unused_input='ignore')

    # null loglikelihood function
    fn_null = function(inputs=[],
                       outputs=model.loglikelihood(y),
                       givens={
                           x_ng: data_x_ng,
                           x_g: data_x_g,
                           y: data_y,
                           av: data_av,
                           draws: rng
                       },
                       on_unused_input='ignore')

    # compile the theano functions
    fn_estimate = function(
        name='estimate',
        inputs=[index],
        outputs=[model.loglikelihood(y),
                 model.errors(y)],
        updates=updates,
        givens={
            x_ng:
            data_x_ng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                  sz_n))],
            x_g:
            data_x_g[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                 sz_n))],
            y:
            data_y[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                               sz_n))],
            av:
            data_av[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                sz_n))],
            draws:
            rng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch, sz_n))]
        },
        allow_input_downcast=True,
        on_unused_input='ignore',
    )
    """ Main estimation process loop """
    print('Begin estimation...')

    epoch = 0  # process loop parameters
    sz_epoches = 9999
    sz_batches = np.ceil(sz_n / sz_minibatch).astype(np.int32)
    done_looping = False
    patience = 300
    patience_inc = 10
    best_loglikelihood = -np.inf
    null_Loglikelihood = fn_null()
    start_time = timeit.default_timer()

    while epoch < sz_epoches and done_looping is False:
        epoch_error = []
        epoch_loglikelihood = []
        for i in range(sz_batches):
            (batch_loglikelihood, batch_error) = fn_estimate(i)
            epoch_error.append(batch_error)
            epoch_loglikelihood.append(batch_loglikelihood)

        this_loglikelihood = np.sum(epoch_loglikelihood)
        print('@ iteration %d loglikelihood: %.3f' %
              (epoch, this_loglikelihood))

        if this_loglikelihood > best_loglikelihood:
            if this_loglikelihood > 0.997 * best_loglikelihood:
                patience += patience_inc
            best_loglikelihood = this_loglikelihood
            with open('best_model.pkl', 'wb') as f:
                pickle.dump(model, f)

        if epoch > patience:
            done_looping = True

        epoch += 1
    """ Analytics and model statistics """
    print('... solving Hessians')
    h = np.hstack([np.diagonal(mat) for mat in fn_hessian()])
    n_est_params = np.count_nonzero(h)
    aic = 2 * n_est_params - 2 * final_Loglikelihood
    bic = np.log(sz_n) * n_est_params - 2 * final_Loglikelihood

    print('@iteration %d, run time %.3f ' % (epoch, end_time - start_time))
    print('Null Loglikelihood: %.3f' % null_Loglikelihood)
    print('Final Loglikelihood: %.3f' % final_Loglikelihood)
    print('rho square %.3f' % rho_square)
    print('AIC %.3f' % aic)
    print('BIC %.3f' % bic)

    with open('best_model.pkl', 'rb') as f:
        best_model = pickle.load(f)

    run_analytics(best_model, h)