Ejemplo n.º 1
0
def shift_Xy_to_matrices(shift_X, shift_y=None, weights=False):

    flatten = lambda ls: [l_i for l in ls for l_i in l]

    source_X_l = []
    target_X_l = []
    source_y_l = []
    try:
        for (source_X_elt, target_X_elt, source_y_elt) in zip(*zip(*shift_X)[0:3]):
            if len(target_X_elt) > 0:
                target_X_l.append(target_X_elt)
            source_X_l.append(source_X_elt)
            source_y_l.append(source_y_elt)
    except:
        pdb.set_trace()

    if shift_y is None:
        if not weights:
            return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l)
        else:
            assert len(iter(shift_X).next()) == 4
            return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.array([shift_X_elt[-1] for shift_X_elt in shift_X])
    else:
        target_y_l = []
        for (source_X_elt, target_X_elt, source_y_elt), target_y_elt in itertools.izip(zip(*zip(*shift_X)[0:3]), shift_y):
            if len(target_X_elt) > 0:
                target_y_l.append(target_y_elt)
        if not weights:
            return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l)))
        else:
            return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l))), np.array([shift_X_elt[-1] for shift_X_elt in shift_X])
Ejemplo n.º 2
0
def PhotometricError(iref, inew, R, T, points, D):
    # points is a tuple ([y], [x]); convert to homogeneous
    siz = iref.shape
    npoints = len(points[0])
    f = siz[1]  # focal length, FIXME
    Xref = np.vstack(((points[1] - siz[1]*0.5) / f,  # x
                      (siz[0]*0.5 - points[0]) / f,  # y (left->right hand)
                      np.ones(npoints)))             # z = 1
    # this is confusingly written -- i am broadcasting the translation T to
    # every column, but numpy broadcasting only works if it's rows, hence all
    # the transposes
    # print D * Xref
    Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T
    # print Xnew
    # right -> left hand projection
    proj = Xnew[0:2] / Xnew[2]
    p = (-proj[1]*f + siz[0]*0.5, proj[0]*f + siz[1]*0.5)
    margin = 10  # int(siz[0] / 5)
    inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0]-margin-1) &
                     (p[1] >= margin) & (p[1] < siz[1]-margin-1))
    npts_inw = sum(inwindow_mask)
    if npts_inw < 10:
        return 1e6, np.zeros(6 + npoints)
    # todo: filter points which are now out of the window
    oldpointidxs = (points[0][inwindow_mask],
                    points[1][inwindow_mask])
    newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask])
    origpointidxs = np.nonzero(inwindow_mask)[0]
    E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs]
    # dE/dk ->
    # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2
    # = -2r_p dInew/dp dp/dw dw/dX dX/dk
    # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p)
    # intensity gradients for each point
    Ig = InterpolatedGradients(inew, newpointidxs)
    # TODO: use tensors for this
    # gradients for R, T, and D
    gradient = np.zeros(6 + npoints)
    for i in range(npts_inw):
        # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i]
        # Jacobian of w
        oi = origpointidxs[i]
        Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T)
        # scale back up into pixel space, right->left hand coords to get
        # Jacobian of p
        Jp = f * np.vstack((-Jw[1], Jw[0]))
        # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \
        #     'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2]
        # full Jacobian = 2*E + Ig * Jp
        J = np.sign(E[i]) * np.dot(Ig[:, i], Jp)
        # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp)
        gradient[:6] += J[:6]
        # print J[:6]
        gradient[6+origpointidxs[i]] += J[6]

    print R, T, np.sum(np.abs(E)), npts_inw
    # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient)
    return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
Ejemplo n.º 3
0
def get_dL_dp_thru_xopt(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, dL_dxopt, A, b, xopt, p, L_args=None, f_args=None):
    # assumes L(x_opt), x_opt = argmin_x f(x,p) subject to Ax<=b
    # L_args is for arguments to L besides x_opt

    # first, get dL/dws to calculate the gradient at ws1
    if not L_args is None:
        pass
        #print 'L_args len:', len(L_args)
    else:
        print 'NONE'
    if L_args is None:
        dL_dxopt_anal_val1 = dL_dxopt(xopt) #
    else:
#        pdb.set_trace()
        dL_dxopt_anal_val1 = dL_dxopt(xopt, L_args)
    
    # get tight constraints
    A_tight, b_tight = get_tight_constraints(A, b, xopt)
    num_tight = A_tight.shape[0]

    # make C matrix
#    pdb.set_trace()
    if f_args is None:
        C_corner = d_dx_df_dx(xopt, p)
    else:
        C_corner = d_dx_df_dx(xopt, p, f_args)
    C = np.vstack((np.hstack((C_corner,-A_tight.T)), np.hstack((A_tight,np.zeros((num_tight,num_tight))))))
#    print 'C', C
#    print 'C rank', np.linalg.matrix_rank(C), C.shape
#    print 'C corner rank', np.linalg.matrix_rank(C_corner), C_corner.shape
    
    # make d vector
    d = np.hstack((dL_dxopt_anal_val1, np.zeros(num_tight)))

    # solve Cv=d for x
    v = lin_solver(C, d)
#    print 'v', v
    #print C
    #print d
    print 'solver error:', np.linalg.norm(np.dot(C,v) - d)

    # make D
    if f_args is None:
        d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt)
    else:
        d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt, f_args)
    D = np.vstack((-d_dp_df_dx_anal_val1, np.zeros((num_tight,)+p.shape)))
#    print 'D', D[0:10]

    return np.sum(D.T * v[tuple([np.newaxis for i in xrange(len(p.shape))])+(slice(None),)], axis=-1).T
Ejemplo n.º 4
0
def get_dxopt_delta_p(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p, delta_p_direction):
    
    # f(x, p) should be convex
    x_len = A.shape[1]

    # get tight constraints
    A_tight, b_tight = get_tight_constraints(A, b, xopt)
    num_tight = A_tight.shape[0]

    # get d
    p_dim = len(delta_p_direction.shape)
    delta_p_direction_broadcasted = np.tile(delta_p_direction, tuple([x_len] + [1 for i in xrange(p_dim)]))
    d_top = -np.sum(d_dp_df_dx(p, xopt) * delta_p_direction_broadcasted, axis=tuple(range(1,1+p_dim)))
    d_bottom = np.zeros(num_tight)
    d = np.hstack((d_top,d_bottom))

    # get C
    C = np.vstack((np.hstack((d_dx_df_dx(xopt, p), -A_tight.T)), np.hstack((A_tight, np.zeros((num_tight, num_tight))))))

    # get deriv
    deriv = lin_solver(C, d)
    
#    print 'solver error:', np.linalg.norm(np.dot(C,deriv) - d)

    return deriv
Ejemplo n.º 5
0
    def evaluate_trajectory_cost(self, x_array, u_array):
        #Note x_array contains X_T, so a dummy u is required to make the arrays 
        #be of consistent length
        u_array_sup = np.vstack([u_array, np.zeros(len(u_array[0]))])

        J_array = [self.cost(x, u, t, self.aux) for t, (x, u) in enumerate(zip(x_array, u_array_sup))]

        return np.sum(J_array)
Ejemplo n.º 6
0
Archivo: gmm.py Proyecto: mattjj/svae
 def plot_ellipse(ax, alpha, mean, cov, line=None):
     t = np.linspace(0, 2*np.pi, 100) % (2*np.pi)
     circle = np.vstack((np.sin(t), np.cos(t)))
     ellipse = 2.*np.dot(np.linalg.cholesky(cov), circle) + mean[:,None]
     if line:
         line.set_data(ellipse)
         line.set_alpha(alpha)
     else:
         ax.plot(ellipse[0], ellipse[1], alpha=alpha, linestyle='-', linewidth=2)
Ejemplo n.º 7
0
def get_KMM_ineq_constraints(num_train, B_max, eps):
    G_gt_0 = -np.eye(num_train)
    h_gt_0 = np.zeros(num_train)
    G_lt_B_max = np.eye(num_train)
    h_lt_B_max = np.ones(num_train) * B_max
    G_B_sum_lt = np.ones(num_train, dtype=float)
    h_B_sum_lt = (1+eps) * float(num_train) * np.ones(1)
    G_B_sum_gt = -np.ones(num_train, dtype=float)
    h_B_sum_gt = -(1-eps) * float(num_train) * np.ones(1)
    G = np.vstack((G_gt_0,G_lt_B_max,G_B_sum_lt,G_B_sum_gt))
    (h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt)
    h = np.hstack((h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt))    
    return G,h
Ejemplo n.º 8
0
def test_jacobian_against_stacked_grads():
    scalar_funs = [
        lambda x: np.sum(x ** 3),
        lambda x: np.prod(np.sin(x) + np.sin(x)),
        lambda x: grad(lambda y: np.exp(y) * np.tanh(x[0]))(x[1]),
    ]

    vector_fun = lambda x: np.array([f(x) for f in scalar_funs])

    x = npr.randn(5)
    jac = jacobian(vector_fun)(x)
    grads = [grad(f)(x) for f in scalar_funs]

    assert np.allclose(jac, np.vstack(grads))
Ejemplo n.º 9
0
def plot_trace(ps, ttl):
    x = np.linspace(-5, 5, 100)
    y = np.linspace(-5, 5, 100)
    X, Y = np.meshgrid(x, y)
    Z = rosen(np.vstack([X.ravel(), Y.ravel()])).reshape((100,100))
    ps = np.array(ps)
    plt.figure(figsize=(12,4))
    plt.subplot(121)
    plt.contour(X, Y, Z, np.arange(10)**5)
    plt.plot(ps[:, 0], ps[:, 1], '-o')
    plt.plot(1, 1, 'r*', markersize=12) # global minimum
    plt.subplot(122)
    plt.semilogy(range(len(ps)), rosen(ps.T))
    plt.title(ttl)
Ejemplo n.º 10
0
    def kstep_mse(self, obs, act, horizon=1, stoch=True, infer='viterbi'):
        from sklearn.metrics import mean_squared_error, explained_variance_score

        mse, norm_mse = [], []
        for _obs, _act in zip(obs, act):
            _hist_obs, _hist_act, _nxt_act = [], [], []
            _target, _prediction = [], []

            _nb_steps = _obs.shape[0] - horizon
            for t in range(_nb_steps):
                _hist_obs.append(_obs[:t + 1, :])
                _hist_act.append(_act[:t + 1, :])
                _nxt_act.append(_act[t:t + horizon, :])

            _hr = [horizon for _ in range(_nb_steps)]
            _, _obs_hat = self.forcast(hist_obs=_hist_obs,
                                       hist_act=_hist_act,
                                       nxt_act=_nxt_act,
                                       horizon=_hr,
                                       stoch=stoch,
                                       infer=infer)

            for t in range(_nb_steps):
                _target.append(_obs[t + horizon, :])
                _prediction.append(_obs_hat[t][-1, :])

            _target = np.vstack(_target)
            _prediction = np.vstack(_prediction)

            _mse = mean_squared_error(_target, _prediction)
            _norm_mse = explained_variance_score(
                _target, _prediction, multioutput='variance_weighted')

            mse.append(_mse)
            norm_mse.append(_norm_mse)

        return np.mean(mse), np.mean(norm_mse)
Ejemplo n.º 11
0
 def m_step(self,
            discrete_expectations,
            continuous_expectations,
            datas,
            inputs,
            masks,
            tags,
            optimizer="bfgs",
            maxiter=100,
            **kwargs):
     if self.single_subspace:
         # Return exact m-step updates for C, F, d, and inv_etas
         # stack across all datas
         x = np.vstack(continuous_expectations)
         u = np.vstack(inputs)
         y = np.vstack(datas)
         T, D = np.shape(x)
         xb = np.hstack((np.ones((T, 1)), x, u))  # design matrix
         params = np.linalg.lstsq(xb.T @ xb, xb.T @ y, rcond=None)[0].T
         self.ds = params[:, 0].reshape((1, self.N))
         self.Cs = params[:, 1:D + 1].reshape((1, self.N, self.D))
         if self.M > 0:
             self.Fs = params[:, D + 1:].reshape((1, self.N, self.M))
         mu = np.dot(xb, params.T)
         Sigma = (y - mu).T @ (y - mu) / T
         self.inv_etas = np.log(np.diag(Sigma)).reshape((1, self.N))
     else:
         Emissions.m_step(self,
                          discrete_expectations,
                          continuous_expectations,
                          datas,
                          inputs,
                          masks,
                          tags,
                          optimizer=optimizer,
                          maxiter=maxiter,
                          **kwargs)
Ejemplo n.º 12
0
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000):
    """Version of MDS in which "signs" are also an optimization parameter.

    Rather than performing a full optimization and then resetting the
    sign matrix, here we treat the signs as a parameter `A = [a_ij]` and
    minimize the cost function
        F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2
    Lambda is a regularization parameter we can experiment with. The
    collection of data, `X`, is treated as a point on the `Oblique`
    manifold, consisting of `k*n` matrices with unit-norm columns. Since
    we are working on a sphere in complex space we require `k` to be
    even. The first `k/2` entries of each column are the real components
    and the last `k/2` entries are the imaginary parts.

    Parameters
    ----------
    X : ndarray (k, n)
        Initial guess for data.
    D : ndarray (k, k)
        Goal distance matrix.
    lam : float, optional
        Weight to give regularization term.
    v : int, optional
        Verbosity

    Returns
    -------
    X_opt : ndarray (k, n)
        Collection of points optimizing cost.

    """

    dim = X.shape[0]
    num_points = X.shape[1]
    W = distance_to_weights(D)
    Sreal, Simag = norm_rotations(X)
    A = np.vstack(
        (np.reshape(Sreal,
                    (1, num_points**2)), np.reshape(Simag, num_points**2)))
    cp_manifold = Oblique(dim, num_points)
    a_manifold = Oblique(2, num_points**2)
    manifold = Product((cp_manifold, a_manifold))
    solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf'))
    cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam)
    problem = pymanopt.Problem(cost=cost, manifold=manifold)
    Xopt, Aopt = solver.solve(problem, x=(X, A))
    Areal = np.reshape(Aopt[0, :], (num_points, num_points))
    Aimag = np.reshape(Aopt[1, :], (num_points, num_points))
    return Xopt, Areal, Aimag
Ejemplo n.º 13
0
 def p_log_prob(self, idx, z):
     x = self.data[idx]        
     mu, tau, pi = z['mu'], softplus(z['tau']), stick_breaking(z['pi'])
     matrix = []  
     log_prior = 0.
     log_prior += np.sum(gamma_logpdf(tau, 1e-5, 1e-5) + np.log(jacobian_softplus(z['tau'])))        
     log_prior += np.sum(norm.logpdf(mu, 0, 1.))
     log_prior += dirichlet.logpdf(pi, 1e3 * np.ones(self.clusters)) + np.log(jacobian_stick_breaking(z['pi']))
     for k in range(self.clusters):
         matrix.append(np.log(pi[k]) + np.sum(norm.logpdf(x, mu[(k * self.D):((k + 1) * self.D)],
                             np.full([self.D], 1./np.sqrt(tau[k]))), 1))
     matrix  = np.vstack(matrix)
     vector = logsumexp(matrix, axis=0)
     log_lik = np.sum(vector)        
     return self.scale * log_lik + log_prior        
Ejemplo n.º 14
0
def get_x0():

    min_dist = -np.inf
    count = 0
    while min_dist < param.get('min_dist'):

        for i in range(param.get('ni')):
            p_i = param.get('plim')*np.random.rand(param.get('nd'),1) \
             - param.get('plim')/2.
            v_i = param.get('vlim')*np.random.rand(param.get('nd'),1) \
             - param.get('vlim')/2.
            try:
                x0 = np.vstack((x0, p_i, v_i))
            except:
                x0 = np.vstack((p_i, v_i))

        count += 1
        min_dist = get_min_dist(x0)

        if count > 100:
            print('Error: Incompatible Initial Conditions')
            return

    param['x0'] = x0
Ejemplo n.º 15
0
 def _make_A(self, eps_vec):
     C = - 1 / MU_0 * self.Dxf.dot(self.Dxb) \
         - 1 / MU_0 * self.Dyf.dot(self.Dyb)
     #print('C的size',C.shape)
     print('CC',C)
     entries_c, indices_c = get_entries_indices(C)
     # indices into the diagonal of a sparse matrix
     entries_diag = - EPSILON_0 * self.omega**2 * eps_vec
     #print('eps_vec的size',eps_vec.shape)
     indices_diag = npa.vstack((npa.arange(self.N), npa.arange(self.N)))
     print('EE',entries_diag)
     entries_a = npa.hstack((entries_diag, entries_c))
     indices_a = npa.hstack((indices_diag, indices_c))
     print('AA',entries_a)
     return entries_a, indices_a
Ejemplo n.º 16
0
def propagate(m, s, plant, dynmodel, policy):
    angi = plant.angi
    poli = plant.poli
    dyni = plant.dyni
    difi = plant.difi

    D0 = len(m)
    D1 = D0 + 2 * len(angi)
    D2 = D1 + len(policy.max_u)
    M = np.array(m)
    S = s

    i, j = np.arange(D0), np.arange(D0, D1)
    m, s, c = gaussian_trig(M[i], S[np.ix_(i, i)], angi)
    q = np.matmul(S[np.ix_(i, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    i, j = poli, np.arange(D1)
    m, s, c = policy.fcn(M[i], S[np.ix_(i, i)])
    q = np.matmul(S[np.ix_(j, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    i, j = np.hstack([dyni, np.arange(D1, D2)]), np.arange(D2)
    m, s, c = dynmodel.fcn(M[i], S[np.ix_(i, i)])
    q = np.matmul(S[np.ix_(j, i)], c)
    M = np.hstack([M, m])
    S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])])

    P = np.hstack([np.zeros((D0, D2)), np.eye(D0)])
    P = fill_mat(np.eye(len(difi)), P, difi, difi)
    M_next = np.matmul(P, M[:, newaxis]).flatten()
    S_next = P @ S @ P.T
    S_next = (S_next + S_next.T) / 2
    return M_next, S_next
def pendulum_dynamics(xu):
    dt = 0.05
    m = 1.0
    l = 1.0
    d = 1e-2  # damping
    g = 9.80665
    u_mx = 2.0
    x, u = xu[:, :2], xu[:, 2:]
    u = np.clip(u, -u_mx, u_mx)
    th_dot_dot = -3.0 * g / (2 * l) * np.sin(x[:, 0] + np.pi) - d * x[:, 1]
    th_dot_dot += 3.0 / (m * l ** 2) * u.squeeze()
    x_dot = x[:, 1] + th_dot_dot * dt
    x_pos = x[:, 0] + x_dot * dt
    x2 = np.vstack((x_pos, x_dot)).T
    return x2
    def run_hmc(self, check_point, position_init, momentum_init):
        ### Initialize position and momentum
        position_current = position_init
        momentum_current = momentum_init

        ### Perform multiple HMC steps
        for i in range(self.params['total_samples']):
            self.iterations += 1
            ### output accept rate at check point iterations
            if i % check_point == 0 and i > 0:
                accept_rate = self.accepts * 100. / i
                print('HMC {}: accept rate of {}'.format(i, accept_rate))

            position_current, momentum_current = self.hmc(
                position_current, momentum_current)

            # add sample to trace
            if i % self.params['thinning_factor'] == 0:
                self.trace = np.vstack((self.trace, position_current))
                self.potential_energy_trace = np.vstack(
                    (self.potential_energy_trace,
                     self.potential_energy(position_current)))

        self.trace = self.trace[1:]
Ejemplo n.º 19
0
    def _make_A(self, eps_vec, delta_matrix, phi_matrix):
        """ Builds the multi-frequency electromagnetic operator A in Ax = b """
        M = 2*self.Nsb + 1
        N = self.Nx * self.Ny
        W = self.omega + npa.arange(-self.Nsb,self.Nsb+1)*self.omega_mod

        C = sp.kron(sp.eye(M), - 1 / MU_0 * self.Dxf.dot(self.Dxb) - 1 / MU_0 * self.Dyf.dot(self.Dyb))
        entries_c, indices_c = get_entries_indices(C)

        # diagonal entries representing static refractive index
        # this part is just a block diagonal version of the single frequency fdfd_ez
        entries_diag = - EPSILON_0 * npa.kron(W**2, eps_vec)
        indices_diag = npa.vstack((npa.arange(M*N), npa.arange(M*N)))

        entries_a = npa.hstack((entries_diag, entries_c))
        indices_a = npa.hstack((indices_diag, indices_c))

        # off-diagonal entries representing dynamic modulation
        # this part couples different frequencies due to modulation
        # for a derivation of these entries, see Y. Shi, W. Shin, and S. Fan. Optica 3(11), 2016.
        Nfreq = npa.shape(delta_matrix)[0]
        for k in npa.arange(Nfreq):
            # super-diagonal entries (note the +1j phase)
            mod_p = - 0.5 * EPSILON_0 * delta_matrix[k,:] * npa.exp(1j*phi_matrix[k,:])
            entries_p = npa.kron(W[:-k-1]**2, mod_p)
            indices_p = npa.vstack((npa.arange((M-k-1)*N), npa.arange((k+1)*N, M*N)))
            entries_a = npa.hstack((entries_p, entries_a))
            indices_a = npa.hstack((indices_p,indices_a))
            # sub-diagonal entries (note the -1j phase)
            mod_m = - 0.5 * EPSILON_0 * delta_matrix[k,:] * npa.exp(-1j*phi_matrix[k,:]) 
            entries_m = npa.kron(W[k+1:]**2, mod_m)
            indices_m = npa.vstack((npa.arange((k+1)*N, M*N), npa.arange((M-k-1)*N)))
            entries_a = npa.hstack((entries_m, entries_a))
            indices_a = npa.hstack((indices_m,indices_a))

        return entries_a, indices_a
Ejemplo n.º 20
0
    def predict(self, X=None, y=None):
        '''
        Function to make predictions
        '''
        X = np.vstack([np.ones((1,X.shape[1])), X])
        AL = self.softmax((np.dot(self.trained_theta, X)))
        
        y_hat = AL.argmax(axis=0)
        y = np.argmax(y, axis=0)

        acc = (y_hat == y).mean()
        print("Accuracy:", acc)
        # print(y_hat.shape, y.shape)
        # print(y_hat)
        return y_hat, y
Ejemplo n.º 21
0
def train_test_split(T1, X1, T2, X2, train_rate=0.8):
    """
    :param train_rate: fraction of data used for training
    :param parameters: specification for the data generation of two scenarios
    :return:training and testing data for C2ST, note each is a combination of data from two samples
    """

    # %% Data Preprocessing
    # interpolate
    T1, X1 = interpolate(T1, X1)
    T2, X2 = interpolate(T2, X2)
    dataX1 = np.zeros((X1.shape[0], X1.shape[1], 2))
    dataX2 = np.zeros((X2.shape[0], X2.shape[1], 2))

    # Dataset build
    for i in range(len(X1)):
        dataX1[i, :, :] = np.hstack((X1[i, np.newaxis].T, T1[i, np.newaxis].T))
        dataX2[i, :, :] = np.hstack((X2[i, np.newaxis].T, T2[i, np.newaxis].T))

    dataY1 = np.random.choice([0], size=(len(dataX1), ))
    dataY2 = np.random.choice([1], size=(len(dataX2), ))
    dataY1 = dataY1[:, np.newaxis]
    dataY2 = dataY2[:, np.newaxis]

    dataX = Permute(np.vstack((dataX1, dataX2)))
    dataY = Permute(np.vstack((dataY1, dataY2)))

    # %% Train / Test Division
    train_size = int(len(dataX) * train_rate)

    trainX, testX = np.array(dataX[0:train_size]), np.array(
        dataX[train_size:len(dataX)])
    trainY, testY = np.array(dataY[0:train_size]), np.array(
        dataY[train_size:len(dataX)])

    return trainX, trainY, testX, testY
 def log_prior(self, x, w):
     ''' 
         Returns log(p(Y|x,w))
     '''
     n = x.shape[0]
     if self.prior_model == "logistic_regression":
         negative_energy = np.dot(x, w)
         return np.vstack(
             (-np.log1p(np.exp(negative_energy)) * np.ones(x.shape[0]),
              negative_energy - np.log1p(np.exp(negative_energy)))).T
     elif self.prior_model == "mlp":
         cur_idx = self.n_features * self.hidden_layer_sizes[0]
         wi = w[:cur_idx].reshape(
             (self.n_features, self.hidden_layer_sizes[0]))
         bi = w[cur_idx:cur_idx + self.hidden_layer_sizes[0]]
         ho = np.dot(x, wi) + bi
         hi = np.tanh(ho)
         cur_idx += self.hidden_layer_sizes[0]
         for i in range(len(self.hidden_layer_sizes) - 1):
             wi = w[cur_idx:cur_idx + self.hidden_layer_sizes[i] *
                    self.hidden_layer_sizes[i + 1]].reshape(
                        (self.hidden_layer_sizes[i],
                         self.hidden_layer_sizes[i + 1]))
             cur_idx += self.hidden_layer_sizes[
                 i] * self.hidden_layer_sizes[i + 1]
             bi = w[cur_idx:cur_idx + self.hidden_layer_sizes[i + 1]]
             cur_idx += self.hidden_layer_sizes[i + 1]
             ho = np.dot(hi, wi) + bi
             hi = np.tanh(ho)
             # cur_idx = cur_idx+self.n_hidden_units**2
         negative_energy = np.dot(hi, w[cur_idx:-1]) + w[-1]
         negative_energy = np.vstack((np.zeros(n), negative_energy)).T
         return negative_energy - logsumexp(negative_energy,
                                            axis=1).reshape((n, 1))
     else:
         raise ValueError("Invalid prior model: %s" % self.prior_model)
Ejemplo n.º 23
0
 def obj_noise_space(sqrt_gwidth, z):
     zp = z[:J]
     zq = z[J:]
     torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1))
     torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1))
     # need preprocessing probably
     global model_input_size
     s = model_input_size
     upsample = nn.Upsample(size=(s, s), mode='bilinear')
     fp = model(upsample(gen_p(torch_zp))).cpu().data.numpy()
     fp = fp.reshape((J, -1))
     fq = model(upsample(gen_q(torch_zq))).cpu().data.numpy()
     fq = fq.reshape((J, -1))
     F = np.vstack([fp, fq])
     return obj_feat_space(sqrt_gwidth, F)
Ejemplo n.º 24
0
    def initialize(self, x, u, **kwargs):
        localize = kwargs.get('localize', False)

        Ts = [_x.shape[0] for _x in x]
        if localize:
            from sklearn.cluster import KMeans
            km = KMeans(self.nb_states, random_state=1)
            km.fit((np.vstack(x)))
            zs = np.split(km.labels_, np.cumsum(Ts)[:-1])
            zs = [z[:-1] for z in zs]
        else:
            zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts]

        _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs))
        for k in range(self.nb_states):
            ## Select the transformation
            si = int(self.rot_lds[k, 0])
            sj = int(self.rot_lds[k, 1])
            T = self.T[sj, ...]

            ts = [np.where(z == k)[0] for z in zs]

            xs = []
            ys = []
            for i in range(len(ts)):
                _x = x[i][ts[i], :]
                _x = np.dot(T, _x.T).T
                _y = x[i][ts[i] + 1, :]
                _y = np.dot(T, _y.T).T

                xs.append(_x)
                ys.append(_y)

            ## THIS SHOULD NOT BE LIKE THIS , DUE TO IF SEVERAL TRANSFORMATIONS NOT WORK
            coef_, intercept_, sigma = linear_regression(xs, ys)
            self.A[si, ...] = coef_[:, :self.dm_obs]
            #self.B[k, ...] = coef_[:, self.dm_obs:]
            self.c[si, :] = intercept_
            _cov[si, ...] = sigma

            self.cov = _cov

        self.covt = np.zeros([self.nb_states, self.dm_obs, self.dm_obs])
        for k in range(self.nb_states):
            i = int(self.rot_lds[k, 0])
            j = int(self.rot_lds[k, 1])
            T_inv = self.T_inv[j, ...]
            self.covt[k, ...] = np.dot(T_inv, self.cov[i, ...])
Ejemplo n.º 25
0
 def _ntied_transmat_prior(self, transmat_val):  # TODO: document choices
     transmat = np.empty((0, self.n_components))
     for r in range(self.n_unique):
         row = np.empty((self.n_chain, 0))
         for c in range(self.n_unique):
             if r == c:
                 subm = np.array(sp.diags([transmat_val[r, c],
                                 1.0], [0, 1],
                     shape=(self.n_chain, self.n_chain)).todense())
             else:
                 lower_left = np.zeros((self.n_chain, self.n_chain))
                 lower_left[self.n_tied, 0] = 1.0
                 subm = np.kron(transmat_val[r, c], lower_left)
             row = np.hstack((row, subm))
         transmat = np.vstack((transmat, row))
     return transmat
Ejemplo n.º 26
0
    def forward_pass(self, wb, inputs, nodes_rows, nodes_cols, graph_idxs):
        """
        Parameters:
        ===========
        - inputs: (np.array) the output from the previous layer, of shape
                  (n_all_nodes, n_features)
        - graphs: (list of nx.Graphs)
        """

        fingerprints = []
        for g, idxs in sorted(graph_idxs.items()):
            fp = np.sum(inputs[idxs], axis=0)
            fingerprints.append(fp)

        assert len(fingerprints) == len(graph_idxs)
        return np.vstack(fingerprints)
Ejemplo n.º 27
0
    def inference(self, x, return_std=False):
        self.likelihood(self.params)
        c_c = self.rho * self.RBF(self.theta_c, self.Xc, x)
        c_e = self.rho**2 * self.RBF(self.theta_c, self.Xe, x) + self.RBF(
            self.theta_e, self.Xe, x)
        c = np.vstack((c_c, c_e))

        mean = np.matmul(c.T, self.alpha)
        v = np.linalg.solve(self.L.T, np.linalg.solve(self.L, c))
        var = self.rho**2 * self.RBF(self.theta_c, x) + self.RBF(
            self.theta_e, x) - np.matmul(c.T, v)
        std = np.sqrt(np.diag(var))
        if return_std is False:
            return mean, var
        else:
            return mean, std
Ejemplo n.º 28
0
def chebyshev_centre(A, b, gamma):
    rows, cols = A.shape
    c = np.zeros(cols + 1)
    c[-1] = -1
    A_ = np.hstack([A, np.sqrt(np.sum(np.power(A, 2), axis=1)).reshape(-1, 1)])
    A_ = np.vstack([A_, -c.reshape(1, -1)])
    b_ = np.append(b, 100).reshape(-1, 1)

    # l2 norm minimisation of w
    P = gamma * np.eye(cols + 1)
    P[:, -1] = P[-1, :] = 0

    res = solve_qp(P=P, q=c, G=A_, h=b_)
    x_c = np.array(res[:-1])
    R = np.float(res[-1])
    return x_c, R
Ejemplo n.º 29
0
 def _ntied_transmat_prior(self, transmat_val):  # TODO: document choices
     transmat = np.empty((0, self.n_components))
     for r in range(self.n_unique):
         row = np.empty((self.n_chain, 0))
         for c in range(self.n_unique):
             if r == c:
                 subm = np.array(sp.diags([transmat_val[r, c],
                                 1.0], [0, 1],
                     shape=(self.n_chain, self.n_chain)).todense())
             else:
                 lower_left = np.zeros((self.n_chain, self.n_chain))
                 lower_left[self.n_tied, 0] = 1.0
                 subm = np.kron(transmat_val[r, c], lower_left)
             row = np.hstack((row, subm))
         transmat = np.vstack((transmat, row))
     return transmat
    def get_untilted_draws(self, num_draws):
        v = self.mix_par.values
        z = sp.stats.multinomial.rvs(n=num_draws, p=v['w'][0, :], size=1)[0]
        covs = [ np.linalg.inv(v['info'][k, :, :]) \
                 for k in range(self.num_components)]
        means = [ v['loc'][k, :] for k in range(self.num_components) ]

        draws = [ np.array(sp.stats.multivariate_normal.rvs(
                      means[k], cov=covs[k], size=z[k])) \
                  for k in range(self.num_components)]

        # Oh, numpy.  :(
        if self.dim == 1:
            draws = [ np.expand_dims(d, 1) for d in draws]

        return np.vstack(draws)
Ejemplo n.º 31
0
    def mixture_log_density(var_mixture_params, x):
        """Returns a weighted average over component densities."""
        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_log_densities = np.vstack(
            [component_log_density(params_k, x) for params_k in var_params]).T

        # print ((component_log_densities).shape)
        # print ((component_log_densities + log_weights).shape)

        # fasdfa
        # print (logsumexp(component_log_densities + log_weights, axis=1, keepdims=False).shape)
        # fsafda

        return logsumexp(component_log_densities + log_weights,
                         axis=1,
                         keepdims=False)  #over clusters
Ejemplo n.º 32
0
    def forward_pass(self, wb, inputs, nodes_rows, nodes_cols, graph_idxs):
        """
        Parameters:
        ===========
        - inputs: (np.array) the output from the previous layer, of shape
                  (n_all_nodes, n_features)
        - graphs: (list of nx.Graphs)
        """

        fingerprints = []
        for g, idxs in sorted(graph_idxs.items()):
            fp = np.sum(inputs[idxs], axis=0)
            fingerprints.append(fp)

        assert len(fingerprints) == len(graph_idxs)
        return np.vstack(fingerprints)
Ejemplo n.º 33
0
    def draw_legacy(self,
                    show=True,
                    fig_to_plot_on=None,
                    ax_to_plot_on=None
                    ):
        # Draws the airplane using matplotlib.
        # This method is deprecated (superseded by draw() ) and will be removed in a future release.

        # Setup
        if fig_to_plot_on == None or ax_to_plot_on == None:
            fig, ax = fig3d()
            fig.set_size_inches(12, 9)
        else:
            fig = fig_to_plot_on
            ax = ax_to_plot_on

        # TODO plot bodies

        # Plot wings
        for wing in self.wings:

            for i in range(len(wing.sections) - 1):
                le_start = wing.sections[i].xyz_le + wing.xyz_le
                le_end = wing.sections[i + 1].xyz_le + wing.xyz_le
                te_start = wing.sections[i].xyz_te() + wing.xyz_le
                te_end = wing.sections[i + 1].xyz_te() + wing.xyz_le

                points = np.vstack((le_start, le_end, te_end, te_start, le_start))
                x = points[:, 0]
                y = points[:, 1]
                z = points[:, 2]

                ax.plot(x, y, z, color='#cc0039')

                if wing.symmetric:
                    ax.plot(x, -1 * y, z, color='#cc0039')

        # Plot reference point
        x = self.xyz_ref[0]
        y = self.xyz_ref[1]
        z = self.xyz_ref[2]
        ax.scatter(x, y, z)

        set_axes_equal(ax)
        plt.tight_layout()
        if show:
            plt.show()
Ejemplo n.º 34
0
    def step(self, policy_new, sample_dict={}):
        """
        compute one step of the update
        Args:
            policy_new: policy instance to make the step for
            sample_dict: dict with samples (currently not used)

        Returns:

        """

        self.sample(None, None, None)
        reward = np.vstack(self._reward_q)

        res = optimize.minimize(
            self.dual_function,
            1.0,
            method='SLSQP',
            # method='L-BFGS-B',
            jac=grad(self.dual_function),
            args=(reward, ),
            bounds=((1e-8, 1e8), ))
        eta = res.x

        kl_samples = self.kl_divergence(eta, reward)

        r = np.asarray(self._reward_q)
        x = np.stack(self._sample_q)
        self.update_policy(policy_new, eta=eta, x=x, r=r)

        # maintain old policy to sample from for later use
        self.policy.set_params(policy_new.params())

        return {
            'epsilon':
            self.epsilon,
            #     'beta': self.beta,
            'eta':
            eta.item(),
            'kl':
            kl_samples.item(),
            # 'entropy_diff': entropy_diff,
            'entropy':
            self.policy.entropy(),
            'reward':
            (self.objective(policy_new.mean) - self.objective.f_opt).item(),
        }
Ejemplo n.º 35
0
def visualizeLatentState(X, rs, gen_params, rec_params):
    q_means, q_log_stds = nn_predict_gaussian(rec_params, X)
    latents = sample_diag_gaussian(q_means, q_log_stds, rs)
    gen = sigmoid(neural_net_predict(gen_params, latents))
    gen = gen[:,:gen.shape[1]/2]
    print(gen.shape)
    print(X.shape)
    #yTrain =y[:genTrain.shape[0],:]
    #yTest = y[genTrain.shape[0]:,:]
    #pdb.set_trace
    y = tsne(np.vstack((X,gen*10)))
    plt.figure()
    plt.clf()
    plt.scatter(y[:gen.shape[0],0],y[:gen.shape[0],1],color='red')
    plt.scatter(y[gen.shape[0]:,0],y[gen.shape[0]:,1],color='blue')
    plt.legend(['X', 'Xdecoded'],)
    plt.savefig('hidden.jpg')
def generate_random_features_ind(data1, data2, num_feat):
    '''
    This functions generates random features for a set of paired inputs, such as times and observations
    Inputs:
    - data:  (n x d) array
    - num_feat: number of random features to be generated
    Output:
    - features in fixed dimensional space (n x num_feat) array
    '''
    # find length-scale for random features using median heuristic
    sig = meddistance(np.vstack((data1, data2)), subsample=1000)
    random_parameters = rp_ind(num_feat, sig, 1)
    rff1 = np.array(
        [f1(row[:, np.newaxis], random_parameters) for row in data1])
    rff2 = np.array(
        [f1(row[:, np.newaxis], random_parameters) for row in data2])
    return rff1, rff2
    def mixture_elbo(var_mixture_params, t):
        # We need to only sample the continuous component parameters,
        # and integrate over the discrete component choice

        def mixture_lower_bound(params):
            """Provides a stochastic estimate of the variational lower bound."""
            samples = component_sample(params, num_samples, rs)
            log_qs = mixture_log_density(var_mixture_params, samples)
            log_ps = logprob(samples, t)
            log_ps = np.reshape(log_ps, (num_samples, -1))
            log_qs = np.reshape(log_qs, (num_samples, -1))
            return np.mean(log_ps - log_qs)

        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_elbos = np.vstack(
            [mixture_lower_bound(params_k) for params_k in var_params])
        return np.sum(component_elbos + log_weights)
    def mixture_elbo(var_mixture_params, t):
        # We need to only sample the continuous component parameters,
        # and integrate over the discrete component choice

        def mixture_lower_bound(params):
            """Provides a stochastic estimate of the variational lower bound."""
            samples = component_sample(params, num_samples, rs)
            log_qs = mixture_log_density(var_mixture_params, samples)
            log_ps = logprob(samples, t)
            log_ps = np.reshape(log_ps, (num_samples, -1))
            log_qs = np.reshape(log_qs, (num_samples, -1))
            return np.mean(log_ps - log_qs)

        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_elbos = np.vstack(
            [mixture_lower_bound(params_k) for params_k in var_params])
        return np.sum(component_elbos + log_weights)
Ejemplo n.º 39
0
def Gardner_Krauth_Mezard(N, patterns, weights, biases, sc, lr, k, maxiter):
    '''
    Gardner rule rule proposed in (1987) Krauth Learning algorithms with optimal stability in neural networks +
    Krauth Mezard update strategy
    '''
    Z = np.array(patterns).T
    M = 0
    p = Z.shape[-1]
    Z_ = np.vstack([Z, np.ones(p)])
    w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)]))
    y_global = ((w_and_b @ Z_).T /
                (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T  #
    while (np.any(y_global < k) and M < maxiter):
        for i in range(N):  # for each neuron independently
            # compute normalised stability measure (h_i, sigma_i)/|w_i|^2_2
            sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2)
            ys = ((weights[i, :] @ Z + biases[i]) /
                  (np.sqrt(sum_of_squares))) * Z[i, :]  #
            #pick the pattern with the weakest y
            ind_min = np.argmin(ys)
            weakest_pattern = np.array(
                deepcopy(patterns[ind_min].reshape(1, N)))
            h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T +
                   biases[i]).squeeze()
            # if the new weakest pattern is not yet stable with the margin k
            y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares))  #
            while (y < k):
                weights[i, :] = deepcopy(
                    weights[i, :] + lr *
                    (weakest_pattern[0, i] * weakest_pattern).squeeze())
                #set diagonal elements to zero
                if sc == True:
                    weights[i, i] = 0
                biases[i] = biases[i] + lr * weakest_pattern[0, i]
                sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2)
                h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T +
                       biases[i]).squeeze()
                y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares)
                                                     )  #
        w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)]))
        y_global = ((w_and_b @ Z_).T /
                    (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T  #
        M += 1
        if M >= maxiter:
            print('Maximum number of iterations has been exceeded')
    return weights, biases
Ejemplo n.º 40
0
    def mixture_elbo(var_mixture_params, t):
        # We need to only sample the continuous component parameters,
        # and integrate over the discrete component choice

        # sample_sum = 0
        # for i in range(num_samples):

        def mixture_lower_bound(params):
            """Provides a stochastic estimate of the variational lower bound."""
            samples = component_sample(params, num_samples, rs)
            # print (samples.shape)
            log_qs = mixture_log_density(var_mixture_params, samples)
            # print (log_qs.shape)
            log_ps = logprob(samples, t)
            log_ps = np.reshape(log_ps, (num_samples, -1))
            log_qs = np.reshape(log_qs, (num_samples, -1))
            # print (log_qs.shape)

            log_w = log_ps - log_qs
            elbo = logmeanexp(log_w)


            # w_log_w = np.exp(log_w) * log_w

            # print (w_log_w.shape)
            # dfasd

            

            # w_log_w = softmax(log_w) * log_w

            # w_log_w = np.square(np.exp(log_w))


            # elbo = np.mean(w_log_w)
            # elbo = np.sum(w_log_w)

            

            return elbo

        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_elbos = np.vstack(
            [mixture_lower_bound(params_k) for params_k in var_params])
        return np.sum(component_elbos + log_weights)
Ejemplo n.º 41
0
def sig_all():
    """
    This method returns a numpy array of shape=(2, 3, 3) which contains the
    3 Pauli matrices in it. sigx = sig_all[:, :, 0], sigy = sig_all[:, :,
    1], sigz = sig_all[:, :, 2],

    Returns
    -------
    np.ndarray
        shape = (2, 2, 3)

    """
    sigx = np.array([[0, 1], [1, 0]])
    sigy = np.array([[0, -1j], [1j, 0]])
    sigz = np.array([[1, 0], [0, -1]])
    all_paulis = np.vstack([sigx, sigy, sigz])
    all_paulis = np.reshape(all_paulis, (3, 2, 2)).transpose(1, 2, 0)
    return all_paulis
Ejemplo n.º 42
0
 def _loss(self, params):
     WtW = self.WtW
     B = self.base * params[:, None]
     p, n = B.shape
     scale = 1.0 + np.sum(B, axis=0)
     B = B / np.max(scale)
     diag = 1.0 - np.sum(B, axis=0)
     # A = [diag; B] - always has sensitivity 1
     #TODO(ryan): use woodbury identity to make more efficient
     A = np.vstack([np.diag(diag), B])
     AtA1 = np.linalg.inv(np.dot(A.T, A))
     #D2 = 1.0 / diag**2
     #C = np.eye(p) + np.dot(B, B.T)
     #C1 = np.linalg.inv(C)
     #X = np.dot(B.T, np.dot(C1, B))
     # inverse calculated using woodbury identity
     #AtA1 = np.diag(D2) - X*D2*D2[:,None]
     return np.trace(np.dot(WtW, AtA1))
Ejemplo n.º 43
0
    def back_propagation(self, x_array, u_array):
        """
        Back propagation along the given state and control trajectories to solve
        the Riccati equations for the error system (delta_x, delta_u, t)
        Need to approximate the dynamics/costs/constraints along the given trajectory
        dynamics needs a time-varying first-order approximation
        costs and constraints need time-varying second-order approximation
        """
        #Note x_array contains X_T, so a dummy u is required to make the arrays 
        #be of consistent length
        u_array_sup = np.vstack([u_array, np.zeros(len(u_array[0]))])
        lqr_sys = self.build_lqr_system(x_array, u_array_sup)

        #k and K
        fdfwd = [None] * self.T
        fdbck_gain = [None] * self.T

        #initialize with the terminal cost parameters to prepare the backpropagation
        Vxx = lqr_sys['dldxx'][-1]
        Vx = lqr_sys['dldx'][-1]

        for t in reversed(range(self.T)):
            #note the double check if we need the transpose or not
            Qx = lqr_sys['dldx'][t] + lqr_sys['dfdx'][t].T.dot(Vx)
            Qu = lqr_sys['dldu'][t] + lqr_sys['dfdu'][t].T.dot(Vx)
            Qxx = lqr_sys['dldxx'][t] + lqr_sys['dfdx'][t].T.dot(Vxx).dot(lqr_sys['dfdx'][t])
            Qux = lqr_sys['dldux'][t] + lqr_sys['dfdu'][t].T.dot(Vxx).dot(lqr_sys['dfdx'][t])
            Quu = lqr_sys['dlduu'][t] + lqr_sys['dfdu'][t].T.dot(Vxx).dot(lqr_sys['dfdu'][t])

            #use regularized inverse for numerical stability
            inv_Quu = self.regularized_persudo_inverse_(Quu, reg=self.reg)

            #get k and K
            fdfwd[t] = -inv_Quu.dot(Qu)
            fdbck_gain[t] = -inv_Quu.dot(Qux)

            #update value function for the previous time step
            Vxx = Qxx - fdbck_gain[t].T.dot(Quu).dot(fdbck_gain[t])
            Vx = Qx - fdbck_gain[t].T.dot(Quu).dot(fdfwd[t])

        return fdfwd, fdbck_gain
Ejemplo n.º 44
0
    def _ntied_transmat(self, transmat_val):  # TODO: document choices

#                        +-----------------+
#                        |a|1|0|0|0|0|0|0|0|
#                        +-----------------+
#                        |0|a|1|0|0|0|0|0|0|
#                        +-----------------+
#   +---+---+---+        |0|0|a|b|0|0|c|0|0|
#   | a | b | c |        +-----------------+
#   +-----------+        |0|0|0|e|1|0|0|0|0|
#   | d | e | f | +----> +-----------------+
#   +-----------+        |0|0|0|0|e|1|0|0|0|
#   | g | h | i |        +-----------------+
#   +---+---+---+        |d|0|0|0|0|e|f|0|0|
#                        +-----------------+
#                        |0|0|0|0|0|0|i|1|0|
#                        +-----------------+
#                        |0|0|0|0|0|0|0|i|1|
#                        +-----------------+
#                        |g|0|0|h|0|0|0|0|i|
#                        +-----------------+
# for a model with n_unique = 3 and n_tied = 2


        transmat = np.empty((0, self.n_components))
        for r in range(self.n_unique):
            row = np.empty((self.n_chain, 0))
            for c in range(self.n_unique):
                if r == c:
                    subm = np.array(sp.diags([transmat_val[r, c],
                                    1 - transmat_val[r, c]], [0, 1],
                                    shape=(self.n_chain,
                                           self.n_chain)).todense())
                else:
                    lower_left = np.zeros((self.n_chain, self.n_chain))
                    lower_left[self.n_tied, 0] = 1.0
                    subm = np.kron(transmat_val[r, c], lower_left)
                row = np.hstack((row, subm))
            transmat = np.vstack((transmat, row))
        return transmat
Ejemplo n.º 45
0
def job_lin_mmd(sample_source, tr, te, r):
    """Linear mmd with grid search to choose the best Gaussian width."""
    # should be completely deterministic

    # If n is too large, pairwise meddian computation can cause a memory error. 
    with util.ContextTimer() as t:
        X, Y = tr.xy()
        Xr = X[:min(X.shape[0], 1000), :]
        Yr = Y[:min(Y.shape[0], 1000), :]
        
        med = util.meddistance(np.vstack((Xr, Yr)) )
        widths = [ (med*f) for f in 2.0**np.linspace(-1, 4, 40)]
        list_kernels = [kernel.KGauss( w**2 ) for w in widths]
        # grid search to choose the best Gaussian width
        besti, powers = tst.LinearMMDTest.grid_search_kernel(tr, list_kernels, alpha)
        # perform test 
        best_ker = list_kernels[besti]
        lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
        test_result = lin_mmd_test.perform_test(te)

    result = {'test_method': lin_mmd_test, 'test_result': test_result, 'time_secs': t.secs}
    return result
Ejemplo n.º 46
0
    def interpolate_basis(self, basis, dt, dt_max,
                          norm=True):
        # Interpolate basis at the resolution of the data
        L,B = basis.shape
        t_int = np.arange(0.0, dt_max, step=dt)
        t_bas = np.linspace(0.0, dt_max, L)

        ibasis = np.zeros((len(t_int), B))
        for b in np.arange(B):
            ibasis[:,b] = np.interp(t_int, t_bas, basis[:,b])

        # Normalize so that the interpolated basis has volume 1
        if norm:
            # ibasis /= np.trapz(ibasis,t_int,axis=0)
            ibasis /= (dt * np.sum(ibasis, axis=0))

        if not self.allow_instantaneous:
            # Typically, the impulse responses are applied to times
            # (t+1:t+R). That means we need to prepend a row of zeros to make
            # sure the basis remains causal
            ibasis = np.vstack((np.zeros((1,B)), ibasis))

        return ibasis
Ejemplo n.º 47
0
def InterpolatedGradients(im, pts):
    """ return linearly interpolated intensity values in im
    for all points pts

    pts is assumed to be ([1xn], [1xn]) tuple of y and x indices
    """
    ptsi = tuple(p.astype(np.int32) for p in pts)  # truncated integer part
    ptsf = (pts[0] - ptsi[0], pts[1] - ptsi[1])  # fractional part

    # bilinear weights
    up = 1 - ptsf[0]
    down = ptsf[0]
    left = 1 - ptsf[1]
    right = ptsf[1]

    # image components
    ul = im[ptsi]
    ur = im[(ptsi[0], 1+ptsi[1])]
    dl = im[(1+ptsi[0], ptsi[1])]
    dr = im[(1+ptsi[0], 1+ptsi[1])]
    return np.vstack((
        left*(dl - ul) + right*(dr - ur),  # y gradient
        up*(ur - ul) + down*(dr - dl)))    # x gradient
Ejemplo n.º 48
0
def im2col(img, block_size = (5, 5), skip = 1):
    """ stretches block_size size'd patches centered skip distance 
        away in both row/column space, stacks into columns (and stacks)
        bands into rows

        Use-case is for storing images for quick matrix multiplies
           - blows up memory usage by quite a bit (factor of 10!)

        motivated by implementation discussion here: 
            http://cs231n.github.io/convolutional-networks/

        edited from snippet here:
            http://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python
    """
    # stack depth bands (colors)
    if len(img.shape) == 3:
        return np.vstack([ im2col(img[:,:,k], block_size, skip)
                           for k in xrange(img.shape[2]) ])

    # input array and block size
    A = img
    B = block_size

    # Parameters
    M,N = A.shape
    col_extent = N - B[1] + 1
    row_extent = M - B[0] + 1

    # Get Starting block indices
    start_idx = np.arange(B[0])[:,None]*N + np.arange(B[1])

    # Get offsetted indices across the height and width of input array
    offset_idx = np.arange(0, row_extent, skip)[:,None]*N + np.arange(0, col_extent, skip)

    # Get all actual indices & index into input array for final output
    out = np.take(A,start_idx.ravel()[:,None] + offset_idx.ravel())
    return out
Ejemplo n.º 49
0
    def forward_pass(self, graphs):
        """
        Returns the nodes' features stacked together, along with a dictionary
        of nodes and their neighbors.

        An example structure is:
        - {1: [1, 2 , 4],
           2: [2, 1],
           ...
           }
        """
        # First off, we label each node with the index of each node's data.
        features = []
        i = 0
        for g in graphs:
            for n, d in g.nodes(data=True):
                features.append(d['features'])
                g.node[n]['idx'] = i
                i += 1

        # We then do a second pass over the graphs, and record each node and
        # their neighbors' indices in the stacked features array.
        #
        # We also record the indices corresponding to each graph.
        nodes_nbrs = defaultdict(list)
        graph_idxs = defaultdict(list)
        for idx, g in enumerate(graphs):
            g.graph['idx'] = idx  # set the graph's index attribute.
            for n, d in g.nodes(data=True):
                nodes_nbrs[d['idx']].append(d['idx'])
                graph_idxs[idx].append(d['idx'])  # append node index to list
                                                  # of graph's nodes indices.
                for nbr in g.neighbors(n):
                    nodes_nbrs[d['idx']].append(g.node[nbr]['idx'])

        return np.vstack(features), nodes_nbrs, graph_idxs
 def mixture_log_density(var_mixture_params, x):
     """Returns a weighted average over component densities."""
     log_weights, var_params = unpack_mixture_params(var_mixture_params)
     component_log_densities = np.vstack([component_log_density(params_k, x)
                                          for params_k in var_params]).T
     return logsumexp(component_log_densities + log_weights, axis=1, keepdims=False)
Ejemplo n.º 51
0
            Nsamps = th_samples.shape[0]
            # discard first half and randomly permute
            th_samples = th_samples[Nsamps/2:, :]
            ll_samps   = ll_samps[Nsamps/2:]
            chain_perm = np.random.permutation(th_samples.shape[0])[0:2500]
            chain_perm = np.arange(2500)
            # assemble a few thousand samples
            B0 = parser.get(th_samples[0], 'betas')
            B_samps = np.zeros((len(chain_perm), B0.shape[0], B0.shape[1]))
            for i, idx in enumerate(chain_perm):
                betas = K_chol.dot(parser.get(th_samples[idx, :], 'betas').T).T
                B_samp = np.exp(betas)
                B_samp /= np.sum(B_samp * lam0_delta, axis=1, keepdims=True)
                B_samps[i, :, :] = B_samp
            B_chains.append(B_samps)
        B_samps = np.vstack(B_chains)
        B_samps = B_samps[npr.permutation(B_samps.shape[0]), :, :]

    B_mle = load_basis(num_bases     = NUM_BASES,
                       split_type    = SPLIT_TYPE,
                       lam_subsample = LAM_SUBSAMPLE)
    lam0, lam0_delta = ru.get_lam0(lam_subsample=LAM_SUBSAMPLE)
    def get_basis_sample(idx, mle = False): 
        """ Method to return a basis sample to condition on 
        (or the MLE if specified) """
        if mle: 
            return B_mle
        else:
            return B_samps[idx]

    ##########################################################################
Ejemplo n.º 52
0
    Nr = TrackNormal(rx)

    # psie is sort of backwards: higher angles go to the left
    return np.angle(Nx) - np.angle(Nr)


if __name__ == '__main__':
    TRACK_SPACING = 19.8  # cm
    x = SVGPathToTrackPoints("oakwarehouse.path", TRACK_SPACING)[:-1]

    xm = np.array(x)[:, 0] / 50  # 50 pixels / meter
    track_k = TrackCurvature(xm)
    Nx = TrackNormal(xm)
    u = 1j * Nx
    np.savetxt("track_x.txt",
               np.vstack([np.real(xm), np.imag(xm)]).T.reshape(-1),
               newline=",\n")
    np.savetxt("track_u.txt",
               np.vstack([np.real(u), np.imag(u)]).T.reshape(-1),
               newline=",\n")
    np.savetxt("track_k.txt", track_k, newline=",\n")

    ye, val, stuff = OptimizeTrack(xm, 1.4, 0.1)
    psie = RelativePsie(ye, xm)

    rx = u*ye + xm
    raceline_k = TrackCurvature(rx)

    np.savetxt("raceline_k.txt", raceline_k, newline=",\n")
    np.savetxt("raceline_ye.txt", ye, newline=",\n")
    np.savetxt("raceline_psie.txt", psie, newline=",\n")
Ejemplo n.º 53
0
def plot_ellipse(ax, mean, cov_sqrt, alpha, num_points=100):
    angles = np.linspace(0, 2*np.pi, num_points)
    circle_pts = np.vstack([np.cos(angles), np.sin(angles)]).T * 2.0
    cur_pts = mean + np.dot(circle_pts, cov_sqrt)
    ax.plot(cur_pts[:, 0], cur_pts[:, 1], '-', alpha=alpha)
Ejemplo n.º 54
0
def gmm_log_likelihood(params, data):
    cluster_lls = []
    for log_proportion, mean, cov_sqrt in zip(*unpack_gmm_params(params)):
        cov = np.dot(cov_sqrt.T, cov_sqrt)
        cluster_lls.append(log_proportion + mvn.logpdf(data, mean, cov))
    return np.sum(logsumexp(np.vstack(cluster_lls), axis=0))
Ejemplo n.º 55
0
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \
        flag_same=False):
    """ This function generates pairwise constraints (ML/CL) using groud-truth
    cluster label and noise parameters
    Parameters
    ----------
    label: shape(n_sample, )
        cluster label of all the samples
    alpha: shape(n_expert, )
        sensitivity parameters of experts
    beta: shape(n_expert, )
        specificity parameters of experts
    num_ML: int
    num_CL: int
    flag_same: True if different experts provide constraints for the same set
    of sample pairs, False if different experts provide constraints for
    different set of sample pairs
    
    Returns
    -------
    S: shape(n_con, 4)
        The first column -> expert id
        The second and third column -> (row, column) indices of two samples
        The fourth column -> constraint values (1 for ML and 0 for CL)
    """
    n_sample = len(label)
    tp = np.tile(label, (n_sample,1))
    label_mat = (tp == tp.T).astype(int)
    
    ML_set = []
    CL_set = []
    # get indices of upper-triangle matrix
    [row, col] = np.triu_indices(n_sample, k=1)
    # n_sample * (n_sample-1)/2
    for idx in range(len(row)):
        if label_mat[row[idx],col[idx]] == 1:
            ML_set.append([row[idx], col[idx]])
        elif label_mat[row[idx],col[idx]] == 0:
            CL_set.append([row[idx], col[idx]])
        else:
            print "Invalid matrix entry values"

    ML_set = np.array(ML_set)
    CL_set = np.array(CL_set)

    assert num_ML < ML_set.shape[0]
    assert num_CL < CL_set.shape[0]
    
    # generate noisy constraints for each expert
    assert len(alpha) == len(beta)
    n_expert = len(alpha)
    
    # initialize the constraint matrix
    S = np.zeros((0, 4))
    
    # different experts provide constraint for the same set of sample pairs
    if flag_same == True:
        idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False)
        idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False)
        ML = ML_set[idx_ML, :]
        CL = CL_set[idx_CL, :]
        for m in range(n_expert):
            val_ML = prng.binomial(1, alpha[m], num_ML)
            val_CL = prng.binomial(1, 1-beta[m], num_CL)
            Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \
                    val_ML.reshape(val_ML.size,1) ))
            Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \
                    val_CL.reshape(val_CL.size,1) ))
            S = np.vstack((S, Sm_ML, Sm_CL)).astype(int)
    # different experts provide constraints for different sets of sample pairs
    else:
        for m in range(n_expert):
            prng = np.random.RandomState(1000 + m)
            idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False)
            idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False)
            ML = ML_set[idx_ML, :]
            CL = CL_set[idx_CL, :]
            val_ML = prng.binomial(1, alpha[m], num_ML)
            val_CL = prng.binomial(1, 1-beta[m], num_CL)
            Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \
                    val_ML.reshape(val_ML.size,1) ))
            Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \
                    val_CL.reshape(val_CL.size,1) ))
            S = np.vstack((S, Sm_ML, Sm_CL)).astype(int)

    return S
Ejemplo n.º 56
0
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None):
    """ polynomial interpolation
    Parameters
    ----------
    points: shape(pointNum, 3), three columns represents x, f, g
    doPolot: set to 1 to plot, default 0
    xmin: min value that brackets minimum (default: min of points)
    xmax: max value that brackets maximum (default: max of points)
    
    set f or g to sqrt(-1)=1j if they are not known
    the order of the polynomial is the number of known f and g values minus 1

    Returns
    -------
    minPos:
    fmin:
    """
    
    if doPlot == None:
        doPlot = 0

    nPoints = points.shape[0]
    order = np.sum(np.imag(points[:, 1:3]) == 0) -1
    
    # code for most common case: cubic interpolation of 2 points
    if nPoints == 2 and order == 3 and doPlot == 0:
        [minVal, minPos] = [np.min(points[:,0]), np.argmin(points[:,0])]
        notMinPos = 1 - minPos
        d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\
                points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0])

        t_d2 =  d1**2 - points[minPos,2]*points[notMinPos,2]
        if t_d2 > 0:
            d2 = np.sqrt(t_d2)
        else:
            d2 = np.sqrt(-t_d2) * np.complex(0,1)
        if np.isreal(d2):
            t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\
                    ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\
                    points[minPos,2]+2*d2))
            minPos = np.min([np.max([t,points[minPos,0]]), points[notMinPos,0]])
        else:
            minPos = np.mean(points[:,0])
        fmin = minVal
        return (minPos, fmin)
    
    xmin = np.min(points[:,0])
    xmax = np.max(points[:,0])

    # compute bounds of interpolation area
    if xminBound == None:
        xminBound = xmin
    if xmaxBound == None:
        xmaxBound = xmax

    # constraints based on available function values
    A = np.zeros((0, order+1))
    b = np.zeros((0, 1))
    for i in range(nPoints):
        if np.imag(points[i,1]) == 0:
            constraint = np.zeros(order+1)
            for j in np.arange(order,-1,-1):
                constraint[order-j] = points[i,0]**j
            A = np.vstack((A, constraint))
            b = np.append(b, points[i,1])
    
    # constraints based on availabe derivatives
    for i in range(nPoints):
        if np.isreal(points[i,2]):
            constraint = np.zeros(order+1)
            for j in range(1,order+1):
                constraint[j-1] = (order-j+1)* points[i,0]**(order-j)
            A = np.vstack((A, constraint))
            b = np.append(b,points[i,2])
    
    # find interpolating polynomial
    params = np.linalg.solve(A, b)

    # compute critical points
    dParams = np.zeros(order)
    for i in range(params.size-1):
        dParams[i] = params[i] * (order-i)
    
    if np.any(np.isinf(dParams)):
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0]))
    else:
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \
                np.roots(dParams)))
    
    # test critical points
    fmin = np.infty;
    minPos = (xminBound + xmaxBound)/2.
    for xCP in cp:
        if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound:
            fCP = np.polyval(params, xCP)
            if np.imag(fCP) == 0 and fCP < fmin:
                minPos = np.double(np.real(xCP))
                fmin = np.double(np.real(fCP))
    
    # plot situation (omit this part for now since we are not going to use it
    # anyway)

    return (minPos, fmin)
            feat_list.append(d['features'])
            idx += 1

        # Now loop over each node again and figure out its neighbors.
        for n, d in p.nodes(data=True):
            graph_idxs[project['title']].append(d['idx'])
            nodes_nbrs[d['idx']].append(d['idx'])
            graph_nodes[project['title']][d['idx']] = n
            for nbr in p.neighbors(n):
                nodes_nbrs[d['idx']].append(p.node[nbr]['idx'])
            # print(nodes_nbrs[d['idx']])
    except:
        print('Did not make graph for {0}'.format(project['code']))

# Save the data to disk:
# The array...
feat_array = np.vstack(feat_list)
np.save('../data/feat_array.npy', feat_array)

# The node idxs and their neighbor idxs...
with open('../data/nodes_nbrs.pkl', 'wb') as f:
    pkl.dump(nodes_nbrs, f)

# The graphs' seqids and their node idxs...
with open('../data/graph_idxs.pkl', 'wb') as f:
    pkl.dump(graph_idxs, f)

# The graphs': {'SeqID1':{1:'A51SER',...},...}
with open('../data/graph_nodes.pkl', 'wb') as f:
    pkl.dump(graph_nodes, f)
Ejemplo n.º 58
0
    #examine output
    ####################################################################
    print "Final Loss: ", loss_fun(best_w, train_images, samps)
    params    = pred_fun(best_w, train_images)
    means     = params[:, :8]
    variances = params[:, -8:]

    i = 10
    def compare_moments(i):
        print "samp comparison, idx = %d "%i
        print " {0:5} | {1:6} | {2:6} | {3:6} | {4:6} ".format(
                "dim", "mod_m", "sam_m", "mod_v", "sam_v")
        smean = samps[i].mean(axis=0)
        svar  = samps[i].var(axis=0)
        for i, (mm, mv, m, v) in enumerate(zip(means[i, :], variances[i, :], smean, svar)):
            print " {0:5} | {1:6} | {2:6} | {3:6} | {4:6} ".format(
                    i, "%2.2f"%mm, "%2.2f"%m, "%2.2f"%mv, "%2.2f"%v)

    compare_moments(0)
    compare_moments(10)
    compare_moments(80)


    ######### exploratory stuff - look at the scaling of each distribution
    svals = []
    for i in range(len(samps)):
        u, s, v = np.linalg.svd(samps[i])
        svals.append(s)
    svals = np.vstack(svals)

Ejemplo n.º 59
0
def minConf_PQN(funObj, x, funProj, options=None):
    """
    The problems are of the form
                min funObj(x) s.t. x in C
    The projected quasi-Newton sub-problems are solved using the spectral
    projected gradient algorithm

    Parameters
    ----------
    funObj: function to minimize, return objective value as the first argument
            and gradient as the second argument
    funProj: function that returns projection of x onto C
    options:
        1) verbose: level of verbosity (0: no output, 1: final, 2: iter
        (default), 3: debug)
        2) optTol: tolerance used to check for optimality (default: 1e-5)
        3) progTol: tolerance used to check for progress (default: 1e-9)
        4) maxIter: maximum number of calls to funObj (default: 500)
        5) maxProject: maximum number of calls to funProj (default: 100000)
        6) numDiff: compute derivatives numerically (0: use user-supplied
            derivatives (default), 1: use finite differences, 2: use complex
            differentials)
        7) suffDec: sufficient decrease parameter in Armijo condition (default:
            1e-4)
        8) corrections: number of lbfgs corrections to store (default: 10)
        9) adjustStep: use quadratic initialization of line search (default: 0)
        10) bbInit: initialize sub-problem with Barzilai-Borwein step (default:
            0)
        11) SPGoptTol: optimality tolerance for SPG direction finding (default:
            1e-6)
        12) SPGiters: maximum number of iterations for SPG direction finding
            (default: 10)

    Returns
    -------
    x: optimal parameter values
    f: optimal objective value
    funEvals: number of function evaluations 
    """
    
    # number of variables/parameters
    nVars = len(x)
    
    # set default optimization settings
    options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9, \
                'maxIter':500, 'maxProject':100000, 'suffDec':1e-4, \
                'corrections':10, 'adjustStep':0, 'bbInit':0, 'SPGoptTol':1e-6,\
                'SPGprogTol':1e-10, 'SPGiters':10, 'SPGtestOpt':0}
    options = setDefaultOptions(options, options_default)
    
    if options['verbose'] == 3:
        print 'Running PQN...'
        print 'Number of L-BFGS Corrections to store: ' + \
                str(options['corrections'])
        print 'Spectral initialization of SPG: ' + str(options['bbInit'])
        print 'Maximum number of SPG iterations: ' + str(options['SPGiters'])
        print 'SPG optimality tolerance: ' + str(options['SPGoptTol'])
        print 'SPG progress tolerance: ' + str(options['SPGprogTol'])
        print 'PQN optimality tolerance: ' + str(options['optTol'])
        print 'PQN progress tolerance: ' + str(options['progTol'])
        print 'Quadratic initialization of line search: ' + \
                str(options['adjustStep'])
        print 'Maximum number of function evaluations: ' + \
                str(options['maxIter'])
        print 'Maximum number of projections: ' + str(options['maxProject'])

    if options['verbose'] >= 2:
        print '{:10s}'.format('Iteration') + \
                '{:10s}'.format('FunEvals') + \
                '{:10s}'.format('Projections') + \
                '{:15s}'.format('StepLength') + \
                '{:15s}'.format('FunctionVal') + \
                '{:15s}'.format('OptCond')
    
    funEvalMultiplier = 1
    # project initial parameter vector
    # translate this function (Done!)
    x = funProj(x)
    projects = 1

    # evaluate initial parameters
    # translate this function (Done!)
    [f, g] = funObj(x)
    funEvals = 1

    # check optimality of initial point
    projects = projects + 1
    if np.max(np.abs(funProj(x-g)-x)) < options['optTol']:
        if options['verbose'] >= 1:
            print "First-Order Optimality Conditions Below optTol at Initial Point"
            return (x, f, funEvals)
    
    i = 1
    while funEvals <= options['maxIter']:
        # compute step direction
        # this is for initialization
        if i == 1:
            p = funProj(x-g)
            projects = projects + 1
            S = np.zeros((nVars, 0))
            Y = np.zeros((nVars, 0))
            Hdiag = 1
        else:
            y = g - g_old
            s = x - x_old

            # translate this function (Done!)
            [S, Y, Hdiag] = lbfgsUpdate(y, s, options['corrections'], \
                    options['verbose']==3, S, Y, Hdiag)

            # make compact representation
            k = Y.shape[1]
            L = np.zeros((k,k))
            for j in range(k):
                L[j+1:,j] = np.dot(np.transpose(S[:,j+1:]), Y[:,j])
            N = np.hstack((S/Hdiag, Y.reshape(Y.shape[0], Y.size/Y.shape[0])))
            M1 = np.hstack((np.dot(S.T,S)/Hdiag, L))
            M2 = np.hstack((L.T, -np.diag(np.diag(np.dot(S.T,Y)))))
            M = np.vstack((M1, M2))
            
            # translate this function (Done!)
            HvFunc = lambda v: v/Hdiag - np.dot(N,np.linalg.solve(M,np.dot(N.T,v)))
            
            if options['bbInit'] == True:
                # use Barzilai-Borwein step to initialize sub-problem
                alpha = np.dot(s,s)/np.dot(s,y)
                if alpha <= 1e-10 or alpha > 1e10:
                    alpha = min(1., 1./np.sum(np.abs(g)))
                # solve sub-problem
                xSubInit = x - alpha*g
                feasibleInit = 0
            else:
                xSubInit = x
                feasibleInit = 1

            # solve Sub-problem
            # translate this function (Done!)
            [p, subProjects] = solveSubProblem(x, g, HvFunc, funProj, \
                    options['SPGoptTol'], options['SPGprogTol'], \
                    options['SPGiters'], options['SPGtestOpt'], feasibleInit,\
                    xSubInit)
            projects = projects + subProjects

        d = p - x
        g_old = g
        x_old = x

        # check the progress can be made along the direction
        gtd = np.dot(g,d)
        if gtd > -options['progTol']:
            if options['verbose'] >= 1:
                print "Directional Derivative below progTol"
            break
        
        # select initial guess to step length
        if i == 1 or options['adjustStep'] == 0:
            t = 1.
        else:
            t = min(1., 2.*(f-f_old)/gtd)
        
        # bound step length on first iteration
        if i == 1:
            t = min(1., 1./np.sum(np.abs(g)))

        # evluate the objective and gradient at the initial step
        if t == 1:
            x_new = p
        else:
            x_new = x + t*d
        [f_new, g_new] = funObj(x_new)
        funEvals = funEvals + 1

        # backtracking line search
        f_old = f
        # translate isLegal (Done!)
        while f_new > f + options['suffDec']*np.dot(g,x_new-x) or \
                not isLegal(f_new):
            temp = t
            # backtrack to next trial value
            if not isLegal(f_new) or not isLegal(g_new):
                if options['verbose'] == 3:
                    print "Halving step size"
                t = t/2.
            else:
                if options['verbose'] == 3:
                    print "Cubic backtracking"
                # translate polyinterp (Done!)
                t = polyinterp(np.array([[0.,f,gtd],\
                                        [t,f_new,np.dot(g_new,d)]]))[0]

            # adjust if change is too small/large
            if t < temp*1e-3:
                if options['verbose'] == 3:
                    print "Interpolated value too small, Adjusting"
                t = temp*1e-3
            elif t > temp*0.6:
                if options['verbose'] == 3:
                    print "Interpolated value too large, Adjusting"
                t = temp*0.6

            # check whether step has become too small
            if np.sum(np.abs(t*d)) < options['progTol'] or t == 0:
                if options['verbose'] == 3:
                    print "Line search failed"
                t = 0
                f_new = f
                g_new = g
                break

            # evaluate new point
            f_prev = f_new
            t_prev = temp
            x_new = x + t*d
            [f_new, g_new] = funObj(x_new)
            funEvals = funEvals + 1

        # take step
        x = x_new
        f = f_new
        g = g_new

        optCond = np.max(np.abs(funProj(x-g)-x))
        projects = projects + 1

        # output log
        if options['verbose'] >= 2:
            print '{:10d}'.format(i) + \
                  '{:10d}'.format(funEvals*funEvalMultiplier) + \
                  '{:10d}'.format(projects) + \
                  '{:15.5e}'.format(t) + \
                  '{:15.5e}'.format(f) + \
                  '{:15.5e}'.format(optCond)

        # check optimality
        if optCond < options['optTol']:
            print "First-order optimality conditions below optTol"
            break
        
        if np.max(np.abs(t*d)) < options['progTol']:
            if options['verbose'] >= 1:
                print "Step size below progTol"
            break

        if np.abs(f-f_old) < options['progTol']:
            if options['verbose'] >= 1:
                print "Function value changing by less than progTol"
            break

        if funEvals > options['maxIter']:
            if options['verbose'] >= 1:
                print "Function evaluation exceeds maxIter"
            break

        if projects > options['maxProject']:
            if options['verbose'] >= 1:
                print "Number of projections exceeds maxProject"
            break
        i = i + 1

    return (x, f, funEvals)