Example #1
0
    def fit(self):
        f = self.f
        X = self.X
        tol = self.tol
        d = self.d
        n = self.n

        current_best_residual = np.inf
        for r in range(self.restarts):
            print('restart %d' % r)
            M0 = np.linalg.qr(np.random.randn(self.d, self.n))[0]
            my_params = [Parameter(order=self.order, distribution='uniform', lower=-5, upper=5) for _ in range(n)]
            my_basis = Basis('total-order')
            my_poly_init = Poly(parameters=my_params, basis=my_basis, method='least-squares',
                                sampling_args={'mesh': 'user-defined',
                                               'sample-points': X @ M0,
                                               'sample-outputs': f})
            my_poly_init.set_model()
            c0 = my_poly_init.coefficients.copy()

            residual = self.cost(f, X, M0, c0)

            cauchy_length = self.cauchy_length
            residual_history = []
            iter_ind = 0
            M = M0.copy()
            c = c0.copy()
            while residual > tol:
                if self.verbosity == 2:
                    print(residual)
                residual_history.append(residual)
                # Minimize over M
                func_M = lambda M_var: self.cost(f, X, M_var, c)
                grad_M = lambda M_var: self.dcostdM(f, X, M_var, c)

                manifold = Stiefel(d, n)
                solver = ConjugateGradient(maxiter=self.max_M_iters)

                problem = Problem(manifold=manifold, cost=func_M, egrad=grad_M, verbosity=0)

                M = solver.solve(problem, x=M)

                # Minimize over c
                func_c = lambda c_var: self.cost(f, X, M, c_var)
                grad_c = lambda c_var: self.dcostdc(f, X, M, c_var)

                res = minimize(func_c, x0=c, method='CG', jac=grad_c)
                c = res.x
                residual = self.cost(f, X, M, c)
                if iter_ind < cauchy_length:
                    iter_ind += 1
                elif np.abs(np.mean(residual_history[-cauchy_length:]) - residual)/residual < self.cauchy_tol:
                    break

            if self.verbosity > 0:
                print('final residual on training data: %f' % self.cost(f, X, M, c))
            if residual < current_best_residual:
                self.M = M
                self.c = c
                current_best_residual = residual
def CGmanopt(X, objective_function, A, **kwargs):
    '''
    Minimizes the objective function subject to the constraint that X.T * X = I_k using the
    conjugate gradient method

    Args:
        X: Initial 2D array of shape (n, k) such that X.T * X = I_k
        objective_function: Objective function F(X, A) to minimize.
        A: Additional parameters for the objective function F(X, A)

    Keyword Args:
        None

    Returns:
        Xopt: Value of X that minimizes the objective subject to the constraint.
    '''

    manifold = Stiefel(X.shape[0], X.shape[1])

    def cost(X):
        c, _ = objective_function(X, A)
        return c

    problem = Problem(manifold=manifold, cost=cost, verbosity=0)
    solver = ConjugateGradient(logverbosity=0)
    Xopt = solver.solve(problem)
    return Xopt, None
Example #3
0
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    dimension = 3  # Dimension of the embedding space, i.e. R^k
    num_points = 24  # Points on the sphere
    # This value should be as close to 0 as affordable. If it is too close to
    # zero, optimization first becomes much slower, than simply doesn't work
    # anymore because of floating point overflow errors (NaN's and Inf's start
    # to appear). If it is too large, then log-sum-exp is a poor approximation
    # of the max function, and the spread will be less uniform. An okay value
    # seems to be 0.01 or 0.001 for example. Note that a better strategy than
    # using a small epsilon straightaway is to reduce epsilon bit by bit and to
    # warm-start subsequent optimization in that way. Trustregions will be more
    # appropriate for these fine tunings.
    epsilon = 0.0015

    cost = create_cost(backend, dimension, num_points, epsilon)
    manifold = Elliptope(num_points, dimension)
    problem = pymanopt.Problem(manifold, cost)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)
    Yopt = solver.solve(problem)

    if quiet:
        return

    Xopt = Yopt @ Yopt.T
    maxdot = np.triu(Xopt, 1).max()
    print("Maximum angle between any two points:", maxdot)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    m, n, rank = 5, 4, 2
    matrix = rnd.randn(m, n)

    cost, egrad = create_cost_egrad(backend, matrix, rank)
    manifold = FixedRankEmbedded(m, n, rank)
    problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient()
    left_singular_vectors, singular_values, right_singular_vectors = \
        solver.solve(problem)
    low_rank_approximation = (left_singular_vectors @ np.diag(singular_values)
                              @ right_singular_vectors)

    if not quiet:
        u, s, vt = la.svd(matrix, full_matrices=False)
        indices = np.argsort(s)[-rank:]
        low_rank_solution = (
            u[:, indices] @ np.diag(s[indices]) @ vt[indices, :])
        print("Analytic low-rank solution:")
        print()
        print(low_rank_solution)
        print()
        print("Rank-{} approximation:".format(rank))
        print()
        print(low_rank_approximation)
        print()
        print("Frobenius norm error:",
              la.norm(low_rank_approximation - low_rank_solution))
        print()
def fixedrank(A, YT, r):
    """ Solves the AX=YT problem on the manifold of r-rank matrices with  
    """

    # Instantiate a manifold
    manifold = FixedRankEmbedded(N, r, r)

    # Define the cost function (here using autograd.numpy)
    def cost(X):
        U = X[0]
        cst = 0
        for n in range(N):
            cst = cst + huber(U[n, :])
        Mat = np.matmul(np.matmul(X[0], np.diag(X[1])), X[2])
        fidelity = LA.norm(np.subtract(np.matmul(A, Mat), YT))
        return cst + lambd * fidelity**2

    problem = Problem(manifold=manifold, cost=cost)
    solver = ConjugateGradient(maxiter=maxiter)

    # Let Pymanopt do the rest
    Xopt = solver.solve(problem)

    #Solve
    Sol = np.dot(np.dot(Xopt[0], np.diag(Xopt[1])), Xopt[2])

    return Sol
Example #6
0
def solve_manopt(X, d, cost, egrad, Wo=None):

    D = X.shape[1]
    manifold = Grassmann(height=D, width=d)
    problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0)

    solver = ConjugateGradient(mingradnorm=1e-3)
    W = solver.solve(problem, x=Wo)

    return W
Example #7
0
def closest_unit_norm_column_approximation(A):
    """
    Returns the matrix with unit-norm columns that is closests to A w.r.t. the
    Frobenius norm.
    """
    m, n = A.shape

    manifold = Oblique(m, n)
    solver = ConjugateGradient()
    X = T.matrix()
    cost = 0.5 * T.sum((X - A)**2)

    problem = Problem(manifold=manifold, cost=cost, arg=X)
    return solver.solve(problem)
def closest_unit_norm_column_approximation(A):
    """
    Returns the matrix with unit-norm columns that is closests to A w.r.t. the
    Frobenius norm.
    """
    m, n = A.shape

    manifold = Oblique(m, n)
    solver = ConjugateGradient()
    X = T.matrix()
    cost = 0.5 * T.sum((X - A) ** 2)

    problem = Problem(manifold=manifold, cost=cost, arg=X)
    return solver.solve(problem)
Example #9
0
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000):
    """Version of MDS in which "signs" are also an optimization parameter.

    Rather than performing a full optimization and then resetting the
    sign matrix, here we treat the signs as a parameter `A = [a_ij]` and
    minimize the cost function
        F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2
    Lambda is a regularization parameter we can experiment with. The
    collection of data, `X`, is treated as a point on the `Oblique`
    manifold, consisting of `k*n` matrices with unit-norm columns. Since
    we are working on a sphere in complex space we require `k` to be
    even. The first `k/2` entries of each column are the real components
    and the last `k/2` entries are the imaginary parts.

    Parameters
    ----------
    X : ndarray (k, n)
        Initial guess for data.
    D : ndarray (k, k)
        Goal distance matrix.
    lam : float, optional
        Weight to give regularization term.
    v : int, optional
        Verbosity

    Returns
    -------
    X_opt : ndarray (k, n)
        Collection of points optimizing cost.

    """

    dim = X.shape[0]
    num_points = X.shape[1]
    W = distance_to_weights(D)
    Sreal, Simag = norm_rotations(X)
    A = np.vstack(
        (np.reshape(Sreal,
                    (1, num_points**2)), np.reshape(Simag, num_points**2)))
    cp_manifold = Oblique(dim, num_points)
    a_manifold = Oblique(2, num_points**2)
    manifold = Product((cp_manifold, a_manifold))
    solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf'))
    cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam)
    problem = pymanopt.Problem(cost=cost, manifold=manifold)
    Xopt, Aopt = solver.solve(problem, x=(X, A))
    Areal = np.reshape(Aopt[0, :], (num_points, num_points))
    Aimag = np.reshape(Aopt[1, :], (num_points, num_points))
    return Xopt, Areal, Aimag
Example #10
0
def solve_manopt(X, d, cost, egrad):

    D = X.shape[1]
    manifold = Grassmann(height=D, width=d)
    problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0)

    solver = ConjugateGradient(mingradnorm=1e-3)

    M = mean_riemann(X)
    w, v = np.linalg.eig(M)
    idx = w.argsort()[::-1]
    v_ = v[:, idx]
    Wo = v_[:, :d]
    W = solver.solve(problem, x=Wo)
    return W
Example #11
0
def packing_on_the_sphere(n, k, epsilon):
    manifold = Elliptope(n, k)
    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)
    
    def cost(X):
        Y = np.dot(X, X.T)
        # Shift the exponentials by the maximum value to reduce numerical
        # trouble due to possible overflows.
        s = np.triu(Y, 1).max()
        expY = np.exp((Y - s) / epsilon)
        # Zero out the diagonal
        expY -= np.diag(np.diag(expY))
        u = np.triu(expY, 1).sum()
        return s + epsilon * np.log(u)

    problem = Problem(manifold, cost)
    return solver.solve(problem)
Example #12
0
def packing_on_the_sphere(n, k, epsilon):
    manifold = Elliptope(n, k)
    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)

    def cost(X):
        Y = np.dot(X, X.T)
        # Shift the exponentials by the maximum value to reduce numerical
        # trouble due to possible overflows.
        s = np.triu(Y, 1).max()
        expY = np.exp((Y - s) / epsilon)
        # Zero out the diagonal
        expY -= np.diag(np.diag(expY))
        u = np.triu(expY, 1).sum()
        return s + epsilon * np.log(u)

    problem = Problem(manifold, cost)
    return solver.solve(problem)
Example #13
0
    def _optimize(self, max_opt_time, max_opt_iter, verbosity):
        """Optimize the GeoIMC optimization problem

        Args: The args of `solve`
        """
        residual_global = np.zeros(self.Y.data.shape)

        solver = ConjugateGradient(maxtime=max_opt_time,
                                   maxiter=max_opt_iter,
                                   linesearch=LineSearchBackTracking())
        prb = Problem(manifold=self.manifold,
                      cost=lambda x: self._cost(x, residual_global),
                      egrad=lambda z: self._egrad(z, residual_global),
                      verbosity=verbosity)
        solution = solver.solve(prb, x=self.W)
        self.W = [solution[0], solution[1], solution[2]]

        return self._cost(self.W, residual_global)
Example #14
0
def dominant_eigenvector(A):
    """
    Returns the dominant eigenvector of the symmetric matrix A.

    Note: For the same A, this should yield the same as the dominant invariant
    subspace example with p = 1.
    """
    m, n = A.shape
    assert m == n, "matrix must be square"
    assert np.allclose(np.sum(A - A.T), 0), "matrix must be symmetric"

    manifold = Sphere(n)
    solver = ConjugateGradient(maxiter=500, minstepsize=1e-6)
    x = T.matrix()
    cost = -x.T.dot(T.dot(A, x)).trace()

    problem = Problem(man=manifold, ad_cost=cost, ad_arg=x)
    xopt = solver.solve(problem)
    return xopt.squeeze()
Example #15
0
def _bootstrap_problem(A, k, minstepsize=1e-9, man_type='fixed'):
    m, n = A.shape
    if man_type == 'fixed':
        manifold = FixedRankEmbeeded(m, n, k)
    if man_type == 'fixed2':
        manifold = FixedRankEmbeeded2Factors(m, n, k)
    elif man_type == 'simple':
        manifold = Simple(m, n, k)
    #solver = TrustRegions(maxiter=500, minstepsize=1e-6)
    solver = ConjugateGradient(maxiter=500, minstepsize=minstepsize)
    return manifold, solver
Example #16
0
def main():
    r"""Main entry point in the graph embedding procedure."""
    args = config_parser().parse_args()

    g_pdists = load_pdists(args)
    n = g_pdists.shape[0]
    d = args.manifold_dim

    # we are actually using only the upper diagonal part
    g_pdists = g_pdists[np.triu_indices(n, 1)]
    g_sq_pdists = g_pdists**2

    # read the graph
    # the distortion cost
    def distortion_cost(X):
        man_sq_pdists = manifold_pdists(X, squared=True)

        return np.sum(np.abs(man_sq_pdists / g_sq_pdists - 1))

    # the manifold, problem, and solver
    manifold = PositiveDefinite(d, k=n)
    problem = Problem(manifold=manifold, cost=distortion_cost, verbosity=2)
    linesearch = ReduceLROnPlateau(start_lr=2e-2,
                                   patience=10,
                                   threshold=1e-4,
                                   factor=0.1,
                                   verbose=1)
    solver = ConjugateGradient(linesearch=linesearch, maxiter=1000)

    # solve it
    with Timer('training') as t:
        X_opt = solver.solve(problem, x=sample_init_points(n, d))

    # the distortion achieved
    man_pdists = manifold_pdists(X_opt)
    print('Average distortion: ', average_distortion(g_pdists, man_pdists))
    man_pdists_sym = pdists_vec_to_sym(man_pdists, n, 1e12)
    print('MAP: ', mean_average_precision(g,
                                          man_pdists_sym,
                                          diag_adjusted=True))
Example #17
0
def main_mds(D, dim=3, X=None, space='real'):
    """MDS via gradient descent with the chordal metric.

    Parameters
    ----------
    D : ndarray (n, n)
        Goal distance matrix.
    dim : int, optional
        Goal dimension (of ambient Euclidean space). Default is `dim = 3`.
    X : ndarray (dim, n), optional
        Initial value for gradient descent. `n` points in dimension `dim`. If
        both a dimension and an initial condition are specified, the initial
        condition overrides the dimension.
    field : str
        Choice of real or complex version. Options 'real', 'complex'. If
        'complex' dim must be even.

    """

    n = D.shape[0]
    max_d = np.max(D)
    if max_d > 1:
        print('WARNING: maximum value in distance matrix exceeds diameter of '\
            'projective space. Max distance = $2.4f.' %max_d)
    manifold = Oblique(dim, n)
    solver = ConjugateGradient()
    if space == 'real':
        cost = setup_cost(D)
    elif space == 'complex':
        cost = setup_CPn_cost(D, int(dim/2))
    problem = pymanopt.Problem(manifold=manifold, cost=cost)
    if X is None:
        X_out = solver.solve(problem)
    else:
        if X.shape[0] != dim:
            print('WARNING: initial condition does not match specified goal '\
                'dimension. Finding optimum in dimension %d' %X.shape[0])
        X_out = solver.solve(problem, x=X)
    return X_out
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    m = 5
    n = 8
    matrix = rnd.randn(m, n)

    cost, egrad = create_cost_egrad(backend, matrix)
    manifold = Oblique(m, n)
    problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient()
    Xopt = solver.solve(problem)

    if quiet:
        return

    # Calculate the actual solution by normalizing the columns of A.
    X = matrix / la.norm(matrix, axis=0)[np.newaxis, :]

    # Print information about the solution.
    print("Solution found: %s" % np.allclose(X, Xopt, rtol=1e-3))
    print("Frobenius-error: %f" % la.norm(X - Xopt))
Example #19
0
def test_doublystochastic(N, M, K):
    rnd.seed(21)

    ns = [N] * K
    ms = [M] * K
    batch = len(ns)

    p = []
    q = []
    A = []
    for i in range(batch):
        n, m = ns[i], ms[i]
        p0 = np.random.rand(n)
        q0 = np.random.rand(m)
        p.append(p0 / np.sum(p0))
        q.append(q0 / np.sum(q0))
        A0 = rnd.rand(n, m)
        A0 = A0[np.newaxis, :]
        A0 = SKnopp(A0, p[i], q[i], n+m)
        A.append(A0)
    A = np.vstack((C for C in A))

    def _cost(x):
        return 0.5 * (np.linalg.norm(np.array(x) - np.array(A))**2)

    def _egrad(x):
        return x - A

    def _ehess(x, u):
        return u

    manf = DoublyStochastic(n, m, p, q)
    solver = ConjugateGradient(maxiter=3, maxtime=100000)
    prblm = Problem(manifold=manf, cost=lambda x: _cost(x), egrad=lambda x: _egrad(x), ehess=lambda x, u: _ehess(x, u), verbosity=3)

    U = manf.rand()
    Uopt = solver.solve(prblm, x=U)
    def optimize_on_manifold(self, options, optmeth):
        if optmeth not in ['bo13', 'wen12', 'ManOpt']:
            print("Chosen optimization method", optmeth,
                  "has not been implemented, using 'ManOpt' ")
            optmeth = 'ManOpt'

        if optmeth == 'ManOpt':
            # This is hardcoding it to the two-dimensional case..
            manifold_one = Stiefel(
                np.shape(self.rotations[0])[0],
                np.shape(self.rotations[0])[1])
            manifold_two = Stiefel(
                np.shape(self.rotations[0])[0],
                np.shape(self.rotations[0])[1])
            manifold = Product((manifold_one, manifold_two))
            optimization_variable = tf.Variable(tf.placeholder(tf.float32))
            problem = Problem(manifold=manifold,
                              cost=self.my_cost(),
                              arg=optimization_variable)
            solver = ConjugateGradient(problem, optimization_variable, options)

            return solver
Example #21
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space')
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument('--model_path', default=None, type=str, help='directory to save the model')
    parser.add_argument('--geomm_embeddings_path', default=None, type=str, help='directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.')
    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0,type=int, help='Verbose')
    mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('-dtrain', '--dictionary_train', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument('-dtest', '--dictionary_test', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments', 'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg', type=float,default=1e2, help='Lambda for L2 Regularization')
    geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization')

    eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    ## Logging
    #method_name = os.path.join('logs','geomm')
    #directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    #if not os.path.exists(directory):
    #    os.makedirs(directory)
    #log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train))
    #log_file_name = log_file_name + '.log'
    #class Logger(object):
    #    def __init__(self):
    #        self.terminal = sys.stdout
    #        self.log = open(os.path.join(directory,log_file_name), "a")

    #    def write(self, message):
    #        self.terminal.write(message)
    #        self.log.write(message)

    #    def flush(self):
    #        #this flush method is needed for python 3 compatibility.
    #        #this handles the flush command by doing nothing.
    #        #you might want to specify some extra behavior here.
    #        pass
    #sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'
    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape')
    trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape')
    src_words, x = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype)
    trg_words, z = embeddings.read(trgfile,max_voc=args.max_vocab, dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary
    noov=0
    src_indices = []
    trg_indices = []
    f = open(args.dictionary_train, encoding=args.encoding, errors='surrogateescape')
    for line in f:
        src,trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            noov+=1
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg)) #, file=sys.stderr
    f.close()
    if args.verbose:
        print('Number of training pairs having at least one OOV: {}'.format(noov))
    src_indices = src_indices
    trg_indices = trg_indices
    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            z = embeddings.mean_center_embeddingwise(z)


    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    x_count = len(set(src_indices))
    z_count = len(set(trg_indices))
    A = np.zeros((x_count,z_count))

    # Creating dictionary matrix from training set
    map_dict_src={}
    map_dict_trg={}
    I=0
    uniq_src=[]
    uniq_trg=[]
    for i in range(len(src_indices)):
        if src_indices[i] not in map_dict_src.keys():
            map_dict_src[src_indices[i]]=I
            I+=1
            uniq_src.append(src_indices[i])
    J=0
    for j in range(len(trg_indices)):
        if trg_indices[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices[j]]=J
            J+=1
            uniq_trg.append(trg_indices[j])

    for i in range(len(src_indices)):
        A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1

    np.random.seed(0)
    Lambda=args.l2_reg

    U1 = TT.matrix()
    U2 = TT.matrix()
    B  = TT.matrix()

    Kx, Kz = x[uniq_src], z[uniq_trg]
    XtAZ = Kx.T.dot(A.dot(Kz))
    XtX = Kx.T.dot(Kx)
    ZtZ = Kz.T.dot(Kz)
    # AA = np.sum(A*A) # this can be added if cost needs to be compared to original geomm

    W = (U1.dot(B)).dot(U2.T)
    regularizer = 0.5*Lambda*(TT.sum(B**2))
    sXtX = shared(XtX)
    sZtZ = shared(ZtZ)
    sXtAZ = shared(XtAZ)

    cost = regularizer
    wtxtxw = W.T.dot(sXtX.dot(W))
    wtxtxwztz = wtxtxw.dot(sZtZ)
    cost += TT.nlinalg.trace(wtxtxwztz)
    cost += -2 * TT.sum(W * sXtAZ)
    # cost += shared(AA) # this can be added if cost needs to be compared with original geomm

    solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter)

    manifold =Product([Stiefel(x.shape[1], x.shape[1]),Stiefel(z.shape[1], x.shape[1]),PositiveDefinite(x.shape[1])])
    #manifold =Product([Stiefel(x.shape[1], 200),Stiefel(z.shape[1], 200),PositiveDefinite(200)])
    problem = Problem(manifold=manifold, cost=cost, arg=[U1,U2,B], verbosity=3)
    wopt = solver.solve(problem)

    w= wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    ### Save the models if requested
    if args.model_path is not None:
        os.makedirs(args.model_path,exist_ok=True)
        np.savetxt('{}/U_src.csv'.format(args.model_path),U1)
        np.savetxt('{}/U_tgt.csv'.format(args.model_path),U2)
        np.savetxt('{}/B.csv'.format(args.model_path),B)

    # Step 2: Transformation
    xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time-start_time))
    gc.collect()

    ### Save the GeoMM embeddings if requested
    xw_n = embeddings.length_normalize(xw)
    zw_n = embeddings.length_normalize(zw)
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'src.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(src_words,xw_n,outfile)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'trg.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(trg_words,zw_n,outfile)

    # Step 3: Evaluation
    if args.normalize_eval:
        xw = xw_n
        zw = zw_n

    X = xw[src_indices]
    Z = zw[trg_indices]

    # Loading test dictionary
    f = open(args.dictionary_test, encoding=args.encoding, errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    ### compute nearest neigbours of x in z
    t=time.time()
    nbrhood_x=np.zeros(xw.shape[0])

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
        nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)

    ### compute nearest neigbours of z in x (GPU version)
    nbrhood_z=np.zeros(zw.shape[0])
    with cp.cuda.Device(0):
        nbrhood_z2=cp.zeros(zw.shape[0])
        batch_num=1
        for i in range(0, zw.shape[0], BATCH_SIZE):
            j = min(i + BATCH_SIZE, zw.shape[0])
            similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
            nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
            batch_num+=1
        nbrhood_z=cp.asnumpy(nbrhood_z2)

    #### compute nearest neigbours of z in x (CPU version)
    #nbrhood_z=np.zeros(zw.shape[0])
    #for i in range(0, len(zw.shape[0]), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(zw.shape[0]))
    #    similarities = zw[i:j].dot(xw.T)
    #    similarities_z = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
    #    nbrhood_z[i:j]=np.mean(similarities_z[:,:args.csls_neighbourhood],axis=1)

    #### find translation
    #for i in range(0, len(src), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(src))
    #    similarities = xw[src[i:j]].dot(zw.T)
    #    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
    #    nn = similarities.argmax(axis=1).tolist()
    #    similarities = np.argsort((similarities),axis=1)

    #    nn5 = (similarities[:,-5:])
    #    nn10 = (similarities[:,-10:])
    #    for k in range(j-i):
    #        translation[src[i+k]] = nn[k]
    #        translation5[src[i+k]] = nn5[k]
    #        translation10[src[i+k]] = nn10[k]


    #if args.geomm_embeddings_path is not None:
    #    delim=','
    #    os.makedirs(args.geomm_embeddings_path,exist_ok=True)

    #    translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
    #    with open(translations_fname,'w',encoding=args.encoding) as translations_file:
    #        for src_id in src:
    #            src_word = src_words[src_id]
    #            all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
    #            trgout_words = [ trg_words[j] for j in translation10[src_id] ]
    #            ss = list(nn10[src_id,:])
    #
    #            p1 = ':'.join(all_trg_words)
    #            p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
    #            translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, delim=delim, p1=p1, p2=p2) )

    ### find translation  (and write to file if output requested)
    delim=','
    translations_file =None
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)
        translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
        translations_file = open(translations_fname,'w',encoding=args.encoding)

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities),axis=1)

        nn5 = (similarities[:,-5:])
        nn10 = (similarities[:,-10:])
        for k in range(j-i):
            translation[src[i+k]] = nn[k]
            translation5[src[i+k]] = nn5[k]
            translation10[src[i+k]] = nn10[k]


            if args.geomm_embeddings_path is not None:
                src_id=src[i+k]
                src_word = src_words[src_id]
                all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
                trgout_words = [ trg_words[j] for j in translation10[src_id] ]
                #ss = list(nn10[src_id,:])

                p1 = ':'.join(all_trg_words)
                p2 = ':'.join(trgout_words)
                #p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
                translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, p1=p1, p2=p2, delim=delim) )

    if args.geomm_embeddings_path is not None:
        translations_file.close()

    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean=0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy5 = mean

    mean=0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy10 = mean
    message = src_input.split(".")[-2] + "-->" + trg_input.split(".")[-2] + ":"
        'Coverage:{0:7.2%}  Accuracy:{1:7.2%}'.format(coverage, accuracy)
      rotation,
      end='\n\n')

#Take a step further and solve the problem using a pymanopt to solve the optimization as a constrained problem
from pymanopt import Problem
from pymanopt.solvers import ConjugateGradient
from pymanopt.manifolds import Stiefel


# define objective function
def pca_objective(U):
    return -np.trace(np.dot(U.T, np.dot(cov, U)))


# set up Pymanopt problem and solve.
solver = ConjugateGradient(maxiter=1000)
manifold = Stiefel(D, components)
problem = Problem(manifold=manifold, cost=pca_objective, verbosity=0)
Uopt = solver.solve(problem)

print('Solution found using Pymanopt = \n', Uopt, end='\n\n')

# Find the matrix T such that UA = Uopt
T = np.linalg.lstsq(U, Uopt, rcond=None)[0]

# Assuming this is a rotation matrix, compute the angle and confirm this is indeed a rotation
angle = np.arccos(T[0, 0])
rotation = np.array([[np.cos(angle), np.sin(angle)],
                     [-np.sin(angle), np.cos(angle)]])
print('T matrix found as: \n', T)
print('rotation matrix associated with an angle of ', np.round(angle, 4),
Example #23
0
 def _bootstrap_problem(self):
     self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1)
     self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)
Example #24
0
class UsvRiemannianLogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-9):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.U = T.matrix('U')
        self.S = T.matrix('S')
        self.V = T.matrix('V')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.U, self.S, self.V])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                       self.num_factors + 1))
            item_vectors = np.random.normal(size=(self.num_items,
                                                       self.num_factors + 1))
            s = rnd.random(self.num_factors + 1)
            s[:-1] = np.sort(s[:-1])[::-1]

            x0 = (user_vectors, np.diag(s), item_vectors.T)
        else:
            x0 = x0
        (left, middle, right), self.loss_history = self.solver.solve(problem, x=x0)
        right = right.T

        s_mid = np.diag(np.sqrt(np.diag(middle)[:-1]))
        self.middle = s_mid


        print('U norm: {}'.format(la.norm(left[:, :-1])))
        print('V norm: {}'.format(la.norm(right[:, :-1])))
        self.user_vectors = left[:, :-1].dot(s_mid)
        self.item_vectors = right[:, :-1].dot(s_mid)
        self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1])
        self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1])
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print('LL: {}'.format(self._log_likelihood()))

    def _log_likelihood(self):
        loglik = 0
        A = np.dot(self.user_vectors, self.item_vectors.T)
        A += self.user_biases
        A += self.item_biases.T
        B = A * self.counts
        loglik += np.sum(B)

        A = np.exp(A)
        A += 1

        A = np.log(A)
        A = (self.counts + 1) * A
        loglik -= np.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag(self.middle)))
        return loglik

    def log_likelihood(self):
        Users = self.U[:, :-1]
        Middle = self.S
        Items = self.V[:-1, :]
        UserBiases = self.U[:, -1].reshape((-1, 1))
        ItemBiases = self.V[-1, :].reshape((-1, 1))

        A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
        A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
        A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Example #25
0
class WildLogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-10):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                       self.num_factors))
            item_vectors = np.random.normal(size=(self.num_items,
                                                       self.num_factors))
            user_biases = np.random.normal(size=(self.num_users, 1)) / SCONST
            item_biases = np.random.normal(size=(self.num_items, 1)) / SCONST
            x0 = (np.hstack((user_vectors, user_biases)),
                  np.hstack((item_vectors, item_biases)))
        else:
            x0 = x0
        (left, right), self.loss_history = self.solver.solve(problem, x=x0)

        self.user_vectors = left[:, :-1]
        self.item_vectors = right[:, :-1]
        self.user_biases = left[:, -1].reshape((self.num_users, 1))
        self.item_biases = right[:, -1].reshape((self.num_items, 1))
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))

    def log_likelihood(self):
        Users = self.L[:, :-1]
        Items = self.R[:, :-1]
        UserBiases = self.L[:, -1].reshape((-1, 1))
        ItemBiases = self.R[:, -1].reshape((-1, 1))

        A = T.dot(self.L[:, :-1], (self.R[:, :-1]).T)
        A = T.inc_subtensor(A[:, :], UserBiases)
        A = T.inc_subtensor(A[:, :], ItemBiases.T)
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.L[:, :-1]))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.R[:, :-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Example #26
0
 def _bootstrap_problem(self):
     self.manifold = FixedRankEmbeeded(self.num_users, self.num_items,
                                       self.num_factors + 1)
     self.solver = ConjugateGradient(maxiter=self.iterations,
                                     minstepsize=self.minstepsize)
Example #27
0
class LogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-9):
        self.counts = counts
        N = 20000
        self.counts = counts[:N, :N]
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors + 2
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users,
                                                  self.num_items,
                                                  self.num_factors)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])
        left, right = self.solver.solve(problem)
        self.user_vectors = left[:, :-2]
        self.item_vectors = right[:, :-2]
        self.user_biases = left[:, -1]
        self.item_biases = right[:, -2]
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print("how much user outer? {}".format(
            np.average(np.isclose(left[:, -2], 1))))
        print("how much item outer? {}".format(
            np.average(np.isclose(right[:, -1], 1))))
        print('user delta: {} in norm, {} in max abs'.format(
            la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1))))
        print('item delta: {} in norm, {} in max abs'.format(
            la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1))))

    def evaluate_lowrank(self, U, V, item, fast=False):
        if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len(
                item[1]):
            if fast:
                rows = U[item[0], :]
                cols = V[item[1], :]
                data = (rows * cols).sum(1)
                return data
            else:
                idx_argsort = item[0].argsort()
                item = (item[0][idx_argsort], item[1][idx_argsort])

                vals, idxs, counts = [theano.shared(it) for it in\
                                      np.unique(item[0], return_index=True, return_counts=True)]
                output = T.zeros(int(np.max(counts.get_value())))
                it1 = theano.shared(item[1])

                def process_partial_dot(row_idx, out, U, V, item):
                    partial_dot = T.dot(
                        U[vals[row_idx], :],
                        V[item[idxs[row_idx]:idxs[row_idx] +
                               counts[row_idx]], :].T)
                    return T.set_subtensor(out[:counts[row_idx]], partial_dot)

                parts, updates = theano.scan(fn=process_partial_dot,
                                             outputs_info=output,
                                             sequences=T.arange(vals.size),
                                             non_sequences=[U, V, it1])
                mask = np.ones(
                    (vals.get_value().size, int(np.max(counts.get_value()))))
                for i, count in enumerate(counts.get_value()):
                    mask[i, count:] = 0
                return parts[theano.shared(mask).nonzero()].ravel()
        else:
            raise ValueError('__getitem__ now supports only indices set')

    def log_likelihood(self):
        Users = self.L[:, :-2]
        Items = self.R[:, :-2]
        UserBiases = self.L[:, -1]
        ItemBiases = self.R[:, -2]
        UserOuter = self.L[:, -2]
        ItemOuter = self.R[:, -1]

        ## A = T.dot(Users, Items.T)
        ## A += UserBiases
        ## A += ItemBiases.T
        ## B = A * self.counts
        ## loglik = T.sum(B)

        # A implicitly stored as self.L @ self.R.T
        # loglik = T.sum(A * self.counts) => sum over nonzeros only
        print('nnz size: {}'.format(self.counts.nonzero()[0].size))
        loglik = T.dot(
            self.evaluate_lowrank(self.L,
                                  self.R,
                                  self.counts.nonzero(),
                                  fast=False),
            np.array(self.counts[self.counts.nonzero()]).ravel())

        ## A = T.exp(A)
        ## A += 1
        ## A = T.log(A)
        # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0
        # ln(2)
        const_term = (T.ones(
            (self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1)))
        # x/2
        first_order_term = (0.5 * self.L, 0.5 * self.R)
        # x^2/8
        second_order_term = hadamard((self.L, self.R), (self.L, self.R),
                                     self.num_factors)
        second_order_term = tuple(factor / 8.0 for factor in second_order_term)

        grouped_factors = list(
            zip(const_term, first_order_term, second_order_term))
        A = (T.concatenate(grouped_factors[0],
                           axis=1), T.concatenate(grouped_factors[1], axis=1))

        ## A = (self.counts + 1) * A
        ## loglik -= T.sum(A)
        loglik -= sum_lowrank(A)
        loglik -= T.dot(
            self.evaluate_lowrank(A[0],
                                  A[1],
                                  self.counts.nonzero(),
                                  fast=False),
            np.array(self.counts[self.counts.nonzero()]).ravel())

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Users))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Items))

        # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly
        # outer products with biases
        loglik -= self.num_users * T.sum(T.square(UserOuter - 1))
        loglik -= self.num_items * T.sum(T.square(ItemOuter - 1))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Example #28
0
# (a) Instantiation of a manifold
# points on the manifold are parameterized via their singular value
# decomposition (u, s, vt) where
# u is a 5 x 2 matrix with orthonormal columns,
# s is a vector of length 2,
# vt is a 2 x 4 matrix with orthonormal rows,
# so that u*diag(s)*vt is a 5 x 4 matrix of rank 2.
manifold = FixedRankEmbedded(A.shape[0], A.shape[1], k)


# (b) Definition of a cost function (here using autograd.numpy)
#       Note that the cost must be defined in terms of u, s and vt, where
#       X = u * diag(s) * vt.
def cost(usv):
    delta = .5
    u = usv[0]
    s = usv[1]
    vt = usv[2]
    X = np.dot(np.dot(u, np.diag(s)), vt)
    return np.sum(np.sqrt((X - A)**2 + delta**2) - delta)


# define the Pymanopt problem
problem = Problem(manifold=manifold, cost=cost)
# (c) Instantiation of a Pymanopt solver
solver = ConjugateGradient()

# let Pymanopt do the rest
X = solver.solve(problem)
def RELMM(data, A_init, S0, lambda_S, lambda_S0):

    [L, N] = data.shape

    [L, P] = S0.shape

    V = P * np.eye(P) - np.outer(np.ones(P), np.transpose(np.ones(P)))

    def cost(X):

        data_fit = np.zeros(N)

        for n in np.arange(N):
            data_fit[n] = np.linalg.norm(
                S[:, :, n] - np.dot(X, np.diag(psi[:, n])), 'fro')**2

        cost = lambda_S / 2 * np.sum(data_fit,
                                     axis=0) + lambda_S0 / 2 * np.trace(
                                         np.dot(np.dot(X, V), np.transpose(X)))

        return cost

    def egrad(X):

        partial_grad = np.zeros([L, P, N])

        for n in np.arange(N):
            partial_grad[:, :, n] = np.dot(X, np.diag(psi[:, n])) - np.dot(
                S[:, :, n], np.diag(psi[:, n]))

        egrad = lambda_S * np.sum(partial_grad, axis=2) + lambda_S0 * np.dot(
            X, V)

        return egrad

    A = A_init
    S = np.zeros([L, P, N])
    psi = np.ones([P, N])

    for n in np.arange(N):
        S[:, :, n] = S0

    maxiter = 200

    U = A  # split variable
    D = np.zeros(A.shape)  # Lagrange mutlipliers

    rho = 1

    maxiter_ADMM = 100
    tol_A_ADMM = 10**-3
    tol_A = 10**-3
    tol_S = 10**-3
    tol_psi = 10**-3
    tol_S0 = 10**-3

    I = np.identity(P)

    for i in np.arange(maxiter):

        A_old = np.copy(A)
        psi_old = np.copy(psi)
        S_old = np.copy(S)
        S0_old = np.copy(S0)

        # A update

        for j in np.arange(maxiter_ADMM):

            A_old_ADMM = np.copy(A)

            for n in np.arange(N):
                A[:, n] = np.dot(
                    np.linalg.inv(
                        np.dot(np.transpose(S[:, :, n]), S[:, :, n]) +
                        rho * I),
                    np.dot(np.transpose(S[:, :, n]), data[:, n]) + rho *
                    (U[:, n] - D[:, n]))

            U = proj_simplex(A + D)

            D = D + A - U

            if j > 0:
                rel_A_ADMM = np.abs((np.linalg.norm(A, 'fro') - np.linalg.norm(
                    A_old_ADMM, 'fro'))) / np.linalg.norm(A_old_ADMM, 'fro')

                print("iteration ", j, " of ", maxiter_ADMM, ", rel_A_ADMM =",
                      rel_A_ADMM)

                if rel_A_ADMM < tol_A_ADMM:
                    break

# psi update

        for n in np.arange(N):
            for p in np.arange(P):
                psi[p,
                    n] = np.dot(np.transpose(S0[:, p]), S[:, p, n]) / np.dot(
                        np.transpose(S0[:, p]), S0[:, p])

# S update

        for n in np.arange(N):
            S[:, :, n] = np.dot(
                np.outer(data[:, n], np.transpose(A[:, n])) +
                lambda_S * np.dot(S0, np.diag(psi[:, n])),
                np.linalg.inv(
                    np.outer(A[:, n], np.transpose(A[:, n])) + lambda_S * I))

# S0 update

        manifold = Oblique(L, P)
        solver = ConjugateGradient()
        problem = Problem(manifold=manifold, cost=cost, egrad=egrad)
        S0 = solver.solve(problem)

        # termination checks

        if i > 0:

            S_vec = np.hstack(S)

            rel_A = np.abs(
                np.linalg.norm(A, 'fro') -
                np.linalg.norm(A_old, 'fro')) / np.linalg.norm(A_old, 'fro')
            rel_psi = np.abs(
                np.linalg.norm(psi, 'fro') -
                np.linalg.norm(psi_old, 'fro')) / np.linalg.norm(
                    psi_old, 'fro')
            rel_S = np.abs(
                np.linalg.norm(S_vec) -
                np.linalg.norm(np.hstack(S_old))) / np.linalg.norm(S_old)
            rel_S0 = np.abs(
                np.linalg.norm(S0, 'fro') -
                np.linalg.norm(S0_old, 'fro')) / np.linalg.norm(S0_old, 'fro')

            print("iteration ", i, " of ", maxiter, ", rel_A =", rel_A,
                  ", rel_psi =", rel_psi, "rel_S =", rel_S, "rel_S0 =", rel_S0)

            if rel_A < tol_A and rel_psi and tol_psi and rel_S < tol_S and rel_S0 < tol_S0 and i > 1:
                break

    return A, psi, S, S0
Example #30
0
    def _update_classifier(self, data, labels, w, classes):
        """Update the classifier parameters theta and bias

        Parameters
        ----------

        data : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of 2D array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        classes : int
            The number of classes in the classifier.


        Returns
        -------

        theta : array, shape=[features, classes]
            The MLR parameter for the class planes.

        bias : array shape=[classes,]
            The MLR parameter for class biases.
        """

        # Stack the data and labels for training the classifier
        data_stacked, labels_stacked, weights = \
            SSSRM._stack_list(data, labels, w)

        features = w[0].shape[1]
        total_samples = weights.size

        data_th = S.shared(data_stacked.astype(theano.config.floatX))
        val_ = S.shared(labels_stacked)
        total_samples_S = S.shared(total_samples)
        theta_th = T.matrix(name='theta', dtype=theano.config.floatX)
        bias_th = T.col(name='bias', dtype=theano.config.floatX)
        constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True)
        weights_th = S.shared(weights)

        log_p_y_given_x = \
            T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T))
        f = -constf2 * T.sum((log_p_y_given_x[T.arange(total_samples_S), val_])
                             / weights_th) + 0.5 * T.sum(theta_th ** 2)

        manifold = Product((Euclidean(features, classes),
                            Euclidean(classes, 1)))
        problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th],
                          verbosity=0)
        solver = ConjugateGradient(mingradnorm=1e-6)
        solution = solver.solve(problem)
        theta = solution[0]
        bias = solution[1]

        del constf2
        del theta_th
        del bias_th
        del data_th
        del val_
        del solver
        del solution

        return theta, bias
Example #31
0
    def _update_w(self, data_align, data_sup, labels, w, s, theta, bias):
        """

        Parameters
        ----------
        data_align : list of 2D arrays, element i has shape=[voxels_i, n_align]
            Each element in the list contains the fMRI data for alignment of
            one subject. There are n_align samples for each subject.

        data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        s : array, shape=[features, samples]
            The shared response.

        theta : array, shape=[classes, features]
            The MLR class plane parameters.

        bias : array, shape=[classes]
            The MLR class biases.

        Returns
        -------

        w : list of 2D array, element i has shape=[voxels_i, features]
            The updated orthogonal transforms (mappings).
        """
        subjects = len(data_align)

        s_th = S.shared(s.astype(theano.config.floatX))
        theta_th = S.shared(theta.T.astype(theano.config.floatX))
        bias_th = S.shared(bias.T.astype(theano.config.floatX),
                           broadcastable=(True, False))

        for subject in range(subjects):
            logger.info('Subject Wi %d' % subject)
            # Solve for subject i
            # Create the theano function
            w_th = T.matrix(name='W', dtype=theano.config.floatX)
            data_srm_subject = \
                S.shared(data_align[subject].astype(theano.config.floatX))
            constf1 = \
                S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1],
                         allow_downcast=True)
            f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2)

            if data_sup[subject] is not None:
                lr_samples_S = S.shared(data_sup[subject].shape[1])
                data_sup_subject = \
                    S.shared(data_sup[subject].astype(theano.config.floatX))
                labels_S = S.shared(labels[subject])
                constf2 = S.shared(-self.alpha / self.gamma
                                   / data_sup[subject].shape[1],
                                   allow_downcast=True)

                log_p_y_given_x = T.log(T.nnet.softmax((theta_th.dot(
                    w_th.T.dot(data_sup_subject))).T + bias_th))
                f2 = constf2 * T.sum(
                    log_p_y_given_x[T.arange(lr_samples_S), labels_S])
                f = f1 + f2
            else:
                f = f1

            # Define the problem and solve
            f_subject = self._objective_function_subject(data_align[subject],
                                                         data_sup[subject],
                                                         labels[subject],
                                                         w[subject],
                                                         s, theta, bias)
            minstep = np.min((10**-np.floor(np.log10(f_subject))), 1e-1)
            manifold = Stiefel(w[subject].shape[0], w[subject].shape[1])
            problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0)
            solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep)
            w[subject] = np.array(solver.solve(
                problem, x=w[subject].astype(theano.config.floatX)))
            if data_sup[subject] is not None:
                del f2
                del log_p_y_given_x
                del data_sup_subject
                del labels_S
            del solver
            del problem
            del manifold
            del f
            del f1
            del data_srm_subject
            del w_th
        del theta_th
        del bias_th
        del s_th

        # Run garbage collector to avoid filling up the memory
        gc.collect()
        return w
Example #32
0
    def _update_w(self, data_align, data_sup, labels, w, s, theta, bias):
        """

        Parameters
        ----------
        data_align : list of 2D arrays, element i has shape=[voxels_i, n_align]
            Each element in the list contains the fMRI data for alignment of
            one subject. There are n_align samples for each subject.

        data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        s : array, shape=[features, samples]
            The shared response.

        theta : array, shape=[classes, features]
            The MLR class plane parameters.

        bias : array, shape=[classes]
            The MLR class biases.

        Returns
        -------

        w : list of 2D array, element i has shape=[voxels_i, features]
            The updated orthogonal transforms (mappings).
        """
        subjects = len(data_align)

        s_th = S.shared(s.astype(theano.config.floatX))
        theta_th = S.shared(theta.T.astype(theano.config.floatX))
        bias_th = S.shared(bias.T.astype(theano.config.floatX),
                           broadcastable=(True, False))

        for subject in range(subjects):
            logger.info('Subject Wi %d' % subject)
            # Solve for subject i
            # Create the theano function
            w_th = T.matrix(name='W', dtype=theano.config.floatX)
            data_srm_subject = \
                S.shared(data_align[subject].astype(theano.config.floatX))
            constf1 = \
                S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1],
                         allow_downcast=True)
            f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2)

            if data_sup[subject] is not None:
                lr_samples_S = S.shared(data_sup[subject].shape[1])
                data_sup_subject = \
                    S.shared(data_sup[subject].astype(theano.config.floatX))
                labels_S = S.shared(labels[subject])
                constf2 = S.shared(-self.alpha / self.gamma /
                                   data_sup[subject].shape[1],
                                   allow_downcast=True)

                log_p_y_given_x = T.log(
                    T.nnet.softmax(
                        (theta_th.dot(w_th.T.dot(data_sup_subject))).T +
                        bias_th))
                f2 = constf2 * T.sum(log_p_y_given_x[T.arange(lr_samples_S),
                                                     labels_S])
                f = f1 + f2
            else:
                f = f1

            # Define the problem and solve
            f_subject = self._objective_function_subject(
                data_align[subject], data_sup[subject], labels[subject],
                w[subject], s, theta, bias)
            minstep = np.amin(((10**-np.floor(np.log10(f_subject))), 1e-1))
            manifold = Stiefel(w[subject].shape[0], w[subject].shape[1])
            problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0)
            solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep)
            w[subject] = np.array(
                solver.solve(problem,
                             x=w[subject].astype(theano.config.floatX)))
            if data_sup[subject] is not None:
                del f2
                del log_p_y_given_x
                del data_sup_subject
                del labels_S
            del solver
            del problem
            del manifold
            del f
            del f1
            del data_srm_subject
            del w_th
        del theta_th
        del bias_th
        del s_th

        # Run garbage collector to avoid filling up the memory
        gc.collect()
        return w
Example #33
0
class UsvRiemannianLogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-9):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded(self.num_users, self.num_items,
                                          self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.U = T.matrix('U')
        self.S = T.matrix('S')
        self.V = T.matrix('V')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.U, self.S, self.V])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                  self.num_factors + 1))
            item_vectors = np.random.normal(size=(self.num_items,
                                                  self.num_factors + 1))
            s = rnd.random(self.num_factors + 1)
            s[:-1] = np.sort(s[:-1])[::-1]

            x0 = (user_vectors, np.diag(s), item_vectors.T)
        else:
            x0 = x0
        (left, middle, right), self.loss_history = self.solver.solve(problem,
                                                                     x=x0)
        right = right.T

        s_mid = np.diag(np.sqrt(np.diag(middle)[:-1]))
        self.middle = s_mid

        print('U norm: {}'.format(la.norm(left[:, :-1])))
        print('V norm: {}'.format(la.norm(right[:, :-1])))
        self.user_vectors = left[:, :-1].dot(s_mid)
        self.item_vectors = right[:, :-1].dot(s_mid)
        self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1])
        self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1])
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print('LL: {}'.format(self._log_likelihood()))

    def _log_likelihood(self):
        loglik = 0
        A = np.dot(self.user_vectors, self.item_vectors.T)
        A += self.user_biases
        A += self.item_biases.T
        B = A * self.counts
        loglik += np.sum(B)

        A = np.exp(A)
        A += 1

        A = np.log(A)
        A = (self.counts + 1) * A
        loglik -= np.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag(
            self.middle)))
        return loglik

    def log_likelihood(self):
        Users = self.U[:, :-1]
        Middle = self.S
        Items = self.V[:-1, :]
        UserBiases = self.U[:, -1].reshape((-1, 1))
        ItemBiases = self.V[-1, :].reshape((-1, 1))

        A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
        A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
        A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
class LogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-9):
        self.counts = counts
        N = 20000
        self.counts = counts[:N, :N]
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors + 2
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])
        left, right = self.solver.solve(problem)
        self.user_vectors = left[:, :-2]
        self.item_vectors = right[:, :-2]
        self.user_biases = left[:, -1]
        self.item_biases = right[:, -2]
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print("how much user outer? {}".format(np.average(np.isclose(left[:, -2], 1))))
        print("how much item outer? {}".format(np.average(np.isclose(right[:, -1], 1))))
        print('user delta: {} in norm, {} in max abs'.format(la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1))))
        print('item delta: {} in norm, {} in max abs'.format(la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1))))

    def evaluate_lowrank(self, U, V, item, fast=False):
        if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len(item[1]):
            if fast:
                rows = U[item[0], :]
                cols = V[item[1], :]
                data = (rows * cols).sum(1)
                return data
            else:
                idx_argsort = item[0].argsort()
                item = (item[0][idx_argsort], item[1][idx_argsort])

                vals, idxs, counts = [theano.shared(it) for it in\
                                      np.unique(item[0], return_index=True, return_counts=True)]
                output = T.zeros(int(np.max(counts.get_value())))
                it1 = theano.shared(item[1])

                def process_partial_dot(row_idx, out, U, V, item):
                    partial_dot = T.dot(U[vals[row_idx], :], V[item[idxs[row_idx]: idxs[row_idx] + counts[row_idx]], :].T)
                    return T.set_subtensor(out[:counts[row_idx]], partial_dot)
                parts, updates = theano.scan(fn=process_partial_dot,
                                             outputs_info=output,
                                             sequences=T.arange(vals.size),
                                             non_sequences=[U, V, it1])
                mask = np.ones((vals.get_value().size, int(np.max(counts.get_value()))))
                for i, count in enumerate(counts.get_value()):
                    mask[i, count:] = 0
                return parts[theano.shared(mask).nonzero()].ravel()
        else:
            raise ValueError('__getitem__ now supports only indices set')

    def log_likelihood(self):
        Users = self.L[:, :-2]
        Items = self.R[:, :-2]
        UserBiases = self.L[:, -1]
        ItemBiases = self.R[:, -2]
        UserOuter = self.L[:, -2]
        ItemOuter = self.R[:, -1]

        ## A = T.dot(Users, Items.T)
        ## A += UserBiases
        ## A += ItemBiases.T
        ## B = A * self.counts
        ## loglik = T.sum(B)

        # A implicitly stored as self.L @ self.R.T
        # loglik = T.sum(A * self.counts) => sum over nonzeros only
        print('nnz size: {}'.format(self.counts.nonzero()[0].size))
        loglik = T.dot(self.evaluate_lowrank(self.L, self.R, self.counts.nonzero(), fast=False),
                  np.array(self.counts[self.counts.nonzero()]).ravel())

        ## A = T.exp(A)
        ## A += 1
        ## A = T.log(A)
        # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0
        # ln(2)
        const_term = (T.ones((self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1)))
        # x/2
        first_order_term = (0.5 * self.L, 0.5 * self.R)
        # x^2/8
        second_order_term = hadamard((self.L, self.R), (self.L, self.R), self.num_factors)
        second_order_term = tuple(factor / 8.0 for factor in second_order_term)

        grouped_factors = list(zip(const_term, first_order_term, second_order_term))
        A = (T.concatenate(grouped_factors[0], axis=1), T.concatenate(grouped_factors[1], axis=1))

        ## A = (self.counts + 1) * A
        ## loglik -= T.sum(A)
        loglik -= sum_lowrank(A)
        loglik -= T.dot(self.evaluate_lowrank(A[0], A[1], self.counts.nonzero(), fast=False),
                  np.array(self.counts[self.counts.nonzero()]).ravel())


        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Users))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Items))

        # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly
        # outer products with biases
        loglik -= self.num_users * T.sum(T.square(UserOuter - 1))
        loglik -= self.num_items * T.sum(T.square(ItemOuter - 1))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Example #35
0
    def _update_classifier(self, data, labels, w, classes):
        """Update the classifier parameters theta and bias

        Parameters
        ----------

        data : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of 2D array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        classes : int
            The number of classes in the classifier.


        Returns
        -------

        theta : array, shape=[features, classes]
            The MLR parameter for the class planes.

        bias : array shape=[classes,]
            The MLR parameter for class biases.
        """

        # Stack the data and labels for training the classifier
        data_stacked, labels_stacked, weights = \
            SSSRM._stack_list(data, labels, w)

        features = w[0].shape[1]
        total_samples = weights.size

        data_th = S.shared(data_stacked.astype(theano.config.floatX))
        val_ = S.shared(labels_stacked)
        total_samples_S = S.shared(total_samples)
        theta_th = T.matrix(name='theta', dtype=theano.config.floatX)
        bias_th = T.col(name='bias', dtype=theano.config.floatX)
        constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True)
        weights_th = S.shared(weights)

        log_p_y_given_x = \
            T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T))
        f = -constf2 * T.sum(
            (log_p_y_given_x[T.arange(total_samples_S), val_]) /
            weights_th) + 0.5 * T.sum(theta_th**2)

        manifold = Product((Euclidean(features,
                                      classes), Euclidean(classes, 1)))
        problem = Problem(manifold=manifold,
                          cost=f,
                          arg=[theta_th, bias_th],
                          verbosity=0)
        solver = ConjugateGradient(mingradnorm=1e-6)
        solution = solver.solve(problem)
        theta = solution[0]
        bias = solution[1]

        del constf2
        del theta_th
        del bias_th
        del data_th
        del val_
        del solver
        del solution

        return theta, bias
Example #36
0
class WildLogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-10):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users,
                                                  self.num_items,
                                                  self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                  self.num_factors))
            item_vectors = np.random.normal(size=(self.num_items,
                                                  self.num_factors))
            user_biases = np.random.normal(size=(self.num_users, 1)) / SCONST
            item_biases = np.random.normal(size=(self.num_items, 1)) / SCONST
            x0 = (np.hstack((user_vectors, user_biases)),
                  np.hstack((item_vectors, item_biases)))
        else:
            x0 = x0
        (left, right), self.loss_history = self.solver.solve(problem, x=x0)

        self.user_vectors = left[:, :-1]
        self.item_vectors = right[:, :-1]
        self.user_biases = left[:, -1].reshape((self.num_users, 1))
        self.item_biases = right[:, -1].reshape((self.num_items, 1))
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))

    def log_likelihood(self):
        Users = self.L[:, :-1]
        Items = self.R[:, :-1]
        UserBiases = self.L[:, -1].reshape((-1, 1))
        ItemBiases = self.R[:, -1].reshape((-1, 1))

        A = T.dot(self.L[:, :-1], (self.R[:, :-1]).T)
        A = T.inc_subtensor(A[:, :], UserBiases)
        A = T.inc_subtensor(A[:, :], ItemBiases.T)
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.L[:, :-1]))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.R[:, :-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
    loss = loss

    # compute 1,2 norm of K
    norm12 = 0.
    for i in range(K.shape[0]):
        norm12 += np.linalg.norm(K[i])

    loss = loss / len(S) + lam12 * norm12
    return loss


# create the problems, defined over the manifold
problem_L1 = Problem(manifold=manifold, cost=costL1)
problem_L12 = Problem(manifold=manifold, cost=costL12)
# Instantiate a pymanopt solver
solver = ConjugateGradient(maxiter=100)

# solve each problem:

# Lasso method:
print("Beginning test with L_1")
ts = time()
Khat_L1 = solver.solve(problem_L1)
print(np.shape(Khat_L1))
tot_time = time() - ts
emp_loss, log_loss = lossK(Khat_L1, X, S)
print("L_1 regularization: Time=%f, emp_loss=%f, log_loss=%f" %
      (tot_time, emp_loss, log_loss))

# L_{1,2} regularized method
print("Beginning test with L_{1,2}")
Example #38
0
    XtYZ = torch.matmul(X.T, C)
    BtA = XtYZ.T
    DtC = XtYZ

    f = 0.5 * (torch.norm(AtA.flatten())**2 + normBtB**2) - torch.norm(
        BtA.flatten())**2

    f += 0.5 * (torch.norm(CtC.flatten())**2 + normDtD**2) - torch.norm(
        DtC.flatten())**2

    f += 0.5 * regularizer * torch.norm(Y, 'fro')**2
    return f


problem = pymanopt.Problem(manifold, cost=cost)
solver = ConjugateGradient(maxiter=200)
print("Starting optimization...\n")
Yopt = solver.solve(problem)
print("Optimization finished\n")
print(Yopt.sum(0))
print(Yopt.sum(1))

X, Z = X.numpy(), Z.numpy()

W = uf(X.T @ (Yopt @ Z))

# Alignment of rows / Node correspondences
YZ = Yopt @ Z
YX = Yopt.T @ X

print("||X - YZ||_fro", np.linalg.norm((X - YZ), 'fro'))