Beispiel #1
0
def NG_dr1(X, verbosity = 0):
    """
    X: array of N points on Gr(n, p); N x n x p array
    aim to represent X by X_hat (N points on Gr(n-1, p)) 
    where X_hat_i = A^T X_i, A \in St(n, n-1)
    minimizing the projection error (using projection F-norm)
    """
    N, n, p = X.shape
    cpx = np.iscomplex(X).any() # true if X is complex-valued

    if cpx:
        man = Product([ComplexGrassmann(n, 1), Euclidean(p, 2)])
        
    else:
        man = Product([Grassmann(n, 1), Euclidean(p)])
    
    X_ = torch.from_numpy(X)
    
    @pymanopt.function.PyTorch
    def cost(v, b):
        vvT = torch.matmul(v, v.conj().t()) # n x n
        if cpx:
            b_ = b[:,0] + b[:,1]*1j
            b_ = torch.unsqueeze(b_, axis=1)
        else:
            b_ = torch.unsqueeze(b, axis=1)
        vbt = torch.matmul(v, b_.t()) # n x p
        IvvT = torch.eye(n, dtype=X_.dtype) - vvT
        d2 = 0
        for i in range(N):
            d2 = d2 + dist_proj(X_[i], torch.matmul(IvvT, X_[i]) + vbt)**2/N
            #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N
        return d2
    
    solver = SteepestDescent()
    problem = Problem(manifold=man, cost=cost, verbosity=verbosity)
    theta = solver.solve(problem)
    v = theta[0]
    b_ = theta[1]
    
    if cpx:
        b = b_[:,0] + b_[:,1]*1j
        b = np.expand_dims(b, axis=1)
    else:
        b = np.expand_dims(b_, axis=1)
    
    R = ortho_complement(v)
    tmp = np.array([R.conj().T for i in range(N)])
    X_low = multiprod(tmp, X)
    X_low = np.array([qr(X_low[i])[0] for i in range(N)])

    return X_low, R, v, b
Beispiel #2
0
    def setUp(self):
        self.m = m = 100
        self.n = n = 50
        self.euclidean = Euclidean(m, n)
        self.sphere = Sphere(n)
        self.manifold = Product([self.euclidean, self.sphere])

        point = self.manifold.random_point()

        @pymanopt.function.autograd(self.manifold)
        def cost(*x):
            return np.sum([np.linalg.norm(a - b)**2 for a, b in zip(x, point)])

        self.cost = cost
Beispiel #3
0
def NG_dr(X, m, verbosity=0, *args, **kwargs):
    """
    X: array of N points on Gr(n, p); N x n x p array
    aim to represent X by X_hat (N points on Gr(m, p), m < n) 
    where X_hat_i = R^T X_i, R \in St(n, m)
    minimizing the projection error (using projection F-norm)
    """
    N, n, p = X.shape
    cpx = np.iscomplex(X).any() # true if X is complex-valued

    if cpx:
        man = Product([ComplexGrassmann(n, m), Euclidean(n, p, 2)])
        
    else:
        man = Product([Grassmann(n, m), Euclidean(n, p)])
    
    X_ = torch.from_numpy(X)
    
    @pymanopt.function.PyTorch
    def cost(A, B):
        AAT = torch.matmul(A, A.conj().t()) # n x n
        if cpx:
            B_ = B[:,:,0] + B[:,:,1]*1j
        else:
            B_ = B
        IAATB = torch.matmul(torch.eye(n, dtype=X_.dtype) - AAT, B_) # n x p
        d2 = 0
        for i in range(N):
            d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]) + IAATB)**2/N
            #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N
        return d2

    #solver = ConjugateGradient()
    solver = SteepestDescent()
    problem = Problem(manifold=man, cost=cost, verbosity=verbosity)
    theta = solver.solve(problem)
    A = theta[0]
    B = theta[1]
    
    if cpx:
        B_ = B[:,:,0] + B[:,:,1]*1j
    else:
        B_ = B

    #tmp = np.array([A.T for i in range(N)])
    tmp = np.array([A.conj().T for i in range(N)])
    X_low = multiprod(tmp, X)
    X_low = np.array([qr(X_low[i])[0] for i in range(N)])

    return X_low, A, B_
    def __init__(self, dataPtr, lambda1=1e-2, rank=10):
        """Initialize parameters

        Args:
            dataPtr (DataPtr): An object of which contains X, Z side features and target matrix Y.
            lambda1 (uint): Regularizer.
            rank (uint): rank of the U, B, V parametrization.
        """

        self.dataset = dataPtr
        self.X = self.dataset.get_entity("row")
        self.Z = self.dataset.get_entity("col")
        self.rank = rank
        self._loadTarget()
        self.shape = (self.X.shape[0], self.Z.shape[0])
        self.lambda1 = lambda1
        self.nSamples = self.Y.data.shape[0]

        self.W = None
        self.optima_reached = False
        self.manifold = Product([
            Stiefel(self.X.shape[1], self.rank),
            SymmetricPositiveDefinite(self.rank),
            Stiefel(self.Z.shape[1], self.rank),
        ])
Beispiel #5
0
    def setUp(self):
        self.m = m = 20
        self.n = n = 10
        self.rank = rank = 3

        A = np.random.randn(m, n)

        @pymanopt.function.Autograd
        def cost(u, s, vt, x):
            return np.linalg.norm(((u * s) @ vt - A) @ x)**2

        self.cost = cost
        self.gradient = self.cost.compute_gradient()
        self.hvp = self.cost.compute_hessian_vector_product()

        self.manifold = Product([FixedRankEmbedded(m, n, rank), Euclidean(n)])
        self.problem = pymanopt.Problem(self.manifold, self.cost)
Beispiel #6
0
    def setUp(self):
        self.m = m = 20
        self.n = n = 10
        self.rank = rank = 3

        A = np.random.normal(size=(m, n))
        self.manifold = Product([FixedRankEmbedded(m, n, rank), Euclidean(n)])

        @pymanopt.function.autograd(self.manifold)
        def cost(u, s, vt, x):
            return np.linalg.norm(((u * s) @ vt - A) @ x) ** 2

        self.cost = cost
        self.gradient = self.cost.get_gradient_operator()
        self.hessian = self.cost.get_hessian_operator()

        self.problem = pymanopt.Problem(self.manifold, self.cost)
Beispiel #7
0
 def __init__(self, Xs, Xt, device=-1):
     self.Xs = Xs
     self.Xt = Xt
     assert isinstance(self.Xs, torch.Tensor)
     assert isinstance(self.Xt, torch.Tensor)
     d1 = self.Xs.size(1)
     d2 = self.Xt.size(1)
     self.device = device
     assert d1 == d2, f"Error. Found different dims {d1}, {d2}"
     self.manifold = Product([Stiefel(d1, d2)])
Beispiel #8
0
    def test_vararg_cost_on_product(self):
        shape = (3, 3)
        manifold = Product([Stiefel(*shape)] * 2)

        @pymanopt.function.tensorflow(manifold)
        def cost(*args):
            X, Y = args
            return tf.reduce_sum(X) + tf.reduce_sum(Y)

        problem = pymanopt.Problem(manifold, cost)
        optimizer = TrustRegions(max_iterations=1)
        Xopt, Yopt = optimizer.run(problem).point
        self.assertEqual(Xopt.shape, (3, 3))
        self.assertEqual(Yopt.shape, (3, 3))
Beispiel #9
0
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000):
    """Version of MDS in which "signs" are also an optimization parameter.

    Rather than performing a full optimization and then resetting the
    sign matrix, here we treat the signs as a parameter `A = [a_ij]` and
    minimize the cost function
        F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2
    Lambda is a regularization parameter we can experiment with. The
    collection of data, `X`, is treated as a point on the `Oblique`
    manifold, consisting of `k*n` matrices with unit-norm columns. Since
    we are working on a sphere in complex space we require `k` to be
    even. The first `k/2` entries of each column are the real components
    and the last `k/2` entries are the imaginary parts.

    Parameters
    ----------
    X : ndarray (k, n)
        Initial guess for data.
    D : ndarray (k, k)
        Goal distance matrix.
    lam : float, optional
        Weight to give regularization term.
    v : int, optional
        Verbosity

    Returns
    -------
    X_opt : ndarray (k, n)
        Collection of points optimizing cost.

    """

    dim = X.shape[0]
    num_points = X.shape[1]
    W = distance_to_weights(D)
    Sreal, Simag = norm_rotations(X)
    A = np.vstack(
        (np.reshape(Sreal,
                    (1, num_points**2)), np.reshape(Simag, num_points**2)))
    cp_manifold = Oblique(dim, num_points)
    a_manifold = Oblique(2, num_points**2)
    manifold = Product((cp_manifold, a_manifold))
    solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf'))
    cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam)
    problem = pymanopt.Problem(cost=cost, manifold=manifold)
    Xopt, Aopt = solver.solve(problem, x=(X, A))
    Areal = np.reshape(Aopt[0, :], (num_points, num_points))
    Aimag = np.reshape(Aopt[1, :], (num_points, num_points))
    return Xopt, Areal, Aimag
Beispiel #10
0
    def fit(self, T, Y, init, maxIter=100):
        self.init_fit(T, Y, None)

        D = self.D + self.L
        K = self.K

        # (1) Instantiate the manifold
        manifold = Product([PositiveDefinite(D + 1, k=K), Euclidean(K - 1)])

        cost = self.get_cost_function(T, Y)

        problem = Problem(manifold=manifold, cost=cost, verbosity=1)

        # (3) Instantiate a Pymanopt solver
        solver = SteepestDescent(maxiter=3 * maxIter)

        # let Pymanopt do the rest
        Xopt = solver.solve(problem)
        self.Xopt_to_theta(Xopt)
Beispiel #11
0
    def __init__(self, Xs, Xt, A, lbda, rank, device=-1):
        self.Xs = Xs
        self.Xt = Xt
        self.A = A
        self.rank = rank
        self.lbda = lbda
        assert isinstance(self.Xs, torch.Tensor)
        assert isinstance(self.Xt, torch.Tensor)
        assert isinstance(self.A, torch.Tensor)
        self.device = device

        d1 = self.Xs.size(1)
        d2 = self.Xt.size(1)

        assert (d1 == rank == d2), f"Found dimensions {d1}, {rank}, {d2}"
        d = d1
        self.manifold = Product(
            [Stiefel(d, d), PositiveDefinite(d),
             Stiefel(d, d)])
    def optimize_on_manifold(self, options, optmeth):
        if optmeth not in ['bo13', 'wen12', 'ManOpt']:
            print("Chosen optimization method", optmeth,
                  "has not been implemented, using 'ManOpt' ")
            optmeth = 'ManOpt'

        if optmeth == 'ManOpt':
            # This is hardcoding it to the two-dimensional case..
            manifold_one = Stiefel(
                np.shape(self.rotations[0])[0],
                np.shape(self.rotations[0])[1])
            manifold_two = Stiefel(
                np.shape(self.rotations[0])[0],
                np.shape(self.rotations[0])[1])
            manifold = Product((manifold_one, manifold_two))
            optimization_variable = tf.Variable(tf.placeholder(tf.float32))
            problem = Problem(manifold=manifold,
                              cost=self.my_cost(),
                              arg=optimization_variable)
            solver = ConjugateGradient(problem, optimization_variable, options)

            return solver
Beispiel #13
0
def cross_validation(odom_1,
                     aligned_1,
                     odom_2,
                     aligned_2,
                     type_1,
                     type_2,
                     K=10):
    """Function to run cross-validation to run nonlinear optimization for optimal
    pose estimation and evaluation.  Performs cross-validation K times and splits
    the dataset into K (approximately) even splits, to be used for in-sample
    training and out-of-sample evaluation.

    This function estimates a relative transformation between two lidar frames
    using nonlinear optimization, and evaluates the robustness of this estimate
    through K-fold cross-validation performance of our framework.  Though this
    function does not return any values, it saves all results in the
    'results' relative path.

    Parameters:
        odom_1 (pd.DataFrame):  DataFrame corresponding to odometry data for the
            pose we wish to transform into the odom_2 frame of reference.  See
            data/main_odometry.csv for an example of the headers/columns/data
            types this function expects this DataFrame to have.

        aligned_1 (pd.DataFrame): DataFrame corresponding to aligned odometry
            data given the 3 sets of odometry data for the 3 lidar sensors.  This
            data corresponds to the odom_1 sensor frame.

        odom_2 (pd.DataFrame):  DataFrame corresponding to odometry data for the
            pose we wish to transform the odom_1 frame of reference into.  See
            data/main_odometry.csv for an example of the headers/columns/data
            types this function expects this DataFrame to have.

        aligned_2 (pd.DataFrame): DataFrame corresponding to aligned odometry
            data given the 3 sets of odometry data for the 3 lidar sensors.  This
            data corresponds to the odom_2 sensor frame.

        type_1 (str):  String denoting the lidar type.  Should be in the set
            {'main', 'front', 'rear'}.  This type corresponds to the data type
            for the odom_1 frame.

        type_2 (str):  String denoting the lidar type.  Should be in the set
            {'main', 'front', 'rear'}.  This type corresponds to the data type
            for the odom_2 frame.

        K (int):  The number of folds to be used for cross-validation.  Defaults
            to 10.
    """
    # Get ICP covariance matrices
    # Odom 1 lidar odometry
    odom1_icp, odom1_trans_cov, odom1_trans_cov_max, \
    odom1_trans_cov_avg, odom1_rot_cov, odom1_rot_cov_max, \
    odom1_rot_cov_avg, odom1_reject = parse_icp_cov(odom_1, type=type_1,
                                                  reject_thr=REJECT_THR)

    # Odom 2 lidar odometry
    odom2_icp, odom2_trans_cov, odom2_trans_cov_max, \
    odom2_trans_cov_avg, odom2_rot_cov, odom2_rot_cov_max, \
    odom2_rot_cov_avg, odom2_reject = parse_icp_cov(odom_2, type=type_2,
                                                    reject_thr=REJECT_THR)
    # Calculate relative poses
    (odom1_aligned,
     odom1_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_1)
    (odom2_aligned,
     odom2_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_2)

    # Compute weights for weighted estimate
    cov_t_odom1, cov_R_odom1 = compute_weights_euler(odom1_aligned)
    cov_t_odom2, cov_R_odom2 = compute_weights_euler(odom2_aligned)

    # Extract a single scalar using the average value from rotation and translation
    var_t_odom1 = extract_variance(cov_t_odom1, mode="max")
    var_R_odom1 = extract_variance(cov_R_odom1, mode="max")
    var_t_odom2 = extract_variance(cov_t_odom2, mode="max")
    var_R_odom2 = extract_variance(cov_R_odom2, mode="max")

    # Optimization (1) Instantiate a manifold
    translation_manifold = Euclidean(3)  # Translation vector
    so3 = Rotations(3)  # Rotation matrix
    manifold = Product((so3, translation_manifold))  # Instantiate manifold

    # Get initial guesses for our estimations
    if os.path.exists(PKL_POSES_PATH):  # Check to make sure path exists
        transforms_dict = load_transforms(
            PKL_POSES_PATH)  # Relative transforms

    # Map types to sensor names to access initial estimate relative transforms
    types2sensors = {"main": "velodyne", "front": "front", "rear": "rear"}

    # Now get initial guesses from the relative poses
    initial_guess_odom1_odom2 = transforms_dict["{}_{}".format(
        types2sensors[type_1], types2sensors[type_2])]
    # Print out all the initial estimates as poses
    print("INITIAL GUESS {} {}: \n {} \n".format(types2sensors[type_1],
                                                 types2sensors[type_2],
                                                 initial_guess_odom1_odom2))

    # Get rotation matrices for initial guesses
    R0_odom1_odom2, t0_odom1_odom2 = initial_guess_odom1_odom2[:3, :3], \
                                     initial_guess_odom1_odom2[:3, 3]
    X0_odom1_odom2 = (R0_odom1_odom2, t0_odom1_odom2)  # Pymanopt estimate
    print("INITIAL GUESS {} {}: \n R0: \n {} \n\n t0: \n {} \n".format(
        types2sensors[type_1], types2sensors[type_2], R0_odom1_odom2,
        t0_odom1_odom2))

    # Create KFold xval object to get training/validation indices
    kf = KFold(n_splits=K, random_state=None, shuffle=False)
    k = 0  # Set fold counter to 0

    # Dataset
    A = np.array(odom2_rel_poses)  # First set of poses
    B = np.array(odom1_rel_poses)  # Second set of poses
    N = len(A)
    assert len(A) == len(B)  # Sanity check to ensure odometry data matches
    r = np.logical_or(np.array(odom1_reject)[:N],
                      np.array(odom2_reject)[:N])  # Outlier rejection

    print("NUMBER OF CROSS-VALIDATION FOLDS: {}".format(K))

    # Iterate over 30 second intervals of the poses
    for train_index, test_index in kf.split(
            A):  # Perform K-fold cross-validation

        # Path for results from manifold optimization
        analysis_results_path = os.path.join(ANALYSIS_RESULTS_PATH,
                                             "k={}".format(k))
        final_estimates_path = os.path.join(FINAL_ESTIMATES_PATH,
                                            "k={}".format(k))
        odometry_plots_path = os.path.join(ODOMETRY_PLOTS_PATH,
                                           "k={}".format(k))

        # Make sure all paths exist - if they don't create them
        for path in [
                analysis_results_path, final_estimates_path,
                odometry_plots_path
        ]:
            check_dir(path)

        # Get training data
        A_train = A[train_index]
        B_train = B[train_index]
        N_train = min(A_train.shape[0], B_train.shape[0])
        r_train = r[train_index]
        print("FOLD NUMBER: {}, NUMBER OF TRAINING SAMPLES: {}".format(
            k, N_train))

        omega = np.max([var_R_odom1, var_R_odom2
                        ])  # Take average across different odometries
        rho = np.max([var_t_odom1,
                      var_t_odom2])  # Take average across different odometries

        cost_lambda = lambda x: cost(x, A_train, B_train, r_train, rho, omega,
                                     WEIGHTED)  # Create cost function
        problem = Problem(manifold=manifold,
                          cost=cost_lambda)  # Create problem
        solver = CustomSteepestDescent()  # Create custom solver
        X_opt = solver.solve(problem, x=X0_odom1_odom2)  # Solve problem
        print("Initial Guess for Main-Front Transformation: \n {}".format(
            initial_guess_odom1_odom2))
        print("Optimal solution between {} and {} "
              "reference frames: \n {}".format(types2sensors[type_1],
                                               types2sensors[type_2], X_opt))

        # Take intermediate values for plotting
        estimates_x = solver.estimates
        errors = solver.errors
        iters = solver.iterations

        # Metrics dictionary
        estimates_dict = {i: T for i, T in zip(iters, estimates_x)}
        error_dict = {i: e for i, e in zip(iters, errors)}

        # Save intermediate results to a pkl file
        estimates_fname = os.path.join(
            analysis_results_path,
            "estimates_{}_{}.pkl".format(types2sensors[type_1],
                                         types2sensors[type_2], X_opt))
        error_fname = os.path.join(
            analysis_results_path,
            "error_{}_{}.pkl".format(types2sensors[type_1],
                                     types2sensors[type_2], X_opt))

        # Save estimates to pickle file
        with open(estimates_fname, "wb") as pkl_estimates:
            pickle.dump(estimates_dict, pkl_estimates)
            pkl_estimates.close()

        # Save error to pickle file
        with open(error_fname, "wb") as pkl_error:
            pickle.dump(error_dict, pkl_error)
            pkl_error.close()

        # Calculate difference between initial guess and final
        X_opt_T = construct_pose(X_opt[0], X_opt[1].reshape((3, 1)))
        print("DIFFERENCE IN MATRICES: \n {}".format(
            np.subtract(X_opt_T, initial_guess_odom1_odom2)))

        # Compute the weighted RMSE (training/in-sample)
        train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_weighted, \
        train_rmse_init_t_weighted, train_rmse_final_R_weighted, \
        train_rmse_final_t_weighted = compute_rmse_weighted(
            initial_guess_odom1_odom2, X_opt_T, A_train, B_train, rho, omega)

        # Compute the unweighted RMSE (training/in-sample)
        train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_R_unweighted, \
        train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, \
        train_rmse_final_t_unweighted = compute_rmse_unweighted(
            initial_guess_odom1_odom2, X_opt_T, A_train, B_train)

        # Concatenate all RMSE values for training/in-sample
        train_rmses = [
            train_rmse_init_unweighted, train_rmse_final_unweighted,
            train_rmse_init_weighted, train_rmse_final_weighted,
            train_rmse_init_R_unweighted, train_rmse_init_t_unweighted,
            train_rmse_final_R_unweighted, train_rmse_final_t_unweighted,
            train_rmse_init_R_weighted, train_rmse_init_t_weighted,
            train_rmse_final_R_weighted, train_rmse_final_t_weighted
        ]

        # Display and save RMSEs
        outpath = os.path.join(
            analysis_results_path,
            "train_rmse_{}_{}.txt".format(types2sensors[type_1],
                                          types2sensors[type_2]))
        display_and_save_rmse(train_rmses, outpath)

        # Get test data
        A_test = A[test_index]
        B_test = B[test_index]
        N_test = min(A_test.shape[0], B_test.shape[0])
        print("NUMBER OF TEST SAMPLES: {}".format(N_test))

        # Compute the weighted RMSE (testing/out-of-sample)
        test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_weighted, \
        test_rmse_init_t_weighted, test_rmse_final_R_weighted, \
        test_rmse_final_t_weighted = compute_rmse_weighted(initial_guess_odom1_odom2,
                                                            X_opt_T, A_test, B_test, rho, omega)

        # Compute the unweighted RMSE (testing/out-of-sample)
        test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_R_unweighted, \
        test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, \
        test_rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_odom1_odom2,
                                                                X_opt_T, A_test, B_test)

        # Concatenate all RMSE values for testing/out-of-sample
        test_rmses = [
            test_rmse_init_unweighted, test_rmse_final_unweighted,
            test_rmse_init_weighted, test_rmse_final_weighted,
            test_rmse_init_R_unweighted, test_rmse_init_t_unweighted,
            test_rmse_final_R_unweighted, test_rmse_final_t_unweighted,
            test_rmse_init_R_weighted, test_rmse_init_t_weighted,
            test_rmse_final_R_weighted, test_rmse_final_t_weighted
        ]

        # Display and save RMSEs
        outpath = os.path.join(
            analysis_results_path,
            "test_rmse_{}_{}.txt".format(types2sensors[type_1],
                                         types2sensors[type_2]))
        display_and_save_rmse(test_rmses, outpath)

        # Save final estimates
        final_estimate_outpath = os.path.join(
            final_estimates_path, "{}_{}.txt".format(types2sensors[type_1],
                                                     types2sensors[type_2]))
        np.savetxt(final_estimate_outpath, X_opt_T)

        # Finally, increment k
        k += 1
Beispiel #14
0
def fit_gpytorch_manifold(
    mll: MarginalLogLikelihood,
    bounds: Optional[ParameterBounds] = None,
    solver: Solver = pyman_solvers.ConjugateGradient(maxiter=500),
    nb_init_candidates: int = 200,
    last_x_as_candidate_prob: float = 0.9,
    options: Optional[Dict[str, Any]] = None,
    track_iterations: bool = True,
    approx_mll: bool = False,
    module_to_array_func: TModToArray = module_to_list_of_array,
    module_from_array_func: TArrayToMod = set_params_with_list_of_array,
) -> Tuple[MarginalLogLikelihood, Dict[str, Union[
        float, List[OptimizationIteration]]]]:
    """
    This function fits a gpytorch model by maximizing MLL with a pymanopt optimizer.

    The model and likelihood in mll must already be in train mode.
    This method requires that the model has `train_inputs` and `train_targets`.

    Parameters
    ----------
    :param mll: MarginalLogLikelihood to be maximized.

    Optional parameters
    -------------------
    :param nb_init_candidates: number of random initial candidates for the GP parameters
    :param last_x_as_candidate_prob: probability that the last set of parameter is among the initial candidates
    :param bounds: A dictionary mapping parameter names to tuples of lower and upper bounds.
    :param solver: Pymanopt solver.
    :param options: Dictionary of solver options, passed along to scipy.minimize.
    :param track_iterations: Track the function values and wall time for each iteration.
    :param approx_mll: If True, use gpytorch's approximate MLL computation. This is disabled by default since the
        stochasticity is an issue for determistic optimizers). Enabling this is only recommended when working with
        large training data sets (n>2000).

    Returns
    -------
    :return: 2-element tuple containing
        - MarginalLogLikelihood with parameters optimized in-place.
        - Dictionary with the following key/values:
            "fopt": Best mll value.
            "wall_time": Wall time of fitting.
            "iterations": List of OptimizationIteration objects with information on each iteration.
                If track_iterations is False, will be empty.

    Example:
    gp = SingleTaskGP(train_X, train_Y)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    mll.train()
    fit_gpytorch_scipy(mll)
    mll.eval()
    """
    options = options or {}
    # Current parameters
    x0, property_dict, bounds = module_to_array_func(module=mll,
                                                     bounds=bounds,
                                                     exclude=options.pop(
                                                         "exclude", None))
    x0 = [x0i.astype(np.float64) for x0i in x0]
    if bounds is not None:
        warnings.warn(
            'Bounds handling not supported yet in fit_gpytorch_manifold')
        # bounds = Bounds(lb=bounds[0], ub=bounds[1], keep_feasible=True)

    t1 = time.time()

    # Define cost function
    def cost(x):
        param_dict = OrderedDict(mll.named_parameters())
        idx = 0
        for p_name, attrs in property_dict.items():
            # Construct the new tensor
            if len(attrs.shape) == 0:  # deal with scalar tensors
                # new_data = torch.tensor(x[0], dtype=attrs.dtype, device=attrs.device)
                new_data = torch.tensor(x[idx][0],
                                        dtype=attrs.dtype,
                                        device=attrs.device)
            else:
                # new_data = torch.tensor(x, dtype=attrs.dtype, device=attrs.device).view(*attrs.shape)
                new_data = torch.tensor(x[idx],
                                        dtype=attrs.dtype,
                                        device=attrs.device).view(*attrs.shape)
            param_dict[p_name].data = new_data
            idx += 1
        # mllx = set_params_with_array(mll, x, property_dict)
        train_inputs, train_targets = mll.model.train_inputs, mll.model.train_targets
        mll.zero_grad()
        output = mll.model(*train_inputs)
        args = [output, train_targets] + _get_extra_mll_args(mll)
        loss = -mll(*args).sum()
        return loss

    def egrad(x):
        loss = cost(x)
        loss.backward()
        param_dict = OrderedDict(mll.named_parameters())
        grad = []
        for p_name in property_dict:
            t = param_dict[p_name].grad
            if t is None:
                # this deals with parameters that do not affect the loss
                if len(property_dict[p_name].shape
                       ) > 1 and property_dict[p_name].shape[0] > 1:
                    # if the variable is a matrix, keep its shape
                    grad.append(np.zeros(property_dict[p_name].shape))
                else:
                    grad.append(np.zeros(property_dict[p_name].shape))
            else:
                if t.ndim > 1 and t.shape[
                        0] > 1:  # if the variable is a matrix, keep its shape
                    grad.append(t.detach().cpu().double().clone().numpy())
                else:  # Vector case
                    grad.append(
                        t.detach().view(-1).cpu().double().clone().numpy())
        return grad

    # Define the manifold (product of manifolds)
    manifolds_list = []
    for p_name, t in mll.named_parameters():
        try:
            # If a manifold is given add it
            manifolds_list.append(attrgetter(p_name + "_manifold")(mll))
        except AttributeError:
            # Otherwise, default: Euclidean
            manifolds_list.append(
                Euclidean(int(np.prod(property_dict[p_name].shape))))
    # Product of manifolds
    manifold = Product(manifolds_list)

    # Instanciate the problem on the manifold
    if track_iterations:
        verbosity = 2
    else:
        verbosity = 0

    problem = Problem(manifold=manifold,
                      cost=cost,
                      egrad=egrad,
                      verbosity=verbosity,
                      arg=torch.Tensor())  #, precon=precon)

    # For cases where the Hessian is hard/long to compute, we approximate it with finite differences of the gradient.
    # Typical cases: the Hessian can be hard to compute due to the 2nd derivative of the eigenvalue decomposition,
    # e.g. in the SPD affine-invariant distance.
    problem._hess = types.MethodType(get_hessianfd, problem)

    # Choose initial parameters
    # Do not always consider x0, to encourage variations of the parameters.
    if np.random.rand() < last_x_as_candidate_prob:
        x0_candidates = [x0]
        x0_candidates += [
            manifold.rand() for i in range(nb_init_candidates - 1)
        ]
    else:
        x0_candidates = []
        x0_candidates += [manifold.rand() for i in range(nb_init_candidates)]
    for i in range(int(3 * nb_init_candidates / 4)):
        x0_candidates[i][0:4] = x0[0:4]  #TODO remove hard-coding
    y0_candidates = [cost(x0_candidates[i]) for i in range(nb_init_candidates)]

    y_init, x_init_idx = torch.Tensor(y0_candidates).min(0)
    x_init = x0_candidates[x_init_idx]

    with gpt_settings.fast_computations(log_prob=approx_mll):
        # Logverbosity of the solver to 1
        solver._logverbosity = 1
        # Solve
        opt_x, opt_log = solver.solve(problem, x=x_init)

    # Construct info dict
    info_dict = {
        "fopt": float(cost(opt_x).detach().numpy()),
        "wall_time": time.time() - t1,
        "opt_log": opt_log,
    }
    # if not res.success:  # TODO update
    #     try:
    #         # Some res.message are bytes
    #         msg = res.message.decode("ascii")
    #     except AttributeError:
    #         # Others are str
    #         msg = res.message
    #     warnings.warn(
    #         f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning
    #     )
    # Set to optimum
    mll = module_from_array_func(mll, opt_x, property_dict)
    return mll, info_dict
Beispiel #15
0
      accuracy_summary =tf.summary.scalar("accuracy", accuracy)

    #with tf.name_scope("e_test") as scope:
        #e_correct_prediction = tf.equal(tf.argmax(e_y, 1), tf.argmax(y_, 1))
        #e_accuracy = tf.reduce_mean(tf.cast(e_correct_prediction, "float"))
        #e_accuracy_summary =tf.summary.scalar("e_accuracy", e_accuracy)

    summaries = tf.summary.merge_all()



    manifold_b = Euclidean(1,10)
    if use_parameterization:
        manifold_A = Euclidean(784, k)
        manifold_B = Euclidean(k, 10)
        manifold_W = Product([manifold_A,manifold_B])
        arg = [A, B, b]
    else:
        manifold_W = FixedRankEmbedded(784, 10, k)
        #manifold_W = Product([Stiefel(784,k), Euclidean(k), Stiefel(k,10)])
        arg = [A, M, B, b]

    manifold = Product([manifold_W, manifold_b])

    e_manifold_W = Euclidean(784, 10)
    e_arg = [eW, b]

    e_manifold = Product([e_manifold_W, manifold_b])

    problem = Problem(manifold=manifold, cost=loss, accuracy=accuracy, summary=summaries, arg=arg, data=[x,y_], verbosity=1)
    e_problem = Problem(manifold=e_manifold, cost=e_loss, accuracy=accuracy, summary=summaries, arg=e_arg, data=[x, y_],
Beispiel #16
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Generate latent space embeddings')
    parser.add_argument('emb1', help='path to embedding 1')
    parser.add_argument('emb2', help='path to embedding 2')
    parser.add_argument(
        '--geomm_embeddings_path',
        default=None,
        type=str,
        help=
        'directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.'
    )
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--verbose', default=0, type=int, help='Verbose')
    mapping_group = parser.add_argument_group(
        'mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('--dictionary',
                               default=sys.stdin.fileno(),
                               help='the dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb', 'no'],
        nargs=2,
        default=[],
        help=
        'the normalization actions performed in sequence for embeddings 1 and 2'
    )

    geomm_group = parser.add_argument_group('GeoMM arguments',
                                            'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg',
                             type=float,
                             default=1e2,
                             help='Lambda for L2 Regularization')
    geomm_group.add_argument(
        '--max_opt_time',
        type=int,
        default=5000,
        help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument(
        '--max_opt_iter',
        type=int,
        default=150,
        help='Maximum number of iterations for optimization')

    args = parser.parse_args()

    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'
    if args.verbose:
        print('Loading embeddings data...')

    # Read input embeddings
    emb1file = open(args.emb1,
                    encoding=args.encoding,
                    errors='surrogateescape')
    emb2file = open(args.emb2,
                    encoding=args.encoding,
                    errors='surrogateescape')
    emb1_words, x = embeddings.read(emb1file, max_voc=0, dtype=dtype)
    emb2_words, z = embeddings.read(emb2file, max_voc=0, dtype=dtype)

    # Build word to index map
    emb1_word2ind = {word: i for i, word in enumerate(emb1_words)}
    emb2_word2ind = {word: i for i, word in enumerate(emb2_words)}

    noov = 0
    emb1_indices = []
    emb2_indices = []
    f = open(args.dictionary, encoding=args.encoding, errors='surrogateescape')
    for line in f:
        emb1, emb2 = line.split()
        try:
            emb1_ind = emb1_word2ind[emb1]
            emb2_ind = emb2_word2ind[emb2]
            emb1_indices.append(emb1_ind)
            emb2_indices.append(emb2_ind)
        except KeyError:
            noov += 1
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    emb1, emb2))  #, file=sys.stderr
    f.close()
    if args.verbose:
        print('Number of embedding pairs having at least one OOV: {}'.format(
            noov))
    emb1_indices = emb1_indices
    emb2_indices = emb2_indices
    if args.verbose:
        print('Normalizing embeddings...')

    # STEP 0: Normalization
    if len(args.normalize) > 0:
        x = normalize_emb(x, args.normalize[0])
        z = normalize_emb(z, args.normalize[1])

    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    x_count = len(set(emb1_indices))
    z_count = len(set(emb2_indices))

    # Filter out uniq values
    map_dict_emb1 = {}
    map_dict_emb2 = {}
    I = 0
    uniq_emb1 = []
    uniq_emb2 = []
    for i in range(len(emb1_indices)):
        if emb1_indices[i] not in map_dict_emb1.keys():
            map_dict_emb1[emb1_indices[i]] = I
            I += 1
            uniq_emb1.append(emb1_indices[i])
    J = 0
    for j in range(len(emb2_indices)):
        if emb2_indices[j] not in map_dict_emb2.keys():
            map_dict_emb2[emb2_indices[j]] = J
            J += 1
            uniq_emb2.append(emb2_indices[j])

    # Creating dictionary matrix
    row = list(range(0, x_count))
    col = list(range(0, x_count))
    data = [1 for i in range(0, x_count)]
    print(f"Counts: {x_count}, {z_count}")
    A = coo_matrix((data, (row, col)), shape=(x_count, z_count))

    np.random.seed(0)
    Lambda = args.l2_reg

    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()

    Xemb1 = x[uniq_emb1]
    Zemb2 = z[uniq_emb2]
    del x, z
    gc.collect()

    Kx, Kz = Xemb1, Zemb2
    XtAZ = Kx.T.dot(A.dot(Kz))
    XtX = Kx.T.dot(Kx)
    ZtZ = Kz.T.dot(Kz)
    AA = np.sum(A * A)

    W = (U1.dot(B)).dot(U2.T)
    regularizer = 0.5 * Lambda * (TT.sum(B**2))
    sXtX = shared(XtX)
    sZtZ = shared(ZtZ)
    sXtAZ = shared(XtAZ)

    cost = regularizer
    wtxtxw = W.T.dot(sXtX.dot(W))
    wtxtxwztz = wtxtxw.dot(sZtZ)
    cost += TT.nlinalg.trace(wtxtxwztz)
    cost += -2 * TT.sum(W * sXtAZ)
    cost += shared(AA)

    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    manifold = Product([
        Stiefel(Kx.shape[1], Kx.shape[1]),
        Stiefel(Kz.shape[1], Kz.shape[1]),
        PositiveDefinite(Kx.shape[1])
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)
    print(f"Problem solved ...")

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    print(f"Model copied ...")

    gc.collect()

    # Step 2: Transformation
    xw = Kx.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = Kz.dot(U2).dot(scipy.linalg.sqrtm(B))
    print(f"Transformation done ...")

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time -
                                                             start_time))

    del Kx, Kz, B, U1, U2
    gc.collect()

    ### Save the GeoMM embeddings if requested
    xw_n = embeddings.length_normalize(xw)
    zw_n = embeddings.length_normalize(zw)

    del xw, zw
    gc.collect()

    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path, exist_ok=True)

        out_emb_fname = os.path.join(args.geomm_embeddings_path, 'emb1.vec')
        new_emb1_words = []
        for id in uniq_emb1:
            new_emb1_words.append(emb1_words[id])
        with open(out_emb_fname, 'w', encoding=args.encoding) as outfile:
            embeddings.write(new_emb1_words, xw_n, outfile)

        new_emb2_words = []
        for id in uniq_emb2:
            new_emb2_words.append(emb2_words[id])
        out_emb_fname = os.path.join(args.geomm_embeddings_path, 'emb2.vec')
        with open(out_emb_fname, 'w', encoding=args.encoding) as outfile:
            embeddings.write(new_emb2_words, zw_n, outfile)

    exit(0)
 def setUp(self):
     self.m = m = 100
     self.n = n = 50
     self.euclidean = Euclidean(m, n)
     self.sphere = Sphere(n)
     self.man = Product([self.euclidean, self.sphere])
Beispiel #18
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space')
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument('--model_path', default=None, type=str, help='directory to save the model')
    parser.add_argument('--geomm_embeddings_path', default=None, type=str, help='directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.')
    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0,type=int, help='Verbose')
    mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('-dtrain', '--dictionary_train', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument('-dtest', '--dictionary_test', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments', 'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg', type=float,default=1e2, help='Lambda for L2 Regularization')
    geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization')

    eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    ## Logging
    #method_name = os.path.join('logs','geomm')
    #directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    #if not os.path.exists(directory):
    #    os.makedirs(directory)
    #log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train))
    #log_file_name = log_file_name + '.log'
    #class Logger(object):
    #    def __init__(self):
    #        self.terminal = sys.stdout
    #        self.log = open(os.path.join(directory,log_file_name), "a")

    #    def write(self, message):
    #        self.terminal.write(message)
    #        self.log.write(message)

    #    def flush(self):
    #        #this flush method is needed for python 3 compatibility.
    #        #this handles the flush command by doing nothing.
    #        #you might want to specify some extra behavior here.
    #        pass
    #sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'
    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape')
    trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape')
    src_words, x = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype)
    trg_words, z = embeddings.read(trgfile,max_voc=args.max_vocab, dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary
    noov=0
    src_indices = []
    trg_indices = []
    f = open(args.dictionary_train, encoding=args.encoding, errors='surrogateescape')
    for line in f:
        src,trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            noov+=1
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg)) #, file=sys.stderr
    f.close()
    if args.verbose:
        print('Number of training pairs having at least one OOV: {}'.format(noov))
    src_indices = src_indices
    trg_indices = trg_indices
    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            z = embeddings.mean_center_embeddingwise(z)


    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    x_count = len(set(src_indices))
    z_count = len(set(trg_indices))
    A = np.zeros((x_count,z_count))

    # Creating dictionary matrix from training set
    map_dict_src={}
    map_dict_trg={}
    I=0
    uniq_src=[]
    uniq_trg=[]
    for i in range(len(src_indices)):
        if src_indices[i] not in map_dict_src.keys():
            map_dict_src[src_indices[i]]=I
            I+=1
            uniq_src.append(src_indices[i])
    J=0
    for j in range(len(trg_indices)):
        if trg_indices[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices[j]]=J
            J+=1
            uniq_trg.append(trg_indices[j])

    for i in range(len(src_indices)):
        A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1

    np.random.seed(0)
    Lambda=args.l2_reg

    U1 = TT.matrix()
    U2 = TT.matrix()
    B  = TT.matrix()

    Kx, Kz = x[uniq_src], z[uniq_trg]
    XtAZ = Kx.T.dot(A.dot(Kz))
    XtX = Kx.T.dot(Kx)
    ZtZ = Kz.T.dot(Kz)
    # AA = np.sum(A*A) # this can be added if cost needs to be compared to original geomm

    W = (U1.dot(B)).dot(U2.T)
    regularizer = 0.5*Lambda*(TT.sum(B**2))
    sXtX = shared(XtX)
    sZtZ = shared(ZtZ)
    sXtAZ = shared(XtAZ)

    cost = regularizer
    wtxtxw = W.T.dot(sXtX.dot(W))
    wtxtxwztz = wtxtxw.dot(sZtZ)
    cost += TT.nlinalg.trace(wtxtxwztz)
    cost += -2 * TT.sum(W * sXtAZ)
    # cost += shared(AA) # this can be added if cost needs to be compared with original geomm

    solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter)

    manifold =Product([Stiefel(x.shape[1], x.shape[1]),Stiefel(z.shape[1], x.shape[1]),PositiveDefinite(x.shape[1])])
    #manifold =Product([Stiefel(x.shape[1], 200),Stiefel(z.shape[1], 200),PositiveDefinite(200)])
    problem = Problem(manifold=manifold, cost=cost, arg=[U1,U2,B], verbosity=3)
    wopt = solver.solve(problem)

    w= wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    ### Save the models if requested
    if args.model_path is not None:
        os.makedirs(args.model_path,exist_ok=True)
        np.savetxt('{}/U_src.csv'.format(args.model_path),U1)
        np.savetxt('{}/U_tgt.csv'.format(args.model_path),U2)
        np.savetxt('{}/B.csv'.format(args.model_path),B)

    # Step 2: Transformation
    xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time-start_time))
    gc.collect()

    ### Save the GeoMM embeddings if requested
    xw_n = embeddings.length_normalize(xw)
    zw_n = embeddings.length_normalize(zw)
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'src.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(src_words,xw_n,outfile)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'trg.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(trg_words,zw_n,outfile)

    # Step 3: Evaluation
    if args.normalize_eval:
        xw = xw_n
        zw = zw_n

    X = xw[src_indices]
    Z = zw[trg_indices]

    # Loading test dictionary
    f = open(args.dictionary_test, encoding=args.encoding, errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    ### compute nearest neigbours of x in z
    t=time.time()
    nbrhood_x=np.zeros(xw.shape[0])

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
        nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)

    ### compute nearest neigbours of z in x (GPU version)
    nbrhood_z=np.zeros(zw.shape[0])
    with cp.cuda.Device(0):
        nbrhood_z2=cp.zeros(zw.shape[0])
        batch_num=1
        for i in range(0, zw.shape[0], BATCH_SIZE):
            j = min(i + BATCH_SIZE, zw.shape[0])
            similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
            nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
            batch_num+=1
        nbrhood_z=cp.asnumpy(nbrhood_z2)

    #### compute nearest neigbours of z in x (CPU version)
    #nbrhood_z=np.zeros(zw.shape[0])
    #for i in range(0, len(zw.shape[0]), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(zw.shape[0]))
    #    similarities = zw[i:j].dot(xw.T)
    #    similarities_z = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
    #    nbrhood_z[i:j]=np.mean(similarities_z[:,:args.csls_neighbourhood],axis=1)

    #### find translation
    #for i in range(0, len(src), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(src))
    #    similarities = xw[src[i:j]].dot(zw.T)
    #    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
    #    nn = similarities.argmax(axis=1).tolist()
    #    similarities = np.argsort((similarities),axis=1)

    #    nn5 = (similarities[:,-5:])
    #    nn10 = (similarities[:,-10:])
    #    for k in range(j-i):
    #        translation[src[i+k]] = nn[k]
    #        translation5[src[i+k]] = nn5[k]
    #        translation10[src[i+k]] = nn10[k]


    #if args.geomm_embeddings_path is not None:
    #    delim=','
    #    os.makedirs(args.geomm_embeddings_path,exist_ok=True)

    #    translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
    #    with open(translations_fname,'w',encoding=args.encoding) as translations_file:
    #        for src_id in src:
    #            src_word = src_words[src_id]
    #            all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
    #            trgout_words = [ trg_words[j] for j in translation10[src_id] ]
    #            ss = list(nn10[src_id,:])
    #
    #            p1 = ':'.join(all_trg_words)
    #            p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
    #            translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, delim=delim, p1=p1, p2=p2) )

    ### find translation  (and write to file if output requested)
    delim=','
    translations_file =None
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)
        translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
        translations_file = open(translations_fname,'w',encoding=args.encoding)

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities),axis=1)

        nn5 = (similarities[:,-5:])
        nn10 = (similarities[:,-10:])
        for k in range(j-i):
            translation[src[i+k]] = nn[k]
            translation5[src[i+k]] = nn5[k]
            translation10[src[i+k]] = nn10[k]


            if args.geomm_embeddings_path is not None:
                src_id=src[i+k]
                src_word = src_words[src_id]
                all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
                trgout_words = [ trg_words[j] for j in translation10[src_id] ]
                #ss = list(nn10[src_id,:])

                p1 = ':'.join(all_trg_words)
                p2 = ':'.join(trgout_words)
                #p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
                translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, p1=p1, p2=p2, delim=delim) )

    if args.geomm_embeddings_path is not None:
        translations_file.close()

    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean=0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy5 = mean

    mean=0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy10 = mean
    message = src_input.split(".")[-2] + "-->" + trg_input.split(".")[-2] + ":"
        'Coverage:{0:7.2%}  Accuracy:{1:7.2%}'.format(coverage, accuracy)
Beispiel #19
0
class TestProductManifold(unittest.TestCase):
    def setUp(self):
        self.m = m = 100
        self.n = n = 50
        self.euclidean = Euclidean(m, n)
        self.sphere = Sphere(n)
        self.man = Product([self.euclidean, self.sphere])

    def test_dim(self):
        np_testing.assert_equal(self.man.dim, self.m * self.n + self.n - 1)

    def test_typicaldist(self):
        np_testing.assert_equal(self.man.typicaldist,
                                np.sqrt((self.m * self.n) + np.pi**2))

    def test_dist(self):
        X = self.man.rand()
        Y = self.man.rand()
        np_testing.assert_equal(
            self.man.dist(X, Y),
            np.sqrt(
                self.euclidean.dist(X[0], Y[0])**2 +
                self.sphere.dist(X[1], Y[1])**2))

    # def test_inner(self):

    # def test_proj(self):

    # def test_ehess2rhess(self):

    # def test_retr(self):

    # def test_egrad2rgrad(self):

    # def test_norm(self):

    # def test_rand(self):

    # def test_randvec(self):

    # def test_transp(self):

    def test_exp_log_inverse(self):
        s = self.man
        X = s.rand()
        Y = s.rand()
        Yexplog = s.exp(X, s.log(X, Y))
        np_testing.assert_almost_equal(s.dist(Y, Yexplog), 0)

    def test_log_exp_inverse(self):
        s = self.man
        X = s.rand()
        U = s.randvec(X)
        Ulogexp = s.log(X, s.exp(X, U))
        np_testing.assert_array_almost_equal(U[0], Ulogexp[0])
        np_testing.assert_array_almost_equal(U[1], Ulogexp[1])

    def test_pairmean(self):
        s = self.man
        X = s.rand()
        Y = s.rand()
        Z = s.pairmean(X, Y)
        np_testing.assert_array_almost_equal(s.dist(X, Z), s.dist(Y, Z))
if __name__ == "__main__":
    # Generate random data
    X = np.random.randn(3, 100)
    Y = X[0:1, :] - 2 * X[1:2, :] + np.random.randn(1, 100) + 5

    # Cost function is the sqaured test error
    w = T.matrix()
    b = T.matrix()
    cost = T.sum((Y - w.T.dot(X) - b[0, 0])**2)

    # A solver that involves the hessian
    solver = TrustRegions()

    # R^3 x R^1
    manifold = Product([Euclidean(3, 1), Euclidean(1, 1)])

    # Solve the problem with pymanopt
    problem = Problem(manifold=manifold, cost=cost, arg=[w, b], verbosity=0)
    wopt = solver.solve(problem)

    print('Weights found by pymanopt (top) / ' 'closed form solution (bottom)')

    print(wopt[0].T)
    print(wopt[1])

    X1 = np.concatenate((X, np.ones((1, 100))), axis=0)
    wclosed = np.linalg.inv(X1.dot(X1.T)).dot(X1).dot(Y.T)
    print(wclosed[0:3].T)
    print(wclosed[3])
Beispiel #21
0
class TestProblemBackendInterface(TestCase):
    def setUp(self):
        self.m = m = 20
        self.n = n = 10
        self.rank = rank = 3

        A = np.random.normal(size=(m, n))
        self.manifold = Product([FixedRankEmbedded(m, n, rank), Euclidean(n)])

        @pymanopt.function.autograd(self.manifold)
        def cost(u, s, vt, x):
            return np.linalg.norm(((u * s) @ vt - A) @ x) ** 2

        self.cost = cost
        self.gradient = self.cost.get_gradient_operator()
        self.hessian = self.cost.get_hessian_operator()

        self.problem = pymanopt.Problem(self.manifold, self.cost)

    def test_cost_function(self):
        (u, s, vt), x = self.manifold.random_point()
        self.cost(u, s, vt, x)

    def test_gradient_operator_shapes(self):
        (u, s, vt), x = self.manifold.random_point()
        gu, gs, gvt, gx = self.gradient(u, s, vt, x)
        self.assertEqual(gu.shape, (self.m, self.rank))
        self.assertEqual(gs.shape, (self.rank,))
        self.assertEqual(gvt.shape, (self.rank, self.n))
        self.assertEqual(gx.shape, (self.n,))

    def test_hessian_operator_shapes(self):
        (u, s, vt), x = self.manifold.random_point()
        (a, b, c), d = self.manifold.random_point()
        hu, hs, hvt, hx = self.hessian(u, s, vt, x, a, b, c, d)
        self.assertEqual(hu.shape, (self.m, self.rank))
        self.assertEqual(hs.shape, (self.rank,))
        self.assertEqual(hvt.shape, (self.rank, self.n))
        self.assertEqual(hx.shape, (self.n,))

    def test_problem_cost(self):
        cost = self.problem.cost
        X = self.manifold.random_point()
        (u, s, vt), x = X
        np_testing.assert_allclose(cost(X), self.cost(u, s, vt, x))

    def test_problem_gradient_operator(self):
        X = self.manifold.random_point()
        (u, s, vt), x = X
        G = self.problem.euclidean_gradient(X)
        (gu, gs, gvt), gx = G
        for ga, gb in zip((gu, gs, gvt, gx), self.gradient(u, s, vt, x)):
            np_testing.assert_allclose(ga, gb)

    def test_problem_hessian_operator(self):
        ehess = self.problem.euclidean_hessian
        X = self.manifold.random_point()
        U = self.manifold.random_point()
        H = ehess(X, U)

        (u, s, vt), x = X
        (a, b, c), d = U

        (hu, hs, hvt), hx = H
        for ha, hb in zip(
            (hu, hs, hvt, hx), self.hessian(u, s, vt, x, a, b, c, d)
        ):
            np_testing.assert_allclose(ha, hb)
Beispiel #22
0
 def setUp(self):
     self.m = m = 100
     self.n = n = 50
     self.euclidean = Euclidean(m, n)
     self.sphere = Sphere(n)
     self.man = Product([self.euclidean, self.sphere])
Beispiel #23
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Map the source embeddings into the target embedding space'
    )
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('mid_input', help='the input pivot embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument(
        '--max_vocab',
        default=0,
        type=int,
        help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0, type=int, help='Verbose')
    mapping_group = parser.add_argument_group(
        'mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument(
        '-dtrain1',
        '--dictionary_train1',
        default=sys.stdin.fileno(),
        help='the first training dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtrain2',
        '--dictionary_train2',
        default=sys.stdin.fileno(),
        help='the second training dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtest',
        '--dictionary_test',
        default=sys.stdin.fileno(),
        help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb'],
        nargs='*',
        default=[],
        help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments',
                                            'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg',
                             type=float,
                             default=1e2,
                             help='Lambda for L2 Regularization')
    geomm_group.add_argument(
        '--max_opt_time',
        type=int,
        default=5000,
        help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument(
        '--max_opt_iter',
        type=int,
        default=150,
        help='Maximum number of iterations for optimization')

    eval_group = parser.add_argument_group('evaluation arguments',
                                           'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval',
                            action='store_true',
                            help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size',
                            type=int,
                            default=1000,
                            help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood',
                            type=int,
                            default=10,
                            help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    # Logging
    method_name = os.path.join('logs', 'geomm_cmp_pip')
    directory = os.path.join(
        os.path.join(os.getcwd(), method_name),
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(directory):
        os.makedirs(directory)
    log_file_name, file_extension = os.path.splitext(
        os.path.basename(args.dictionary_test))
    log_file_name = log_file_name + '.log'

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(os.path.join(directory, log_file_name), "a")

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

        def flush(self):
            #this flush method is needed for python 3 compatibility.
            #this handles the flush command by doing nothing.
            #you might want to specify some extra behavior here.
            pass

    sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'

    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    midfile = open(args.mid_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    trgfile = open(args.trg_input,
                   encoding=args.encoding,
                   errors='surrogateescape')

    src_words, x = embeddings.read(srcfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)
    mid_words, y = embeddings.read(midfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)
    trg_words, z = embeddings.read(trgfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    mid_word2ind = {word: i for i, word in enumerate(mid_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary-1
    src_indices12 = []
    trg_indices12 = []
    f = open(args.dictionary_train1,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = mid_word2ind[trg]
            src_indices12.append(src_ind)
            trg_indices12.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()

    # Build training dictionary-2
    src_indices23 = []
    trg_indices23 = []
    f = open(args.dictionary_train2,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = mid_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices23.append(src_ind)
            trg_indices23.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()

    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            y = embeddings.length_normalize(y)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            y = embeddings.mean_center(y)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            y = embeddings.length_normalize_dimensionwise(y)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            y = embeddings.mean_center_embeddingwise(y)
            z = embeddings.mean_center_embeddingwise(z)

    # Step 1.1: Optimization-1
    if args.verbose:
        print('Beginning Optimization-1')
    start_time = time.time()

    x_count = len(set(src_indices12))
    y_count = len(set(trg_indices12))
    A = np.zeros((x_count, y_count))

    # Creating dictionary matrix from training set
    map_dict_src = {}
    map_dict_trg = {}
    I = 0
    uniq_src = []
    uniq_trg = []
    for i in range(len(src_indices12)):
        if src_indices12[i] not in map_dict_src.keys():
            map_dict_src[src_indices12[i]] = I
            I += 1
            uniq_src.append(src_indices12[i])
    J = 0
    for j in range(len(trg_indices12)):
        if trg_indices12[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices12[j]] = J
            J += 1
            uniq_trg.append(trg_indices12[j])

    for i in range(len(src_indices12)):
        A[map_dict_src[src_indices12[i]], map_dict_trg[trg_indices12[i]]] = 1

    np.random.seed(0)
    Lambda = args.l2_reg
    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()
    cost = TT.sum(((shared(x[uniq_src]).dot(U1.dot(B.dot(U2.T)))).dot(
        shared(y[uniq_trg]).T) - A)**2) + 0.5 * Lambda * (TT.sum(B**2))

    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    low_rank = 300
    manifold = Product([
        Stiefel(x.shape[1], low_rank),
        Stiefel(y.shape[1], low_rank),
        PositiveDefinite(low_rank)
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]
    w12 = U1.dot(B).dot(U2.T)
    u11 = U1
    u21 = U2
    b1 = B

    # Step 1.2: Optimization-2
    if args.verbose:
        print('Beginning Optimization-2')
    y_count = len(set(src_indices23))
    z_count = len(set(trg_indices23))
    A = np.zeros((y_count, z_count))

    # Creating dictionary matrix from training set
    map_dict_src = {}
    map_dict_trg = {}
    I = 0
    uniq_src = []
    uniq_trg = []
    for i in range(len(src_indices23)):
        if src_indices23[i] not in map_dict_src.keys():
            map_dict_src[src_indices23[i]] = I
            I += 1
            uniq_src.append(src_indices23[i])
    J = 0
    for j in range(len(trg_indices23)):
        if trg_indices23[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices23[j]] = J
            J += 1
            uniq_trg.append(trg_indices23[j])

    for i in range(len(src_indices23)):
        A[map_dict_src[src_indices23[i]], map_dict_trg[trg_indices23[i]]] = 1

    np.random.seed(0)
    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()
    cost = TT.sum(((shared(y[uniq_src]).dot(U1.dot(B.dot(U2.T)))).dot(
        shared(z[uniq_trg]).T) - A)**2) + 0.5 * Lambda * (TT.sum(B**2))
    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    low_rank = 300
    manifold = Product([
        Stiefel(y.shape[1], low_rank),
        Stiefel(z.shape[1], low_rank),
        PositiveDefinite(low_rank)
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]
    w23 = U1.dot(B).dot(U2.T)
    u22 = U1
    u32 = U2
    b2 = B

    # Step 2: Transformation
    w12_1 = u11.dot(scipy.linalg.sqrtm(b1))
    w12_2 = u21.dot(scipy.linalg.sqrtm(b1))
    w23_1 = u22.dot(scipy.linalg.sqrtm(b2))
    w23_2 = u32.dot(scipy.linalg.sqrtm(b2))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time -
                                                             start_time))
    gc.collect()

    # Step 3: Evaluation
    # Loading test dictionary
    f = open(args.dictionary_test,
             encoding=args.encoding,
             errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    # Composition (CMP)
    xw = x.dot(w12).dot(w23)
    zw = z
    if args.normalize_eval:
        xw = embeddings.length_normalize(xw)
        zw = embeddings.length_normalize(zw)

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    t = time.time()
    nbrhood_x = np.zeros(xw.shape[0])
    nbrhood_z = np.zeros(zw.shape[0])
    nbrhood_z2 = cp.zeros(zw.shape[0])
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1 * np.partition(
            -1 * similarities, args.csls_neighbourhood - 1, axis=1)
        nbrhood_x[src[i:j]] = np.mean(
            similarities_x[:, :args.csls_neighbourhood], axis=1)

    batch_num = 1
    for i in range(0, zw.shape[0], BATCH_SIZE):
        j = min(i + BATCH_SIZE, zw.shape[0])
        similarities = -1 * cp.partition(
            -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
            args.csls_neighbourhood - 1,
            axis=1)[:, :args.csls_neighbourhood]
        nbrhood_z2[i:j] = (cp.mean(similarities[:, :args.csls_neighbourhood],
                                   axis=1))
        batch_num += 1
    nbrhood_z = cp.asnumpy(nbrhood_z2)
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(
            np.transpose(2 * similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities), axis=1)

        nn5 = (similarities[:, -5:])
        nn10 = (similarities[:, -10:])
        for k in range(j - i):
            translation[src[i + k]] = nn[k]
            translation5[src[i + k]] = nn5[k]
            translation10[src[i + k]] = nn10[k]
    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean = 0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy5 = mean

    mean = 0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy10 = mean
    print(
        'CMP: Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
        .format(coverage, accuracy, accuracy5, accuracy10))

    # Pipeline (PIP)
    xw = x.dot(w12_1)
    zw = y.dot(w12_2)
    if args.normalize_eval:
        xw = embeddings.length_normalize(xw)
        zw = embeddings.length_normalize(zw)

    translation12 = collections.defaultdict(int)
    # PIP-Stage 1
    t = time.time()
    nbrhood_x = np.zeros(xw.shape[0])
    nbrhood_z = np.zeros(zw.shape[0])
    nbrhood_z2 = cp.zeros(zw.shape[0])
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1 * np.partition(
            -1 * similarities, args.csls_neighbourhood - 1, axis=1)
        nbrhood_x[src[i:j]] = np.mean(
            similarities_x[:, :args.csls_neighbourhood], axis=1)

    batch_num = 1
    for i in range(0, zw.shape[0], BATCH_SIZE):
        j = min(i + BATCH_SIZE, zw.shape[0])
        similarities = -1 * cp.partition(
            -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
            args.csls_neighbourhood - 1,
            axis=1)[:, :args.csls_neighbourhood]
        nbrhood_z2[i:j] = (cp.mean(similarities[:, :args.csls_neighbourhood],
                                   axis=1))
        batch_num += 1
    nbrhood_z = cp.asnumpy(nbrhood_z2)
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(
            np.transpose(2 * similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        for k in range(j - i):
            translation[src[i + k]] = nn[k]

    # PIP-Stage 2
    mid = [translation[sr] for sr in src]
    xw = y.dot(w23_1)
    zw = z.dot(w23_2)
    if args.normalize_eval:
        xw = embeddings.length_normalize(xw)
        zw = embeddings.length_normalize(zw)

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    t = time.time()
    nbrhood_x = np.zeros(xw.shape[0])
    nbrhood_z = np.zeros(zw.shape[0])
    nbrhood_z2 = cp.zeros(zw.shape[0])
    for i in range(0, len(mid), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(mid))
        similarities = xw[mid[i:j]].dot(zw.T)
        # similarities_x = np.sort(similarities, axis=1)
        similarities_x = -1 * np.partition(
            -1 * similarities, args.csls_neighbourhood - 1, axis=1)
        nbrhood_x[mid[i:j]] = np.mean(
            similarities_x[:, :args.csls_neighbourhood], axis=1)

    batch_num = 1
    for i in range(0, zw.shape[0], BATCH_SIZE):
        j = min(i + BATCH_SIZE, zw.shape[0])
        similarities = -1 * cp.partition(
            -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
            args.csls_neighbourhood - 1,
            axis=1)[:, :args.csls_neighbourhood]
        nbrhood_z2[i:j] = (cp.mean(similarities[:, :args.csls_neighbourhood],
                                   axis=1))
        batch_num += 1
    nbrhood_z = cp.asnumpy(nbrhood_z2)
    for i in range(0, len(mid), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(mid))
        similarities = xw[mid[i:j]].dot(zw.T)
        similarities = np.transpose(
            np.transpose(2 * similarities) - nbrhood_x[mid[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities), axis=1)

        nn5 = (similarities[:, -5:])
        nn10 = (similarities[:, -10:])
        for k in range(j - i):
            translation[src[i + k]] = nn[k]
            translation5[src[i + k]] = nn5[k]
            translation10[src[i + k]] = nn10[k]

    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean = 0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy5 = mean

    mean = 0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy10 = mean
    print(
        'PIP: Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
        .format(coverage, accuracy, accuracy5, accuracy10))
Beispiel #24
0
    def evaluate(self):
        # Perform reconstruction
        if self.recon_type == 'tp':  # Estimate theta and phi
            manifold = Sphere(3)

            def cost(X, data=self.data):
                ll = self.multiframe.noise_model.loglikelihood(
                    util.xyz2tp(*X), data)
                return -ll

            problem = Problem(manifold=manifold, cost=cost, verbosity=0)
            start_pts = [
                np.array(util.tp2xyz(*x)) for x in util.sphere_profile(20)
            ]
            solver = ParticleSwarm(maxcostevals=200)
            Xopt = solver.solve(problem, x=start_pts)
            self.estimated_fluorophore = fluorophore.Fluorophore(*util.xyz2tp(
                *Xopt))
        elif self.recon_type == 'tpc':  # Estimate theta, phi, constant
            # Create manifold and cost function
            manifold = Product((Sphere(3), Euclidean(1)))

            def cost(X, data=self.data):
                estimate = np.hstack([util.xyz2tp(*X[0]), X[1]])
                ll = self.multiframe.noise_model.loglikelihood(estimate, data)
                return -ll

            problem = Problem(manifold=manifold, cost=cost, verbosity=0)

            # Generate start_pts and format
            xyz_start_pts = 3 * [
                np.array(util.tp2xyz(*x)) for x in util.sphere_profile(10)
            ]
            c_start_pts = np.expand_dims(np.hstack(
                (10 * [0.1], 10 * [2], 10 * [10])),
                                         axis=1)
            start_pts = np.hstack((xyz_start_pts, c_start_pts))
            pts = []
            for start_pt in start_pts:
                pts.append([np.array(start_pt[0:3]), np.array(start_pt[3:5])])

            # Solve
            solver = ParticleSwarm(maxcostevals=250)
            Xopt = solver.solve(problem, x=pts)

            self.estimated_fluorophore = fluorophore.Fluorophore(
                *np.hstack([util.xyz2tp(*Xopt[0]), Xopt[1]]).flatten())

        elif self.recon_type == 'tpck':  # Estimate theta, phi, constant, kappa
            # Create manifold and cost function
            manifold = Product((Sphere(3), Euclidean(2)))

            def cost(X, data=self.data):
                estimate = np.array([
                    util.xyz2tp(*X[0]), X[1]
                ]).flatten()  # Reshape data for loglikelihood function
                ll = self.multiframe.noise_model.loglikelihood(estimate, data)
                print(estimate, ll)
                return -ll

            problem = Problem(manifold=manifold, cost=cost, verbosity=0)

            # Generate start_pts and format
            xyz_start_pts = 3 * [
                np.array(util.tp2xyz(*x)) for x in util.sphere_profile(10)
            ]
            k_start_pts = np.expand_dims(np.hstack(
                (10 * [-100], 10 * [0], 10 * [100])),
                                         axis=1)
            c_start_pts = np.expand_dims(np.hstack(
                (10 * [0.1], 10 * [1], 10 * [10])),
                                         axis=1)
            start_pts = np.hstack((xyz_start_pts, c_start_pts, k_start_pts))
            pts = []
            for start_pt in start_pts:
                pts.append([np.array(start_pt[0:3]), np.array(start_pt[3:5])])

            # Solve
            solver = ParticleSwarm(maxcostevals=200)
            Xopt = solver.solve(problem, x=pts)

            self.estimated_fluorophore = fluorophore.Fluorophore(
                *np.array([util.xyz2tp(*Xopt[0]), Xopt[1]]).flatten())
# +
import sys


sys.path.insert(0, "../..")

from autograd.scipy.special import logsumexp

import pymanopt
from pymanopt import Problem
from pymanopt.manifolds import Euclidean, Product, SymmetricPositiveDefinite
from pymanopt.optimizers import SteepestDescent


# (1) Instantiate the manifold
manifold = Product([SymmetricPositiveDefinite(D + 1, k=K), Euclidean(K - 1)])

# (2) Define cost function
# The parameters must be contained in a list theta.
@pymanopt.function.autograd(manifold)
def cost(S, v):
    # Unpack parameters
    nu = np.append(v, 0)

    logdetS = np.expand_dims(np.linalg.slogdet(S)[1], 1)
    y = np.concatenate([samples.T, np.ones((1, N))], axis=0)

    # Calculate log_q
    y = np.expand_dims(y, 0)

    # 'Probability' of y belonging to each cluster
Beispiel #26
0
    def manifold_fit(self, Y, X):
        from pymanopt.manifolds import Rotations, Euclidean, Product
        from pymanopt import Problem
        from pymanopt.solvers import TrustRegions
        # from minimal_varx import make_normalized_G

        cov_res, cov_xlag, cov_y_xlag = calc_extended_covariance(Y, X, self.p)
        self.set_covs(cov_res, cov_xlag)
        # m = X.shape[0]

        with_c = (self.mm_degree > self.agg_rnk) and (self.m - self.agg_rnk)
        C = Euclidean(self.mm_degree - self.agg_rnk, self.m - self.agg_rnk)
        RG = Rotations(self.m)
        if with_c:
            raise (ValueError(
                "This option is implemented only for self.agg_rnk == m"))
        if with_c:
            manifold = Product([RG, C])
        else:
            manifold = RG
        if not with_c:
            c_null = np.zeros(
                (self.mm_degree - self.agg_rnk, self.m - self.agg_rnk))
        else:
            c_null = None

        if with_c:

            def cost(x):
                o, c = x
                G = make_normalized_G(self, self.m, o, c)
                self.calc_states(G)
                return self.neg_log_llk
        else:

            def cost(x):
                G = make_normalized_G(self, self.m, x, c_null)
                self.calc_states(G)
                return self.neg_log_llk

        if with_c:

            def egrad(x):
                o, c = x
                grad_o, grad_c = self.map_o_c_grad(self._gradient_tensor, o, c)
                return [grad_o, grad_c]
        else:

            def egrad(x):
                grad_o, grad_c = self.map_o_c_grad(self._gradient_tensor, x,
                                                   c_null)

                return grad_o

        if with_c:

            def ehess(x, Heta):
                o, c = x
                eta = make_normalized_G(self, self.m, Heta[0], Heta[1])
                hess_raw = self.hessian_prod(eta)
                hess_o, hess_c = self.map_o_c_grad(hess_raw, o, c)
                return [hess_o, hess_c]

        else:

            def ehess(x, Heta):
                eta = make_normalized_G(self, self.m, Heta, c_null)
                hess_raw = self.hessian_prod(eta)
                hess_o, hess_c = self.map_o_c_grad(hess_raw, x, c_null)
                return hess_o

        if with_c:
            min_mle = Problem(manifold, cost, egrad=egrad, ehess=ehess)
        else:
            min_mle = Problem(manifold, cost, egrad=egrad, ehess=ehess)

        solver = TrustRegions()
        opt = solver.solve(min_mle)
        if with_c:
            G_opt = make_normalized_G(self, self.m, opt[0], opt[1])
        else:
            G_opt = make_normalized_G(self, self.m, opt, c_null)

        self.calc_H_F_Phi(G_opt, cov_y_xlag)
        return opt
Beispiel #27
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Map the source embeddings into the target embedding space'
    )
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument(
        '--max_vocab',
        default=0,
        type=int,
        help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0, type=int, help='Verbose')
    mapping_group = parser.add_argument_group(
        'mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument(
        '-dtrain',
        '--dictionary_train',
        default=sys.stdin.fileno(),
        help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtest',
        '--dictionary_test',
        default=sys.stdin.fileno(),
        help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtrainspl',
        '--dictionary_trainspl',
        default=sys.stdin.fileno(),
        help='the training dictionary split file (defaults to stdin)')
    mapping_group.add_argument(
        '-dvalspl',
        '--dictionary_valspl',
        default=sys.stdin.fileno(),
        help='the validation dictionary split file (defaults to stdin)')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb'],
        nargs='*',
        default=[],
        help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments',
                                            'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg',
                             type=float,
                             default=1e-1,
                             help='Lambda for L2 Regularization')
    geomm_group.add_argument(
        '--max_opt_time',
        type=int,
        default=5000,
        help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument(
        '--max_opt_iter',
        type=int,
        default=150,
        help='Maximum number of iterations for optimization')
    geomm_group.add_argument(
        '--x_cutoff',
        type=int,
        default=25000,
        help='Vocabulary cutoff for first language for bootstrapping')
    geomm_group.add_argument(
        '--z_cutoff',
        type=int,
        default=25000,
        help='Vocabulary cutoff for second language for bootstrapping')
    geomm_group.add_argument(
        '--patience',
        type=int,
        default=1,
        help=
        'Number of iterations with a decrease in validation accuracy permissible during bootstrapping'
    )

    eval_group = parser.add_argument_group('evaluation arguments',
                                           'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval',
                            action='store_true',
                            help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size',
                            type=int,
                            default=500,
                            help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood',
                            type=int,
                            default=10,
                            help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    # Logging
    method_name = os.path.join('logs', 'geomm_semi')
    directory = os.path.join(
        os.path.join(os.getcwd(), method_name),
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(directory):
        os.makedirs(directory)
    log_file_name, file_extension = os.path.splitext(
        os.path.basename(args.dictionary_train))
    log_file_name = log_file_name + '.log'

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(os.path.join(directory, log_file_name), "a")

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

        def flush(self):
            #this flush method is needed for python 3 compatibility.
            #this handles the flush command by doing nothing.
            #you might want to specify some extra behavior here.
            pass

    sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'

    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    trgfile = open(args.trg_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    src_words, x = embeddings.read(srcfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)
    trg_words, z = embeddings.read(trgfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary
    src_indices = []
    trg_indices = []
    f = open(args.dictionary_train,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()
    src_indices = src_indices
    trg_indices = trg_indices
    src_indices_train = list(src_indices)
    trg_indices_train = list(trg_indices)
    src_indices = []
    trg_indices = []

    # Loading train-split dictionary
    f = open(args.dictionary_trainspl,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()

    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            z = embeddings.mean_center_embeddingwise(z)
    orig_src = src_indices
    orig_trg = trg_indices
    best_val_acc = 0
    best_add_src = []
    best_add_trg = []
    add_src = []
    add_trg = []

    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    it_count = 0
    drop_count = 0

    # Bootstrap loop
    while True:
        if args.verbose:
            print('Starting bootstrap iteration {0}'.format(it_count + 1))
        # Step 1.1: Optimization
        x_count = len(set(src_indices))
        z_count = len(set(trg_indices))

        # Creating dictionary matrix from training set
        map_dict_src = {}
        map_dict_trg = {}
        I = 0
        uniq_src = []
        uniq_trg = []
        for i in range(len(src_indices)):
            if src_indices[i] not in map_dict_src.keys():
                map_dict_src[src_indices[i]] = I
                I += 1
                uniq_src.append(src_indices[i])
        J = 0
        for j in range(len(trg_indices)):
            if trg_indices[j] not in map_dict_trg.keys():
                map_dict_trg[trg_indices[j]] = J
                J += 1
                uniq_trg.append(trg_indices[j])

        np.random.seed(0)
        Lambda = args.l2_reg
        U1 = TT.matrix()
        U2 = TT.matrix()
        B = TT.matrix()
        X_tot = x[uniq_src].T.dot(x[uniq_src])
        Z_tot = z[uniq_trg].T.dot(z[uniq_trg])
        W = U1.dot(B.dot(U2.T))
        cost = (TT.nlinalg.trace(
            U2.dot(
                B.dot(
                    U1.T.dot(
                        shared(X_tot).dot(
                            U1.dot(B.dot(U2.T.dot(shared(Z_tot))))))))) -
                2 * TT.sum(
                    (shared(x[src_indices]).dot(W)) * shared(z[trg_indices]))
                ) / (len(src_indices)) + 0.5 * Lambda * (TT.sum(B**2))
        solver = ConjugateGradient(maxtime=args.max_opt_time,
                                   maxiter=args.max_opt_iter,
                                   mingradnorm=1e-15)

        low_rank = 300
        manifold = Product([
            Stiefel(x.shape[1], low_rank),
            Stiefel(z.shape[1], low_rank),
            PositiveDefinite(low_rank)
        ])
        problem = Problem(manifold=manifold,
                          cost=cost,
                          arg=[U1, U2, B],
                          verbosity=3)
        wopt = solver.solve(problem)
        w = wopt
        U1 = w[0]
        U2 = w[1]
        B = w[2]

        # Step 1.2: Transformation
        xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
        zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

        it_count += 1

        # Step 1.3: Compute Validation Accuracy
        if args.normalize_eval:
            xw = embeddings.length_normalize(xw)
            zw = embeddings.length_normalize(zw)

        # Loading validation dictionary
        f = open(args.dictionary_valspl,
                 encoding=args.encoding,
                 errors='surrogateescape')
        src2trg = collections.defaultdict(set)
        trg2src = collections.defaultdict(set)
        oov = set()
        vocab = set()
        for line in f:
            src, trg = line.split()
            if args.max_vocab:
                src = src.lower()
                trg = trg.lower()
            try:
                src_ind = src_word2ind[src]
                trg_ind = trg_word2ind[trg]
                src2trg[src_ind].add(trg_ind)
                trg2src[trg_ind].add(src_ind)
                vocab.add(src)
            except KeyError:
                oov.add(src)
        src = list(src2trg.keys())
        trgt = list(trg2src.keys())

        oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
        coverage = len(src2trg) / (len(src2trg) + len(oov))
        f.close()

        translation = collections.defaultdict(int)
        translation5 = collections.defaultdict(list)
        translation10 = collections.defaultdict(list)

        t = time.time()
        nbrhood_x = cp.zeros(xw.shape[0])
        nbrhood_z = cp.zeros(zw.shape[0])
        for i in range(0, len(src), BATCH_SIZE):
            j = min(i + BATCH_SIZE, len(src))
            similarities = -1 * cp.partition(
                -1 *
                cp.dot(cp.asarray(xw[src[i:j]]), cp.transpose(cp.asarray(zw))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_x[src[i:j]] = (cp.mean(similarities, axis=1))

        for i in range(0, zw.shape[0], BATCH_SIZE):
            j = min(i + BATCH_SIZE, zw.shape[0])
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_z[i:j] = (cp.mean(similarities, axis=1))

        for i in range(0, len(src), BATCH_SIZE):
            j = min(i + BATCH_SIZE, len(src))
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(xw[src[i:j]]).dot(
                    cp.transpose(cp.asarray(zw)))) -
                nbrhood_x[src[i:j]]) - nbrhood_z
            nn = cp.argmax(similarities, axis=1).tolist()
            similarities = cp.argsort((similarities), axis=1)

            nn5 = (similarities[:, -5:])
            nn10 = (similarities[:, -10:])
            for k in range(j - i):
                translation[src[i + k]] = nn[k]
                translation5[src[i + k]] = nn5[k].tolist()
                translation10[src[i + k]] = nn10[k].tolist()
        accuracy = np.mean(
            [1 if translation[i] in src2trg[i] else 0 for i in src])
        mean = 0
        for i in src:
            for k in translation5[i]:
                if k in src2trg[i]:
                    mean += 1
                    break

        mean /= len(src)
        accuracy5 = mean

        mean = 0
        for i in src:
            for k in translation10[i]:
                if k in src2trg[i]:
                    mean += 1
                    break

        mean /= len(src)
        accuracy10 = mean
        drop_count += 1
        if accuracy > best_val_acc:
            if args.verbose:
                print('Improvement of {0}%  over best validation accuracy!'.
                      format((accuracy - best_val_acc) * 100))
            best_val_acc = accuracy
            best_add_src = list(add_src)
            best_add_trg = list(add_trg)
            drop_count = 0

        if args.verbose:
            print(
                'Val Set:- Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
                .format(coverage, accuracy, accuracy5, accuracy10))
        if drop_count >= args.patience:
            if args.verbose:
                print('Training ended')
            break

        # Step 1.4: Dictionary Induction Stage (Bootstrap)
        # Consider x_cutoff and z_cutoff to be the vocabulary of the two languages(First k words of vocabulary are the most frequent words in the language(as per standard word embeddings)).
        # CSLS Inferencing will be performed on this vocabulary subset. Bidirectional bootstrapping is performed.
        # Dictionary entries for first "x_cutoff" words of Language-1 and for first "z-cutoff" words of Language-2 are inferred. Original training dictionary is also added.
        # Total dictionary size=x_cutoff+z_cutoff+size(train_set)
        if args.normalize_eval:
            xw = embeddings.length_normalize(xw)
            zw = embeddings.length_normalize(zw)

        x_vocab_size = min(xw.shape[0], args.x_cutoff)
        z_vocab_size = min(zw.shape[0], args.z_cutoff)
        t = time.time()
        nbrhood_x = cp.zeros(x_vocab_size)
        best_sim_x = cp.zeros(x_vocab_size)
        best_sim_x_csls = cp.zeros(x_vocab_size)
        nbrhood_z = cp.zeros(z_vocab_size)

        batch_num = 1
        for i in range(0, x_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, x_vocab_size)
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(xw[i:j]),
                            cp.transpose(cp.asarray(zw[:z_vocab_size]))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_x[i:j] = (cp.mean(similarities, axis=1))
            best_sim_x[i:j] = (cp.max(similarities, axis=1))
            batch_num += 1

        batch_num = 1
        for i in range(0, z_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, z_vocab_size)
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(zw[i:j]),
                            cp.transpose(cp.asarray(xw[:x_vocab_size]))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_z[i:j] = (cp.mean(similarities, axis=1))
            batch_num += 1

        src_indices = list(range(0, x_vocab_size))
        trg_indices = []
        batch_num = 1
        for i in range(0, x_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, x_vocab_size)
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(xw[i:j]).dot(
                    cp.transpose(cp.asarray(zw[:z_vocab_size])))) -
                nbrhood_x[i:j]) - nbrhood_z
            nn = cp.argmax(similarities, axis=1).tolist()
            trg_indices.append(nn)
            batch_num += 1

        src_indices2 = []
        trg_indices2 = list(range(0, z_vocab_size))
        batch_num = 1
        for i in range(0, z_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, z_vocab_size)
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(zw[i:j]).dot(
                    cp.transpose(cp.asarray(xw[:x_vocab_size])))) -
                nbrhood_z[i:j]) - nbrhood_x
            nn = cp.argmax(similarities, axis=1).tolist()
            src_indices2.append(nn)
            batch_num += 1
        trg_indices = [item for sublist in trg_indices for item in sublist]
        src_indices2 = [item for sublist in src_indices2 for item in sublist]

        add_src = list(src_indices + src_indices2)
        add_trg = list(trg_indices + trg_indices2)
        src_indices = src_indices + src_indices2 + orig_src
        trg_indices = trg_indices + trg_indices2 + orig_trg

    end_time = time.time()
    if args.verbose:
        print('Completed bootstrapping in {0:.2f} seconds'.format(end_time -
                                                                  start_time))

    # Step 2: Final Training with bootstrapped dictionary
    if args.verbose:
        print('Training final model')
    src_indices = best_add_src + src_indices_train
    trg_indices = best_add_trg + trg_indices_train
    x_count = len(set(src_indices))
    z_count = len(set(trg_indices))

    # Creating dictionary matrix from training set
    map_dict_src = {}
    map_dict_trg = {}
    I = 0
    uniq_src = []
    uniq_trg = []
    for i in range(len(src_indices)):
        if src_indices[i] not in map_dict_src.keys():
            map_dict_src[src_indices[i]] = I
            I += 1
            uniq_src.append(src_indices[i])
    J = 0
    for j in range(len(trg_indices)):
        if trg_indices[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices[j]] = J
            J += 1
            uniq_trg.append(trg_indices[j])

    np.random.seed(0)
    Lambda = args.l2_reg
    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()
    X_tot = x[uniq_src].T.dot(x[uniq_src])
    Z_tot = z[uniq_trg].T.dot(z[uniq_trg])
    W = U1.dot(B.dot(U2.T))
    cost = (TT.nlinalg.trace(
        U2.dot(
            B.dot(
                U1.T.dot(
                    shared(X_tot).dot(U1.dot(B.dot(U2.T.dot(shared(Z_tot)))))))
        )) - 2 * TT.sum(
            (shared(x[src_indices]).dot(W)) * shared(z[trg_indices]))
            ) / len(src_indices) + 0.5 * Lambda * (TT.sum(B**2))
    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    low_rank = 300
    manifold = Product([
        Stiefel(x.shape[1], low_rank),
        Stiefel(z.shape[1], low_rank),
        PositiveDefinite(low_rank)
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

    gc.collect()

    # Step 3: Evaluation
    if args.verbose:
        print('Beginning Evaluation')

    if args.normalize_eval:
        xw = embeddings.length_normalize(xw)
        zw = embeddings.length_normalize(zw)
    # Loading test dictionary
    f = open(args.dictionary_test,
             encoding=args.encoding,
             errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    t = time.time()
    nbrhood_x = np.zeros(xw.shape[0])
    nbrhood_z = np.zeros(zw.shape[0])
    nbrhood_z2 = cp.zeros(zw.shape[0])
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1 * np.partition(
            -1 * similarities, args.csls_neighbourhood - 1, axis=1)
        nbrhood_x[src[i:j]] = np.mean(
            similarities_x[:, :args.csls_neighbourhood], axis=1)

    batch_num = 1
    for i in range(0, zw.shape[0], BATCH_SIZE):
        j = min(i + BATCH_SIZE, zw.shape[0])
        similarities = -1 * cp.partition(
            -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
            args.csls_neighbourhood - 1,
            axis=1)[:, :args.csls_neighbourhood]
        nbrhood_z2[i:j] = (cp.mean(similarities, axis=1))
        batch_num += 1
    nbrhood_z = cp.asnumpy(nbrhood_z2)
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(
            np.transpose(2 * similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities), axis=1)

        nn5 = (similarities[:, -5:])
        nn10 = (similarities[:, -10:])
        for k in range(j - i):
            translation[src[i + k]] = nn[k]
            translation5[src[i + k]] = nn5[k]
            translation10[src[i + k]] = nn10[k]
    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean = 0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy5 = mean

    mean = 0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy10 = mean
    print(
        'Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
        .format(coverage, accuracy, accuracy5, accuracy10))
class TestProductManifold(unittest.TestCase):
    def setUp(self):
        self.m = m = 100
        self.n = n = 50
        self.euclidean = Euclidean(m, n)
        self.sphere = Sphere(n)
        self.man = Product([self.euclidean, self.sphere])

    def test_dim(self):
        np_testing.assert_equal(self.man.dim, self.m*self.n+self.n-1)

    def test_typicaldist(self):
        np_testing.assert_equal(self.man.typicaldist,
                                np.sqrt((self.m*self.n)+np.pi**2))

    def test_dist(self):
        X = self.man.rand()
        Y = self.man.rand()
        np_testing.assert_equal(self.man.dist(X, Y),
                                np.sqrt(
                                    self.euclidean.dist(X[0], Y[0])**2 +
                                    self.sphere.dist(X[1], Y[1])**2))

    # def test_inner(self):

    # def test_proj(self):

    # def test_ehess2rhess(self):

    # def test_retr(self):

    # def test_egrad2rgrad(self):

    # def test_norm(self):

    # def test_rand(self):

    # def test_randvec(self):

    # def test_transp(self):

    def test_exp_log_inverse(self):
        s = self.man
        X = s.rand()
        Y = s.rand()
        Yexplog = s.exp(X, s.log(X, Y))
        np_testing.assert_almost_equal(s.dist(Y, Yexplog), 0)

    def test_log_exp_inverse(self):
        s = self.man
        X = s.rand()
        U = s.randvec(X)
        Ulogexp = s.log(X, s.exp(X, U))
        np_testing.assert_array_almost_equal(U[0], Ulogexp[0])
        np_testing.assert_array_almost_equal(U[1], Ulogexp[1])

    def test_pairmean(self):
        s = self.man
        X = s.rand()
        Y = s.rand()
        Z = s.pairmean(X, Y)
        np_testing.assert_array_almost_equal(s.dist(X, Z), s.dist(Y, Z))
Beispiel #29
0
    def run(self,
            S,
            F,
            v=None,
            C=None,
            fs=None,
            omega=None,
            maxiter=500,
            tol=1e-10,
            variant='bp'):
        '''
        Run MERLiN algorithm.
        Whether to run a scalar variant, i.e. S -> C -> w'F, or a
        timeseries variant, i.e. S -> C -> bp(w'F) is determined by the
        dimensionality of the input F.

        Input (default)
            - S
                (m x 1) np.array that contains the samples of S
            - F
                either a (d x m) np.array that contains the linear mixture
                samples or a (d x m x n) np.array that contains the linearly
                mixed timeseries of length n (d channels, m trials)
            - v
                (d x 1) np.array holding the linear combination that
                extracts middle node C from F
            - C
                (m x 1) np.array that contains the samples of the middle
                node C
            - fs
                sampling rate in Hz
            - omega
                tuple of (low, high) cut-off of desired frequency band
            - maxiter (500)
                maximum iterations to run the optimisation algorithm for
            - tol (1e-16)
                terminate optimisation if step size < tol or grad norm < tol
            - variant ('bp')
                determines which MERLiN variant to use on timeseries data
                ('bp' = MERLiNbp algorithm ([1], Algorithm 4),
                 'bpicoh' = MERLiNbpicoh algorithm ([1], Algorithm 5),
                 'nlbp' = MERLiNnlbp)

        Output
            - w
                linear combination that was found and should extract the
                effect of C from F
            - converged
                boolean that indicates whether the stopping criterion was
                met before the maximum number of iterations was performed
            - curob
                objecive functions value at w
        '''
        self._S = S
        self._Forig = F
        self._fs = fs
        self._omega = omega
        self._d = F.shape[0]
        self._m = F.shape[1]

        # scalar or timeseries mode
        if F.ndim == 3:
            self._mode = 'timeseries'
            self._n = F.shape[2]
            if not (fs and omega):
                raise ValueError('Both the optional arguments fs and omega '
                                 'need to be provided.')
            if self._verbosity:
                print('Launching MERLiN' + variant + ' for iid sampled '
                      'timeseries chunks.')
        elif F.ndim == 2:
            self._mode = 'scalar'
            if self._verbosity:
                print('Launching MERLiN for iid sampled scalars.')
        else:
            raise ValueError('F needs to be a 2-dimensional numpy array '
                             '(iid sampled scalars) or a 3-dimensional '
                             'numpy array (iid sampled timeseries chunks).')

        self._prepare(v, C)

        if self._mode is 'scalar':
            problem = self._problem_MERLiN()
        elif variant is 'bp':
            problem = self._problem_MERLiNbp()
        elif variant is 'bpicoh':
            problem = self._problem_MERLiNbpicoh()
        elif variant is 'nlbp':
            problem = self._problem_MERLiNnlbp()
        else:
            raise NotImplementedError

        if variant is not 'nlbp':
            problem.manifold = Sphere(self._d, 1)
        elif variant is 'nlbp':
            problem.manifold = Product(
                [Sphere(self._d, 1),
                 Euclidean(1, 1),
                 Euclidean(1, 1)])

        # choose best out of ten 10-step runs as initialisation
        solver = SteepestDescent(maxiter=10, logverbosity=1)
        res = [solver.solve(problem) for k in range(0, 10)]
        obs = [-r[1]['final_values']['f(x)'] for r in res]
        w0 = res[obs.index(max(obs))][0]

        solver = SteepestDescent(maxtime=float('inf'),
                                 maxiter=maxiter,
                                 mingradnorm=tol,
                                 minstepsize=tol,
                                 logverbosity=1)
        if self._verbosity:
            print('Running optimisation algorithm.')
        w, info = solver.solve(problem, x=w0)
        if variant is 'nlbp':
            w = w[0]
        converged = maxiter != info['final_values']['iterations']
        curob = -float(info['final_values']['f(x)'])
        if self._verbosity:
            print('DONE.')
        return self._P.T.dot(w), converged, curob
    def fit(self, RLRMCdata, verbosity=0, _evaluate=False):
        """The underlying fit method for RLRMC

        Args:
            RLRMCdata (RLRMCdataset): the RLRMCdataset object.
            verbosity (int): verbosity of Pymanopt. Possible values are 0 (least verbose), 1, or 2 (most verbose). 
            _evaluate (bool): flag to compute the per iteration statistics in train (and validation) datasets.
        """
        # initialize the model
        W0 = self._init_train(RLRMCdata.train)
        self.user2id = RLRMCdata.user2id
        self.item2id = RLRMCdata.item2id
        self.id2user = RLRMCdata.id2user
        self.id2item = RLRMCdata.id2item

        # residual variable
        residual_global = np.zeros(RLRMCdata.train.data.shape, dtype=np.float64)

        ###################Riemannian first-order algorithm######################

        solver = ConjugateGradientMS(
            maxtime=self.max_time,
            maxiter=self.maxiter,
            linesearch=LineSearchBackTracking(),
        )  # , logverbosity=2)
        # construction of manifold
        manifold = Product(
            [
                Stiefel(self.model_param.get("num_row"), self.rank),
                Stiefel(self.model_param.get("num_col"), self.rank),
                PositiveDefinite(self.rank),
            ]
        )
        problem = Problem(
            manifold=manifold,
            cost=lambda x: self._cost(
                x,
                RLRMCdata.train.data,
                RLRMCdata.train.indices,
                RLRMCdata.train.indptr,
                residual_global,
            ),
            egrad=lambda z: self._egrad(
                z, RLRMCdata.train.indices, RLRMCdata.train.indptr, residual_global
            ),
            verbosity=verbosity,
        )

        if _evaluate:
            residual_validation_global = np.zeros(
                RLRMCdata.validation.data.shape, dtype=np.float64
            )
            Wopt, self.stats = solver.solve(
                problem,
                x=W0,
                compute_stats=lambda x, y, z: self._my_stats(
                    x,
                    y,
                    z,
                    residual_global,
                    RLRMCdata.validation.data,
                    RLRMCdata.validation.indices,
                    RLRMCdata.validation.indptr,
                    residual_validation_global,
                ),
            )
        else:
            Wopt, self.stats = solver.solve(problem, x=W0)
        self.L = np.dot(Wopt[0], Wopt[2])
        self.R = Wopt[1]
Beispiel #31
0
    def _update_classifier(self, data, labels, w, classes):
        """Update the classifier parameters theta and bias

        Parameters
        ----------

        data : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of 2D array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        classes : int
            The number of classes in the classifier.


        Returns
        -------

        theta : array, shape=[features, classes]
            The MLR parameter for the class planes.

        bias : array shape=[classes,]
            The MLR parameter for class biases.
        """

        # Stack the data and labels for training the classifier
        data_stacked, labels_stacked, weights = \
            SSSRM._stack_list(data, labels, w)

        features = w[0].shape[1]
        total_samples = weights.size

        data_th = S.shared(data_stacked.astype(theano.config.floatX))
        val_ = S.shared(labels_stacked)
        total_samples_S = S.shared(total_samples)
        theta_th = T.matrix(name='theta', dtype=theano.config.floatX)
        bias_th = T.col(name='bias', dtype=theano.config.floatX)
        constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True)
        weights_th = S.shared(weights)

        log_p_y_given_x = \
            T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T))
        f = -constf2 * T.sum(
            (log_p_y_given_x[T.arange(total_samples_S), val_]) /
            weights_th) + 0.5 * T.sum(theta_th**2)

        manifold = Product((Euclidean(features,
                                      classes), Euclidean(classes, 1)))
        problem = Problem(manifold=manifold,
                          cost=f,
                          arg=[theta_th, bias_th],
                          verbosity=0)
        solver = ConjugateGradient(mingradnorm=1e-6)
        solution = solver.solve(problem)
        theta = solution[0]
        bias = solution[1]

        del constf2
        del theta_th
        del bias_th
        del data_th
        del val_
        del solver
        del solution

        return theta, bias
Beispiel #32
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space')
    parser.add_argument('emb_file', help='the input target embeddings')
    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0,type=int, help='Verbose')
  
    mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('-dtrain_file', '--dictionary_train_file', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument('-dtest_file', '--dictionary_test_file', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order')
    
    geomm_group = parser.add_argument_group('GeoMM Multi arguments', 'Arguments for GeoMM Multi method')
    geomm_group.add_argument('--l2_reg', type=float,default=1e3, help='Lambda for L2 Regularization')
    geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization')
   
    eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    # Logging
    method_name = os.path.join('logs','geomm_multi')
    directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(directory):
        os.makedirs(directory)
    log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train_file))
    log_file_name = log_file_name + '.log'
    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(os.path.join(directory,log_file_name), "a")

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)  

        def flush(self):
            #this flush method is needed for python 3 compatibility.
            #this handles the flush command by doing nothing.
            #you might want to specify some extra behavior here.
            pass    
    sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'

    if args.verbose:
        print('Loading train data...')
    words = []
    emb = []
    with open(args.emb_file, encoding=args.encoding, errors='surrogateescape') as f:
        for line in f:
            srcfile = open(line.strip(), encoding=args.encoding, errors='surrogateescape')
            words_temp, x_temp = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype)
            words.append(words_temp)
            emb.append(x_temp)


    # Build word to index map
    word2ind = []
    for lang in words:
        word2ind.append({word: i for i, word in enumerate(lang)})

    # Build training dictionary
    train_pairs = []
    with open(args.dictionary_train_file, encoding=args.encoding, errors='surrogateescape') as ff:
        for line in ff:
            vals = line.split(',')
            curr_dict=[int(vals[0].strip()),int(vals[1].strip())]
            src_indices = []
            trg_indices = []
            with open(vals[2].strip(), encoding=args.encoding, errors='surrogateescape') as f:
                for line in f:
                    src,trg = line.split()
                    if args.max_vocab:
                        src=src.lower()
                        trg=trg.lower()
                    try:
                        src_ind = word2ind[curr_dict[0]][src]
                        trg_ind = word2ind[curr_dict[1]][trg]
                        src_indices.append(src_ind)
                        trg_indices.append(trg_ind)
                    except KeyError:
                        if args.verbose:
                            print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg), file=sys.stderr)
            curr_dict.append(src_indices)
            curr_dict.append(trg_indices)
            train_pairs.append(curr_dict)
    if args.verbose:
        print('Normalizing embeddings...')
    # Step 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            for i in range(len(emb)):
                emb[i] = embeddings.length_normalize(emb[i])
        elif action == 'center':
            for i in range(len(emb)):
                emb[i] = embeddings.mean_center(emb[i])
        elif action == 'unitdim':
            for i in range(len(emb)):
                emb[i] = embeddings.length_normalize_dimensionwise(emb[i])
        elif action == 'centeremb':
            for i in range(len(emb)):
                emb[i] = embeddings.mean_center_embeddingwise(emb[i])


    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    mean_size=0
    for tp in range(len(train_pairs)):
        src_indices = train_pairs[tp][2]
        trg_indices = train_pairs[tp][3]
        x_count = len(set(src_indices))
        z_count = len(set(trg_indices))
        A = np.zeros((x_count,z_count))
        
        # Creating dictionary matrix from training set
        map_dict_src={}
        map_dict_trg={}
        I=0
        uniq_src=[]
        uniq_trg=[]
        for i in range(len(src_indices)):
            if src_indices[i] not in map_dict_src.keys():
                map_dict_src[src_indices[i]]=I
                I+=1
                uniq_src.append(src_indices[i])
        J=0
        for j in range(len(trg_indices)):
            if trg_indices[j] not in map_dict_trg.keys():
                map_dict_trg[trg_indices[j]]=J
                J+=1
                uniq_trg.append(trg_indices[j])

        for i in range(len(src_indices)):
            A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1
        train_pairs[tp].append(uniq_src)
        train_pairs[tp].append(uniq_trg)
        train_pairs[tp].append(A)
        mean_size+= (len(uniq_src)*len(uniq_trg))
    mean_size = mean_size/len(train_pairs)
    np.random.seed(0)
    Lambda=args.l2_reg

    variables=[]
    manif = []
    low_rank=emb[0].shape[1]
    for i in range(len(emb)):
        variables.append(TT.matrix())
        manif.append(Stiefel(emb[i].shape[1],low_rank))
    variables.append(TT.matrix())
    manif.append(PositiveDefinite(low_rank))
    B = variables[-1]
    cost = 0.5*Lambda*(TT.sum(B**2))
    for i in range(len(train_pairs)):
        x = emb[train_pairs[i][0]]
        z = emb[train_pairs[i][1]]
        U1 = variables[train_pairs[i][0]]
        U2 = variables[train_pairs[i][1]]
        cost = cost + TT.sum(((shared(x[train_pairs[i][4]]).dot(U1.dot(B.dot(U2.T)))).dot(shared(z[train_pairs[i][5]]).T)-shared(train_pairs[i][6]))**2)/float(len(train_pairs[i][2]))
    solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter,mingradnorm=1e-12)
    manifold =Product(manif)
    problem = Problem(manifold=manifold, cost=cost, arg=variables, verbosity=3)
    wopt = solver.solve(problem)
    w= wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    # Step 2: Transformation
    Bhalf = scipy.linalg.sqrtm(wopt[-1])
    test_emb = []
    for i in range(len(emb)):
        test_emb.append(emb[i].dot(wopt[i]).dot(Bhalf))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time-start_time))
    gc.collect()


    # Step 3: Evaluation
    if args.verbose:
        print('Beginning Evaluation')

    if args.normalize_eval:
        for i in range(len(test_emb)):
            test_emb[i] = embeddings.length_normalize(test_emb[i])

    # Loading test dictionary
    with open(args.dictionary_test_file, encoding=args.encoding, errors='surrogateescape') as ff:
        for line in ff:
            vals = line.split(',')
            curr_dict=[int(vals[0].strip()),int(vals[1].strip())]
            with open(vals[2].strip(), encoding=args.encoding, errors='surrogateescape') as f:
                src_word2ind = word2ind[curr_dict[0]]
                trg_word2ind = word2ind[curr_dict[1]]
                xw = test_emb[curr_dict[0]]
                zw = test_emb[curr_dict[1]]
                src2trg = collections.defaultdict(set)
                trg2src = collections.defaultdict(set)
                oov = set()
                vocab = set()
                for line in f:
                    src, trg = line.split()
                    if args.max_vocab:
                        src=src.lower()
                        trg=trg.lower()
                    try:
                        src_ind = src_word2ind[src]
                        trg_ind = trg_word2ind[trg]
                        src2trg[src_ind].add(trg_ind)
                        trg2src[trg_ind].add(src_ind)
                        vocab.add(src)
                    except KeyError:
                        oov.add(src)
                src = list(src2trg.keys())
                trgt = list(trg2src.keys())

                oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
                coverage = len(src2trg) / (len(src2trg) + len(oov))
                f.close()

                translation = collections.defaultdict(int)
                translation5 = collections.defaultdict(list)
                translation10 = collections.defaultdict(list)

                t=time.time()
                nbrhood_x=np.zeros(xw.shape[0])
                nbrhood_z=np.zeros(zw.shape[0])
                nbrhood_z2=cp.zeros(zw.shape[0])
                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
                    nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)
                batch_num=1
                for i in range(0, zw.shape[0], BATCH_SIZE):
                    j = min(i + BATCH_SIZE, zw.shape[0])
                    similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
                    nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
                    batch_num+=1
                nbrhood_z=cp.asnumpy(nbrhood_z2)
                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]])- nbrhood_z
                    nn = similarities.argmax(axis=1).tolist()
                    similarities = np.argsort((similarities),axis=1)

                    nn5 = (similarities[:,-5:])
                    nn10 = (similarities[:,-10:])
                    for k in range(j-i):
                        translation[src[i+k]] = nn[k]
                        translation5[src[i+k]] = nn5[k]
                        translation10[src[i+k]] = nn10[k]
                accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
                mean=0
                for i in src:
                    for k in translation5[i]:
                        if k in src2trg[i]:
                            mean+=1
                            break

                mean/=len(src)
                accuracy5 = mean

                mean=0
                for i in src:
                    for k in translation10[i]:
                        if k in src2trg[i]:
                            mean+=1
                            break

                mean/=len(src)
                accuracy10 = mean
                print('Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'.format(coverage, accuracy, accuracy5, accuracy10))