Exemple #1
0
def visualize_tree(model,tree,y_true, save_path,label_dict):
    leaves_embeddings = model.normalize_embeddings(model.embeddings.weight.data)
    leaves_embeddings = project(leaves_embeddings).detach().cpu().numpy()
    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    ax = plot_tree_from_leaves(ax, tree, leaves_embeddings, label_dict=label_dict, labels=y_true)
    fig.savefig(save_path)
Exemple #2
0
 def decode_tree(self, fast_decoding):
     """Build a binary tree (nx graph) from leaves' embeddings. Assume points are normalized to same radius."""
     leaves_embeddings = self.normalize_embeddings(self.embeddings.weight.data)
     leaves_embeddings = project(leaves_embeddings).detach().cpu()
     sim_fn = lambda x, y: torch.sum(x * y, dim=-1)
     if fast_decoding:
         parents = nn_merge_uf_fast_np(leaves_embeddings, S=sim_fn, partition_ratio=1.2)
     else:
         parents = sl_from_embeddings(leaves_embeddings, sim_fn)
     tree = nx.DiGraph()
     for i, j in enumerate(parents[:-1]):
         tree.add_edge(j, i)
     return tree
Exemple #3
0
 def __init__(self, n_nodes=1, rank=2, temperature=0.05, init_size=1e-3, max_scale=1. - 1e-3):
     super(HypHC, self).__init__()
     self.n_nodes = n_nodes
     self.embeddings = nn.Embedding(n_nodes, rank)
     self.temperature = temperature
     self.scale = nn.Parameter(torch.Tensor([init_size]), requires_grad=True)
     #TODO: consider change init_size
     #embedding.weight.data is suspected as the data points coodrinates
     self.embeddings.weight.data = project(
         self.scale * (2 * torch.rand((n_nodes, rank)) - 1.0)
     )
     self.init_size = init_size
     self.max_scale = max_scale
Exemple #4
0
 def __init__(self,
              n_nodes=1,
              rank=2,
              temperature=0.05,
              init_size=1e-3,
              max_scale=1. - 1e-3):
     super(HypHC, self).__init__()
     self.n_nodes = n_nodes
     self.embeddings = nn.Embedding(n_nodes, rank)
     self.temperature = temperature
     self.scale = nn.Parameter(torch.Tensor([init_size]),
                               requires_grad=True)
     self.embeddings.weight.data = project(self.scale * (2 * torch.rand(
         (n_nodes, rank)) - 1.0))
     self.init_size = init_size
     self.max_scale = max_scale
Exemple #5
0
    # load dataset
    config = json.load(open(os.path.join(args.model_dir, "config.json")))
    config_args = argparse.Namespace(**config)
    _, y_true, similarities = load_data(config_args.dataset)

    # build HypHC model
    model = HypHC(similarities.shape[0], config_args.rank,
                  config_args.temperature, config_args.init_size,
                  config_args.max_scale)
    params = torch.load(os.path.join(args.model_dir, f"model_{args.seed}.pkl"),
                        map_location=torch.device('cpu'))
    model.load_state_dict(params, strict=False)
    model.eval()

    # decode tree
    tree = model.decode_tree(fast_decoding=True)
    leaves_embeddings = model.normalize_embeddings(
        model.embeddings.weight.data)
    print(leaves_embeddings.shape)
    with open("leave_embeddings.pkl", "wb") as f:
        pickle.dump(leaves_embeddings, f)
    leaves_embeddings = project(leaves_embeddings).detach().cpu().numpy()
    with open("leave_embeddings_projected.pkl", "wb") as f:
        pickle.dump(leaves_embeddings, f)
    print(leaves_embeddings.shape)
    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    ax = plot_tree_from_leaves(ax, tree, leaves_embeddings, labels=y_true)
    fig.savefig(os.path.join(args.model_dir, f"embeddings_{args.seed}.png"))
Exemple #6
0
    def step(self, closure=None):
        """Performs a single optimization step.
        Arguments
        ---------
        closure : callable (optional)
            A closure that reevaluates the model
            and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()
        with torch.no_grad():
            for group in self.param_groups:
                if "step" not in group:
                    group["step"] = 0
                betas = group["betas"]
                weight_decay = group["weight_decay"]
                eps = group["eps"]
                learning_rate = group["lr"]
                amsgrad = group["amsgrad"]
                for point in group["params"]:
                    grad = point.grad
                    if grad is None:
                        continue
                    if grad.is_sparse:
                        raise RuntimeError(
                            "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
                        )

                    state = self.state[point]

                    # State initialization
                    if len(state) == 0:
                        state["step"] = 0
                        # Exponential moving average of gradient values
                        state["exp_avg"] = torch.zeros_like(point)
                        # Exponential moving average of squared gradient values
                        state["exp_avg_sq"] = torch.zeros_like(point)
                        if amsgrad:
                            # Maintains max of all exp. moving avg. of sq. grad. values
                            state["max_exp_avg_sq"] = torch.zeros_like(point)
                    # make local variables for easy access
                    exp_avg = state["exp_avg"]
                    exp_avg_sq = state["exp_avg_sq"]
                    # actual step
                    grad.add_(point, alpha=weight_decay)
                    grad = egrad2rgrad(point, grad)
                    exp_avg.mul_(betas[0]).add_(grad, alpha=1 - betas[0])
                    exp_avg_sq.mul_(betas[1]).add_(inner(point, grad),
                                                   alpha=1 - betas[1])
                    if amsgrad:
                        max_exp_avg_sq = state["max_exp_avg_sq"]
                        # Maintains the maximum of all 2nd moment running avg. till now
                        torch.max(max_exp_avg_sq,
                                  exp_avg_sq,
                                  out=max_exp_avg_sq)
                        # Use the max. for normalizing running avg. of gradient
                        denom = max_exp_avg_sq.sqrt().add_(eps)
                    else:
                        denom = exp_avg_sq.sqrt().add_(eps)
                    group["step"] += 1
                    bias_correction1 = 1 - betas[0]**group["step"]
                    bias_correction2 = 1 - betas[1]**group["step"]
                    step_size = (learning_rate * bias_correction2**0.5 /
                                 bias_correction1)

                    # copy the state, we need it for retraction
                    # get the direction for ascend
                    direction = exp_avg / denom
                    # transport the exponential averaging to the new point
                    new_point = project(expmap(-step_size * direction, point))
                    exp_avg_new = ptransp(point, new_point, exp_avg)
                    # use copy only for user facing point
                    copy_or_set_(point, new_point)
                    exp_avg.set_(exp_avg_new)

                    group["step"] += 1
        return loss