def visualize_tree(model,tree,y_true, save_path,label_dict): leaves_embeddings = model.normalize_embeddings(model.embeddings.weight.data) leaves_embeddings = project(leaves_embeddings).detach().cpu().numpy() fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) ax = plot_tree_from_leaves(ax, tree, leaves_embeddings, label_dict=label_dict, labels=y_true) fig.savefig(save_path)
def decode_tree(self, fast_decoding): """Build a binary tree (nx graph) from leaves' embeddings. Assume points are normalized to same radius.""" leaves_embeddings = self.normalize_embeddings(self.embeddings.weight.data) leaves_embeddings = project(leaves_embeddings).detach().cpu() sim_fn = lambda x, y: torch.sum(x * y, dim=-1) if fast_decoding: parents = nn_merge_uf_fast_np(leaves_embeddings, S=sim_fn, partition_ratio=1.2) else: parents = sl_from_embeddings(leaves_embeddings, sim_fn) tree = nx.DiGraph() for i, j in enumerate(parents[:-1]): tree.add_edge(j, i) return tree
def __init__(self, n_nodes=1, rank=2, temperature=0.05, init_size=1e-3, max_scale=1. - 1e-3): super(HypHC, self).__init__() self.n_nodes = n_nodes self.embeddings = nn.Embedding(n_nodes, rank) self.temperature = temperature self.scale = nn.Parameter(torch.Tensor([init_size]), requires_grad=True) #TODO: consider change init_size #embedding.weight.data is suspected as the data points coodrinates self.embeddings.weight.data = project( self.scale * (2 * torch.rand((n_nodes, rank)) - 1.0) ) self.init_size = init_size self.max_scale = max_scale
def __init__(self, n_nodes=1, rank=2, temperature=0.05, init_size=1e-3, max_scale=1. - 1e-3): super(HypHC, self).__init__() self.n_nodes = n_nodes self.embeddings = nn.Embedding(n_nodes, rank) self.temperature = temperature self.scale = nn.Parameter(torch.Tensor([init_size]), requires_grad=True) self.embeddings.weight.data = project(self.scale * (2 * torch.rand( (n_nodes, rank)) - 1.0)) self.init_size = init_size self.max_scale = max_scale
# load dataset config = json.load(open(os.path.join(args.model_dir, "config.json"))) config_args = argparse.Namespace(**config) _, y_true, similarities = load_data(config_args.dataset) # build HypHC model model = HypHC(similarities.shape[0], config_args.rank, config_args.temperature, config_args.init_size, config_args.max_scale) params = torch.load(os.path.join(args.model_dir, f"model_{args.seed}.pkl"), map_location=torch.device('cpu')) model.load_state_dict(params, strict=False) model.eval() # decode tree tree = model.decode_tree(fast_decoding=True) leaves_embeddings = model.normalize_embeddings( model.embeddings.weight.data) print(leaves_embeddings.shape) with open("leave_embeddings.pkl", "wb") as f: pickle.dump(leaves_embeddings, f) leaves_embeddings = project(leaves_embeddings).detach().cpu().numpy() with open("leave_embeddings_projected.pkl", "wb") as f: pickle.dump(leaves_embeddings, f) print(leaves_embeddings.shape) fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) ax = plot_tree_from_leaves(ax, tree, leaves_embeddings, labels=y_true) fig.savefig(os.path.join(args.model_dir, f"embeddings_{args.seed}.png"))
def step(self, closure=None): """Performs a single optimization step. Arguments --------- closure : callable (optional) A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() with torch.no_grad(): for group in self.param_groups: if "step" not in group: group["step"] = 0 betas = group["betas"] weight_decay = group["weight_decay"] eps = group["eps"] learning_rate = group["lr"] amsgrad = group["amsgrad"] for point in group["params"]: grad = point.grad if grad is None: continue if grad.is_sparse: raise RuntimeError( "Riemannian Adam does not support sparse gradients yet (PR is welcome)" ) state = self.state[point] # State initialization if len(state) == 0: state["step"] = 0 # Exponential moving average of gradient values state["exp_avg"] = torch.zeros_like(point) # Exponential moving average of squared gradient values state["exp_avg_sq"] = torch.zeros_like(point) if amsgrad: # Maintains max of all exp. moving avg. of sq. grad. values state["max_exp_avg_sq"] = torch.zeros_like(point) # make local variables for easy access exp_avg = state["exp_avg"] exp_avg_sq = state["exp_avg_sq"] # actual step grad.add_(point, alpha=weight_decay) grad = egrad2rgrad(point, grad) exp_avg.mul_(betas[0]).add_(grad, alpha=1 - betas[0]) exp_avg_sq.mul_(betas[1]).add_(inner(point, grad), alpha=1 - betas[1]) if amsgrad: max_exp_avg_sq = state["max_exp_avg_sq"] # Maintains the maximum of all 2nd moment running avg. till now torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) # Use the max. for normalizing running avg. of gradient denom = max_exp_avg_sq.sqrt().add_(eps) else: denom = exp_avg_sq.sqrt().add_(eps) group["step"] += 1 bias_correction1 = 1 - betas[0]**group["step"] bias_correction2 = 1 - betas[1]**group["step"] step_size = (learning_rate * bias_correction2**0.5 / bias_correction1) # copy the state, we need it for retraction # get the direction for ascend direction = exp_avg / denom # transport the exponential averaging to the new point new_point = project(expmap(-step_size * direction, point)) exp_avg_new = ptransp(point, new_point, exp_avg) # use copy only for user facing point copy_or_set_(point, new_point) exp_avg.set_(exp_avg_new) group["step"] += 1 return loss