def __init__( self, in_features, out_features, bias=True, *, ball: geoopt.PoincareBall, ball_out=None, learn_origin=False, ): # for manifolds that have parameters like Poincare Ball # we have to attach them to the closure Module. # It is hard to implement device allocation for manifolds in other case. if ball_out is None: ball_out = ball super().__init__(in_features=in_features, out_features=out_features, bias=bias) self.ball = ball self.ball_out = ball_out if self.bias is not None: self.bias = geoopt.ManifoldParameter(self.bias, manifold=self.ball_out) if learn_origin: self.source_origin = geoopt.ManifoldParameter( self.ball.origin(in_features)) self.target_origin = geoopt.ManifoldParameter( self.ball_out.origin(out_features)) else: self.register_buffer("source_origin", None) self.register_buffer("target_origin", None) self.reset_parameters()
def __init__(self, feature_num, word_embed, label_embed, d_ball=2, hidden_size=5, if_gru=True, default_dtype=th.float64, **kwargs): super().__init__(**kwargs) self.hidden_size = hidden_size self.d_ball = d_ball self.word_embed = gt.ManifoldParameter(word_embed, manifold=gt.PoincareBall()) self.label_embed = gt.ManifoldParameter(label_embed, manifold=gt.PoincareBall()) self.default_dtype = default_dtype if (if_gru): self.rnn = hyperGRU(input_size=word_embed.shape[1], hidden_size=self.hidden_size, d_ball=self.d_ball, default_dtype=self.default_dtype) else: self.rnn = hyperRNN(input_size=word_embed.shape[1], hidden_size=self.hidden_size, d_ball=self.d_ball, default_dtype=self.default_dtype) self.dense_1 = nn.Linear(feature_num, int(feature_num / 2)) self.dense_2 = nn.Linear(int(feature_num / 2), 1)
def __init__(self, num_embeddings, embedding_dim, manifold=geoopt.Euclidean(), _weight=None): super(LookupEmbedding, self).__init__() if isinstance(embedding_dim, int): embedding_dim = (embedding_dim, ) self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.manifold = manifold if _weight is None: _weight = torch.Tensor(num_embeddings, *embedding_dim) self.weight = geoopt.ManifoldParameter(_weight, manifold=self.manifold) self.reset_parameters() else: assert _weight.shape == ( num_embeddings, *embedding_dim, ), "_weight MUST be of shape (num_embeddings, *embedding_dim)" self.weight = geoopt.ManifoldParameter(_weight, manifold=self.manifold)
def __init__(self, feature_num, word_embed, label_embed, hidden_size=5, if_gru=False, **kwargs): super().__init__(**kwargs) self.hidden_size = hidden_size self.word_embed = gt.ManifoldParameter(word_embed, manifold=gt.PoincareBall()) self.label_embed = gt.ManifoldParameter(label_embed, manifold=gt.PoincareBall()) if (if_gru): self.rnn = hyperGRU(input_size=word_embed.shape[1], hidden_size=self.hidden_size) else: self.rnn = hyperRNN(input_size=word_embed.shape[1], hidden_size=self.hidden_size) self.dense_1 = nn.Linear(feature_num, int(feature_num / 2)) self.dense_2 = nn.Linear(int(feature_num / 2), 1)
def __init__(self, c, args): super(DualDecoder, self).__init__(c) self.manifold = getattr(manifolds, args.manifold)() self.in_features = args.dim act = getattr(F, args.act) if args.dataset == 'pubmed': self.cls_e = GATConv(self.in_features, args.n_classes, 8, False, args.alpha, args.dropout, args.bias, lambda x: x) self.cls_h = HGATConv(self.manifold, self.in_features, args.dim, 8, False, args.alpha, args.dropout, args.bias, act, atten=args.atten, dist=args.dist) else: self.cls_e = GATConv(self.in_features, args.n_classes, 1, args.concat, args.alpha, args.dropout, args.bias, lambda x: x) self.cls_h = HGATConv(self.manifold, self.in_features, args.dim, 1, args.concat, args.alpha, args.dropout, args.bias, act, atten=args.atten, dist=args.dist) self.in_features = args.dim self.out_features = args.n_classes self.c = c self.ball = ball = geoopt.PoincareBall(c=c) self.sphere = sphere = geoopt.manifolds.Sphere() self.scale = nn.Parameter(torch.zeros(self.out_features)) point = torch.randn(self.out_features, self.in_features) / 4 point = pmath.expmap0(point.to(args.device), c=c) tangent = torch.randn(self.out_features, self.in_features) self.point = geoopt.ManifoldParameter(point, manifold=ball) with torch.no_grad(): self.tangent = geoopt.ManifoldParameter(tangent, manifold=sphere).proj_() self.decoder_name = 'DualDecoder' '''prob weight''' self.w_e = nn.Linear(args.n_classes, 1, bias=False) self.w_h = nn.Linear(args.dim, 1, bias=False) self.drop_e = args.drop_e self.drop_h = args.drop_h self.reset_param()
def __init__(self, input_size, hidden_size): super(hyperRNN, self).__init__() self.input_size = input_size self.hidden_size = hidden_size k = (1 / hidden_size)**0.5 self.w = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, 2, hidden_size, 2).uniform_(-k, k)) self.u = gt.ManifoldParameter(gt.ManifoldTensor(input_size, 2, hidden_size, 2).uniform_(-k, k)) self.b = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, 2, manifold=gt.PoincareBall()).zero_())
def __init__(self, in_features, out_features): super(hyperDense, self).__init__() self.in_features = in_features self.out_features = out_features k = (1 / in_features)**0.5 self.w = gt.ManifoldParameter( gt.ManifoldTensor(in_features, out_features).uniform_(-k, k)) self.b = gt.ManifoldParameter(gt.ManifoldTensor(out_features).zero_())
def __init__(self, input_size, hidden_size, ball): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.ball = ball k = (1 / hidden_size)**0.5 self.w_z = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.w_r = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.w_h = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.u_z = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.u_r = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.u_h = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.b_z = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=self.ball).zero_()) self.b_r = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=self.ball).zero_()) self.b_h = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=self.ball).zero_())
def __init__(self, input_size, hidden_size): super(GRUCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size k = (1 / hidden_size)**0.5 self.w_z = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.w_r = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.w_h = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k, k)) self.u_z = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.u_r = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.u_h = gt.ManifoldParameter( gt.ManifoldTensor(input_size, hidden_size).uniform_(-k, k)) self.b_z = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=gt.PoincareBall()).zero_()) self.b_r = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=gt.PoincareBall()).zero_()) self.b_h = gt.ManifoldParameter( gt.ManifoldTensor(hidden_size, manifold=gt.PoincareBall()).zero_())
def __init__(self, in_features, out_features, c=1.0): super().__init__() self.in_features = in_features self.out_features = out_features self.ball = ball = geoopt.PoincareBall(c=c) self.sphere = sphere = geoopt.manifolds.Sphere() self.scale = torch.nn.Parameter(torch.zeros(out_features)) point = torch.randn(out_features, in_features) / 4 point = pmath.expmap0(point, c=c) tangent = torch.randn(out_features, in_features) self.point = geoopt.ManifoldParameter(point, manifold=ball) with torch.no_grad(): self.tangent = geoopt.ManifoldParameter(tangent, manifold=sphere).proj_()
def __init__(self, input_size, hidden_size): super(EuclRNN, self).__init__() self.manifold = gt.Euclidean() self.input_size = input_size self.hidden_size = hidden_size # k = (1 / hidden_size)**0.5 k_w = (6 / (self.hidden_size + self.hidden_size)) ** 0.5 # xavier uniform k_u = (6 / (self.input_size + self.hidden_size)) ** 0.5 # xavier uniform self.w = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k_w, k_w)) self.u = gt.ManifoldParameter(gt.ManifoldTensor(input_size, hidden_size).uniform_(-k_u, k_u)) bias = torch.randn(hidden_size) * 1e-5 self.b = gt.ManifoldParameter(bias, manifold=self.manifold)
def __init__(self, input_size, hidden_size): super(MobiusRNN, self).__init__() self.ball = gt.PoincareBall() self.input_size = input_size self.hidden_size = hidden_size # k = (1 / hidden_size)**0.5 k_w = (6 / (self.hidden_size + self.hidden_size)) ** 0.5 # xavier uniform k_u = (6 / (self.input_size + self.hidden_size)) ** 0.5 # xavier uniform self.w = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k_w, k_w)) self.u = gt.ManifoldParameter(gt.ManifoldTensor(input_size, hidden_size).uniform_(-k_u, k_u)) bias = torch.randn(hidden_size) * 1e-5 self.b = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball)
def test_deepcopy(): t = geoopt.ManifoldTensor() t = copy.deepcopy(t) assert isinstance(t, geoopt.ManifoldTensor) p = geoopt.ManifoldParameter() p = copy.deepcopy(p) assert isinstance(p, geoopt.ManifoldParameter)
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, nonlin=None): super().__init__() self.manifold = gt.Euclidean() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.weight_ih = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size)) for i in range(num_layers)] ) self.weight_hh = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers)] ) if bias: biases = [] for i in range(num_layers): bias = torch.randn(3, hidden_size) * 1e-5 bias = gt.ManifoldParameter(bias, manifold=self.manifold) biases.append(bias) self.bias = torch.nn.ParameterList(biases) else: self.register_buffer("bias", None) self.nonlin = nonlin self.reset_parameters()
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, nonlin=None, hyperbolic_input=True, hyperbolic_hidden_state0=True, c=1.0): super().__init__() self.ball = gt.PoincareBall(c=c) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.weight_ih = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size)) for i in range(num_layers)] ) self.weight_hh = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers)] ) if bias: biases = [] for i in range(num_layers): bias = torch.randn(3, hidden_size) * 1e-5 bias = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball) biases.append(bias) self.bias = torch.nn.ParameterList(biases) else: self.register_buffer("bias", None) self.nonlin = nonlin self.hyperbolic_input = hyperbolic_input self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0 self.reset_parameters()
def test_adam_lorentz(params): lorentz = geoopt.manifolds.Lorentz(k=torch.Tensor([1.0])) torch.manual_seed(42) with torch.no_grad(): X = geoopt.ManifoldParameter(torch.randn(20, 10), manifold=lorentz).proj_() Xstar = torch.randn(20, 10) Xstar.set_(lorentz.projx(Xstar)) def closure(): optim.zero_grad() loss = (Xstar - X).pow(2).sum() loss.backward() return loss.item() optim = geoopt.optim.RiemannianAdam([X], stabilize=4500, **params) assert optim.param_groups[0]["stabilize"] == 4500 for _ in range(10000): if (Xstar - X).norm() < 1e-5: break optim.step(closure) assert X.is_contiguous() np.testing.assert_allclose(X.data, Xstar, atol=1e-5, rtol=1e-5) optim.load_state_dict(optim.state_dict()) optim.step(closure)
def test_compare_manifolds(): m1 = geoopt.Euclidean() m2 = geoopt.Euclidean(ndim=1) tensor = geoopt.ManifoldTensor(10, manifold=m1) with pytest.raises(ValueError) as e: _ = geoopt.ManifoldParameter(tensor, manifold=m2) assert e.match("Manifolds do not match")
def test_adam_birkhoff(params): birkhoff = geoopt.manifolds.BirkhoffPolytope(tol=1e-5) torch.manual_seed(42) with torch.no_grad(): X = geoopt.ManifoldParameter(torch.rand(1, 5, 5), manifold=birkhoff).proj_() Xstar = torch.rand(1, 5, 5) Xstar.set_(birkhoff.projx(Xstar)) def closure(): optim.zero_grad() loss = (X - Xstar).pow(2).sum() # manifold constraint that makes optimization hard if violated row_penalty = ((X.transpose(1, 2) @ X).sum(dim=1) - 1.0).pow(2).sum() * 100 col_penalty = ((X.transpose(1, 2) @ X).sum(dim=2) - 1.0).pow(2).sum() * 100 loss += row_penalty + col_penalty loss.backward() return loss.item() optim = geoopt.optim.RiemannianAdam([X], stabilize=1000, **params) assert (X - Xstar).norm() > 1e-3 for _ in range(10000): if (X - Xstar).norm() < 1e-3: break optim.step(closure) assert X.is_contiguous() np.testing.assert_allclose(X.data, Xstar, atol=1e-3, rtol=1e-3)
def test_rsgd_spd(params): manifold = geoopt.manifolds.SymmetricPositiveDefinite() torch.manual_seed(42) with torch.no_grad(): X = geoopt.ManifoldParameter(manifold.random(2, 2), manifold=manifold).proj_() Xstar = manifold.random(2, 2) # Xstar.set_(manifold.projx(Xstar)) def closure(): optim.zero_grad() loss = (X - Xstar).pow(2).sum() # manifold constraint that makes optimization hard if violated loss.backward() return loss.item() optim = geoopt.optim.RiemannianSGD([X], **params) assert (X - Xstar).norm() > 1e-5 for i in range(10000): cond = (X - Xstar).norm() if cond < 1e-5: break optim.step(closure) print(i, cond) assert X.is_contiguous() np.testing.assert_allclose(X.data, Xstar, atol=1e-5) optim.load_state_dict(optim.state_dict()) optim.step(closure)
def test_rsgd_stiefel(params): stiefel = geoopt.manifolds.Stiefel() torch.manual_seed(42) with torch.no_grad(): X = geoopt.ManifoldParameter(torch.randn(20, 10), manifold=stiefel).proj_() Xstar = torch.randn(20, 10) Xstar.set_(stiefel.projx(Xstar)) def closure(): optim.zero_grad() loss = (X - Xstar).pow(2).sum() # manifold constraint that makes optimization hard if violated loss += (X.t() @ X - torch.eye(X.shape[1])).pow(2).sum() * 100 loss.backward() return loss.item() optim = geoopt.optim.RiemannianSGD([X], **params) assert (X - Xstar).norm() > 1e-5 for _ in range(10000): if (X - Xstar).norm() < 1e-5: break optim.step(closure) assert X.is_contiguous() np.testing.assert_allclose(X.data, Xstar, atol=1e-5) optim.load_state_dict(optim.state_dict()) optim.step(closure)
def __init__( self, input_size, hidden_size, num_layers=2, bias=True, nonlin=None, hyperbolic_input=True, hyperbolic_hidden_state0=True, k=-1.0, ): super().__init__() ''' TODO: generalize to any number of layers current problem: ParameterList doesn't get copied to multiple GPUs when model is wrapped in DataParallel bug source: https://github.com/pytorch/pytorch/issues/36035 ''' assert num_layers == 2, '====[hyrnn_nets.py] current version only support 2-layer GRU====' self.ball = geoopt.PoincareBall(c=k.abs()) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias weight_ih = torch.nn.ParameterList([ torch.nn.Parameter( torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size)) for i in range(num_layers) ]) weight_hh = torch.nn.ParameterList([ torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers) ]) if bias: biases = [] for i in range(num_layers): bias = torch.randn(3, hidden_size) * 1e-5 bias = geoopt.ManifoldParameter(gmath.expmap0(bias, k=self.ball.c), manifold=self.ball) biases.append(bias) self.bias = torch.nn.ParameterList(biases) else: self.register_buffer("bias", None) #====ONLY SUPPORT 2 LAYERS====# self.weight_ih_1 = weight_ih[0] self.weight_ih_2 = weight_ih[1] self.weight_hh_1 = weight_hh[0] self.weight_hh_2 = weight_hh[1] self.bias_1 = self.bias[0] self.bias_2 = self.bias[1] self.nonlin = nonlin self.hyperbolic_input = hyperbolic_input self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0 self.reset_parameters()
def __init__(self, in_features, out_features, k=-1.0, fp64_hyper=True): k = torch.tensor(k) super().__init__() self.in_features = in_features self.out_features = out_features self.ball = ball = geoopt.PoincareBall(c=k.abs()) self.sphere = sphere = geoopt.manifolds.Sphere() self.scale = torch.nn.Parameter(torch.zeros(out_features)) point = torch.randn(out_features, in_features) / 4 point = gmath.expmap0(point, k=k) tangent = torch.randn(out_features, in_features) self.point = geoopt.ManifoldParameter(point, manifold=ball) self.fp64_hyper = fp64_hyper with torch.no_grad(): self.tangent = geoopt.ManifoldParameter(tangent, manifold=sphere).proj_()
def test_rsgd_complex_manifold(params, complex_manifold): manifold = complex_manifold torch.manual_seed(42) with torch.no_grad(): X = geoopt.ManifoldParameter(manifold.random(2, 2), manifold=manifold).proj_() Xstar = manifold.random(2, 2) def closure(): optim.zero_grad() loss = manifold.dist(X, Xstar).pow(2).sum() loss.backward() return loss.item() optim = geoopt.optim.RiemannianSGD([X], **params) assert manifold.dist(X, Xstar) > 1e-1 for i in range(10000): distance = manifold.dist(X, Xstar) if distance < 1e-4: break try: optim.step(closure) except UserWarning: # On the first pass it raises a UserWarning due to discarding part of the # complex variable in a casting pass print(i, distance) distance = manifold.dist(X, Xstar) np.testing.assert_equal(distance < 1e-4, torch.tensor(True)) optim.load_state_dict(optim.state_dict()) optim.step(closure)
def __init__(self, *args, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, k=-1.0, fp64_hyper=True, **kwargs): k = torch.tensor(k) super().__init__(*args, **kwargs) if self.bias is not None: if hyperbolic_bias: self.ball = manifold = geoopt.PoincareBall(c=k.abs()) self.bias = geoopt.ManifoldParameter(self.bias, manifold=manifold) with torch.no_grad(): # self.bias.set_(gmath.expmap0(self.bias.normal_() / 4, k=k)) self.bias.set_( gmath.expmap0(self.bias.normal_() / 400, k=k)) with torch.no_grad(): # 1e-2 was the original value in the code. The updated one is from HNN++ std = 1 / np.sqrt(2 * self.weight.shape[0] * self.weight.shape[1]) # Actually, we divide that by 100 so that it starts really small and far from the border std = std / 100 self.weight.normal_(std=std) self.hyperbolic_bias = hyperbolic_bias self.hyperbolic_input = hyperbolic_input self.nonlin = nonlin self.k = k self.fp64_hyper = fp64_hyper
def init_lut(self, weights, dim_0, dim_1): if weights is None: with torch.no_grad(): weights = init_embeddings(dim_0, dim_1) if self.args.embedding_metric == cs.HY: weights = pmath.expmap0(weights, k=self.word_embed_manifold.k) return gt.ManifoldParameter(weights, manifold=self.word_embed_manifold)
def test_weighted_midpoint_euclidean(lincomb): manifold = stereographic.Stereographic(0) a = geoopt.ManifoldParameter(manifold.random(2, 3, 10)) mid = manifold.weighted_midpoint(a, reducedim=[0], lincomb=lincomb) assert mid.shape == a.shape[-2:] if lincomb: assert torch.allclose(mid, a.sum(0)) else: assert torch.allclose(mid, a.mean(0))
def test_weighted_midpoint_reduce_dim(_k, lincomb): manifold = stereographic.Stereographic(_k, learnable=True) a = geoopt.ManifoldParameter(manifold.random(2, 3, 10)) mid = manifold.weighted_midpoint(a, reducedim=[0], lincomb=lincomb) assert mid.shape == a.shape[-2:] assert torch.isfinite(mid).all() mid.sum().backward() assert torch.isfinite(a.grad).all() assert not torch.isclose(manifold.k.grad, manifold.k.new_zeros(()))
def test_init_manifold(): torch.manual_seed(42) stiefel = geoopt.manifolds.Stiefel() rn = geoopt.manifolds.Rn() x0 = torch.randn(10, 10) x1 = torch.randn(10, 10) p0 = geoopt.ManifoldParameter(x0, manifold=stiefel) p1 = geoopt.ManifoldParameter(x1, manifold=rn) p0.grad = torch.zeros_like(p0) p1.grad = torch.zeros_like(p1) p0old = p0.clone() p1old = p1.clone() opt = geoopt.optim.RiemannianSGD([p0, p1], lr=1, stabilize=1) opt.zero_grad() opt.step() assert not np.allclose(p0.data, p0old.data) np.testing.assert_allclose(p1.data, p1old.data) np.testing.assert_allclose(p0.data, stiefel.projx(p0old.data), atol=1e-4)
def __init__(self, *args, nonlin=None, ball=None, c=1.0, **kwargs): super().__init__(*args, **kwargs) # for manifolds that have parameters like Poincare Ball # we have to attach them to the closure Module. # It is hard to implement device allocation for manifolds in other case. self.ball = create_ball(ball, c) if self.bias is not None: self.bias = geoopt.ManifoldParameter(self.bias, manifold=self.ball) self.nonlin = nonlin self.reset_parameters()
def test_weighted_midpoint_zero(_k, lincomb): manifold = stereographic.Stereographic(_k, learnable=True) a = geoopt.ManifoldParameter(manifold.random(2, 3, 10)) mid = manifold.weighted_midpoint( a, reducedim=[0], lincomb=lincomb, weights=torch.zeros_like(a[..., 0]) ) assert mid.shape == a.shape[-2:] assert torch.allclose(mid, torch.zeros_like(mid)) mid.sum().backward() assert torch.isfinite(a.grad).all() assert torch.isfinite(manifold.k.grad).all()