def mobius_linear( input, weight, bias=None, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, k=-1.0, ): k = torch.tensor(k) if hyperbolic_input: output = mobius_matvec(weight, input, k=k) else: output = torch.nn.functional.linear(input, weight) output = gmath.expmap0(output, k=k) if bias is not None: if not hyperbolic_bias: bias = gmath.expmap0(bias, k=k) output = gmath.mobius_add(output, bias.unsqueeze(0).expand_as(output), k=k) if nonlin is not None: output = gmath.mobius_fn_apply(nonlin, output, k=k) output = gmath.project(output, k=k) return output
def mobius_gru_loop( input: torch.Tensor, h0: torch.Tensor, weight_ih: torch.Tensor, weight_hh: torch.Tensor, bias: torch.Tensor, k: torch.Tensor, batch_sizes=None, hyperbolic_input: bool = False, hyperbolic_hidden_state0: bool = False, nonlin=None, ): if not hyperbolic_hidden_state0: hx = gmath.expmap0(h0, k=k) else: hx = h0 if not hyperbolic_input: input = gmath.expmap0(input, k=k) outs = [] if batch_sizes is None: input_unbinded = input.unbind(0) for t in range(input.size(0)): hx = mobius_gru_cell( input=input_unbinded[t], hx=hx, weight_ih=weight_ih, weight_hh=weight_hh, bias=bias, nonlin=nonlin, k=k, ) outs.append(hx) outs = torch.stack(outs) h_last = hx else: h_last = [] T = len(batch_sizes) - 1 for i, t in enumerate(range(batch_sizes.size(0))): ix, input = input[:batch_sizes[t]], input[batch_sizes[t]:] hx = mobius_gru_cell( input=ix, hx=hx, weight_ih=weight_ih, weight_hh=weight_hh, bias=bias, nonlin=nonlin, k=k, ) outs.append(hx) if t < T: hx, ht = hx[:batch_sizes[t + 1]], hx[batch_sizes[t + 1]:] h_last.append(ht) else: h_last.append(hx) h_last.reverse() h_last = torch.cat(h_last) outs = torch.cat(outs) return outs, h_last
def __init__(self, *args, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, k=-1.0, fp64_hyper=True, **kwargs): k = torch.tensor(k) super().__init__(*args, **kwargs) if self.bias is not None: if hyperbolic_bias: self.ball = manifold = geoopt.PoincareBall(c=k.abs()) self.bias = geoopt.ManifoldParameter(self.bias, manifold=manifold) with torch.no_grad(): # self.bias.set_(gmath.expmap0(self.bias.normal_() / 4, k=k)) self.bias.set_( gmath.expmap0(self.bias.normal_() / 400, k=k)) with torch.no_grad(): # 1e-2 was the original value in the code. The updated one is from HNN++ std = 1 / np.sqrt(2 * self.weight.shape[0] * self.weight.shape[1]) # Actually, we divide that by 100 so that it starts really small and far from the border std = std / 100 self.weight.normal_(std=std) self.hyperbolic_bias = hyperbolic_bias self.hyperbolic_input = hyperbolic_input self.nonlin = nonlin self.k = k self.fp64_hyper = fp64_hyper
def __init__( self, input_size, hidden_size, num_layers=2, bias=True, nonlin=None, hyperbolic_input=True, hyperbolic_hidden_state0=True, k=-1.0, ): super().__init__() ''' TODO: generalize to any number of layers current problem: ParameterList doesn't get copied to multiple GPUs when model is wrapped in DataParallel bug source: https://github.com/pytorch/pytorch/issues/36035 ''' assert num_layers == 2, '====[hyrnn_nets.py] current version only support 2-layer GRU====' self.ball = geoopt.PoincareBall(c=k.abs()) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias weight_ih = torch.nn.ParameterList([ torch.nn.Parameter( torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size)) for i in range(num_layers) ]) weight_hh = torch.nn.ParameterList([ torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers) ]) if bias: biases = [] for i in range(num_layers): bias = torch.randn(3, hidden_size) * 1e-5 bias = geoopt.ManifoldParameter(gmath.expmap0(bias, k=self.ball.c), manifold=self.ball) biases.append(bias) self.bias = torch.nn.ParameterList(biases) else: self.register_buffer("bias", None) #====ONLY SUPPORT 2 LAYERS====# self.weight_ih_1 = weight_ih[0] self.weight_ih_2 = weight_ih[1] self.weight_hh_1 = weight_hh[0] self.weight_hh_2 = weight_hh[1] self.bias_1 = self.bias[0] self.bias_2 = self.bias[1] self.nonlin = nonlin self.hyperbolic_input = hyperbolic_input self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0 self.reset_parameters()
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, nonlin=None, hyperbolic_input=True, hyperbolic_hidden_state0=True, c=1.0): super().__init__() self.ball = gt.PoincareBall(c=c) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.weight_ih = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size)) for i in range(num_layers)] ) self.weight_hh = torch.nn.ParameterList( [torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers)] ) if bias: biases = [] for i in range(num_layers): bias = torch.randn(3, hidden_size) * 1e-5 bias = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball) biases.append(bias) self.bias = torch.nn.ParameterList(biases) else: self.register_buffer("bias", None) self.nonlin = nonlin self.hyperbolic_input = hyperbolic_input self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0 self.reset_parameters()
def init_lut(self, weights, dim_0, dim_1): if weights is None: with torch.no_grad(): weights = init_embeddings(dim_0, dim_1) if self.args.embedding_metric == cs.HY: weights = pmath.expmap0(weights, k=self.word_embed_manifold.k) return gt.ManifoldParameter(weights, manifold=self.word_embed_manifold)
def define_mapping(in_metric, out_metric, c_value): if in_metric == out_metric: return lambda x: x elif in_metric == cs.HY and out_metric == cs.EU: return lambda x: pmath.logmap0(x, k=POINCARE_K) elif in_metric == cs.EU and out_metric == cs.HY: return lambda x: pmath.expmap0(x, k=POINCARE_K) else: raise ValueError( f"Wrong metrics: in_metric:'{in_metric}', out_metric:'{out_metric}'" )
def mobius_linear( input, weight, bias=None, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, c=1.0, ): if hyperbolic_input: output = pmath.mobius_matvec(weight, input, k=to_k(c, weight)) else: output = torch.nn.functional.linear(input, weight) output = pmath.expmap0(output, k=to_k(c, output)) if bias is not None: if not hyperbolic_bias: bias = pmath.expmap0(bias, k=to_k(c, bias)) output = pmath.mobius_add(output, bias, k=to_k(c, output)) if nonlin is not None: output = pmath.mobius_fn_apply(nonlin, output, k=to_k(c, output)) output = pmath.project(output, k=to_k(c, output)) return output
def __init__(self, output_dim, input_dims, second_input_dim=None, third_input_dim=None, nonlin=None): super(MobiusConcat, self).__init__() b_input_dims = second_input_dim if second_input_dim is not None else input_dims self.lin_a = MobiusLinear(input_dims, output_dim, bias=False, nonlin=nonlin) self.lin_b = MobiusLinear(b_input_dims, output_dim, bias=False, nonlin=nonlin) if third_input_dim: self.lin_c = MobiusLinear(third_input_dim, output_dim, bias=False, nonlin=nonlin) self.ball = gt.PoincareBall() b = torch.randn(output_dim) * 1e-5 self.bias = gt.ManifoldParameter(pmath.expmap0(b, k=self.ball.k), manifold=self.ball)
def __init__(self, input_size, hidden_size): super(MobiusRNN, self).__init__() self.ball = gt.PoincareBall() self.input_size = input_size self.hidden_size = hidden_size # k = (1 / hidden_size)**0.5 k_w = (6 / (self.hidden_size + self.hidden_size)) ** 0.5 # xavier uniform k_u = (6 / (self.input_size + self.hidden_size)) ** 0.5 # xavier uniform self.w = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k_w, k_w)) self.u = gt.ManifoldParameter(gt.ManifoldTensor(input_size, hidden_size).uniform_(-k_u, k_u)) bias = torch.randn(hidden_size) * 1e-5 self.b = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball)
def __init__(self, in_features, out_features, c=1.0): super().__init__() self.in_features = in_features self.out_features = out_features self.ball = ball = geoopt.PoincareBall(c=c) self.sphere = sphere = geoopt.manifolds.Sphere() self.scale = torch.nn.Parameter(torch.zeros(out_features)) point = torch.randn(out_features, in_features) / 4 point = pmath.expmap0(point, k=to_k(c, point)) tangent = torch.randn(out_features, in_features) self.point = geoopt.ManifoldParameter(point, manifold=ball) with torch.no_grad(): self.tangent = geoopt.ManifoldParameter(tangent, manifold=sphere).proj_()
def __init__(self, *args, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, c=1.0, **kwargs): super().__init__(*args, **kwargs) self.ball = gt.PoincareBall(c=c) if self.bias is not None: if hyperbolic_bias: self.bias = gt.ManifoldParameter(self.bias, manifold=self.ball) with torch.no_grad(): self.bias.set_(pmath.expmap0(self.bias.normal_() * 1e-3, k=self.ball.k)) with torch.no_grad(): fin, fout = self.weight.size() k = (6 / (fin + fout)) ** 0.5 # xavier uniform self.weight.uniform_(-k, k) self.hyperbolic_bias = hyperbolic_bias self.hyperbolic_input = hyperbolic_input self.nonlin = nonlin
def __init__(self, in_features, out_features, k=-1.0, fp64_hyper=True): k = torch.tensor(k) super().__init__() self.in_features = in_features self.out_features = out_features self.ball = ball = geoopt.PoincareBall(c=k.abs()) self.sphere = sphere = geoopt.manifolds.Sphere() self.scale = torch.nn.Parameter(torch.zeros(out_features)) point = torch.randn(out_features, in_features) / 4 point = gmath.expmap0(point, k=k) tangent = torch.randn(out_features, in_features) self.point = geoopt.ManifoldParameter(point, manifold=ball) self.fp64_hyper = fp64_hyper with torch.no_grad(): self.tangent = geoopt.ManifoldParameter(tangent, manifold=sphere).proj_()
def __init__(self, in_features, out_features, c=1.0): """ :param in_features: number of dimensions of the input :param out_features: number of classes """ super().__init__() self.in_features = in_features self.out_features = out_features self.ball = gt.PoincareBall(c=c) points = torch.randn(out_features, in_features) * 1e-5 points = pmath.expmap0(points, k=self.ball.k) self.p_k = gt.ManifoldParameter(points, manifold=self.ball) tangent = torch.Tensor(out_features, in_features) stdv = (6 / (out_features + in_features)) ** 0.5 # xavier uniform torch.nn.init.uniform_(tangent, -stdv, stdv) self.a_k = torch.nn.Parameter(tangent)
def __init__(self, args, pos_embeds_rows, input_dims): super(DistanceAttention, self).__init__() # pos embeds self.manifold = gt.PoincareBall( ) if args.attn_metric == cs.HY else gt.Euclidean() with torch.no_grad(): pos_embeds = init_embeddings(pos_embeds_rows, input_dims) if args.attn_metric == cs.HY: pos_embeds = pmath.expmap0(pos_embeds, k=self.manifold.k) beta = torch.Tensor(1).uniform_(-0.01, 0.01) self.position_embeds = gt.ManifoldParameter(pos_embeds, manifold=self.manifold) if args.attn_metric == cs.HY: self.key_dense = hnn.MobiusLinear(input_dims, input_dims, hyperbolic_input=True, hyperbolic_bias=True) self.query_dense = hnn.MobiusLinear(input_dims, input_dims, hyperbolic_input=True, hyperbolic_bias=True) self.addition = lambda x, y: pmath.mobius_add( x, y, k=self.manifold.k) self.distance_function = lambda x, y: pmath.dist( x, y, k=self.manifold.k) self.midpoint = hnn.weighted_mobius_midpoint else: self.key_dense = nn.Linear(input_dims, input_dims) self.query_dense = nn.Linear(input_dims, input_dims) self.addition = torch.add self.distance_function = utils.euclidean_distance self.midpoint = hnn.weighted_euclidean_midpoint self.encoder_to_attn_map = define_mapping(args.encoder_metric, args.attn_metric, args.c) self.attention_function = nn.Softmax( dim=1) if args.attn == "softmax" else nn.Sigmoid() self.beta = torch.nn.Parameter(beta, requires_grad=True)
def __init__(self, *args, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, c=1.0, **kwargs): super().__init__(*args, **kwargs) if self.bias is not None: if hyperbolic_bias: self.ball = manifold = geoopt.PoincareBall(c=c) self.bias = geoopt.ManifoldParameter(self.bias, manifold=manifold) with torch.no_grad(): self.bias.set_( pmath.expmap0(self.bias.normal_() / 4, k=to_k(c, self.bias))) with torch.no_grad(): self.weight.normal_(std=1e-2) self.hyperbolic_bias = hyperbolic_bias self.hyperbolic_input = hyperbolic_input self.nonlin = nonlin
def __init__(self, char_vocab, args): super(MentionEncoder, self).__init__() self.mention_output_dim = args.space_dims * 2 self.char_output_dim = args.space_dims if args.encoder_metric == cs.HY: self.manifold = gt.PoincareBall() self.word2space = hnn.MobiusLinear(args.word_emb_size, self.mention_output_dim, hyperbolic_input=True, hyperbolic_bias=True, nonlin=get_nonlin( args.men_nonlin)) self.non_lin = lambda x: x self.char_rnn = hnn.MobiusRNN(args.space_dims, args.space_dims) else: self.manifold = gt.Euclidean() self.word2space = nn.Linear(args.word_emb_size, self.mention_output_dim) self.non_lin = get_nonlin(args.men_nonlin) self.char_rnn = hnn.EuclRNN(args.space_dims, args.space_dims) self.input_dropout = nn.Dropout(p=args.input_dropout) self.mention_attn = DistanceAttention(args, args.mention_len + 1, self.mention_output_dim) self.char_mapping = define_mapping(args.encoder_metric, args.attn_metric, args.c) self.char_midpoint = hnn.mobius_midpoint if args.attn_metric == cs.HY else hnn.euclidean_midpoint # char embeds with torch.no_grad(): char_embeds = init_embeddings(char_vocab.size(), self.char_output_dim) if args.encoder_metric == cs.HY: char_embeds = pmath.expmap0(char_embeds, k=self.manifold.k) self.char_lut = gt.ManifoldParameter(char_embeds, manifold=self.manifold)
def __init__( self, vocab_size, embedding_dim, hidden_dim, project_dim, cell_type="eucl_rnn", embedding_type="eucl", decision_type="eucl", use_distance_as_feature=True, device=None, num_layers=1, num_classes=1, c=1.0, ): super(RNNBase, self).__init__() (cell_type, embedding_type, decision_type) = map(str.lower, [cell_type, embedding_type, decision_type]) if embedding_type == "eucl": self.embedding = hyrnn.LookupEmbedding(vocab_size, embedding_dim, manifold=geoopt.Euclidean()) with torch.no_grad(): self.embedding.weight.normal_() elif embedding_type == "hyp": self.embedding = hyrnn.LookupEmbedding( vocab_size, embedding_dim, manifold=geoopt.PoincareBall(c=c), ) with torch.no_grad(): self.embedding.weight.set_( pmath.expmap0(self.embedding.weight.normal_() / 10, c=c)) else: raise NotImplementedError( "Unsuported embedding type: {0}".format(embedding_type)) self.embedding_type = embedding_type if decision_type == "eucl": self.projector = nn.Linear(hidden_dim * 2, project_dim) self.logits = nn.Linear(project_dim, num_classes) elif decision_type == "hyp": self.projector_source = hyrnn.MobiusLinear(hidden_dim, project_dim, c=c) self.projector_target = hyrnn.MobiusLinear(hidden_dim, project_dim, c=c) self.logits = hyrnn.MobiusDist2Hyperplane(project_dim, num_classes) else: raise NotImplementedError( "Unsuported decision type: {0}".format(decision_type)) self.ball = geoopt.PoincareBall(c) if use_distance_as_feature: if decision_type == "eucl": self.dist_bias = nn.Parameter(torch.zeros(project_dim)) else: self.dist_bias = geoopt.ManifoldParameter( torch.zeros(project_dim), manifold=self.ball) else: self.register_buffer("dist_bias", None) self.decision_type = decision_type self.use_distance_as_feature = use_distance_as_feature self.device = device # declaring device here due to fact we are using catalyst self.num_layers = num_layers self.hidden_dim = hidden_dim self.c = c if cell_type == "eucl_rnn": self.cell = nn.RNN elif cell_type == "eucl_gru": self.cell = nn.GRU elif cell_type == "hyp_gru": self.cell = functools.partial(hyrnn.MobiusGRU, c=c) else: raise NotImplementedError( "Unsuported cell type: {0}".format(cell_type)) self.cell_type = cell_type self.cell_source = self.cell(embedding_dim, self.hidden_dim, self.num_layers) self.cell_target = self.cell(embedding_dim, self.hidden_dim, self.num_layers)
def exp_map_mu0_c(x: Tensor, c: Tensor) -> Tensor: return pm.expmap0(x, c=c)
def forward(self, input): source_input = input[0] target_input = input[1] alignment = input[2] batch_size = alignment.shape[0] source_input_data = self.embedding(source_input.data) target_input_data = self.embedding(target_input.data) zero_hidden = torch.zeros(self.num_layers, batch_size, self.hidden_dim, device=self.device or source_input.device, dtype=source_input_data.dtype) if self.embedding_type == "eucl" and "hyp" in self.cell_type: source_input_data = pmath.expmap0(source_input_data, c=self.c) target_input_data = pmath.expmap0(target_input_data, c=self.c) elif self.embedding_type == "hyp" and "eucl" in self.cell_type: source_input_data = pmath.logmap0(source_input_data, c=self.c) target_input_data = pmath.logmap0(target_input_data, c=self.c) # ht: (num_layers * num_directions, batch, hidden_size) source_input = torch.nn.utils.rnn.PackedSequence( source_input_data, source_input.batch_sizes) target_input = torch.nn.utils.rnn.PackedSequence( target_input_data, target_input.batch_sizes) _, source_hidden = self.cell_source(source_input, zero_hidden) _, target_hidden = self.cell_target(target_input, zero_hidden) # take hiddens from the last layer source_hidden = source_hidden[-1] target_hidden = target_hidden[-1][alignment] if self.decision_type == "hyp": if "eucl" in self.cell_type: source_hidden = pmath.expmap0(source_hidden, c=self.c) target_hidden = pmath.expmap0(target_hidden, c=self.c) source_projected = self.projector_source(source_hidden) target_projected = self.projector_target(target_hidden) projected = pmath.mobius_add(source_projected, target_projected, c=self.ball.c) if self.use_distance_as_feature: dist = (pmath.dist(source_hidden, target_hidden, dim=-1, keepdim=True, c=self.ball.c)**2) bias = pmath.mobius_scalar_mul(dist, self.dist_bias, c=self.ball.c) projected = pmath.mobius_add(projected, bias, c=self.ball.c) else: if "hyp" in self.cell_type: source_hidden = pmath.logmap0(source_hidden, c=self.c) target_hidden = pmath.logmap0(target_hidden, c=self.c) projected = self.projector( torch.cat((source_hidden, target_hidden), dim=-1)) if self.use_distance_as_feature: dist = torch.sum((source_hidden - target_hidden).pow(2), dim=-1, keepdim=True) bias = self.dist_bias * dist projected = projected + bias logits = self.logits(projected) # CrossEntropy accepts logits return logits