Beispiel #1
0
def mobius_linear(
    input,
    weight,
    bias=None,
    hyperbolic_input=True,
    hyperbolic_bias=True,
    nonlin=None,
    k=-1.0,
):
    k = torch.tensor(k)
    if hyperbolic_input:
        output = mobius_matvec(weight, input, k=k)
    else:
        output = torch.nn.functional.linear(input, weight)
        output = gmath.expmap0(output, k=k)
    if bias is not None:
        if not hyperbolic_bias:
            bias = gmath.expmap0(bias, k=k)
        output = gmath.mobius_add(output,
                                  bias.unsqueeze(0).expand_as(output),
                                  k=k)
    if nonlin is not None:
        output = gmath.mobius_fn_apply(nonlin, output, k=k)
    output = gmath.project(output, k=k)
    return output
Beispiel #2
0
def mobius_gru_loop(
    input: torch.Tensor,
    h0: torch.Tensor,
    weight_ih: torch.Tensor,
    weight_hh: torch.Tensor,
    bias: torch.Tensor,
    k: torch.Tensor,
    batch_sizes=None,
    hyperbolic_input: bool = False,
    hyperbolic_hidden_state0: bool = False,
    nonlin=None,
):
    if not hyperbolic_hidden_state0:
        hx = gmath.expmap0(h0, k=k)
    else:
        hx = h0
    if not hyperbolic_input:
        input = gmath.expmap0(input, k=k)
    outs = []
    if batch_sizes is None:
        input_unbinded = input.unbind(0)
        for t in range(input.size(0)):
            hx = mobius_gru_cell(
                input=input_unbinded[t],
                hx=hx,
                weight_ih=weight_ih,
                weight_hh=weight_hh,
                bias=bias,
                nonlin=nonlin,
                k=k,
            )
            outs.append(hx)
        outs = torch.stack(outs)
        h_last = hx
    else:
        h_last = []
        T = len(batch_sizes) - 1
        for i, t in enumerate(range(batch_sizes.size(0))):
            ix, input = input[:batch_sizes[t]], input[batch_sizes[t]:]
            hx = mobius_gru_cell(
                input=ix,
                hx=hx,
                weight_ih=weight_ih,
                weight_hh=weight_hh,
                bias=bias,
                nonlin=nonlin,
                k=k,
            )
            outs.append(hx)
            if t < T:
                hx, ht = hx[:batch_sizes[t + 1]], hx[batch_sizes[t + 1]:]
                h_last.append(ht)
            else:
                h_last.append(hx)
        h_last.reverse()
        h_last = torch.cat(h_last)
        outs = torch.cat(outs)
    return outs, h_last
Beispiel #3
0
 def __init__(self,
              *args,
              hyperbolic_input=True,
              hyperbolic_bias=True,
              nonlin=None,
              k=-1.0,
              fp64_hyper=True,
              **kwargs):
     k = torch.tensor(k)
     super().__init__(*args, **kwargs)
     if self.bias is not None:
         if hyperbolic_bias:
             self.ball = manifold = geoopt.PoincareBall(c=k.abs())
             self.bias = geoopt.ManifoldParameter(self.bias,
                                                  manifold=manifold)
             with torch.no_grad():
                 # self.bias.set_(gmath.expmap0(self.bias.normal_() / 4, k=k))
                 self.bias.set_(
                     gmath.expmap0(self.bias.normal_() / 400, k=k))
     with torch.no_grad():
         # 1e-2 was the original value in the code. The updated one is from HNN++
         std = 1 / np.sqrt(2 * self.weight.shape[0] * self.weight.shape[1])
         # Actually, we divide that by 100 so that it starts really small and far from the border
         std = std / 100
         self.weight.normal_(std=std)
     self.hyperbolic_bias = hyperbolic_bias
     self.hyperbolic_input = hyperbolic_input
     self.nonlin = nonlin
     self.k = k
     self.fp64_hyper = fp64_hyper
Beispiel #4
0
    def __init__(
        self,
        input_size,
        hidden_size,
        num_layers=2,
        bias=True,
        nonlin=None,
        hyperbolic_input=True,
        hyperbolic_hidden_state0=True,
        k=-1.0,
    ):
        super().__init__()
        '''
        TODO: generalize to any number of layers
        current problem: ParameterList doesn't get copied to
        multiple GPUs when model is wrapped in DataParallel
        
        bug source: https://github.com/pytorch/pytorch/issues/36035
        '''
        assert num_layers == 2, '====[hyrnn_nets.py] current version only support 2-layer GRU===='

        self.ball = geoopt.PoincareBall(c=k.abs())
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        weight_ih = torch.nn.ParameterList([
            torch.nn.Parameter(
                torch.Tensor(3 * hidden_size,
                             input_size if i == 0 else hidden_size))
            for i in range(num_layers)
        ])
        weight_hh = torch.nn.ParameterList([
            torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size))
            for _ in range(num_layers)
        ])
        if bias:
            biases = []
            for i in range(num_layers):
                bias = torch.randn(3, hidden_size) * 1e-5
                bias = geoopt.ManifoldParameter(gmath.expmap0(bias,
                                                              k=self.ball.c),
                                                manifold=self.ball)
                biases.append(bias)
            self.bias = torch.nn.ParameterList(biases)
        else:
            self.register_buffer("bias", None)

        #====ONLY SUPPORT 2 LAYERS====#
        self.weight_ih_1 = weight_ih[0]
        self.weight_ih_2 = weight_ih[1]
        self.weight_hh_1 = weight_hh[0]
        self.weight_hh_2 = weight_hh[1]
        self.bias_1 = self.bias[0]
        self.bias_2 = self.bias[1]

        self.nonlin = nonlin
        self.hyperbolic_input = hyperbolic_input
        self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0
        self.reset_parameters()
Beispiel #5
0
 def __init__(self, input_size, hidden_size, num_layers=1, bias=True, nonlin=None, hyperbolic_input=True,
              hyperbolic_hidden_state0=True, c=1.0):
     super().__init__()
     self.ball = gt.PoincareBall(c=c)
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.bias = bias
     self.weight_ih = torch.nn.ParameterList(
         [torch.nn.Parameter(torch.Tensor(3 * hidden_size, input_size if i == 0 else hidden_size))
          for i in range(num_layers)]
     )
     self.weight_hh = torch.nn.ParameterList(
         [torch.nn.Parameter(torch.Tensor(3 * hidden_size, hidden_size)) for _ in range(num_layers)]
     )
     if bias:
         biases = []
         for i in range(num_layers):
             bias = torch.randn(3, hidden_size) * 1e-5
             bias = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball)
             biases.append(bias)
         self.bias = torch.nn.ParameterList(biases)
     else:
         self.register_buffer("bias", None)
     self.nonlin = nonlin
     self.hyperbolic_input = hyperbolic_input
     self.hyperbolic_hidden_state0 = hyperbolic_hidden_state0
     self.reset_parameters()
Beispiel #6
0
 def init_lut(self, weights, dim_0, dim_1):
     if weights is None:
         with torch.no_grad():
             weights = init_embeddings(dim_0, dim_1)
             if self.args.embedding_metric == cs.HY:
                 weights = pmath.expmap0(weights,
                                         k=self.word_embed_manifold.k)
     return gt.ManifoldParameter(weights, manifold=self.word_embed_manifold)
Beispiel #7
0
def define_mapping(in_metric, out_metric, c_value):
    if in_metric == out_metric:
        return lambda x: x
    elif in_metric == cs.HY and out_metric == cs.EU:
        return lambda x: pmath.logmap0(x, k=POINCARE_K)
    elif in_metric == cs.EU and out_metric == cs.HY:
        return lambda x: pmath.expmap0(x, k=POINCARE_K)
    else:
        raise ValueError(
            f"Wrong metrics: in_metric:'{in_metric}', out_metric:'{out_metric}'"
        )
Beispiel #8
0
def mobius_linear(
    input,
    weight,
    bias=None,
    hyperbolic_input=True,
    hyperbolic_bias=True,
    nonlin=None,
    c=1.0,
):
    if hyperbolic_input:
        output = pmath.mobius_matvec(weight, input, k=to_k(c, weight))
    else:
        output = torch.nn.functional.linear(input, weight)
        output = pmath.expmap0(output, k=to_k(c, output))
    if bias is not None:
        if not hyperbolic_bias:
            bias = pmath.expmap0(bias, k=to_k(c, bias))
        output = pmath.mobius_add(output, bias, k=to_k(c, output))
    if nonlin is not None:
        output = pmath.mobius_fn_apply(nonlin, output, k=to_k(c, output))
    output = pmath.project(output, k=to_k(c, output))
    return output
Beispiel #9
0
    def __init__(self, output_dim, input_dims, second_input_dim=None, third_input_dim=None, nonlin=None):
        super(MobiusConcat, self).__init__()
        b_input_dims = second_input_dim if second_input_dim is not None else input_dims

        self.lin_a = MobiusLinear(input_dims, output_dim, bias=False, nonlin=nonlin)
        self.lin_b = MobiusLinear(b_input_dims, output_dim, bias=False, nonlin=nonlin)

        if third_input_dim:
            self.lin_c = MobiusLinear(third_input_dim, output_dim, bias=False, nonlin=nonlin)

        self.ball = gt.PoincareBall()
        b = torch.randn(output_dim) * 1e-5
        self.bias = gt.ManifoldParameter(pmath.expmap0(b, k=self.ball.k), manifold=self.ball)
Beispiel #10
0
    def __init__(self, input_size, hidden_size):
        super(MobiusRNN, self).__init__()

        self.ball = gt.PoincareBall()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        # k = (1 / hidden_size)**0.5
        k_w = (6 / (self.hidden_size + self.hidden_size)) ** 0.5  # xavier uniform
        k_u = (6 / (self.input_size + self.hidden_size)) ** 0.5  # xavier uniform
        self.w = gt.ManifoldParameter(gt.ManifoldTensor(hidden_size, hidden_size).uniform_(-k_w, k_w))
        self.u = gt.ManifoldParameter(gt.ManifoldTensor(input_size, hidden_size).uniform_(-k_u, k_u))
        bias = torch.randn(hidden_size) * 1e-5
        self.b = gt.ManifoldParameter(pmath.expmap0(bias, k=self.ball.k), manifold=self.ball)
Beispiel #11
0
 def __init__(self, in_features, out_features, c=1.0):
     super().__init__()
     self.in_features = in_features
     self.out_features = out_features
     self.ball = ball = geoopt.PoincareBall(c=c)
     self.sphere = sphere = geoopt.manifolds.Sphere()
     self.scale = torch.nn.Parameter(torch.zeros(out_features))
     point = torch.randn(out_features, in_features) / 4
     point = pmath.expmap0(point, k=to_k(c, point))
     tangent = torch.randn(out_features, in_features)
     self.point = geoopt.ManifoldParameter(point, manifold=ball)
     with torch.no_grad():
         self.tangent = geoopt.ManifoldParameter(tangent,
                                                 manifold=sphere).proj_()
Beispiel #12
0
 def __init__(self, *args, hyperbolic_input=True, hyperbolic_bias=True, nonlin=None, c=1.0, **kwargs):
     super().__init__(*args, **kwargs)
     self.ball = gt.PoincareBall(c=c)
     if self.bias is not None:
         if hyperbolic_bias:
             self.bias = gt.ManifoldParameter(self.bias, manifold=self.ball)
             with torch.no_grad():
                 self.bias.set_(pmath.expmap0(self.bias.normal_() * 1e-3, k=self.ball.k))
     with torch.no_grad():
         fin, fout = self.weight.size()
         k = (6 / (fin + fout)) ** 0.5  # xavier uniform
         self.weight.uniform_(-k, k)
     self.hyperbolic_bias = hyperbolic_bias
     self.hyperbolic_input = hyperbolic_input
     self.nonlin = nonlin
Beispiel #13
0
 def __init__(self, in_features, out_features, k=-1.0, fp64_hyper=True):
     k = torch.tensor(k)
     super().__init__()
     self.in_features = in_features
     self.out_features = out_features
     self.ball = ball = geoopt.PoincareBall(c=k.abs())
     self.sphere = sphere = geoopt.manifolds.Sphere()
     self.scale = torch.nn.Parameter(torch.zeros(out_features))
     point = torch.randn(out_features, in_features) / 4
     point = gmath.expmap0(point, k=k)
     tangent = torch.randn(out_features, in_features)
     self.point = geoopt.ManifoldParameter(point, manifold=ball)
     self.fp64_hyper = fp64_hyper
     with torch.no_grad():
         self.tangent = geoopt.ManifoldParameter(tangent,
                                                 manifold=sphere).proj_()
Beispiel #14
0
    def __init__(self, in_features, out_features, c=1.0):
        """
        :param in_features: number of dimensions of the input
        :param out_features: number of classes
        """
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.ball = gt.PoincareBall(c=c)
        points = torch.randn(out_features, in_features) * 1e-5
        points = pmath.expmap0(points, k=self.ball.k)
        self.p_k = gt.ManifoldParameter(points, manifold=self.ball)

        tangent = torch.Tensor(out_features, in_features)
        stdv = (6 / (out_features + in_features)) ** 0.5  # xavier uniform
        torch.nn.init.uniform_(tangent, -stdv, stdv)
        self.a_k = torch.nn.Parameter(tangent)
Beispiel #15
0
    def __init__(self, args, pos_embeds_rows, input_dims):
        super(DistanceAttention, self).__init__()

        # pos embeds
        self.manifold = gt.PoincareBall(
        ) if args.attn_metric == cs.HY else gt.Euclidean()
        with torch.no_grad():
            pos_embeds = init_embeddings(pos_embeds_rows, input_dims)
            if args.attn_metric == cs.HY:
                pos_embeds = pmath.expmap0(pos_embeds, k=self.manifold.k)
            beta = torch.Tensor(1).uniform_(-0.01, 0.01)
        self.position_embeds = gt.ManifoldParameter(pos_embeds,
                                                    manifold=self.manifold)

        if args.attn_metric == cs.HY:
            self.key_dense = hnn.MobiusLinear(input_dims,
                                              input_dims,
                                              hyperbolic_input=True,
                                              hyperbolic_bias=True)
            self.query_dense = hnn.MobiusLinear(input_dims,
                                                input_dims,
                                                hyperbolic_input=True,
                                                hyperbolic_bias=True)
            self.addition = lambda x, y: pmath.mobius_add(
                x, y, k=self.manifold.k)
            self.distance_function = lambda x, y: pmath.dist(
                x, y, k=self.manifold.k)
            self.midpoint = hnn.weighted_mobius_midpoint
        else:
            self.key_dense = nn.Linear(input_dims, input_dims)
            self.query_dense = nn.Linear(input_dims, input_dims)
            self.addition = torch.add
            self.distance_function = utils.euclidean_distance
            self.midpoint = hnn.weighted_euclidean_midpoint

        self.encoder_to_attn_map = define_mapping(args.encoder_metric,
                                                  args.attn_metric, args.c)
        self.attention_function = nn.Softmax(
            dim=1) if args.attn == "softmax" else nn.Sigmoid()
        self.beta = torch.nn.Parameter(beta, requires_grad=True)
Beispiel #16
0
 def __init__(self,
              *args,
              hyperbolic_input=True,
              hyperbolic_bias=True,
              nonlin=None,
              c=1.0,
              **kwargs):
     super().__init__(*args, **kwargs)
     if self.bias is not None:
         if hyperbolic_bias:
             self.ball = manifold = geoopt.PoincareBall(c=c)
             self.bias = geoopt.ManifoldParameter(self.bias,
                                                  manifold=manifold)
             with torch.no_grad():
                 self.bias.set_(
                     pmath.expmap0(self.bias.normal_() / 4,
                                   k=to_k(c, self.bias)))
     with torch.no_grad():
         self.weight.normal_(std=1e-2)
     self.hyperbolic_bias = hyperbolic_bias
     self.hyperbolic_input = hyperbolic_input
     self.nonlin = nonlin
Beispiel #17
0
    def __init__(self, char_vocab, args):
        super(MentionEncoder, self).__init__()

        self.mention_output_dim = args.space_dims * 2
        self.char_output_dim = args.space_dims
        if args.encoder_metric == cs.HY:
            self.manifold = gt.PoincareBall()
            self.word2space = hnn.MobiusLinear(args.word_emb_size,
                                               self.mention_output_dim,
                                               hyperbolic_input=True,
                                               hyperbolic_bias=True,
                                               nonlin=get_nonlin(
                                                   args.men_nonlin))
            self.non_lin = lambda x: x
            self.char_rnn = hnn.MobiusRNN(args.space_dims, args.space_dims)
        else:
            self.manifold = gt.Euclidean()
            self.word2space = nn.Linear(args.word_emb_size,
                                        self.mention_output_dim)
            self.non_lin = get_nonlin(args.men_nonlin)
            self.char_rnn = hnn.EuclRNN(args.space_dims, args.space_dims)

        self.input_dropout = nn.Dropout(p=args.input_dropout)
        self.mention_attn = DistanceAttention(args, args.mention_len + 1,
                                              self.mention_output_dim)
        self.char_mapping = define_mapping(args.encoder_metric,
                                           args.attn_metric, args.c)
        self.char_midpoint = hnn.mobius_midpoint if args.attn_metric == cs.HY else hnn.euclidean_midpoint

        # char embeds
        with torch.no_grad():
            char_embeds = init_embeddings(char_vocab.size(),
                                          self.char_output_dim)
            if args.encoder_metric == cs.HY:
                char_embeds = pmath.expmap0(char_embeds, k=self.manifold.k)
        self.char_lut = gt.ManifoldParameter(char_embeds,
                                             manifold=self.manifold)
Beispiel #18
0
    def __init__(
        self,
        vocab_size,
        embedding_dim,
        hidden_dim,
        project_dim,
        cell_type="eucl_rnn",
        embedding_type="eucl",
        decision_type="eucl",
        use_distance_as_feature=True,
        device=None,
        num_layers=1,
        num_classes=1,
        c=1.0,
    ):
        super(RNNBase, self).__init__()
        (cell_type, embedding_type,
         decision_type) = map(str.lower,
                              [cell_type, embedding_type, decision_type])
        if embedding_type == "eucl":
            self.embedding = hyrnn.LookupEmbedding(vocab_size,
                                                   embedding_dim,
                                                   manifold=geoopt.Euclidean())
            with torch.no_grad():
                self.embedding.weight.normal_()
        elif embedding_type == "hyp":
            self.embedding = hyrnn.LookupEmbedding(
                vocab_size,
                embedding_dim,
                manifold=geoopt.PoincareBall(c=c),
            )
            with torch.no_grad():
                self.embedding.weight.set_(
                    pmath.expmap0(self.embedding.weight.normal_() / 10, c=c))
        else:
            raise NotImplementedError(
                "Unsuported embedding type: {0}".format(embedding_type))
        self.embedding_type = embedding_type
        if decision_type == "eucl":
            self.projector = nn.Linear(hidden_dim * 2, project_dim)
            self.logits = nn.Linear(project_dim, num_classes)
        elif decision_type == "hyp":
            self.projector_source = hyrnn.MobiusLinear(hidden_dim,
                                                       project_dim,
                                                       c=c)
            self.projector_target = hyrnn.MobiusLinear(hidden_dim,
                                                       project_dim,
                                                       c=c)
            self.logits = hyrnn.MobiusDist2Hyperplane(project_dim, num_classes)
        else:
            raise NotImplementedError(
                "Unsuported decision type: {0}".format(decision_type))
        self.ball = geoopt.PoincareBall(c)
        if use_distance_as_feature:
            if decision_type == "eucl":
                self.dist_bias = nn.Parameter(torch.zeros(project_dim))
            else:
                self.dist_bias = geoopt.ManifoldParameter(
                    torch.zeros(project_dim), manifold=self.ball)
        else:
            self.register_buffer("dist_bias", None)
        self.decision_type = decision_type
        self.use_distance_as_feature = use_distance_as_feature
        self.device = device  # declaring device here due to fact we are using catalyst
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.c = c

        if cell_type == "eucl_rnn":
            self.cell = nn.RNN
        elif cell_type == "eucl_gru":
            self.cell = nn.GRU
        elif cell_type == "hyp_gru":
            self.cell = functools.partial(hyrnn.MobiusGRU, c=c)
        else:
            raise NotImplementedError(
                "Unsuported cell type: {0}".format(cell_type))
        self.cell_type = cell_type

        self.cell_source = self.cell(embedding_dim, self.hidden_dim,
                                     self.num_layers)
        self.cell_target = self.cell(embedding_dim, self.hidden_dim,
                                     self.num_layers)
def exp_map_mu0_c(x: Tensor, c: Tensor) -> Tensor:
    return pm.expmap0(x, c=c)
Beispiel #20
0
    def forward(self, input):
        source_input = input[0]
        target_input = input[1]
        alignment = input[2]
        batch_size = alignment.shape[0]

        source_input_data = self.embedding(source_input.data)
        target_input_data = self.embedding(target_input.data)

        zero_hidden = torch.zeros(self.num_layers,
                                  batch_size,
                                  self.hidden_dim,
                                  device=self.device or source_input.device,
                                  dtype=source_input_data.dtype)

        if self.embedding_type == "eucl" and "hyp" in self.cell_type:
            source_input_data = pmath.expmap0(source_input_data, c=self.c)
            target_input_data = pmath.expmap0(target_input_data, c=self.c)
        elif self.embedding_type == "hyp" and "eucl" in self.cell_type:
            source_input_data = pmath.logmap0(source_input_data, c=self.c)
            target_input_data = pmath.logmap0(target_input_data, c=self.c)
        # ht: (num_layers * num_directions, batch, hidden_size)

        source_input = torch.nn.utils.rnn.PackedSequence(
            source_input_data, source_input.batch_sizes)
        target_input = torch.nn.utils.rnn.PackedSequence(
            target_input_data, target_input.batch_sizes)

        _, source_hidden = self.cell_source(source_input, zero_hidden)
        _, target_hidden = self.cell_target(target_input, zero_hidden)

        # take hiddens from the last layer
        source_hidden = source_hidden[-1]
        target_hidden = target_hidden[-1][alignment]

        if self.decision_type == "hyp":
            if "eucl" in self.cell_type:
                source_hidden = pmath.expmap0(source_hidden, c=self.c)
                target_hidden = pmath.expmap0(target_hidden, c=self.c)
            source_projected = self.projector_source(source_hidden)
            target_projected = self.projector_target(target_hidden)
            projected = pmath.mobius_add(source_projected,
                                         target_projected,
                                         c=self.ball.c)
            if self.use_distance_as_feature:
                dist = (pmath.dist(source_hidden,
                                   target_hidden,
                                   dim=-1,
                                   keepdim=True,
                                   c=self.ball.c)**2)
                bias = pmath.mobius_scalar_mul(dist,
                                               self.dist_bias,
                                               c=self.ball.c)
                projected = pmath.mobius_add(projected, bias, c=self.ball.c)
        else:
            if "hyp" in self.cell_type:
                source_hidden = pmath.logmap0(source_hidden, c=self.c)
                target_hidden = pmath.logmap0(target_hidden, c=self.c)
            projected = self.projector(
                torch.cat((source_hidden, target_hidden), dim=-1))
            if self.use_distance_as_feature:
                dist = torch.sum((source_hidden - target_hidden).pow(2),
                                 dim=-1,
                                 keepdim=True)
                bias = self.dist_bias * dist
                projected = projected + bias

        logits = self.logits(projected)
        # CrossEntropy accepts logits
        return logits