def __init__(self, edges, n, emb=16, bases=None, **kwargs): super().__init__() self.emb = emb # vertical stack to find the normalization vindices, vsize = util.adj(edges, n, vertical=False) ih, iw = vindices.size() vals = torch.ones((ih, ), dtype=torch.float) vals = vals / util.sum_sparse(vindices, vals, vsize) # horizontal stack for the actual message passing indices, size = util.adj(edges, n, vertical=False) _, rn = size r = rn//n graph = torch.sparse.FloatTensor(indices=indices.t(), values=vals, size=size) # will this get cuda'd properly? self.register_buffer('graph', graph) if bases is None: self.weights = nn.Parameter(torch.FloatTensor(r, n, emb)) nn.init.xavier_uniform_(self.weights, gain=nn.init.calculate_gain('relu')) self.bases = None else: self.comps = nn.Parameter(torch.FloatTensor(r, bases) ) nn.init.xavier_uniform_(self.weights, gain=nn.init.calculate_gain('relu')) self.bases = nn.Parameter(torch.FloatTensor(bases, n, emb)) nn.init.xavier_uniform_(self.weights, gain=nn.init.calculate_gain('relu'))
def forward(self): ## Layer 1 r, rn, n = self.r, self.rn, self.n b, c = self.bases, self.numcls values = self.edgeweights() # * self.values if self.normalize: values = values / util.sum_sparse(self.ver_ind, values, (rn, n)) if self.bases1 is not None: weights = torch.einsum('rb, bij -> rij', self.comps1, self.bases1) else: weights = self.weights1 assert weights.size() == (r, self.emb, self.h) # apply weights first if self.separate_emb: xw = torch.einsum('rne, reh -> rnh', self.embeddings, weights).contiguous() # xw = self.embeddings else: xw = torch.einsum('ne, reh -> rnh', self.embeddings, weights).contiguous() # hidden1 = torch.mm(self.hor_graph, xw.view(r*n, self.h)) # sparse mm hidden1 = util.spmm(self.hor_indices, values, (n, n * r), xw.view(r * n, self.h)) assert hidden1.size() == (n, self.h) hidden1 = F.relu(hidden1 + self.bias1) ## Layer 2 if self.bases2 is not None: weights = torch.einsum('rb, bij -> rij', self.comps2, self.bases2) else: weights = self.weights2 # Multiply adjacencies by hidden # hidden2 = torch.mm(self.ver_graph, hidden1) # sparse mm hidden2 = util.spmm(self.ver_indices, values, (n * r, n), hidden1) hidden2 = hidden2.view(r, n, self.h) # new dim for the relations # Apply weights, sum over relations hidden2 = torch.einsum('rhc, rnh -> nc', weights, hidden2) assert hidden2.size() == (n, c) return hidden2 + self.bias2 #-- softmax is applied in the loss
def __init__(self, edges, n, emb=16, bases=None, unify='sum', **kwargs): super().__init__() indices, size = util.adj(edges, n) rn, n = size r = rn//n ih, iw = indices.size() vals = torch.ones((ih, ), dtype=torch.float) vals = vals / util.sum_sparse(indices, vals, size) graph = torch.sparse.FloatTensor(indices=indices.t(), values=vals, size=size) # will this get cuda'd properly? self.register_buffer('graph', graph) if bases is None: self.weights = nn.Parameter(torch.FloatTensor(r, emb, emb)) nn.init.xavier_uniform_(self.weights, gain=nn.init.calculate_gain('relu')) self.bases = None else: self.comps = nn.Parameter(torch.FloatTensor(r, bases)) self.bases = nn.Parameter(torch.FloatTensor(bases, emb, emb)) nn.init.xavier_uniform_(self.comps, gain=nn.init.calculate_gain('relu')) nn.init.xavier_uniform_(self.bases, gain=nn.init.calculate_gain('relu')) if unify == 'sum': self.unify = SumUnify() elif unify == 'attention': self.unify = AttentionUnify(r, emb) elif unify == 'mlp': self.unify = MLPUnify(r, emb) else: raise Exception(f'unify {unify} not recognized')
def __init__(self, edges, n, numcls, emb=128, h=16, bases=None, separate_emb=False, indep=False, normalize=False, sample=False): super().__init__() self.emb = emb self.h = h self.bases = bases self.numcls = numcls self.separate_emb = separate_emb self.normalize = normalize self.sample = sample # horizontally and vertically stacked versions of the adjacency graph hor_ind, hor_size = util.adj(edges, n, vertical=False) ver_ind, ver_size = util.adj(edges, n, vertical=True) rn, _ = ver_size r = rn // n self.r, self.rn, self.n = r, rn, n t = len(edges[0][0]) vals = torch.ones(ver_ind.size(0), dtype=torch.float) vals = vals / util.sum_sparse(ver_ind, vals, ver_size) # -- the values are the same for the horizontal and the vertically stacked adjacency matrices # so we can just normalize them by the vertically stacked one and reuse for the horizontal # hor_graph = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) self.register_buffer('hor_indices', hor_ind) #ver_graph = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size) self.register_buffer('ver_indices', ver_ind) self.register_buffer('values', vals) if separate_emb: self.embeddings = nn.Parameter(torch.FloatTensor( r, n, emb)) # single embedding per node nn.init.xavier_uniform_(self.embeddings, gain=nn.init.calculate_gain('relu')) else: self.embeddings = nn.Parameter(torch.FloatTensor( n, emb)) # single embedding per node nn.init.xavier_uniform_(self.embeddings, gain=nn.init.calculate_gain('relu')) # layer 1 weights if bases is None: self.weights1 = nn.Parameter(torch.FloatTensor(r, emb, h)) nn.init.xavier_uniform_(self.weights1, gain=nn.init.calculate_gain('relu')) self.bases1 = None else: self.comps1 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps1, gain=nn.init.calculate_gain('relu')) self.bases1 = nn.Parameter(torch.FloatTensor(bases, emb, h)) nn.init.xavier_uniform_(self.bases1, gain=nn.init.calculate_gain('relu')) # layer 2 weights if bases is None: self.weights2 = nn.Parameter(torch.FloatTensor(r, h, numcls)) nn.init.xavier_uniform_(self.weights2, gain=nn.init.calculate_gain('relu')) self.bases2 = None else: self.comps2 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps2, gain=nn.init.calculate_gain('relu')) self.bases2 = nn.Parameter(torch.FloatTensor(bases, h, numcls)) nn.init.xavier_uniform_(self.bases2, gain=nn.init.calculate_gain('relu')) self.bias1 = nn.Parameter(torch.FloatTensor(h).zero_()) self.bias2 = nn.Parameter(torch.FloatTensor(numcls).zero_()) # convert the edges dict to a matrix of triples s, o, p = [], [], [] for pred, (sub, obj) in edges.items(): s.extend(sub) o.extend(obj) p.extend([pred] * len(sub)) # graph as triples self.register_buffer('indices', torch.tensor([s, p, o], dtype=torch.long).t()) # for computing the attention weights self.indep = indep if indep: self.weights = nn.Parameter(torch.randn(self.indices.size(0))) else: self.sscore = nn.Linear(emb, h) self.pscore = nn.Parameter(torch.FloatTensor(r, h)) nn.init.xavier_uniform_(self.pscore, gain=nn.init.calculate_gain('relu')) self.oscore = nn.Linear(emb, h)
def __init__(self, edges, n, numcls, emb=128, h=16, bases=None, separate_emb=False): super().__init__() self.emb = emb self.h = h self.bases = bases self.numcls = numcls self.separate_emb = separate_emb # horizontally and vertically stacked versions of the adjacency graph hor_ind, hor_size = util.adj(edges, n, vertical=False) ver_ind, ver_size = util.adj(edges, n, vertical=True) rn, _ = ver_size r = rn // n t = len(edges[0][0]) vals = torch.ones(ver_ind.size(0), dtype=torch.float) vals = vals / util.sum_sparse(ver_ind, vals, ver_size) # -- the values are the same for the horizontal and the vertically stacked adjacency matrices # so we can just normalize them by the vertically stacked one and reuse for the horizontal hor_graph = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) self.register_buffer('hor_graph', hor_graph) ver_graph = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size) self.register_buffer('ver_graph', ver_graph) if separate_emb: self.embeddings = nn.Parameter(torch.FloatTensor( r, n, emb)) # single embedding per node nn.init.xavier_uniform_(self.embeddings, gain=nn.init.calculate_gain('relu')) else: self.embeddings = nn.Parameter(torch.FloatTensor( n, emb)) # single embedding per node nn.init.xavier_uniform_(self.embeddings, gain=nn.init.calculate_gain('relu')) # layer 1 weights if bases is None: self.weights1 = nn.Parameter(torch.FloatTensor(r, emb, h)) nn.init.xavier_uniform_(self.weights1, gain=nn.init.calculate_gain('relu')) self.bases1 = None else: self.comps1 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps1, gain=nn.init.calculate_gain('relu')) self.bases1 = nn.Parameter(torch.FloatTensor(bases, emb, h)) nn.init.xavier_uniform_(self.bases1, gain=nn.init.calculate_gain('relu')) # layer 2 weights if bases is None: self.weights2 = nn.Parameter(torch.FloatTensor(r, h, numcls)) nn.init.xavier_uniform_(self.weights2, gain=nn.init.calculate_gain('relu')) self.bases2 = None else: self.comps2 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps2, gain=nn.init.calculate_gain('relu')) self.bases2 = nn.Parameter(torch.FloatTensor(bases, h, numcls)) nn.init.xavier_uniform_(self.bases2, gain=nn.init.calculate_gain('relu')) self.bias1 = nn.Parameter(torch.FloatTensor(h).zero_()) self.bias2 = nn.Parameter(torch.FloatTensor(numcls).zero_())
def forward(self): LACT = torch.relu rp, r, n, nt = self.rp, self.r, self.n, self.nt latents1 = self.to_latent1(self.nhots) assert latents1.size() == (nt, rp) latents1 = torch.softmax(latents1, dim=1) latents1 = latents1.t().reshape(-1) # column normalize latents1 = latents1 / util.sum_sparse( self.hindices, latents1, (n, n * rp), row=False) assert self.hindices.size(0) == latents1.size( 0), f'{self.indices.size()} {latents1.size()}' ## Layer 1 e = self.emb b, c = self.bases, self.numcls if self.bases1 is not None: # weights = torch.einsum('rb, bij -> rij', self.comps1, self.bases1) weights = torch.mm(self.comps1, self.bases1.view(b, n * e)).view(rp, n, e) else: weights = self.weights1 assert weights.size() == (rp, n, e) # Apply weights and sum over relations # h = torch.mm(hor_graph, ) h = util.spmm(indices=self.hindices, values=latents1, size=(n, n * rp), xmatrix=weights.view(rp * n, e)) assert h.size() == (n, e) h = F.relu(h + self.bias1) ## Layer 2 latents2 = self.to_latent2(self.nhots) assert latents2.size() == (nt, rp) latents2 = torch.softmax(latents2, dim=1) latents2 = latents2.t().reshape(-1) # latents2 = LACT(latents2) # row normalize latents2 = latents2 / util.sum_sparse( self.vindices, latents2, (n * rp, n), row=True) # Multiply adjacencies by hidden # h = torch.mm(ver_graph, h) # sparse mm h = util.spmm(indices=self.vindices, values=latents2, size=(n * rp, n), xmatrix=h) h = h.view(rp, n, e) # new dim for the relations if self.bases2 is not None: # weights = torch.einsum('rb, bij -> rij', self.comps2, self.bases2) weights = torch.mm(self.comps2, self.bases2.view(b, e * c)).view(rp, e, c) else: weights = self.weights2 # Apply weights, sum over relations h = torch.einsum('rhc, rnh -> nc', weights, h) # h = torch.bmm(h, weights).sum(dim=0) assert h.size() == (n, c) return h + self.bias2 #-- softmax is applied in the loss
def __init__(self, edges, n, numcls, emb=16, bases=None, softmax=False, triples=None, num_rels=None): super().__init__() self.emb = emb self.bases = bases self.numcls = numcls self.softmax = softmax assert (edges is None or triples is None), 'Pass graph as edges or triples, not both.' assert (edges is not None or triples is not None), 'No graph passed.' if edges is not None: # horizontally and vertically stacked versions of the adjacency graph hor_ind, hor_size = util.adj(edges, n, vertical=False) ver_ind, ver_size = util.adj(edges, n, vertical=True) else: hor_ind, hor_size = util.adj_triples(triples, n, num_rels=num_rels, vertical=False) ver_ind, ver_size = util.adj_triples(triples, n, num_rels=num_rels, vertical=True) _, rn = hor_size r = rn // n vals = torch.ones(ver_ind.size(0), dtype=torch.float) vals = vals / util.sum_sparse(ver_ind, vals, ver_size) # -- the values are the same for the horizontal and the vertically stacked adjacency matrices # so we can just normalize them by the vertically stacked one and reuse for the horizontal hor_graph = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) self.register_buffer('hor_graph', hor_graph) ver_graph = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size) self.register_buffer('ver_graph', ver_graph) # layer 1 weights if bases is None: self.weights1 = nn.Parameter(torch.FloatTensor(r, n, emb)) nn.init.xavier_uniform_(self.weights1, gain=nn.init.calculate_gain('relu')) self.bases1 = None else: self.comps1 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps1, gain=nn.init.calculate_gain('relu')) self.bases1 = nn.Parameter(torch.FloatTensor(bases, n, emb)) nn.init.xavier_uniform_(self.bases1, gain=nn.init.calculate_gain('relu')) # layer 2 weights if bases is None: self.weights2 = nn.Parameter(torch.FloatTensor(r, emb, numcls)) nn.init.xavier_uniform_(self.weights2, gain=nn.init.calculate_gain('relu')) self.bases2 = None else: self.comps2 = nn.Parameter(torch.FloatTensor(r, bases)) nn.init.xavier_uniform_(self.comps2, gain=nn.init.calculate_gain('relu')) self.bases2 = nn.Parameter(torch.FloatTensor(bases, emb, numcls)) nn.init.xavier_uniform_(self.bases2, gain=nn.init.calculate_gain('relu')) self.bias1 = nn.Parameter(torch.FloatTensor(emb).zero_()) self.bias2 = nn.Parameter(torch.FloatTensor(numcls).zero_())
def forward(self, triples, nodes=None): n, r = self.n, self.r rn = r * n ## Construct the graph # horizontally and vertically stacked versions of the adjacency graph # (the vertical is always necessary to normalize the adjacencies) if self.hor: hor_ind, hor_size = util.adj_triples_tensor(triples, n, r, vertical=False) ver_ind, ver_size = util.adj_triples_tensor(triples, n, r, vertical=True) rn, _ = ver_size # compute values of row-normalized adjacency matrices (same for hor and ver) vals = torch.ones(ver_ind.size(0), dtype=torch.float, device=d(triples)) vals = vals / util.sum_sparse(ver_ind, vals, ver_size) if self.hor: self.adj = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) else: self.adj = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size) if triples.is_cuda: self.adj = self.adj.to('cuda') ## Perform message passing assert (nodes is None) == (self.insize is None) h0 = n if self.insize is None else self.insize h1 = self.outsize if self.decomp is None: weights = self.weights elif self.decomp == 'basis': weights = torch.einsum('rb, bij -> rij', self.comps, self.bases) elif self.decomp == 'block': weights = util.block_diag(self.blocks) # TODO: multiply in block form (more efficient, but implementation differs per layer type) assert weights.size() == (r, h0, h1) if self.insize is None: # -- input is the identity matrix, just multiply the weights by the adjacencies out = torch.mm(self.adj, weights.view(r * h0, h1)) elif self.hor: # -- input is high-dim and output is low dim, multiply h0 x weights first nodes = nodes[None, :, :].expand(r, n, h0) nw = torch.einsum('rni, rio -> rno', nodes, weights).contiguous() out = torch.mm(self.adj, nw.view(r * n, h1)) else: # -- adj x h0 first, then weights out = torch.mm(self.adj, nodes) # sparse mm out = out.view(r, n, h0) # new dim for the relations out = torch.einsum('rio, rni -> no', weights, out) assert out.size() == (n, h1) return out + self.bias
def forward(self, nodes=None): n, r = self.n, self.r rn = r * n ## Perform message passing assert (nodes is None) == (self.insize is None) h0 = n if self.insize is None else self.insize h1 = self.outsize if self.decomp is None: weights = self.weights elif self.decomp == 'basis': weights = torch.einsum('rb, bij -> rij', self.comps, self.bases) elif self.decomp == 'block': weights = util.block_diag(self.blocks) # TODO: multiply in block form (more efficient, but implementation differs per layer type) assert weights.size() == (r, h0, h1) if self.edo is not None and self.training: # apply edge dropout p, pid = self.edo nt = self.indices.size(0) - n mask = torch.bernoulli( torch.empty(size=(nt, ), dtype=torch.float, device=d(self.bias)).fill_(1.0 - p)) maskid = torch.bernoulli( torch.empty(size=(n, ), dtype=torch.float, device=d(self.bias)).fill_(1.0 - pid)) vals = torch.cat([mask, maskid], dim=0) else: vals = torch.ones(self.indices.size(0), dtype=torch.float, device=d(self.bias)) # Row- or column normalize the values of the adjacency matrix vals = vals / util.sum_sparse( self.indices, vals, self.adjsize, row=not self.hor) adj = torch.sparse.FloatTensor(indices=self.indices.t(), values=vals, size=self.adjsize) if self.bias.is_cuda: adj = adj.to('cuda') if self.insize is None: # -- input is the identity matrix, just multiply the weights by the adjacencies out = torch.mm(adj, weights.view(r * h0, h1)) elif self.hor: # -- input is high-dim and output is low dim, multiply h0 x weights first nodes = nodes[None, :, :].expand(r, n, h0) nw = torch.einsum('rni, rio -> rno', nodes, weights).contiguous() out = torch.mm(adj, nw.view(r * n, h1)) else: # -- adj x h0 first, then weights out = torch.mm(adj, nodes) # sparse mm out = out.view(r, n, h0) # new dim for the relations out = torch.einsum('rio, rni -> no', weights, out) assert out.size() == (n, h1) return out + self.bias
def forward(self, triples, depth=2): assert triples.size(-1) == 3 n, r = self.n, self.r dims = triples.size()[:-1] triples = triples.reshape(-1, 3) b, _ = triples.size() batch = Batch(triples=triples, graph=self.graph, inv_graph=self.inv_graph) # Sample if depth > 0: batch = self.sample0(batch) if depth > 1: batch = self.sample1(batch) # extract batch node embeddings bind = batch.indices() nodes = self.embeddings[flatten(bind), :] if self.dropout is not None: nodes = self.dropout(nodes) # Message passing if depth > 0: # compute the edge weights dtriples = torch.tensor(list(batch.edges()), device=d(), dtype=torch.long) btriples = torch.tensor(batch.batch_triples(), device=d(), dtype=torch.long) # adjacency matrix indices # -- repeans R times, vertically bn = batch.num_nodes() fr = btriples[:, 0] + bn * btriples[:, 1] to = btriples[:, 2] indices = torch.cat([fr[:, None], to[:, None]], dim=1) si, pi, oi = dtriples[:, 0], dtriples[:, 1], dtriples[:, 2] semb, pemb, oemb = self.embeddings[si, :], self.relations[ pi, :], self.embeddings[oi, :] # compute the score (bilinear dot product) semb = self.tokeys(semb) oemb = self.toqueries(oemb) dots = (semb * pemb * oemb).sum(dim=1) values = torch.ones((indices.size(0), ), device=d(), dtype=torch.float) # values = (dots).abs() values = values / util.sum_sparse(indices, values, (r * bn, bn)) # values *= ACTIVATION(dots) # F.softplus(dots) nodes = nodes + self.rgcn0(nodes, indices, values) if depth > 1: nodes = nodes + self.rgcn1(nodes, indices, values) _, tind = batch.target_indices(bind) # -- indices of the target nodes in the list `bind` subjects, objects = [t[0] for t in tind], [t[1] for t in tind] assert len(subjects) == len(objects) == triples.size(0) # print(nodes.size()) # extract embeddings for target nodes try: s = nodes[subjects, :] o = nodes[objects, :] p = self.relations[triples[:, 1], :] except Exception as e: print(triples.size()) print(batch.size()) print(nodes.size()) print(len(batch.indices())) print(batch.entities) raise (e) scores = self.decoder(s, p, o) assert scores.size() == (util.prod(dims), ) return scores.view(*dims)