Exemple #1
0
def combine_matrices_flat(full_relation, a_pos_heads, a_pos_tails, a_neg_heads,
                          a_neg_tails, ids, b_matrix, device):
    '''
    inputs:
        a_heads: a dict of ID : head indices
        a_tails: a dict of ID : tail indices
        ids: IDs with which to access the indices of A
        b_matrix: a matrix whose indices we want to include in output

    returns:
        out_matrix: matrix with indices & values of A as well as indices of B
        valid_masks: a dict of id:indices that correspond to the indices  for
             each of the relations in A
    '''
    full_heads, full_tails = np.array([],
                                      dtype=np.int32), np.array([],
                                                                dtype=np.int32)
    for rel_id in ids:
        full_heads = np.concatenate((full_heads, a_pos_heads[rel_id]))
        full_heads = np.concatenate((full_heads, a_neg_heads[rel_id]))
        full_tails = np.concatenate((full_tails, a_pos_tails[rel_id]))
        full_tails = np.concatenate((full_tails, a_neg_tails[rel_id]))
    indices = torch.LongTensor(np.vstack((full_heads, full_tails)))
    values = torch.zeros((indices.shape[1], 1))
    shape = (full_relation.entities[0].n_instances,
             full_relation.entities[1].n_instances, 1)
    full_a_matrix = SparseMatrix(indices=indices, values=values, shape=shape)
    full_a_matrix = full_a_matrix.to(device).coalesce_()

    b_idx_matrix = SparseMatrix.from_other_sparse_matrix(b_matrix, 1)
    b_idx_matrix.values += 1

    out_idx_matrix = b_idx_matrix + full_a_matrix
    out_matrix = SparseMatrix.from_other_sparse_matrix(out_idx_matrix, 0)

    for rel_id in ids:
        rel_matrix = make_target_matrix(full_relation, a_pos_heads[rel_id],
                                        a_pos_tails[rel_id],
                                        a_neg_heads[rel_id],
                                        a_neg_tails[rel_id], device)

        rel_full_matrix = SparseMatrix.from_other_sparse_matrix(
            out_idx_matrix, 1) + rel_matrix
        out_matrix.values = torch.cat(
            [out_matrix.values, rel_full_matrix.values], 1)
        out_matrix.n_channels += 1

        rel_idx_matrix = SparseMatrix.from_other_sparse_matrix(rel_matrix, 1)
        rel_idx_matrix.values += 1
        rel_idx_full_matrix = SparseMatrix.from_other_sparse_matrix(
            out_idx_matrix, 1) + rel_idx_matrix
        out_idx_matrix.values = torch.cat(
            [out_idx_matrix.values, rel_idx_full_matrix.values], 1)
        out_idx_matrix.n_channels += 1

    masks = {}
    for channel_i, rel_id in enumerate(ids):
        masks[rel_id] = out_idx_matrix.values[:, channel_i +
                                              1].nonzero().squeeze()
    return out_matrix, masks
def make_flat_target_matrix(full_relation, rel_ids, pos_heads, pos_tails, neg_heads, neg_tails, device):
    full_heads, full_tails = np.array([], dtype=np.int32), np.array([], dtype=np.int32)
    for rel_id in rel_ids:
        full_heads = np.concatenate((full_heads, pos_heads[rel_id]))
        full_heads = np.concatenate((full_heads, neg_heads[rel_id]))
        full_tails = np.concatenate((full_tails, pos_tails[rel_id]))
        full_tails = np.concatenate((full_tails, neg_tails[rel_id]))
    n_rels = len(rel_ids)
    indices = torch.LongTensor(np.vstack((full_heads, full_tails)))
    values = torch.zeros((indices.shape[1], n_rels))
    shape = (full_relation.entities[0].n_instances,
             full_relation.entities[1].n_instances, n_rels)
    full_matrix = SparseMatrix(indices=indices, values=values, shape=shape)
    full_matrix = full_matrix.to(device).coalesce_()
    matrix_out = SparseMatrix.from_other_sparse_matrix(full_matrix, 0)

    for rel_id in rel_ids:
        rel_matrix = make_target_matrix(full_relation,  pos_heads[rel_id],
                                        pos_tails[rel_id], neg_heads[rel_id],
                                        neg_tails[rel_id], device)

        rel_matrix_full = SparseMatrix.from_other_sparse_matrix(full_matrix, 1) + rel_matrix
        matrix_out.values = torch.cat([matrix_out.values, rel_matrix_full.values], 1)
        matrix_out.n_channels += 1
    return matrix_out
 def test_broadcast_all(self):
     zero_matrix = SparseMatrix.from_other_sparse_matrix(self.X,
                                                         n_channels=1)
     out = zero_matrix.broadcast(torch.Tensor([5.]), "all")
     '''
     5ooo
     5o5o
     oooo
     5oo5
     '''
     self.assertSameValues(out.values, np.array([[5, 5, 5, 5, 5]]).T)
 def test_broadcast_row(self):
     zero_matrix = SparseMatrix.from_other_sparse_matrix(self.X,
                                                         n_channels=1)
     out = zero_matrix.broadcast(self.pooled, "row")
     '''
     1ooo
     1o3o
     oooo
     1oo4
     '''
     self.assertSameValues(out.values, np.array([[1, 1, 3, 1, 4]]).T)
 def test_broadcast_col(self):
     zero_matrix = SparseMatrix.from_other_sparse_matrix(self.X,
                                                         n_channels=1)
     out = zero_matrix.broadcast(self.pooled, "col")
     '''
     1ooo
     2o2o
     oooo
     4oo4
     '''
     self.assertSameValues(out.values, np.array([[1, 2, 2, 4, 4]]).T)
 def test_broadcast_diag(self):
     zero_matrix = SparseMatrix.from_other_sparse_matrix(self.X,
                                                         n_channels=1)
     out = zero_matrix.broadcast(torch.Tensor([5., 2]), "diag")
     '''
     5ooo
     0o0o
     oooo
     0oo5
     '''
     self.assertSameValues(out.values,
                           np.array([[5, 0, 0, 0, 5], [2, 0, 0, 0, 2]]).T)
Exemple #7
0
 def forward(self, X_in, X_out, indices_identity, indices_trans):
     '''
     X_in: Source sparse tensor
     X_out: Correpsonding sparse tensor for target relation
     '''
     self.logger.info("n_params: {}".format(self.n_params))
     if type(X_out) == SparseMatrix:
         Y = SparseMatrix.from_other_sparse_matrix(X_out, self.out_dim)
     else:
         Y = X_out.clone()
     #TODO: can add a cache for input operations here
     for i in range(self.n_params):
         op_inp, op_out = self.all_ops[i]
         weight = self.weights[i]
         device = weight.device
         if op_inp == None:
             X_mul = torch.matmul(X_in, weight)
             X_op_out = self.output_op(op_out, X_out, X_mul, device)
         elif op_out == None:
             X_op_inp = self.input_op(op_inp, X_in, device)
             X_mul = torch.matmul(X_op_inp, weight)
             X_op_out = X_mul
         elif op_out[0] == "i":
             # Identity
             X_intersection_vals = X_in.gather_mask(indices_identity[0])
             X_mul = X_intersection_vals @ weight
             X_op_out = X_out.broadcast_from_mask(X_mul,
                                                  indices_identity[1],
                                                  device)
         elif op_out[0] == "t":
             # Transpose
             X_T_intersection_vals = X_in.gather_transpose(indices_trans[0])
             X_mul = X_T_intersection_vals @ weight
             X_op_out = X_out.broadcast_from_mask(X_mul, indices_trans[1],
                                                  device)
         else:
             # Pool or Gather or Do Nothing
             X_op_inp = self.input_op(op_inp, X_in, device)
             # Multiply values by weight
             X_mul = torch.matmul(X_op_inp, weight)
             # Broadcast or Embed Diag or Transpose
             X_op_out = self.output_op(op_out, X_out, X_mul, device)
         #assert X_op_out.nnz() == X_out.nnz()
         #assert Y.nnz() == X_out.nnz(), "Y: {}, X_out: {}".format(Y.nnz(), X_out.nnz())
         #assert Y.nnz() == X_op_out.nnz(), "Y: {}, X_op_out: {}".format(Y.nnz(), X_op_out.nnz())
         Y = Y + X_op_out
     return Y
def select_features(data, schema, feats_type, target_ent):
    '''
    TODO: IMPLEMENT THIS
    '''
    # Select features for nodes
    in_dims = {}
    num_relations = len(schema.relations) - len(schema.entities)

    if feats_type == 0:
        # Keep all node attributes
        pass
    elif feats_type == 1:
        # Set all non-target node attributes to zero
        for ent_i in schema.entities:
            if ent_i.id != target_ent:
                # 10 dimensions for some reason
                n_dim = 10
                rel_id = num_relations + ent_i.id
                data[rel_id] = SparseMatrix.from_other_sparse_matrix(
                    data[rel_id], n_dim)
    '''
    elif feats_type == 2:
        # Set all non-target node attributes to one-hot vector
        for i in range(0, len(features_list)):
            if i != target_ent:
                dim = features_list[i].shape[0]
                indices = torch.arange(n_instances).unsqueeze(0).repeat(2, 1)
                values = torch.FloatTensor(np.ones(dim))
                features_list[i] = torch.sparse.FloatTensor(indices, values, torch.Size([dim, dim])).to(device)
    elif feats_type == 3:
        in_dims = [features.shape[0] for features in features_list]
        for i in range(len(features_list)):
            dim = features_list[i].shape[0]
            indices = np.vstack((np.arange(dim), np.arange(dim)))
            indices = torch.LongTensor(indices)
            values = np.ones(dim)
            features_list[i] = torch.sparse.FloatTensor(indices, values, torch.Size([dim, dim])).to(device)
    '''
    for rel_id in schema.relations:
        in_dims[rel_id] = data[rel_id].n_channels
    return data, in_dims
Exemple #9
0
def load_data_flat(prefix,
                   use_node_attrs=True,
                   use_edge_data=True,
                   node_val='one'):
    '''
    Load data into one matrix with all relations, reproducing Maron 2019
    The first [# relation types] channels are adjacency matrices,
    while the next [sum of feature dimensions per entity type] channels have
    node attributes on the relevant segment of their diagonals if use_node_attrs=True.
    If node features aren't included, then ndoe_val is used instead.
    '''
    dl = data_loader(DATA_FILE_DIR + prefix)
    total_n_nodes = dl.nodes['total']
    entities = [Entity(0, total_n_nodes)]
    relations = {0: Relation(0, [entities[0], entities[0]])}
    schema = DataSchema(entities, relations)

    # Sparse Matrix containing all data
    data_full = sum(dl.links['data'].values()).tocoo()
    data_diag = scipy.sparse.coo_matrix(
        (np.ones(total_n_nodes),
         (np.arange(total_n_nodes), np.arange(total_n_nodes))),
        (total_n_nodes, total_n_nodes))
    data_full += data_diag
    data_full = SparseMatrix.from_scipy_sparse(data_full.tocoo()).zero_()
    data_out = SparseMatrix.from_other_sparse_matrix(data_full, 0)
    # Load up all edge data
    for rel_id in sorted(dl.links['data'].keys()):
        data_matrix = dl.links['data'][rel_id]
        data_rel = SparseMatrix.from_scipy_sparse(data_matrix.tocoo())
        if not use_edge_data:
            # Use only adjacency information
            data_rel.values = torch.ones(data_rel.values.shape)
        data_rel_full = SparseMatrix.from_other_sparse_matrix(data_full,
                                                              1) + data_rel
        data_out.values = torch.cat([data_out.values, data_rel_full.values], 1)
        data_out.n_channels += 1

    if use_node_attrs:
        for ent_id, attr_matrix in dl.nodes['attr'].items():
            start_i = dl.nodes['shift'][ent_id]
            n_instances = dl.nodes['count'][ent_id]
            if attr_matrix is None:
                if node_val == 'zero':
                    attr_matrix = np.zeros((n_instances, 1))
                elif node_val == 'rand':
                    attr_matrix = np.random.randn(n_instances, 1)
                else:
                    attr_matrix = np.ones((n_instances, 1))
            n_channels = attr_matrix.shape[1]
            indices = torch.arange(start_i,
                                   start_i + n_instances).unsqueeze(0).repeat(
                                       2, 1)
            data_rel = SparseMatrix(
                indices=indices,
                values=torch.FloatTensor(attr_matrix),
                shape=np.array([total_n_nodes, total_n_nodes, n_channels]),
                is_set=True)
            data_rel_full = SparseMatrix.from_other_sparse_matrix(
                data_full, n_channels) + data_rel
            data_out.values = torch.cat(
                [data_out.values, data_rel_full.values], 1)
            data_out.n_channels += n_channels

    data = SparseMatrixData(schema)
    data[0] = data_out

    return schema,\
           data, \
           dl
Exemple #10
0
def load_data_flat(prefix,
                   use_node_attrs=True,
                   use_edge_data=True,
                   node_val='zero',
                   feats_type=0):
    '''
    Load data into one matrix with all relations, reproducing Maron 2019
    The first [# relation types] channels are adjacency matrices,
    while the next [sum of feature dimensions per entity type] channels have
    node attributes on the relevant segment of their diagonals if use_node_attrs=True.
    If node features aren't included, then ndoe_val is used instead.
    '''
    dl = data_loader(DATA_FILE_DIR + prefix)
    total_n_nodes = dl.nodes['total']
    entities = [Entity(0, total_n_nodes)]
    relations = {0: Relation(0, [entities[0], entities[0]])}
    schema = DataSchema(entities, relations)

    # Sparse Matrix containing all data
    data_full = sum(dl.links['data'].values()).tocoo()
    data_diag = scipy.sparse.coo_matrix(
        (np.ones(total_n_nodes),
         (np.arange(total_n_nodes), np.arange(total_n_nodes))),
        (total_n_nodes, total_n_nodes))
    data_full += data_diag
    data_full = SparseMatrix.from_scipy_sparse(data_full.tocoo()).zero_()
    data_out = SparseMatrix.from_other_sparse_matrix(data_full, 0)
    # Load up all edge data
    for rel_id in sorted(dl.links['data'].keys()):
        data_matrix = dl.links['data'][rel_id]
        data_rel = SparseMatrix.from_scipy_sparse(data_matrix.tocoo())
        if not use_edge_data:
            # Use only adjacency information
            data_rel.values = torch.ones(data_rel.values.shape)
        data_rel_full = SparseMatrix.from_other_sparse_matrix(data_full,
                                                              1) + data_rel
        data_out.values = torch.cat([data_out.values, data_rel_full.values], 1)
        data_out.n_channels += 1

    target_entity = 0

    if use_node_attrs:
        for ent_id, attr_matrix in dl.nodes['attr'].items():
            start_i = dl.nodes['shift'][ent_id]
            n_instances = dl.nodes['count'][ent_id]
            if attr_matrix is None:
                if node_val == 'zero':
                    attr_matrix = np.zeros((n_instances, 1))
                elif node_val == 'rand':
                    attr_matrix = np.random.randn(n_instances, 1)
                else:
                    attr_matrix = np.ones((n_instances, 1))
            if feats_type == 1 and ent_id != target_entity:
                # To keep same behaviour as non-LGNN model, use 10 dimensions
                attr_matrix = np.zeros((n_instances, 10))
            n_channels = attr_matrix.shape[1]
            indices = torch.arange(start_i,
                                   start_i + n_instances).unsqueeze(0).repeat(
                                       2, 1)
            data_rel = SparseMatrix(
                indices=indices,
                values=torch.FloatTensor(attr_matrix),
                shape=np.array([total_n_nodes, total_n_nodes, n_channels]),
                is_set=True)
            data_rel_full = SparseMatrix.from_other_sparse_matrix(
                data_full, n_channels) + data_rel
            data_out.values = torch.cat(
                [data_out.values, data_rel_full.values], 1)
            data_out.n_channels += n_channels

    data = SparseMatrixData(schema)
    data[0] = data_out

    n_outputs = total_n_nodes
    n_output_classes = dl.labels_train['num_classes']
    schema_out = DataSchema([entities[target_entity]], [
        Relation(0, [entities[target_entity], entities[target_entity]],
                 is_set=True)
    ])
    data_target = SparseMatrixData(schema_out)
    data_target[0] = SparseMatrix(
        indices=torch.arange(n_outputs, dtype=torch.int64).repeat(2, 1),
        values=torch.zeros([n_outputs, n_output_classes]),
        shape=(n_outputs, n_outputs, n_output_classes),
        is_set=True)
    labels = np.zeros((dl.nodes['count'][0], dl.labels_train['num_classes']),
                      dtype=int)
    val_ratio = 0.2
    train_idx = np.nonzero(dl.labels_train['mask'])[0]
    np.random.shuffle(train_idx)
    split = int(train_idx.shape[0] * val_ratio)
    val_idx = train_idx[:split]
    train_idx = train_idx[split:]
    train_idx = np.sort(train_idx)
    val_idx = np.sort(val_idx)
    test_idx = np.nonzero(dl.labels_test['mask'])[0]
    labels[train_idx] = dl.labels_train['data'][train_idx]
    labels[val_idx] = dl.labels_train['data'][val_idx]
    if prefix != 'IMDB':
        labels = labels.argmax(axis=1)
    train_val_test_idx = {}
    train_val_test_idx['train_idx'] = train_idx
    train_val_test_idx['val_idx'] = val_idx
    train_val_test_idx['test_idx'] = test_idx

    return schema,\
           schema_out, \
           data, \
           data_target, \
           labels,\
           train_val_test_idx,\
           dl