Beispiel #1
0
def load_tissue(params=None):
    random_seed = params.random_seed
    dense_dim = params.dense_dim 
    set_seed(random_seed)
    # 400 0.7895
    # 200 0.5117
    # 100 0.3203
    #  50 0.2083
    """
    root = '../data/mammary_gland'
    num = 2915
    data_path = f'{root}/mouse_Mammary_gland{num}_data.csv'
    type_path = f'{root}/mouse_Mammary_gland{num}_celltype.csv'
    """
    data_path = '../data/mouse_data/mouse_brain_2915_data.csv'
    type_path = '../data/mouse_data/mouse_brain_2915_celltype.csv'

    # load celltype file then update labels accordingly
    cell2type = pd.read_csv(type_path, index_col=0)
    cell2type.columns = ['cell', 'type']

    id2label = cell2type['type'].drop_duplicates(keep='first').tolist()
    label2id = {label: idx for idx, label in enumerate(id2label)}
    print(f'{len(id2label)} classes in total')
        
    cell2type['id'] = cell2type['type'].map(label2id)
    assert not cell2type['id'].isnull().any(), 'something wrong about celltype file.'

    # load data file
    data = pd.read_csv(data_path, index_col=0)
    data = data.transpose(copy=True)
    assert cell2type['cell'].tolist() == data.index.tolist()
    print(f'{data.shape[0]} cells, {data.shape[1]} genes.')
    # genes
    id2gene = data.columns.tolist()
    gene2id = {gene: idx for idx, gene in enumerate(id2gene)}

    # construct graph and add nodes and edges
    graph = dgl.DGLGraph()
    start = time()
    # 1. add all genes as nodes
    num_genes = len(id2gene)
    graph.add_nodes(num_genes)
    # maintain a kind of sparse idx for Graph
    row_idx, col_idx = data.to_numpy().nonzero()
    row_idx = row_idx + num_genes
    # 2. add cell nodes and edges
    num_cells = data.shape[0]
    graph.add_nodes(num_cells)
    graph.add_edges(row_idx, col_idx)
    graph.add_edges(col_idx, row_idx)
    print(f'Added {num_cells} nodes and {len(row_idx)} edges.')
    print(f'#Nodes: {graph.number_of_nodes()}, #Edges: {graph.number_of_edges()}.')
    print(data.head())

    # reduce sparse features to dense features
    cell_pca = PCA(n_components=dense_dim, random_state=random_seed)
    cell_pca.fit(data.values)
    cell_feat = cell_pca.transform(data.values)
    cell_feat = torch.FloatTensor(cell_feat)

    gene_pca = PCA(n_components=dense_dim, random_state=random_seed)
    gene_pca.fit(data.T.values)
    gene_feat = gene_pca.transform(data.T.values)
    gene_feat = torch.FloatTensor(gene_feat)

    feat = torch.cat([gene_feat, cell_feat], dim=0)
    # feat = torch.zeros(graph.number_of_nodes(), dense_dim).normal_()

    cell_evr = sum(cell_pca.explained_variance_ratio_) * 100
    gene_evr = sum(gene_pca.explained_variance_ratio_) * 100
    print(f'[PCA] Cell EVR: {cell_evr:.2f}%. Gene EVR: {gene_evr:.2f} %.')
    # generate labels for training and testing
    labels = torch.LongTensor(cell2type['id'].tolist())
    train_mask = torch.zeros(num_cells, dtype=torch.bool)
    train_randidx = torch.randperm(num_cells)[:int(num_cells * 0.8)]
    # generate mask
    train_mask[train_randidx] = True
    test_mask = ~train_mask
    return num_cells, num_genes, graph, feat, labels, train_mask, test_mask
Beispiel #2
0
In this tutorial, you learn how to create a graph and how to read and write node and edge representations.
"""

###############################################################################
# Creating a graph
# ----------------
# The design of :class:`DGLGraph` was influenced by other graph libraries. You
# can create a graph from networkx and convert it into a :class:`DGLGraph` and
# vice versa.

import networkx as nx
import dgl

g_nx = nx.petersen_graph()
g_dgl = dgl.DGLGraph(g_nx)

import matplotlib.pyplot as plt
plt.subplot(121)
nx.draw(g_nx, with_labels=True)
plt.subplot(122)
nx.draw(g_dgl.to_networkx(), with_labels=True)

plt.show()

###############################################################################
# There are many ways to construct a :class:`DGLGraph`. Below are the allowed
# data types ordered by our recommendataion.
#
# * A pair of arrays ``(u, v)`` storing the source and destination nodes respectively.
#   They can be numpy arrays or tensor objects from the backend framework.
Beispiel #3
0
 def generate_rand_graph(n):
     arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
         np.int64)
     return dgl.DGLGraph(arr, readonly=True)
Beispiel #4
0
        else:
            return batch_graph.edges[tuple(zip(*batch_readout_edge_list))].data['pred'], \
                   batch_graph.nodes[batch_h_node_list].data['alpha'], \
                   batch_graph.nodes[batch_h_node_list].data['alpha_lang']


if __name__ == "__main__":
    model = AGRNN()

    node_num = 3
    edge_list = []
    for src in range(node_num):
        for dst in range(node_num):
            edge_list.append((src, dst))
    src, dst = tuple(zip(*edge_list))
    g = dgl.DGLGraph()
    g.add_nodes(node_num)
    g.add_edges(src, dst)
    import ipdb
    ipdb.set_trace()
    e_data = torch.eye(9)
    n_data = torch.arange(9)
    g.edata['feat'] = e_data
    g.ndata['x'] = n_data

    # @staticmethod
    # def _build_graph(node_num, roi_label, node_space):

    #     graph = dgl.DGLGraph()
    #     graph.add_nodes(node_num)
Beispiel #5
0
def test_rgcn_sorted(O):
    ctx = F.ctx()
    etype = []
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    g = g.to(F.ctx())
    # 5 etypes
    R = 5
    etype = [200, 200, 200, 200, 200]
    B = 2
    I = 10

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    rgc_basis_low.loop_weight = rgc_basis.loop_weight
    h = th.randn((100, I)).to(ctx)
    r = etype
    h_new = rgc_basis(g, h, r)
    h_new_low = rgc_basis_low(g, h, r)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    if O % B == 0:
        rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B).to(ctx)
        rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True).to(ctx)
        rgc_bdd_low.weight = rgc_bdd.weight
        rgc_bdd_low.loop_weight = rgc_bdd.loop_weight
        h = th.randn((100, I)).to(ctx)
        r = etype
        h_new = rgc_bdd(g, h, r)
        h_new_low = rgc_bdd_low(g, h, r)
        assert list(h_new.shape) == [100, O]
        assert list(h_new_low.shape) == [100, O]
        assert F.allclose(h_new, h_new_low)

    # with norm
    norm = th.rand((g.number_of_edges(), 1)).to(ctx)

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    rgc_basis_low.loop_weight = rgc_basis.loop_weight
    h = th.randn((100, I)).to(ctx)
    r = etype
    h_new = rgc_basis(g, h, r, norm)
    h_new_low = rgc_basis_low(g, h, r, norm)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    if O % B == 0:
        rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B).to(ctx)
        rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True).to(ctx)
        rgc_bdd_low.weight = rgc_bdd.weight
        rgc_bdd_low.loop_weight = rgc_bdd.loop_weight
        h = th.randn((100, I)).to(ctx)
        r = etype
        h_new = rgc_bdd(g, h, r, norm)
        h_new_low = rgc_bdd_low(g, h, r, norm)
        assert list(h_new.shape) == [100, O]
        assert list(h_new_low.shape) == [100, O]
        assert F.allclose(h_new, h_new_low)

    # id input
    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    rgc_basis_low.loop_weight = rgc_basis.loop_weight
    h = th.randint(0, I, (100, )).to(ctx)
    r = etype
    h_new = rgc_basis(g, h, r)
    h_new_low = rgc_basis_low(g, h, r)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)
Beispiel #6
0
    def __getitem__(self, idx):
        # Returns tuple
        # Smiles has to be in first column of the csv !!

        row = self.df.iloc[idx, :]

        smiles = row.smiles  # needed anyway to build graph
        m = Chem.MolFromSmiles(smiles)

        if self.compute_selfies:
            Chem.Kekulize(m)
            k = Chem.MolToSmiles(m, isomericSmiles=False,
                                 kekuleSmiles=True)  # kekuleSmiles
            selfie = encoder(k)
            """
            if selfie != row.selfies:
                print('new selfie:', selfie)
                print('prev : ', row.selfies)
            """

        else:
            selfie = row.selfies

        # 1 - Graph building
        if m != None:
            graph = smiles_to_nx(smiles)
        else:
            return None, 0, 0, 0

        one_hot = {
            edge: torch.tensor(self.edge_map[label])
            for edge, label in (
                nx.get_edge_attributes(graph, 'bond_type')).items()
        }
        nx.set_edge_attributes(graph, name='one_hot', values=one_hot)

        try:
            at_type = {
                a: oh_tensor(self.at_map[label], self.num_atom_types)
                for a, label in (
                    nx.get_node_attributes(graph, 'atomic_num')).items()
            }
            nx.set_node_attributes(graph, name='atomic_num', values=at_type)
        except KeyError:
            print('!!!! Atom type to one-hot error for input ', smiles,
                  ' ignored')
            return None, 0, 0, 0

        at_charge = {
            a: oh_tensor(self.charges_map[label], self.num_charges)
            for a, label in (
                nx.get_node_attributes(graph, 'formal_charge')).items()
        }
        nx.set_node_attributes(graph, name='formal_charge', values=at_charge)

        try:
            hydrogens = {
                a: torch.tensor(self.chi_map[label], dtype=torch.float)
                for a, label in (
                    nx.get_node_attributes(graph, 'num_explicit_hs')).items()
            }
            nx.set_node_attributes(graph,
                                   name='num_explicit_hs',
                                   values=hydrogens)
        except KeyError:
            print(
                '!!!! Number of explicit hydrogens to one-hot error for input ',
                smiles, ' ignored')
            return None, 0, 0, 0

        aromatic = {
            a: torch.tensor(self.chi_map[label], dtype=torch.float)
            for a, label in (
                nx.get_node_attributes(graph, 'is_aromatic')).items()
        }
        nx.set_node_attributes(graph, name='is_aromatic', values=aromatic)

        at_chir = {
            a: torch.tensor(self.chi_map[label], dtype=torch.float)
            for a, label in (
                nx.get_node_attributes(graph, 'chiral_tag')).items()
        }
        nx.set_node_attributes(graph, name='chiral_tag', values=at_chir)

        # to dgl
        g_dgl = dgl.DGLGraph()
        node_features = [
            'atomic_num', 'formal_charge', 'num_explicit_hs', 'is_aromatic',
            'chiral_tag'
        ]
        g_dgl.from_networkx(nx_graph=graph,
                            node_attrs=node_features,
                            edge_attrs=['one_hot'])

        N = g_dgl.number_of_nodes()

        g_dgl.ndata['h'] = torch.cat(
            [g_dgl.ndata[f].view(N, -1) for f in node_features], dim=1)

        if self.graph_only:  # give only the graph (to encode in latent space)
            return g_dgl, 0, 0, 0

        # 2 - Smiles / selfies to integer indices array

        if self.language == 'selfies':

            a, valid_flag = self.selfies_to_hot(selfie)
            if valid_flag == 0:  # no one hot encoding for this selfie, ignore
                print('!!! Selfie to one-hot failed with current alphabet')
                return None, 0, 0, 0

        else:
            a = np.zeros(self.max_len)
            idces = [self.char_to_index[c] for c in smiles]
            a[:len(idces)] = idces

        # 3 - Optional props and affinities

        props, targets = 0, 0
        if len(self.props) > 0:
            props = np.array(row[self.props], dtype=np.float32)

        if len(self.targets) > 0 and self.binned_scores:
            targets = np.array(row[self.targets],
                               dtype=np.int64)  # for torch.long class labels
        elif len(self.targets) > 0:
            targets = np.array(row[self.targets],
                               dtype=np.float32)  # for torch.float values

        targets = np.nan_to_num(targets)  # if nan somewhere, change to 0.

        return g_dgl, a, props, targets
def main(args):
    column_headers = [
        "dataset", "setting", "model", "pretraining", "epoch", "accuracy"
    ]
    use_cuda = args.use_cuda and torch.cuda.is_available()
    print("Using CUDA:", use_cuda)

    results_df = pd.DataFrame(columns=column_headers)

    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels

    # We dont use a validation set
    train_mask = train_mask | val_mask

    if args.invert:
        # This is different from swapping train and test mask
        # because train | test not cover the whole dataset
        train_mask, test_mask = ~train_mask, train_mask
        setting = 'B'
    else:
        setting = 'A'

    g = dgl.DGLGraph(data.graph)
    # Suppress warning
    g.set_n_initializer(dgl.init.zero_initializer)
    # add self loop
    g.add_edges(g.nodes(), g.nodes())

    # g_train, g = split_graph(g, train_mask)
    # # Select train nodes..
    train_nodes = torch.arange(g.number_of_nodes())[train_mask]
    if use_cuda:
        features, labels = features.cuda(), labels.cuda()
        train_mask, test_mask = train_mask.cuda(), test_mask.cuda()
        train_nodes = train_nodes.cuda()

    # .. to induce subgraph
    g_train = g.subgraph(train_nodes)
    g_train.set_n_initializer(dgl.init.zero_initializer)
    features_train = features[train_mask]
    labels_train = labels[train_mask]

    # Verify sizes of train set
    assert int(train_mask.sum().item()) == features_train.size(0)\
        == labels_train.size(0) == g_train.number_of_nodes()

    # Random Restarts
    for __ in range(args.runs):
        # Init net
        net = build_model(args.model, args.dataset, g_train, in_feats,
                          n_classes)
        if use_cuda:
            net = net.cuda()
        print(net)

        # Init optimizers
        # optimizer = torch.optim.Adam(net.parameters(),
        #                              **training_optimizer_params)
        optimizer = build_optimizer(net.parameters(),
                                    args.model,
                                    args.dataset,
                                    inference=False)
        print("Optimizer", optimizer)

        # Pre-training
        for epoch in range(args.epochs):
            train_epoch(
                epoch + 1,
                net,
                optimizer,
                features_train,
                labels_train,
                train_mask=None  # Use all labels of the *train* subgraph
            )

        print("=== INFERENCE ===")
        net.set_graph(g)
        # Eval without inference epochs
        accuracy_score = eval_inference(0, net, features, labels, test_mask)
        results_df = results_df.append(pd.DataFrame([[
            args.dataset, setting, args.model, args.epochs, 0, accuracy_score
        ]],
                                                    columns=column_headers),
                                       ignore_index=True)

        # Fresh optimizer for up-training at inference time
        # optimizer = torch.optim.Adam(net.parameters(),
        #                              **inference_optimizer_params)
        del optimizer
        optimizer = build_optimizer(net.parameters(),
                                    args.model,
                                    args.dataset,
                                    inference=True)

        print("Fresh inference optimizer", optimizer)
        for i in range(args.inference):
            train_epoch(i + 1,
                        net,
                        optimizer,
                        features,
                        labels,
                        train_mask=train_mask)

            accuracy_score = eval_inference(i + 1, net, features, labels,
                                            test_mask)
            results_df = results_df.append(pd.DataFrame(
                [[
                    args.dataset, setting, args.model, args.epochs, i + 1,
                    accuracy_score
                ]],
                columns=column_headers),
                                           ignore_index=True)
        del net
        del optimizer
        torch.cuda.empty_cache()  # don't leak here

    print(args)
    for i in range(args.inference + 1):
        # Print results to command line
        rbi = results_df[results_df['epoch'] == i]['accuracy']
        print(
            "Avg accuracy over {} runs after {} inference epochs: {:.4f} ({:.4f})"
            .format(args.runs, i, rbi.mean(), rbi.std()))

    if args.outfile is not None:
        # And store them to csv file
        appendDFToCSV_void(results_df, args.outfile, sep=",")
Beispiel #8
0
    def __init__(self, dataset, args):
        src = [dataset.train[0]]
        etype_id = [dataset.train[1]]
        dst = [dataset.train[2]]
        self.num_train = len(dataset.train[0])
        if args.dataset == "wikikg90M":
            self.valid_dict = dataset.valid
            self.num_valid = len(self.valid_dict['h,r->t']['hr'])
        elif dataset.valid is not None:
            src.append(dataset.valid[0])
            etype_id.append(dataset.valid[1])
            dst.append(dataset.valid[2])
            self.num_valid = len(dataset.valid[0])
        else:
            self.num_valid = 0
        if args.dataset == "wikikg90M":
            self.test_dict = dataset.test
            self.num_test = len(self.test_dict['h,r->t']['hr'])
        elif dataset.test is not None:
            src.append(dataset.test[0])
            etype_id.append(dataset.test[1])
            dst.append(dataset.test[2])
            self.num_test = len(dataset.test[0])
        else:
            self.num_test = 0

        if args.dataset == "wikikg90M":
            print('|valid|:', self.num_valid)
            print('|test|:', self.num_test)
            return

        assert len(
            src) > 1, "we need to have at least validation set or test set."

        src = np.concatenate(src)
        etype_id = np.concatenate(etype_id)
        dst = np.concatenate(dst)

        coo = sp.sparse.coo_matrix(
            (np.ones(len(src)), (src, dst)),
            shape=[dataset.n_entities, dataset.n_entities])
        g = dgl.DGLGraph(coo, readonly=True, multigraph=True, sort_csr=True)
        g.edata['tid'] = F.tensor(etype_id, F.int64)
        self.g = g

        if args.eval_percent < 1:
            self.valid = np.random.randint(
                0,
                self.num_valid,
                size=(int(
                    self.num_valid * args.eval_percent), )) + self.num_train
        else:
            self.valid = np.arange(self.num_train,
                                   self.num_train + self.num_valid)
        print('|valid|:', len(self.valid))

        if args.eval_percent < 1:
            self.test = np.random.randint(
                0,
                self.num_test,
                size=(int(self.num_test * args.eval_percent, )))
            self.test += self.num_train + self.num_valid
        else:
            self.test = np.arange(self.num_train + self.num_valid,
                                  self.g.number_of_edges())
        print('|test|:', len(self.test))
Beispiel #9
0
    def generate_batch_G(self, target_bg=None, x=None, batch_size=1, style=None):

        # init graph
        k = self.k
        m = self.m
        n = self.n
        ajr = self.ajr
        if style is not None:
            style = style
        else:
            style = self.style
        if target_bg is not None:
            bg = dgl.batch(np.random.choice(target_bg, batch_size, replace=True))
        else:
            if style.startswith('er'):
                p = float(style.split('-')[1])
                G = [nx.erdos_renyi_graph(n, p) for _ in range(batch_size)]
                adj_matrices = torch.cat([torch.tensor(nx.adjacency_matrix(g).todense()).float() for g in G])

            elif style.startswith('ba'):
                _m = int(style.split('-')[1])
                G = [nx.barabasi_albert_graph(n, _m) for _ in range(batch_size)]
                adj_matrices = torch.cat([torch.tensor(nx.adjacency_matrix(g).todense()).float() for g in G])

            # init batch graphs
            gs = [dgl.DGLGraph() for _ in range(batch_size)]
            _ = [(g.add_nodes(n), g.add_edges(self.src, self.dst)) for g in gs]

            bg = dgl.batch(gs)

            # 2-d coordinates 'x'
            if x is None:
                if style == 'plain':
                    bg.ndata['x'] = torch.rand((batch_size * n, 2))
                elif style == 'shift':
                    bg.ndata['x'] = torch.rand((batch_size * n, 2)) * 10 + 5
                elif style.startswith('cluster'):
                    _h = 2
                    cluster_style = int(style.split('-')[1])
                    if cluster_style == 0:
                        center = torch.rand((batch_size * k, 1, _h)).repeat(1, m, 1) * 6
                    elif cluster_style == 1:  # k=4
                        mask = torch.tensor([[[0, 0]], [[0, 5]], [[5, 0]], [[5, 5]]]).repeat(batch_size, 1, 1)
                        center = torch.rand((batch_size * k, 1, _h)) * 3 + mask
                        center = center.repeat(1, m, 1)
                    elif cluster_style == 2:  # k=4
                        mask = torch.tensor([[[0, 0]], [[0, 0]], [[5, 5]], [[5, 5]]]).repeat(batch_size, 1, 1)
                        center = torch.rand((batch_size * k, 1, _h)) * 3 + mask
                        center = center.repeat(1, m, 1)

                    bg.ndata['x'] = (center + torch.rand((batch_size * k, m, _h))).view(batch_size * n, _h)
            else:
                bg.ndata['x'] = x

        # label
        if self.cut == 'equal':
            label = torch.tensor(range(k)).unsqueeze(1).repeat(batch_size, m).view(-1)
        else:
            label = torch.tensor(self.init_label).repeat(batch_size)
        batch_mask = torch.tensor(range(0, n * batch_size, n)).unsqueeze(1).expand(batch_size, n).flatten()
        perm_idx = torch.cat([torch.randperm(n) for _ in range(batch_size)]) + batch_mask
        label = label[perm_idx].view(batch_size, n)
        bg.ndata['label'] = torch.nn.functional.one_hot(label, k).float().view(batch_size * n, k)

        # calculate edges
        if target_bg is not None:
            # permute the dist matrix
            # TODO: add ndata['adj']
            bg.edata['d'] *= F.relu(torch.ones(bg.edata['d'].shape).cuda() + 0.1 * torch.randn(bg.edata['d'].shape).cuda())
        else:
            if style.startswith('er') or style.startswith('ba'):
                # TODO: add ndata['adj']
                bg.edata['d'] = adj_matrices.view(batch_size, -1, 1)[:, self.nonzero_idx, :].view(-1, 1)
            else:
                _, neighbor_idx, square_dist_matrix = dgl.transform.knn_graph(bg.ndata['x'].view(batch_size, n, -1), ajr + 1, extend_info=True)
                square_dist_matrix = F.relu(square_dist_matrix, inplace=True)  # numerical error could result in NaN in sqrt. value
                bg.ndata['adj'] = torch.sqrt(square_dist_matrix).view(bg.number_of_nodes(), -1)
                # scale d (maintain avg=0.5):
                if style != 'plain':
                    bg.ndata['adj'] /= (bg.ndata['adj'].sum() / (bg.ndata['adj'].shape[0]**2) / 0.5)
                bg.edata['d'] = bg.ndata['adj'].view(batch_size, -1, 1)[:, self.nonzero_idx, :].view(-1, 1)

        group_matrix = torch.bmm(bg.ndata['label'].view(batch_size, n, -1), bg.ndata['label'].view(batch_size, n, -1).transpose(1, 2)).view(batch_size, -1)[:, self.nonzero_idx].view(-1, 1)

        if target_bg is not None:
            bg.edata['e_type'][:, 1:] = group_matrix
        else:
            if style.startswith('er') or style.startswith('ba'):
                bg.edata['e_type'] = torch.cat([bg.edata['d'], group_matrix], dim=1)
            else:
                neighbor_idx -= torch.tensor(range(0, batch_size * n, n)).view(batch_size, 1, 1).repeat(1, n, ajr + 1) \
                                - torch.tensor(range(0, batch_size * n * n, n * n)).view(batch_size, 1, 1).repeat(1, n,
                                                                                                                  ajr + 1)
                adjacent_matrix = torch.zeros((batch_size * n * n, 1))
                adjacent_matrix[neighbor_idx + self.adj_mask.repeat(batch_size, 1, 1)] = 1
                adjacent_matrix = adjacent_matrix.view(batch_size, n * n, 1)[:, self.nonzero_idx, :].view(-1, 1)
                bg.edata['e_type'] = torch.cat([adjacent_matrix, group_matrix], dim=1)

        return bg
Beispiel #10
0
def test_pickling_graph():
    # graph structures and frames are pickled
    g = dgl.DGLGraph()
    g.add_nodes(3)
    src = F.tensor([0, 0])
    dst = F.tensor([1, 2])
    g.add_edges(src, dst)

    x = F.randn((3, 7))
    y = F.randn((3, 5))
    a = F.randn((2, 6))
    b = F.randn((2, 4))

    g.ndata['x'] = x
    g.ndata['y'] = y
    g.edata['a'] = a
    g.edata['b'] = b

    # registered functions are pickled
    g.register_message_func(_global_message_func)
    reduce_func = fn.sum('x', 'x')
    g.register_reduce_func(reduce_func)

    # custom attributes should be pickled
    g.foo = 2

    new_g = _reconstruct_pickle(g)

    _assert_is_identical(g, new_g)
    assert new_g.foo == 2
    assert new_g._message_func == _global_message_func
    assert isinstance(new_g._reduce_func, type(reduce_func))
    assert new_g._reduce_func._name == 'sum'
    assert new_g._reduce_func.reduce_op == F.sum
    assert new_g._reduce_func.msg_field == 'x'
    assert new_g._reduce_func.out_field == 'x'

    # test batched graph with partial set case
    g2 = dgl.DGLGraph()
    g2.add_nodes(4)
    src2 = F.tensor([0, 1])
    dst2 = F.tensor([2, 3])
    g2.add_edges(src2, dst2)

    x2 = F.randn((4, 7))
    y2 = F.randn((3, 5))
    a2 = F.randn((2, 6))
    b2 = F.randn((2, 4))

    g2.ndata['x'] = x2
    g2.nodes[[0, 1, 3]].data['y'] = y2
    g2.edata['a'] = a2
    g2.edata['b'] = b2

    bg = dgl.batch([g, g2])

    bg2 = _reconstruct_pickle(bg)

    _assert_is_identical(bg, bg2)
    new_g, new_g2 = dgl.unbatch(bg2)
    _assert_is_identical(g, new_g)
    _assert_is_identical(g2, new_g2)

    # readonly graph
    g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], multigraph=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # readonly multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], multigraph=True, readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)
def ProcessImage(batch_itr):

    X, y = batch_itr

    ybar = np.ones([64, 64]) * -2.
    Y = np.stack([y[0], y[1], y[2], y[3], y[4], y[5], ybar], axis=0)

    test1, test2, test3, test4, test5, test6, testT =\
    MakeLayer(X[0], Y[0], 0), MakeLayer(X[1], Y[1], 1), \
    MakeLayer(X[2], Y[2], 2), MakeLayer(X[3], Y[3], 3), \
    MakeLayer(X[4], Y[4], 4), MakeLayer(X[5], Y[5], 5), \
    MakeLayer(X[6], Y[6], 6)

    energy_val = np.concatenate(
        [testT[0], test1[0], test2[0], test3[0], test4[0], test5[0], test6[0]])
    x_val = np.concatenate(
        [testT[1], test1[1], test2[1], test3[1], test4[1], test5[1], test6[1]])
    y_val = np.concatenate(
        [testT[2], test1[2], test2[2], test3[2], test4[2], test5[2], test6[2]])
    z_val = np.concatenate(
        [testT[3], test1[3], test2[3], test3[3], test4[3], test5[3], test6[3]])
    x_idx = np.concatenate(
        [testT[4], test1[4], test2[4], test3[4], test4[4], test5[4], test6[4]])
    y_idx = np.concatenate(
        [testT[5], test1[5], test2[5], test3[5], test4[5], test5[5], test6[5]])
    z_idx = np.concatenate(
        [testT[6], test1[6], test2[6], test3[6], test4[6], test5[6], test6[6]])
    target_val = np.concatenate(
        [testT[7], test1[7], test2[7], test3[7], test4[7], test5[7], test6[7]])

    point_indx = np.array([z_idx, x_idx, y_idx], dtype=int)
    point_indx = np.transpose(point_indx)

    point = np.array([x_val, y_val, z_val])
    point = np.transpose(point)

    point = np.reshape(point, (1, point.shape[0], point.shape[1]))
    point = torch.FloatTensor(point)

    #         x_red = x[loc]
    #         y_red = y[loc]

    graph = KNNGraph(graph_size)

    npoints = energy_val.shape[0]

    if (npoints < graph_size):
        g = dgl.DGLGraph()
        g.add_nodes(2)
    else:
        g = graph(point)
        g = dgl.transform.remove_self_loop(g)

    sample = {
        'Input': X,
        'seq_length': len(energy_val),
        'point_xyz': point,
        'target': torch.FloatTensor(y),
        'point_idx_zxy': point_indx,
        'energy': torch.FloatTensor(energy_val),
        'gr': g
    }

    return sample
Beispiel #12
0
def main():

    g_nx = nx.petersen_graph()
    g_dgl = dgl.DGLGraph(g_nx)

    plt.figure()
Beispiel #13
0
def gen_from_data(data, readonly, sort):
    return dgl.DGLGraph(data, readonly=readonly, sort_csr=True)
Beispiel #14
0
def gen_by_mutation():
    g = dgl.DGLGraph()
    src, dst = edge_pair_input()
    g.add_nodes(10)
    g.add_edges(src, dst)
    return g
def plot_tree_graph(jet_graph, ax):

    ax.set_axis_off()

    vtxlist, vtxdict, hadron_list, additional_vtx_dict = compute_jet_vtx(
        jet_graph)

    pt = jet_graph.jet_pt
    eta = jet_graph.jet_eta
    flav = jet_graph.jet_DoubleHadLabel

    ax.set_title('Flavour : ' + str(flav) + '   pt: ' +
                 '{0:.2f}'.format(pt / 1000.0) + '   eta: ' +
                 '{0:.2f}'.format(eta),
                 fontsize=20)

    g = dgl.DGLGraph()
    n_nodes = len(jet_graph['trk_node_index']) + len(
        jet_graph['jf_node_index']) + len(jet_graph['particle_node_index'])
    g.add_nodes(n_nodes)

    edge_list = np.dstack([jet_graph.edge_start, jet_graph.edge_end])[0]

    labels = {}

    for edge in edge_list:
        s, e = edge

        g.add_edge(int(s), int(e))

    pv_x, pv_y, pv_z = jet_graph.truth_PVx, jet_graph.truth_PVy, jet_graph.truth_PVz

    g.add_nodes(1)

    particles_in_primary = []
    for idx, pdgid, x0, y0, z0, x, y, z, stat, injet in zip(
            jet_graph['particle_node_index'], jet_graph['particle_node_pdgid'],
            jet_graph.particle_node_prod_x, jet_graph.particle_node_prod_y,
            jet_graph.particle_node_prod_z, jet_graph.particle_node_decay_x,
            jet_graph.particle_node_decay_y, jet_graph.particle_node_decay_z,
            jet_graph.particle_node_status, jet_graph.particle_node_inJet):
        if np.linalg.norm([x0 - pv_x, y0 - pv_y, z0 - pv_z]) < 0.01:

            particles_in_primary.append(idx)

    for p_in_primary in particles_in_primary:
        has_parent = False
        #loop over the other children of the vtx, see if one of them is the parent
        for p_in_primary_j in particles_in_primary:
            for edge in edge_list:
                s, e = edge
                if p_in_primary_j == s and p_in_primary == e:
                    has_parent = True
        if not has_parent:
            g.add_edge(n_nodes, int(p_in_primary))

    G = g.to_networkx()

    node_colors = []
    for idx, pdgid, charge, in zip(jet_graph['particle_node_index'],
                                   jet_graph['particle_node_pdgid'],
                                   jet_graph.particle_node_charge):
        if abs(pdgid) in [6, 24]:
            node_colors.append('lightgreen')
        elif charge == 0:
            node_colors.append('khaki')
        else:
            node_colors.append('lightsalmon')

    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    min_max_x = list(pos[0])
    y_min = -10

    for key_i, key in enumerate(jet_graph['particle_node_index']):
        if key_i == 0:
            min_max_x = list(pos[key])
        x, y = pos[key]
        if x < min_max_x[0]:
            min_max_x[0] = x
        if x > min_max_x[1]:
            min_max_x[1] = x
        if y < y_min:
            y_min = y - 10
    x_range = min_max_x[1] - min_max_x[0]

    n_tracks = len(jet_graph['trk_node_index'])

    track_x_positions = []

    for track_i, idx in enumerate(jet_graph['trk_node_index']):
        x_orig, y_orig = pos[idx]
        if idx not in jet_graph.edge_end:
            track_x_positions.append(
                (min_max_x[0] + track_i * x_range / n_tracks, idx))
        else:
            track_x_positions.append((x_orig, idx))

    track_x_positions = sorted(track_x_positions, key=lambda x: x[0])

    spacing = 50
    for track_i in range(1, len(track_x_positions)):
        previous_pos = track_x_positions[track_i - 1][0]
        current_pos = track_x_positions[track_i][0]

        if current_pos < previous_pos + spacing:
            track_x_positions[track_i] = (previous_pos + spacing,
                                          track_x_positions[track_i][1])

    for track_x, idx in track_x_positions:
        pos[idx] = (track_x, y_min)

    n_jf_vtx = len(jet_graph['jf_node_index'])

    for idx, vtx_i in zip(jet_graph['jf_node_index'],
                          range(len(jet_graph['jf_node_index']))):
        pos[idx] = (min_max_x[0] + x_range / 2 +
                    (vtx_i) * x_range / n_jf_vtx / 2., y_min - 80)
        labels[idx] = 'JF' + str(vtx_i)

    nx.draw_networkx_nodes(G,
                           pos,
                           node_color='orchid',
                           node_size=1200,
                           ax=ax,
                           nodelist=jet_graph['jf_node_index'])
    nx.draw_networkx_nodes(G,
                           pos,
                           node_color='lightskyblue',
                           node_size=300,
                           ax=ax,
                           nodelist=jet_graph['trk_node_index'])
    nx.draw_networkx_nodes(G,
                           pos,
                           node_color=node_colors,
                           node_size=800,
                           ax=ax,
                           nodelist=jet_graph['particle_node_index'])
    nx.draw_networkx_edges(G, pos, ax=ax)

    for idx, pdgid, x0, y0, z0, x, y, z, stat, injet in zip(
            jet_graph['particle_node_index'], jet_graph['particle_node_pdgid'],
            jet_graph.particle_node_prod_x, jet_graph.particle_node_prod_y,
            jet_graph.particle_node_prod_z, jet_graph.particle_node_decay_x,
            jet_graph.particle_node_decay_y, jet_graph.particle_node_decay_z,
            jet_graph.particle_node_status, jet_graph.particle_node_inJet):
        #labels[idx] = str(idx)
        #labels[idx] = str(stat)+' '+str(injet)
        #labels[idx] = '{0:.2f}'.format(x0)+'\n'+ '{0:.2f}'.format(y0)+'\n'+ '{0:.2f}'.format(z0)
        #labels[idx] = '{0:.4f}'.format(np.linalg.norm(np.array([pv_x,pv_y,pv_z])-np.array([x0,y0,z0])))
        if pdgid in pdg_id_dict:
            labels[idx] = pdg_id_dict[pdgid]
        else:
            labels[idx] = str(pdgid)

    nx.draw_networkx_labels(G, pos, labels, ax=ax)
Beispiel #16
0
from scipy.special import softmax

from tensorboardX import SummaryWriter

macrostep = 10
DEVICE = "cuda:0"

with open("stoppedEdges.pkl", 'rb') as f:
    stoppedEdges = pickle.load(f)

with open("amatrix_edges.pkl", 'rb') as f:
    A = pickle.load(f)

indices = {c: i for i, c in enumerate(list(A.columns))}
invertedIndices = {i: c for i, c in enumerate(list(A.columns))}
g = dgl.DGLGraph(np.eye(A.values.shape[0]) + A.values)
#g = dgl.DGLGraph(A.values)
N = g.number_of_nodes()
embedding_n = 32

g.ndata['entered'] = torch.zeros((g.number_of_nodes(), 1)).cuda().to(DEVICE)


class PredictParkingModule(nn.Module):
    def __init__(self, in_feats, embedding_n):
        super(PredictParkingModule, self).__init__()
        self.embed = nn.Embedding(in_feats, embedding_n)
        self.L2 = nn.Linear(embedding_n + 1, 1)
        self.A3 = F.relu

    def forward(self, node):
Beispiel #17
0
def test_edge_softmax():
    # Basic
    g = dgl.DGLGraph(nx.path_graph(3))
    edata = F.ones((g.number_of_edges(), 1))
    a = nn.edge_softmax(g, edata)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    assert F.allclose(a, uniform_attention(g, a.shape))

    # Test higher dimension case
    edata = F.ones((g.number_of_edges(), 3, 1))
    a = nn.edge_softmax(g, edata)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    assert F.allclose(a, uniform_attention(g, a.shape))

    # Test both forward and backward with PyTorch built-in softmax.
    g = dgl.DGLGraph()
    g.add_nodes(30)
    # build a complete graph
    for i in range(30):
        for j in range(30):
            g.add_edge(i, j)

    score = F.randn((900, 1))
    score.requires_grad_()
    grad = F.randn((900, 1))
    y = F.softmax(score.view(30, 30), dim=0).view(-1, 1)
    y.backward(grad)
    grad_score = score.grad
    score.grad.zero_()
    y_dgl = nn.edge_softmax(g, score)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    # check forward
    assert F.allclose(y_dgl, y)
    y_dgl.backward(grad)
    # checkout gradient
    assert F.allclose(score.grad, grad_score)
    print(score.grad[:10], grad_score[:10])
    
    # Test 2
    def generate_rand_graph(n, m=None, ctor=dgl.DGLGraph):
        if m is None:
            m = n
        arr = (sp.sparse.random(m, n, density=0.1, format='coo') != 0).astype(np.int64)
        return ctor(arr, readonly=True)

    for g in [generate_rand_graph(50),
              generate_rand_graph(50, ctor=dgl.graph),
              generate_rand_graph(100, 50, ctor=dgl.bipartite)]:
        a1 = F.randn((g.number_of_edges(), 1)).requires_grad_()
        a2 = a1.clone().detach().requires_grad_()
        g.edata['s'] = a1
        g.group_apply_edges('dst', lambda edges: {'ss':F.softmax(edges.data['s'], 1)})
        g.edata['ss'].sum().backward()
        
        builtin_sm = nn.edge_softmax(g, a2)
        builtin_sm.sum().backward()
        print(a1.grad - a2.grad)
        assert len(g.srcdata) == 0
        assert len(g.dstdata) == 0
        assert len(g.edata) == 2
        assert F.allclose(a1.grad, a2.grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend
Beispiel #18
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = DGLGraph(multigraph=True)
    g.add_nodes(5)
    g.add_edges([0, 1, 3, 4], [2, 4, 0, 3])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.get_e_repr()['id'],
                         F.copy_to(F.arange(0, 4), F.cpu()))

    # test conversion after modifying DGLGraph
    g.pop_e_repr(
        'id')  # pop id so we don't need to provide id when adding edges
    new_n = F.randn((2, 3))
    new_e = F.randn((3, 5))
    g.add_nodes(2, data={'n1': new_n})
    # add three edges, one is a multi-edge
    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    n1 = F.cat((n1, new_n), 0)
    e1 = F.cat((e1, new_e), 0)
    # convert to networkx again
    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    assert len(nxg) == 7
    assert nxg.size() == 7
    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = DGLGraph(multigraph=True)
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 7
    assert g.number_of_edges() == 7
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
        nxg.node[u]['h'] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
        d['h'] = F.tensor([u, v])

    g = dgl.DGLGraph()
    g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
    assert F.allclose(g.edata['h'],
                      F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
Beispiel #19
0
            read_out = dgl.mean_nodes(g, 'features')
            # output = F.softmax(self.fc(read_out))
            output = self.fc(read_out)
            return output


# graph中src和dst节点(人体关键点指向关系)
src = np.array([
    17, 15, 18, 16, 0, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 13, 11, 14, 23, 22, 24,
    20, 19, 21
])
dst = np.array([
    15, 0, 16, 0, 1, 1, 2, 3, 1, 5, 6, 1, 8, 8, 9, 12, 10, 13, 22, 11, 11, 19,
    14, 14
])
graph = dgl.DGLGraph((src, dst))  # 建立graph
graph = dgl.add_self_loop(graph)

model = GCN(3, 20, 2)

# 只训练一张图,检查模型能否正常运行
inputs = [body_graph_sit[0][0], body_graph_stand[1][0]]
label = [body_graph_sit[0][1], body_graph_stand[1][1]]

optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(50):
    for i in range(2):
        output = model(graph, inputs[i].ndata['coordinate'].float())
        pred = torch.argmax(output, axis=1)
        loss = nn.BCEWithLogitsLoss()
        loss = loss(output, label[i])
Beispiel #20
0
import networkx as nx
import matplotlib.pyplot as plt
import torch
import dgl

N = 100  # number of nodes
DAMP = 0.85  # damping factor阻尼因子
K = 10  # number of iterations
g = nx.nx.erdos_renyi_graph(N, 0.1)  # 图随机生成器,生成nx图
g = dgl.DGLGraph(g)  # 转换成DGL图
# nx.draw(g.to_networkx(), node_size=50, node_color=[[.5, .5, .5, ]])  # 使用nx绘制,设置节点大小及灰度值
# plt.show()

g.ndata['pv'] = torch.ones(N) / N  #初始化PageRank值 batch processing
g.ndata['deg'] = g.out_degrees(g.nodes()).float()  #初始化节点特征
print(g.ndata)


#定义message函数,它将每个节点的PageRank值除以其out-degree,并将结果作为消息传递给它的邻居:
def pagerank_message_func(edges):
    pv = edges.src['pv']
    deg = edges.src['deg']
    return {'pv': pv / deg}


#定义reduce函数,它从mailbox中删除并聚合message,并计算其新的PageRank值:
def pagerank_reduce_func(nodes):
    mail_box = nodes.mailbox['pv']
    msgs = torch.sum(mail_box, dim=1)
    pv = (1 - DAMP) / N + DAMP * msgs
    return {'pv': pv}
Beispiel #21
0
    def load(self):
        print('loading data')
        # Edge features
        # adjs = []
        edge_attr_name = []

        g = nx.readwrite.edgelist.read_edgelist(
            self.edges_dir,
            delimiter=',',
            data=[
                #                                               (x
                ('GO_ID', float),
                ('Gene_Family_Name', float),
                ('chebi', float),
                ('chemogenomics', float),
                ('cid', float),
                ('drug', float),
                ('expression', float),
                ('gene', float),
                ('hprd', float),
                ('protein', float),
                ('substructure', float),
                ('tissue', float)
            ],
            comments='#',
            create_using=nx.DiGraph)

        v_map = pd.read_csv(self.vertex_map_path,
                            delimiter=',',
                            header=None,
                            dtype={
                                'node': str,
                                'id': int
                            })
        v_map[1] = v_map[1].astype(int)
        mapping = pd.Series(v_map[1].values, index=v_map[0]).to_dict()
        g = nx.relabel.relabel_nodes(g, mapping)

        print('number of connected components: ',
              nx.algorithms.components.number_weakly_connected_components(g))
        # Node Features
        if self.node_features_path is None:
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            print('!!! No node features is given, use dummy featuers!!!')
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            features = np.ones((g.number_of_nodes(), 10))
        else:
            features = pd.read_csv(self.node_features_path,
                                   delimiter=',').values

        # Ground Truth label
        labels = pd.read_csv(self.label_path, delimiter=',')

        # convert label to one-hot format
        one_hot_labels = pd.get_dummies(
            data=labels, dummy_na=True,
            columns=['label']).set_index('id')  # N X (#edge attr)  # one hot
        # print(labels.columns)
        one_hot_labels = one_hot_labels.drop(['label_nan'], axis=1)

        size = features.shape[0]

        train_id = set()
        test_id = set()
        train_mask = np.zeros((size, )).astype(bool)
        val_mask = np.zeros((size, )).astype(bool)
        test_mask = np.zeros((size, )).astype(bool)

        train_ratio = 0.8
        np.random.seed(1)
        for column in one_hot_labels.columns:
            set_of_key = set(
                one_hot_labels[(one_hot_labels[column] == 1)].index)
            train_key_set = set(
                np.random.choice(list(set_of_key),
                                 size=int(len(set_of_key) * train_ratio),
                                 replace=False))
            test_key_set = set_of_key - train_key_set
            train_id = train_id.union(train_key_set)
            test_id = test_id.union(test_key_set)
        train_mask[list(train_id)] = 1
        val_mask[list(test_id)] = 1
        test_mask[list(test_id)] = 1

        # one_hot_labels = one_hot_labels.values[:,:-1]  # convert to numpy format and remove the nan column
        y = np.zeros(size)
        y[one_hot_labels.index] = np.argmax(one_hot_labels.values, 1)

        y_train = np.zeros((size, one_hot_labels.shape[1]))  # one hot format
        y_val = np.zeros((size, one_hot_labels.shape[1]))
        y_test = np.zeros((size, one_hot_labels.shape[1]))
        y_train[train_mask, :] = one_hot_labels.loc[sorted(train_id)]
        y_val[val_mask, :] = one_hot_labels.loc[sorted(test_id)]
        y_test[test_mask, :] = one_hot_labels.loc[sorted(test_id)]

        # print('adjs length: ', len(adjs))
        print('features shape: ', features.shape)
        print('y_train shape: ', y_train.shape)
        print('y_val shape: ', y_val.shape)
        print('y_test shape: ', y_test.shape)
        print('train_mask shape: ', train_mask.shape)
        print('val_mask shape: ', val_mask.shape)
        print('test_mask shape: ', test_mask.shape)

        # self.adj = adjs[0]
        self.graph = dgl.DGLGraph()
        self.graph.from_networkx(nx_graph=g,
                                 edge_attrs=[
                                     'GO_ID', 'Gene_Family_Name', 'chebi',
                                     'chemogenomics', 'cid', 'drug',
                                     'expression', 'gene', 'hprd', 'protein',
                                     'substructure', 'tissue'
                                 ])
        self.num_edge_feats = len(self.graph.edge_attr_schemes())
        # standardize edge attrs
        for attr in self.graph.edge_attr_schemes().keys():
            self.graph.edata[attr] = (self.graph.edata[attr] - torch.mean(
                self.graph.edata[attr])) / torch.var(self.graph.edata[attr])
        # concatenate edge attrs
        self.graph.edata['e'] = torch.cat([
            self.graph.edata[attr][:, None]
            for attr in self.graph.edge_attr_schemes().keys()
        ],
                                          dim=1)
        print(self.graph.edge_attr_schemes())
        # self.graph.from_scipy_sparse_matrix(spmat=self.adj)
        self.labels = y
        self.num_labels = one_hot_labels.shape[1]
        # self.edge_attr_adjs = adjs[1:]
        self.features = features
        self.y_train = y_train
        self.y_val = y_val
        self.y_test = y_test
        self.train_mask = train_mask.astype(int)
        self.val_mask = val_mask.astype(int)
        self.test_mask = test_mask.astype(int)
        self.edge_attr_name = edge_attr_name
Beispiel #22
0
def process_game_state_to_dgl(game_state: GameState,
                              use_absolute_pos=False,
                              edge_ally_to_enemy=False):
    # TODO 1 : Find a better way for managing input features and related constants!

    units = game_state.units

    ally_units = units.owned
    enemy_units = units.enemy

    num_allies = len(ally_units)
    num_enemies = len(enemy_units)

    exist_allies = False
    node_types = []

    g = dgl.DGLGraph(multigraph=True)
    g.set_e_initializer(dgl.init.zero_initializer)

    # using curie_initializer for node features matters a lot !
    # working as a mask for computing action probs later.
    g.set_n_initializer(curie_initializer)

    node_features = []

    allies_health = 0
    allies_health_percentage = 0
    allies_mineral_cost = 0
    allies_vespene_cost = 0
    allies_food_cost = 0
    ally_indices = []

    tags = [unit.tag for unit in ally_units + enemy_units]
    tags_tensor = torch.LongTensor(tags)

    tag2unit_dict = dict()

    if num_allies >= 1:
        exist_allies = True
        allies_center_pos = ally_units.center
        allies_unit_dict = dict()
        allies_index_dict = dict()
        for i, allies_unit in enumerate(ally_units):
            tag2unit_dict[allies_unit.tag] = allies_unit
            ally_indices.append(i)
            node_feature = list()
            one_hot_type_id = get_one_hot_unit_type(allies_unit.type_id.value)
            node_feature.extend(one_hot_type_id)
            node_feature.extend(list(allies_center_pos - allies_unit.position))
            if use_absolute_pos:
                node_feature.extend(list(allies_unit.position))
            node_feature.append(allies_unit.health_max)
            node_feature.append(allies_unit.health_percentage)
            node_feature.append(allies_unit.weapon_cooldown)
            node_feature.append(allies_unit.ground_dps)
            one_hot_node_type = get_one_hot_node_type(NODE_ALLY)
            node_feature.extend(one_hot_node_type)
            node_features.append(node_feature)
            allies_unit_dict[allies_unit] = i
            allies_index_dict[i] = allies_unit
            node_types.append(NODE_ALLY)
            allies_health += allies_unit.health
            allies_health_percentage += allies_unit.health_percentage
            allies_mineral_cost += type2cost[allies_unit.name][0]
            allies_vespene_cost += type2cost[allies_unit.name][1]
            allies_food_cost += type2cost[allies_unit.name][2]

    enemies_health = 0
    enemies_health_percentage = 0
    enemies_mineral_cost = 0
    enemies_vespene_cost = 0
    enemies_food_cost = 0
    enemies_indices = []

    if num_enemies >= 1:
        enemy_center_pos = enemy_units.center
        enemy_unit_dict = dict()
        enemy_index_dict = dict()
        for j, enemy_unit in enumerate(enemy_units):
            tag2unit_dict[enemy_unit.tag] = enemy_unit
            enemies_indices.append(num_allies + j)
            node_feature = list()
            one_hot_type_id = get_one_hot_unit_type(enemy_unit.type_id.value)
            node_feature.extend(one_hot_type_id)
            node_feature.extend(list(enemy_center_pos - enemy_unit.position))
            if use_absolute_pos:
                node_feature.extend(list(enemy_unit.position))
            node_feature.append(enemy_unit.health_max)
            node_feature.append(enemy_unit.health_percentage)
            node_feature.append(enemy_unit.weapon_cooldown)
            node_feature.append(enemy_unit.ground_dps)
            one_hot_node_type = get_one_hot_node_type(NODE_ENEMY)
            node_feature.extend(one_hot_node_type)
            node_features.append(node_feature)
            enemy_unit_dict[enemy_unit] = j + num_allies
            enemy_index_dict[j + num_allies] = enemy_unit
            node_types.append(NODE_ENEMY)
            enemies_health += enemy_unit.health
            enemies_health_percentage += enemy_unit.health_percentage
            enemies_mineral_cost += type2cost[enemy_unit.name][0]
            enemies_vespene_cost += type2cost[enemy_unit.name][1]
            enemies_food_cost += type2cost[enemy_unit.name][2]

    if num_allies + num_enemies >= 1:
        node_features = np.stack(
            node_features)  # [Num total units x Num features]
        node_features = torch.Tensor(node_features)

        node_types = torch.Tensor(node_types).reshape(-1)

        unit_indices = torch.Tensor(ally_indices +
                                    enemies_indices).reshape(-1).int()
        num_nodes = node_features.size(0)

    if exist_allies:
        # Add Node features: allies + enemies
        g.add_nodes(
            num_nodes, {
                'node_feature': node_features,
                'node_type': node_types,
                'tag': tags_tensor,
                'node_index': unit_indices,
                'init_node_feature': node_features
            })

        if num_allies >= 2:
            # Add allies edges
            allies_edge_indices = cartesian_product(ally_indices,
                                                    ally_indices,
                                                    return_1d=True)

            # To support hyper network encoder, we keep two edge_types
            allies_edge_type = torch.Tensor(data=(EDGE_ALLY, ))
            allies_edge_type_one_hot = torch.Tensor(
                data=get_one_hot_edge_type(EDGE_ALLY))
            num_allies_edges = len(allies_edge_indices[0])

            g.add_edges(
                allies_edge_indices[0], allies_edge_indices[1], {
                    'edge_type_one_hot':
                    allies_edge_type_one_hot.repeat(num_allies_edges, 1),
                    'edge_type':
                    allies_edge_type.repeat(num_allies_edges)
                })

        if num_allies >= 1 and num_enemies >= 1:
            # Constructing bipartite graph for computing primitive attack on attack

            bipartite_edges = cartesian_product(enemies_indices,
                                                ally_indices,
                                                return_1d=True)

            # the edges from enemies to the allies
            # To support hyper network encoder, we keep two edge_types
            inter_army_edge_type = torch.Tensor(data=(EDGE_ENEMY, ))
            inter_army_edge_type_one_hot = torch.Tensor(
                data=get_one_hot_edge_type(EDGE_ENEMY))
            num_inter_army_edges = len(bipartite_edges[0])

            g.add_edges(
                bipartite_edges[0], bipartite_edges[1], {
                    'edge_type_one_hot':
                    inter_army_edge_type_one_hot.repeat(
                        num_inter_army_edges, 1),
                    'edge_type':
                    inter_army_edge_type.repeat(num_inter_army_edges)
                })

            if edge_ally_to_enemy:

                # the edges from allies to the enemies
                inter_army_edge_type = torch.Tensor(
                    data=(EDGE_ALLY_TO_ENEMY, ))
                inter_army_edge_type_one_hot = torch.Tensor(
                    data=get_one_hot_edge_type(EDGE_ALLY_TO_ENEMY))
                num_inter_army_edges = len(bipartite_edges[0])

                g.add_edges(
                    bipartite_edges[1], bipartite_edges[0], {
                        'edge_type_one_hot':
                        inter_army_edge_type_one_hot.repeat(
                            num_inter_army_edges, 1),
                        'edge_type':
                        inter_army_edge_type.repeat(num_inter_army_edges)
                    })

            for ally_unit in ally_units:
                # get all in-attack-range units. include allies units
                in_range_units = enemy_units.in_attack_range_of(ally_unit)
                if in_range_units:  # when in-attack-range units exist
                    allies_index = allies_unit_dict[ally_unit]
                    for in_range_unit in in_range_units:
                        enemy_index = enemy_unit_dict[in_range_unit]
                        # Expected bottleneck (2) -> Doubled assignment of edges
                        edge_in_attack_range = torch.Tensor(
                            data=(EDGE_IN_ATTACK_RANGE, ))
                        edge_in_attack_range_one_hot = torch.Tensor(
                            data=get_one_hot_edge_type(EDGE_IN_ATTACK_RANGE))
                        edge_in_attack_range = edge_in_attack_range.reshape(-1)
                        # dist = np.linalg.norm(ally_unit.position - in_range_unit.position)
                        # dist = torch.Tensor(data=(dist,))
                        # dist = dist.reshape(1, -1)
                        # damage = edge_total_damage(ally_unit, in_range_unit)
                        # damage = torch.Tensor(data=(damage,)).reshape(1, -1)
                        g.add_edge(enemy_index, allies_index,
                                   {'edge_type': edge_in_attack_range})
    else:
        pass

    ret_dict = dict()
    ret_dict['g'] = g

    # For interfacing nn action args with sc2 action commends.
    ret_dict['tag2unit_dict'] = tag2unit_dict
    ret_dict['units'] = units

    _gf = [
        allies_mineral_cost, allies_vespene_cost, allies_food_cost,
        enemies_mineral_cost, enemies_vespene_cost, enemies_food_cost
    ]
    global_feature = torch.Tensor(data=_gf).view(1, -1)

    ret_dict['global_feature'] = global_feature

    return ret_dict
Beispiel #23
0
def test_simple_pool():
    ctx = F.ctx()
    g = dgl.DGLGraph(nx.path_graph(15))
    g = g.to(F.ctx())

    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
    sort_pool = nn.SortPooling(10)  # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    sum_pool = sum_pool.to(ctx)
    avg_pool = avg_pool.to(ctx)
    max_pool = max_pool.to(ctx)
    sort_pool = sort_pool.to(ctx)
    h1 = sum_pool(g, h0)
    assert F.allclose(F.squeeze(h1, 0), F.sum(h0, 0))
    h1 = avg_pool(g, h0)
    assert F.allclose(F.squeeze(h1, 0), F.mean(h0, 0))
    h1 = max_pool(g, h0)
    assert F.allclose(F.squeeze(h1, 0), F.max(h0, 0))
    h1 = sort_pool(g, h0)
    assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.dim() == 2

    # test#2: batched graph
    g_ = dgl.DGLGraph(nx.path_graph(5)).to(F.ctx())
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    h1 = sum_pool(bg, h0)
    truth = th.stack([
        F.sum(h0[:15], 0),
        F.sum(h0[15:20], 0),
        F.sum(h0[20:35], 0),
        F.sum(h0[35:40], 0),
        F.sum(h0[40:55], 0)
    ], 0)
    assert F.allclose(h1, truth)

    h1 = avg_pool(bg, h0)
    truth = th.stack([
        F.mean(h0[:15], 0),
        F.mean(h0[15:20], 0),
        F.mean(h0[20:35], 0),
        F.mean(h0[35:40], 0),
        F.mean(h0[40:55], 0)
    ], 0)
    assert F.allclose(h1, truth)

    h1 = max_pool(bg, h0)
    truth = th.stack([
        F.max(h0[:15], 0),
        F.max(h0[15:20], 0),
        F.max(h0[20:35], 0),
        F.max(h0[35:40], 0),
        F.max(h0[40:55], 0)
    ], 0)
    assert F.allclose(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.dim() == 2
Beispiel #24
0
    def to_graph(self, threshold=None, format='edge_list', split=True,
                 frac=[0.7, 0.1, 0.2], seed=42, order='descending'):
        """Add a method description here.

        Parameters
        ----------
        threshold :
            Add a variable description here.

        format :
            Add a variable description here.

        split :
            Add a variable description here.

        frac : list, optional (default=frac=[0.7, 0.1, 0.2])
            Train/val/test split fractions.

        seed : int
            Add a variable description here.

        order :
            Add a variable description here.

        Returns
        -------

        """
        '''
        Arguments:
            format: edge_list / dgl / pyg df object
        '''

        df = self.get_data(format='df')

        if len(np.unique(self.raw_y)) > 2:
            print("The dataset label consists of affinity scores. "
                  "Binarization using threshold " +
                  str(threshold) +
                  " is conducted to construct the positive edges in the network. "
                  "Adjust the threshold by to_graph(threshold = X)",
                  flush=True, file=sys.stderr)
            if threshold is None:
                raise AttributeError(
                    "Please specify the threshold to binarize the data by "
                    "'to_graph(threshold = N)'!")
            df['label_binary'] = label_transform(self.raw_y, True, threshold,
                                                 False, verbose=False,
                                                 order=order)
        else:
            # already binary
            df['label_binary'] = df['Y']

        df[self.entity1_name + '_ID'] = df[self.entity1_name + '_ID'].astype(str)
        df[self.entity2_name + '_ID'] = df[self.entity2_name + '_ID'].astype(str)
        df_pos = df[df.label_binary == 1]
        df_neg = df[df.label_binary == 0]

        return_dict = {}

        pos_edges = df_pos[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values
        neg_edges = df_neg[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values
        edges = df[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values

        if format == 'edge_list':
            return_dict['edge_list'] = pos_edges
            return_dict['neg_edges'] = neg_edges
        elif format == 'dgl':
            try:
                import dgl
            except:
                install("dgl")
                import dgl
            unique_entities = np.unique(pos_edges.T.flatten()).tolist()
            index = list(range(len(unique_entities)))
            dict_ = dict(zip(unique_entities, index))
            edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]])
            edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]])
            return_dict['dgl_graph'] = dgl.DGLGraph((edge_list1, edge_list2))
            return_dict['index_to_entities'] = dict_

        elif format == 'pyg':
            try:
                import torch
                from torch_geometric.data import Data
            except:
                raise ImportError(
                    "Please see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html to install pytorch geometric!")

            unique_entities = np.unique(pos_edges.T.flatten()).tolist()
            index = list(range(len(unique_entities)))
            dict_ = dict(zip(unique_entities, index))
            edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]])
            edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]])

            edge_index = torch.tensor([edge_list1, edge_list2],
                                      dtype=torch.long)
            x = torch.tensor(np.array(index), dtype=torch.float)
            data = Data(x=x, edge_index=edge_index)
            return_dict['pyg_graph'] = data
            return_dict['index_to_entities'] = dict_

        elif format == 'df':
            return_dict['df'] = df

        if split:
            return_dict['split'] = create_fold(df, seed, frac)

        return return_dict
Beispiel #25
0
    def prep(self, obs, hiddens, hiddens_u, with_acts=False, add_acts=None):
        graph_list = []
        num_agents = [num_agent[0] for num_agent in obs["num_player"]]
        prev_num_agents = [(a != -1).sum() for a in obs["player_filter"]]
        unc_complete_filter = [
            ob[:n_p_agent]
            for ob, n_p_agent in zip(obs["player_filter"], prev_num_agents)
        ]
        complete_filter = np.concatenate(unc_complete_filter, axis=-1)
        # Create graphs inputted to GNN.
        for num_agent in num_agents:
            num_agent = int(num_agent)
            graph_ob = dgl.DGLGraph()
            graph_ob.add_nodes(num_agent)
            edge_pairs = [(a, b) for a in range(num_agent)
                          for b in range(num_agent) if a != b]
            if not len(edge_pairs) == 0:
                src, dst = zip(*edge_pairs)
                graph_ob.add_edges(src, dst)
            graph_list.append(graph_ob)

        graph_batch = dgl.batch(graph_list)

        # Parse inputs into node inputs.
        num_nodes = graph_batch.batch_num_nodes
        n_ob = torch.cat([
            torch.Tensor([obs['player_info'][id][3 * idx:3 * idx + 3]
                          ]).float() for id, num_node in enumerate(num_nodes)
            for idx in range(num_node)
        ],
                         dim=0)

        u_ob = torch.Tensor(obs["food_info"])

        # Create filters to decide which hidden vectors to maintain.
        # For newly added agents, hiddens set to zeros.
        # For remaining agents, hiddens continues from prev timestep.
        node_filter_np = np.where(complete_filter == 1)[0]
        node_filter = torch.Tensor(node_filter_np).long()

        current_node_offsets, offset = [0], 0
        for cur_num_node in num_nodes[:-1]:
            offset += cur_num_node
            current_node_offsets.append(offset)

        new_indices = []
        filter_idxes = [
            np.arange((filter == 1).sum()) for filter in unc_complete_filter
        ]
        for offset, filter in zip(current_node_offsets, filter_idxes):
            new_indices.append(torch.Tensor(offset + filter).long())
        complete_new_filter = torch.cat(new_indices, dim=-1)

        # Create action vectors for opponent modelling.
        if with_acts:
            acts = []
            for first_act, last_act, prev_node in zip(add_acts,
                                                      obs["prev_actions"],
                                                      prev_num_agents):
                acts.append(first_act)
                acts.extend(last_act[:prev_node - 1])

        # Filter hidden vectors for remaining agents.
        # Add zero vectors for newly added agents.
        n_hid = (torch.zeros([
            1, graph_batch.number_of_nodes(), self.dim_lstm_out
        ]), torch.zeros([1,
                         graph_batch.number_of_nodes(), self.dim_lstm_out]))

        #checks to not make it empty.
        if not (hiddens is None):
            collected_hiddens = (hiddens[0][:, node_filter, :],
                                 hiddens[1][:, node_filter, :])
            n_hid[0][:, complete_new_filter, :] = collected_hiddens[0]
            n_hid[1][:, complete_new_filter, :] = collected_hiddens[1]

        n_hid_u = (torch.zeros([
            1, graph_batch.number_of_nodes(), self.dim_lstm_out
        ]), torch.zeros([1,
                         graph_batch.number_of_nodes(), self.dim_lstm_out]))

        if not (hiddens_u is None):
            collected_hiddens = (hiddens_u[0][:, node_filter, :],
                                 hiddens_u[1][:, node_filter, :])
            n_hid_u[0][:, complete_new_filter, :] = collected_hiddens[0]
            n_hid_u[1][:, complete_new_filter, :] = collected_hiddens[1]

        if with_acts:
            return graph_batch, n_ob, u_ob, n_hid, n_hid_u, acts

        return graph_batch, n_ob, u_ob, n_hid, n_hid_u
def graphClassification(datasets_folder, EdgeLists_folder,
                        NodesEmbedding_folder, number_of_epochs,
                        embedding_size, num_classes, clustering_measure,
                        labels_file):
    files_name = [
        file for file in sorted(os.listdir(datasets_folder),
                                key=lambda s: s.lower())
    ]
    print(len(files_name))
    dict_nodes_embedding = get_node_embedding(NodesEmbedding_folder)
    graphs = retrive_graphs(dict_nodes_embedding, EdgeLists_folder,
                            datasets_folder, labels_file)
    loo = LeaveOneOut()
    splits = loo.split(graphs)
    embedding_dir = "Embeddings_" + clustering_measure
    for train_index, test_index in splits:
        # train_index = np.insert(train_index, 0,test_index[0])
        train_set = itemgetter(*train_index)(graphs)
        test_set = [itemgetter(*test_index)(graphs)]

        data_loader = DataLoader(train_set,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 collate_fn=collate)
        # Create model
        for i in range(0, batch_size - 1):
            c = dgl.DGLGraph()
            c.add_nodes(1)
            test_set.append((c, 0, torch.FloatTensor(torch.zeros(1, 64))))
        model = Classifier(node_embedding_dim, embedding_size, num_classes)
        loss_func = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.006)
        model.train()
        epoch_losses = []

        for epoch in range(number_of_epochs):
            epoch_loss = 0
            counter = 0

            for iter, (bg, label, embedding) in enumerate(data_loader):
                embedding_array = []
                embedding_array = initiate_feat(embedding)
                prediction, hg = model(bg, embedding_array)

                if epoch == number_of_epochs - 1:
                    if not os.path.exists("Embeddings_" + clustering_measure):
                        os.mkdir(embedding_dir)
                        for hidden in hg.detach().numpy():
                            dataset_hidden_name = files_name[
                                train_index[counter]].split(".")[0]
                            hidden.tofile(embedding_dir + "/" +
                                          dataset_hidden_name + ".csv",
                                          sep=',')
                            counter = counter + 1

                loss = loss_func(prediction, label)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                epoch_loss += loss.detach().item()
            epoch_loss /= (iter + 1)
            print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
            epoch_losses.append(epoch_loss)
        model.eval()
        # Convert a list of tuples to two lists
        test_X, test_Y, embedding_test = map(list, zip(*test_set))
        test_bg = dgl.batch(test_X)
        true_label = test_Y[0]
        dataset_test_name = files_name[test_index[0]].split(".")[0]
        print(dataset_test_name)
        test_Y = torch.tensor(test_Y).float().view(-1, 1)
        probs_Y, hidden_layer = model(test_bg, initiate_feat(embedding_test))
        probs_Y = torch.softmax(probs_Y, 1)
        sampled_Y = torch.multinomial(probs_Y, 1)
        argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1)
        print(
            'Accuracy of sampled predictions on the test set: {:.4f}%'.format(
                (test_Y == sampled_Y.float()).sum().item() / len(test_Y) *
                100))
        print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
            (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100))
        break
    return embedding_dir + "/"
Beispiel #27
0
def test_edge_softmax():
    # Basic
    g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
    edata = F.ones((g.number_of_edges(), 1))
    a = nn.edge_softmax(g, edata)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    assert F.allclose(a, uniform_attention(g, a.shape))

    # Test higher dimension case
    edata = F.ones((g.number_of_edges(), 3, 1))
    a = nn.edge_softmax(g, edata)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    assert F.allclose(a, uniform_attention(g, a.shape))

    # Test both forward and backward with Tensorflow built-in softmax.
    g = dgl.DGLGraph().to(F.ctx())
    g.add_nodes(30)
    # build a complete graph
    for i in range(30):
        for j in range(30):
            g.add_edge(i, j)

    score = F.randn((900, 1))
    with tf.GradientTape() as tape:
        tape.watch(score)
        grad = F.randn((900, 1))
        y = tf.reshape(F.softmax(tf.reshape(score, (30, 30)), dim=0), (-1, 1))
        grads = tape.gradient(y, [score])
        grad_score = grads[0]

    with tf.GradientTape() as tape:
        tape.watch(score)
        y_dgl = nn.edge_softmax(g, score)
        assert len(g.ndata) == 0
        assert len(g.edata) == 0
        # check forward
        assert F.allclose(y_dgl, y)
        grads = tape.gradient(y_dgl, [score])
    # checkout gradient
    assert F.allclose(grads[0], grad_score)
    print(grads[0][:10], grad_score[:10])

    # Test 2
    def generate_rand_graph(n):
        arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
            np.int64)
        return dgl.DGLGraph(arr, readonly=True)

    g = generate_rand_graph(50).to(F.ctx())
    a1 = F.randn((g.number_of_edges(), 1))
    a2 = tf.identity(a1)
    with tf.GradientTape() as tape:
        tape.watch(a1)
        g.edata['s'] = a1
        g.group_apply_edges(
            'dst', lambda edges: {'ss': F.softmax(edges.data['s'], 1)})
        loss = tf.reduce_sum(g.edata['ss'])
        a1_grad = tape.gradient(loss, [a1])[0]

    with tf.GradientTape() as tape:
        tape.watch(a2)
        builtin_sm = nn.edge_softmax(g, a2)
        loss = tf.reduce_sum(builtin_sm)
        a2_grad = tape.gradient(loss, [a2])[0]
    print(a1_grad - a2_grad)
    assert len(g.ndata) == 0
    assert len(g.edata) == 2
    assert F.allclose(a1_grad, a2_grad, rtol=1e-4,
                      atol=1e-4)  # Follow tolerance in unittest backend
def plot_locations(graph, ax):

    had_list = graph.hadron_list
    node_list = graph.nodes

    vertices = list(set([node.vertex_idx for node in node_list]))

    if 0 not in vertices:
        vertices.append(0)
    if 1 not in vertices:
        vertices.append(1)

    n_vertices = len(vertices)

    locations_g = dgl.DGLGraph()
    locations_g.add_nodes(n_vertices)
    loc_labels = {}
    loc_spacing = 500

    x_range = n_vertices * loc_spacing

    loc_labels[0] = 'pileup/\nfakes'
    loc_labels[1] = 'primary'

    G = locations_g.to_networkx()
    pos = {}

    for pos_i in range(n_vertices):
        pos[pos_i] = (loc_spacing * pos_i, 0)
        if pos_i < 2:
            continue
        for node in node_list:
            if node.vertex_idx == pos_i:
                loc_labels[pos_i] = '{0:.2f}'.format(
                    np.linalg.norm(node.origin))
                break

    nx.draw_networkx_nodes(G,
                           pos,
                           node_color='mediumaquamarine',
                           node_size=1800,
                           ax=ax)
    nx.draw_networkx_edges(G, pos, ax=ax)
    nx.draw_networkx_labels(G, pos, loc_labels, ax=ax)

    #fake/pileup vertex
    center_point = pos[0]
    sub_g = dgl.DGLGraph()
    sub_g.add_nodes(1)
    sub_g_pos = {0: center_point}
    n_children = 0
    r_x = loc_spacing / 3.5
    r_y = 30
    for node in node_list:
        if node.vertex_idx == 0:
            n_children += 1
            sub_g.add_nodes(1)
            sub_g.add_edge(0, n_children)

    child_idx = 0
    n_tracks = 0
    for node in node_list:
        if node.vertex_idx == 0:
            child_idx += 1

            sub_g_pos[child_idx] = (
                center_point[0] + r_x * np.cos(
                    ((child_idx - 1) / float(n_children)) * 2 * np.pi),
                center_point[1] + r_y * np.sin(
                    ((child_idx - 1) / float(n_children)) * 2 * np.pi))
    G = sub_g.to_networkx()

    nx.draw_networkx_nodes(G,
                           sub_g_pos,
                           node_color='skyblue',
                           node_size=800,
                           ax=ax,
                           nodelist=range(1, n_children + 1))
    nx.draw_networkx_edges(G, sub_g_pos, ax=ax)

    for vtx_i in range(1, n_vertices):

        center_point = pos[vtx_i]

        sub_g = dgl.DGLGraph()
        sub_g.add_nodes(1)
        sub_g_labels = {}

        sub_g_pos = {0: center_point}

        n_children = 0
        for node in node_list:
            if node.vertex_idx == vtx_i:
                n_children += 1
                sub_g.add_nodes(1)
                sub_g.add_edge(0, n_children)

                if node.pdgid in pdg_id_dict:
                    sub_g_labels[n_children] = pdg_id_dict[node.pdgid]
                else:
                    sub_g_labels[n_children] = str(node.pdgid)

        r_x = loc_spacing / 3.5
        r_y = 30

        child_idx = 0
        n_tracks = 0
        for node in node_list:
            if node.vertex_idx == vtx_i:
                child_idx += 1

                sub_g_pos[child_idx] = (
                    center_point[0] + r_x * np.cos(
                        ((child_idx - 1) / float(n_children)) * 2 * np.pi),
                    center_point[1] + r_y * np.sin(
                        ((child_idx - 1) / float(n_children)) * 2 * np.pi))

                if node.reconstructed:
                    sub_g.add_nodes(1)
                    n_tracks += 1
                    sub_g.add_edge(child_idx, n_children + n_tracks)

                    sub_g_pos[n_children + n_tracks] = (
                        center_point[0] + 1.5 * r_x * np.cos(
                            ((child_idx - 1) / float(n_children)) * 2 * np.pi),
                        center_point[1] + 1.5 * r_y * np.sin(
                            ((child_idx - 1) / float(n_children)) * 2 * np.pi))

        G = sub_g.to_networkx()

        nx.draw_networkx_nodes(G,
                               sub_g_pos,
                               node_color='darksalmon',
                               node_size=800,
                               ax=ax,
                               nodelist=range(1, n_children + 1))
        nx.draw_networkx_nodes(G,
                               sub_g_pos,
                               node_color='skyblue',
                               node_size=300,
                               ax=ax,
                               nodelist=range(n_children + 1,
                                              n_children + n_tracks + 1))
        nx.draw_networkx_edges(G, sub_g_pos, ax=ax)
        nx.draw_networkx_labels(G,
                                sub_g_pos,
                                sub_g_labels,
                                ax=ax,
                                nodelist=range(1, n_children + 1))
Beispiel #29
0
def test_rgcn():
    etype = []
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1),
                     readonly=True).to(F.ctx())
    # 5 etypes
    R = 5
    for i in range(g.number_of_edges()):
        etype.append(i % 5)
    B = 2
    I = 10
    O = 8

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    h = tf.random.normal((100, I))
    r = tf.constant(etype)
    h_new = rgc_basis(g, h, r)
    h_new_low = rgc_basis_low(g, h, r)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B)
    rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True)
    rgc_bdd_low.weight = rgc_bdd.weight
    h = tf.random.normal((100, I))
    r = tf.constant(etype)
    h_new = rgc_bdd(g, h, r)
    h_new_low = rgc_bdd_low(g, h, r)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    # with norm
    norm = tf.zeros((g.number_of_edges(), 1))

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    h = tf.random.normal((100, I))
    r = tf.constant(etype)
    h_new = rgc_basis(g, h, r, norm)
    h_new_low = rgc_basis_low(g, h, r, norm)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B)
    rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True)
    rgc_bdd_low.weight = rgc_bdd.weight
    h = tf.random.normal((100, I))
    r = tf.constant(etype)
    h_new = rgc_bdd(g, h, r, norm)
    h_new_low = rgc_bdd_low(g, h, r, norm)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

    # id input
    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True)
    rgc_basis_low.weight = rgc_basis.weight
    rgc_basis_low.w_comp = rgc_basis.w_comp
    h = tf.constant(np.random.randint(0, I, (100, ))) * 1
    r = tf.constant(etype) * 1
    h_new = rgc_basis(g, h, r)
    h_new_low = rgc_basis_low(g, h, r)
    assert list(h_new.shape) == [100, O]
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)
Beispiel #30
0
def processing_amr(amr_dir, tokens_list):
    amr_list = torch.load(amr_dir)

    node_idx_list, edge_type_list, node_idx_offset_list, node_idx_offset_whole = [], [], [], []
    list_of_align_dict = []
    list_of_exist_dict = []

    total_edge_num = 0
    covered_edge_num = 0
    order_list = []
    for i, amr in enumerate(amr_list):
        amr_split_list = amr.split('\n')
        # print(amr_split_list)
        node_to_idx, node_to_offset, node_to_offset_whole = {}, {}, {}
        node_num = 0
        # first to fill in the node list
        for line in amr_split_list:
            if line.startswith('# ::node'):
                node_split = line.split('\t')
                # print(node_split)
                if len(node_split) != 4:
                    # check if the alignment text spans exist
                    continue
                else:
                    align_span = node_split[3].split('-')
                    if not align_span[0].isdigit():
                        continue
                    head_word_idx = int(align_span[1]) - 1
                    try:
                        start = int(align_span[0])
                    except:
                        # print(amr_list[i])
                        raise ValueError
                    end = int(align_span[1])
                    if (start, end) not in list(node_to_offset_whole.values()):
                        node_to_offset.update({node_split[1]: head_word_idx})
                        node_to_offset_whole.update(
                            {node_split[1]: (start, end)})
                        node_to_idx.update({node_split[1]: node_num})
                        node_num += 1
            else:
                continue

        node_idx_list.append(node_to_idx)
        # change str2offset to idx2offset
        node_idx_to_offset = {}
        for key in node_to_idx.keys():
            node_idx_to_offset.update({node_to_idx[key]: node_to_offset[key]})

        node_idx_to_offset_whole = {}
        for key in node_to_idx.keys():
            node_idx_to_offset_whole.update(
                {node_to_idx[key]: node_to_offset_whole[key]})

        node_idx_offset_list.append(node_idx_to_offset)
        node_idx_offset_whole.append(node_idx_to_offset_whole)
        edge_type_dict = {}

        for line in amr_split_list:
            if line.startswith('# ::root'):
                root_split = line.split('\t')
                root = root_split[1]
        prior_dict = {root: []}

        start_list = []
        end_list = []

        for line in amr_split_list:
            if line.startswith('# ::edge'):
                edge_split = line.split('\t')
                amr_edge_type = edge_split[2]
                edge_start = edge_split[4]
                edge_end = edge_split[5]
                # check if the start and end nodes exist
                if (edge_start in node_to_idx) and (edge_end in node_to_idx):
                    # check if the edge type is "ARGx-of", if so, reverse the direction of the edge
                    if amr_edge_type.startswith(
                            "ARG") and amr_edge_type.endswith("-of"):
                        edge_start, edge_end = edge_end, edge_start
                        amr_edge_type = amr_edge_type[0:4]
                    # deal with this edge here
                    edge_idx = get_amr_edge_idx(amr_edge_type)
                    total_edge_num += 1
                    if edge_idx == 11:
                        covered_edge_num += 1
                    start_idx = node_to_idx[edge_start]
                    end_idx = node_to_idx[edge_end]
                    edge_type_dict.update({(start_idx, end_idx): edge_idx})

                else:
                    continue
                # print(edge_start, edge_end)
                if edge_end != root and (not ((edge_start in end_list) and
                                              (edge_end in start_list))):
                    start_list.append(edge_start)
                    end_list.append(edge_end)
                if edge_start not in prior_dict:
                    prior_dict.update({edge_start: [edge_end]})
                else:
                    prior_dict[edge_start].append(edge_end)
            else:
                continue
        edge_type_list.append(edge_type_dict)
        # generating priority list for decoding
        final_order_list = []
        # output orders
        candidate_nodes = node_to_idx.copy()
        while len(candidate_nodes) != 0:
            current_level_nodes = []
            for key in candidate_nodes:
                if key not in end_list:
                    final_order_list.append(candidate_nodes[key])
                    current_level_nodes.append(key)
            # Remove current level nodes from the dictionary
            for node in current_level_nodes:
                candidate_nodes.pop(node)

            # deleting from start lists the current level nodes
            for node in current_level_nodes:
                indices_list = [
                    i for i, x in enumerate(start_list) if x == node
                ]
                start_list = [x for x in start_list if x != node]
                new_end_list = []
                for i in range(len(end_list)):
                    if i not in indices_list:
                        new_end_list.append(end_list[i])
                end_list = new_end_list

        order_list.append(final_order_list.copy())
    # feed into dgl graphs
    graphs_list = []

    for i in range(len(node_idx_list)):
        graph_i = dgl.DGLGraph()

        edge2type = edge_type_list[i]
        node2offset = node_idx_offset_list[i]
        node2offset_whole = node_idx_offset_whole[i]

        nodes_num = len(node2offset)

        graph_i.add_nodes(nodes_num)
        graph_i.ndata['token_pos'] = torch.zeros(nodes_num,
                                                 1,
                                                 dtype=torch.long)
        graph_i.ndata['token_span'] = torch.zeros(nodes_num,
                                                  2,
                                                  dtype=torch.long)

        # fill in token positions
        for key in node2offset:
            graph_i.ndata['token_pos'][key][0] = node2offset[key]
        for key in node2offset:
            graph_i.ndata['token_span'][key][0] = node2offset_whole[key][0]
            graph_i.ndata['token_span'][key][1] = node2offset_whole[key][1]
        # add nodes priorities
        node_prior_tensor = torch.zeros(nodes_num, 1, dtype=torch.long)
        for j in range(nodes_num):
            node_prior_tensor[j][0] = order_list[i].index(j)
        graph_i.ndata['priority'] = node_prior_tensor
        # add edges
        edge_num = len(edge2type)

        edge_iter = 0
        ''' bi-directional edges '''
        edge_type_tensor = torch.zeros(2 * edge_num, 1, dtype=torch.long)
        for key in edge2type:
            graph_i.add_edges(key[0], key[1])
            edge_type_tensor[edge_iter][0] = edge2type[key]
            edge_iter += 1

        for key in edge2type:
            graph_i.add_edges(key[1], key[0])
            edge_type_tensor[edge_iter][0] = edge2type[key]
            edge_iter += 1

        graph_i.edata['type'] = edge_type_tensor
        graphs_list.append(graph_i)

        align_dict = {}
        exist_dict = {}

        span_list = graph_i.ndata["token_span"].tolist()

        for p in range(len(tokens_list[i])):
            min_dis = 2 * len(tokens_list[i])
            min_dis_idx = -1

            if_found = 0

            for q in range(len(span_list)):
                if p >= span_list[q][0] and p < span_list[q][1]:
                    if_found = 1
                    align_dict.update({p: q})
                    exist_dict.update({p: 1})
                    break
                else:
                    new_dis_1 = abs(p - span_list[q][0])
                    new_dis_2 = abs(p - (span_list[q][1] - 1))
                    new_dis = min(new_dis_1, new_dis_2)
                    if new_dis < min_dis:
                        min_dis = new_dis
                        min_dis_idx = q

            if not if_found:
                align_dict.update({p: min_dis_idx})
                exist_dict.update({p: 0})

        list_of_align_dict.append(align_dict)
        list_of_exist_dict.append(exist_dict)

    return graphs_list, list_of_align_dict, list_of_exist_dict