Exemple #1
0
 def setup_class(self):
     self.dataset = build_dataset_from_name("cora")
     self.data = Data.from_pyg_data(self.dataset[0])
     self.num_nodes = self.data.num_nodes
     self.num_edges = self.data.num_edges
     self.num_features = self.data.num_features
     print("Call Setup")
Exemple #2
0
 def get_dataset(self, dataset_name, transform=None):
     assert dataset_name in ("bio", "chem", "test_bio")
     if dataset_name == "bio":
         dataset = BioDataset(self.data_type, transform=transform)  # BioDataset
         opt = {
             "input_layer": 2,
             "edge_encode": 9,
             "self_loop_index": 7,
             "self_loop_type": 1,
             "concat": True,
         }
     elif dataset_name == "chem":
         opt = {
             "edge_emb": [6, 3],
             "num_atom_type": 120,
             "num_chirality_tag": 3,
             "self_loop_index": 0,
             "self_loop_type": 4,
             "concat": False,
         }
         raise NotImplementedError  # ChemDataset
     elif dataset_name == "test_bio":
         dataset = TestBioDataset(data_type=self.data_type, transform=transform)
         opt = {
             "input_layer": 2,
             "edge_encode": 9,
             "self_loop_index": 0,
             "self_loop_type": 1,
             "concat": True,
         }
     else:
         dataset = build_dataset_from_name(self.dataset_name)
         opt = dict()
     return dataset, opt
Exemple #3
0
    def _call(self, dataset=[], **kwargs):
        if isinstance(dataset, str):
            dataset = [dataset]
        tab_data = []
        col_names = [
            "Dataset",
            "#nodes",
            "#edges",
            "#features",
            "#classes",
            "#labeled data",
        ]
        for name in dataset:
            dataset = build_dataset_from_name(name)
            data = dataset[0]

            tab_data.append(
                [
                    name,
                    data.x.shape[0],
                    data.edge_index[0].shape[0],
                    data.x.shape[1],
                    len(set(data.y.numpy())),
                    sum(data.train_mask.numpy()),
                ]
            )
        print(tabulate(tab_data, headers=col_names, tablefmt="psql"))

        return tab_data
Exemple #4
0
def download_datasets(args):
    if not isinstance(args.dataset, list):
        args.dataset = [args.dataset]

    for name in args.dataset:
        dataset = build_dataset_from_name(name)
        print(dataset[0])
Exemple #5
0
    def __init__(self, app: str, model: str, **kwargs):
        super(RecommendationPipepline, self).__init__(app, model=model, **kwargs)

        if "data" in kwargs:
            data = kwargs["data"]
            val_data = test_data = data[-100:, :]
            data = build_recommendation_data("custom", data, val_data, test_data)
            self.data_path = kwargs.get("data_path", "tmp_data.pt")
            self.batch_size = kwargs.get("batch_size", 128)
            torch.save(data, self.data_path)
            self.dataset = NodeDataset(path=self.data_path, scale_feat=False)
        elif "dataset" in kwargs:
            dataset = kwargs.pop("dataset")
            self.dataset = build_dataset_from_name(dataset)
        else:
            print("Please provide recommendation data!")
            exit(0)

        self.batch_size = kwargs.get("batch_size", 2048)
        self.n_items = self.dataset[0].n_params["n_items"]

        args = get_default_args(task="recommendation", dataset="ali", model=model, **kwargs)
        args.model = args.model[0]

        # task = build_task(args, dataset=self.dataset)
        # task.train()

        # self.model = task.model
        self.model = build_model(args)
        self.model.eval()

        self.user_emb, self.item_emb = self.model.generate()
Exemple #6
0
def plot_graph(args):
    if not isinstance(args.dataset, list):
        args.dataset = [args.dataset]

    for name in args.dataset:
        dataset = build_dataset_from_name(name)
        data = dataset[0]

        depth = args.depth
        pic_file = osp.join(args.save_dir, f'display_{name}.png')

        col_names = [
            'Dataset', '#nodes', '#edges', '#features', '#classes',
            '#labeled data'
        ]
        tab_data = [[
            name, data.x.shape[0], data.edge_index.shape[1], data.x.shape[1],
            len(set(data.y.numpy())),
            sum(data.train_mask.numpy())
        ]]
        print(tabulate(tab_data, headers=col_names, tablefmt='psql'))

        G = nx.Graph()
        G.add_edges_from([
            tuple(data.edge_index[:, i].numpy())
            for i in range(data.edge_index.shape[1])
        ])

        s = random.choice(list(G.nodes()))
        q = [s]
        node_set = set([s])
        node_index = {s: 0}
        max_index = 1
        for _ in range(depth):
            nq = []
            for x in q:
                for key in G[x].keys():
                    if key not in node_set:
                        nq.append(key)
                        node_set.add(key)
                        node_index[key] = node_index[x] + 1
            if len(nq) > 0:
                max_index += 1
            q = nq

        cmap = cm.rainbow(np.linspace(0.0, 1.0, max_index))

        for node, index in node_index.items():
            G.nodes[node]['color'] = cmap[index]
            G.nodes[node]['size'] = (max_index - index) * 50

        fig, ax = plt.subplots()
        plot_network(G.subgraph(list(node_set)), node_style=use_attributes())
        plt.savefig(pic_file)
        print(f'Sampled ego network saved to {pic_file} .')
Exemple #7
0
    def _call(self, dataset="cora", seed=-1, depth=3, **kwargs):
        if isinstance(dataset, list):
            dataset = dataset[0]
        name = dataset
        dataset = build_dataset_from_name(name)
        data = dataset[0]

        G = nx.Graph()
        edge_index = torch.stack(data.edge_index)
        G.add_edges_from([
            tuple(edge_index[:, i].numpy()) for i in range(edge_index.shape[1])
        ])

        if seed == -1:
            seed = random.choice(list(G.nodes()))
        q = [seed]
        node_set = set([seed])
        node_index = {seed: 0}
        max_index = 1
        for _ in range(depth):
            nq = []
            for x in q:
                for key in G[x].keys():
                    if key not in node_set:
                        nq.append(key)
                        node_set.add(key)
                        node_index[key] = node_index[x] + 1
            if len(nq) > 0:
                max_index += 1
            q = nq

        cmap = cm.rainbow(np.linspace(0.0, 1.0, max_index))

        for node, index in node_index.items():
            G.nodes[node]["color"] = cmap[index]
            G.nodes[node]["size"] = (max_index - index) * 50

        pic_file = f"{name}.png"
        plt.subplots()
        plot_network(G.subgraph(list(node_set)), node_style=use_attributes())
        plt.savefig(pic_file)
        print(f"Sampled ego network saved to {pic_file}")

        return q
Exemple #8
0
    def __init__(self, in_feats, hidden_size, out_feats, dropout):
        super(Gnn, self).__init__()
        self.conv1 = GCNLayer(in_feats, hidden_size)
        self.conv2 = GCNLayer(hidden_size, out_feats)
        self.dropout = nn.Dropout(dropout)

    def forward(self, graph):
        graph.sym_norm()
        h = graph.x
        h = F.relu(self.conv1(graph, self.dropout(h)))
        h = self.conv2(graph, self.dropout(h))
        return F.log_softmax(h, dim=1)


if __name__ == "__main__":
    dataset = build_dataset_from_name("cora")[0]
    model = Gnn(in_feats=dataset.num_features,
                hidden_size=64,
                out_feats=dataset.num_classes,
                dropout=0.1)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.001,
                                 weight_decay=5e-3)
    model.train()
    for epoch in range(300):
        optimizer.zero_grad()
        out = model(dataset)
        loss = F.nll_loss(out[dataset.train_mask],
                          dataset.y[dataset.train_mask])
        loss.backward()
        optimizer.step()
def test_customized_dataset():
    dataset = build_dataset_from_name("mydataset")
    assert isinstance(dataset[0], Graph)
    assert dataset[0].x.shape[0] == 100