def setup_class(self): self.dataset = build_dataset_from_name("cora") self.data = Data.from_pyg_data(self.dataset[0]) self.num_nodes = self.data.num_nodes self.num_edges = self.data.num_edges self.num_features = self.data.num_features print("Call Setup")
def get_dataset(self, dataset_name, transform=None): assert dataset_name in ("bio", "chem", "test_bio") if dataset_name == "bio": dataset = BioDataset(self.data_type, transform=transform) # BioDataset opt = { "input_layer": 2, "edge_encode": 9, "self_loop_index": 7, "self_loop_type": 1, "concat": True, } elif dataset_name == "chem": opt = { "edge_emb": [6, 3], "num_atom_type": 120, "num_chirality_tag": 3, "self_loop_index": 0, "self_loop_type": 4, "concat": False, } raise NotImplementedError # ChemDataset elif dataset_name == "test_bio": dataset = TestBioDataset(data_type=self.data_type, transform=transform) opt = { "input_layer": 2, "edge_encode": 9, "self_loop_index": 0, "self_loop_type": 1, "concat": True, } else: dataset = build_dataset_from_name(self.dataset_name) opt = dict() return dataset, opt
def _call(self, dataset=[], **kwargs): if isinstance(dataset, str): dataset = [dataset] tab_data = [] col_names = [ "Dataset", "#nodes", "#edges", "#features", "#classes", "#labeled data", ] for name in dataset: dataset = build_dataset_from_name(name) data = dataset[0] tab_data.append( [ name, data.x.shape[0], data.edge_index[0].shape[0], data.x.shape[1], len(set(data.y.numpy())), sum(data.train_mask.numpy()), ] ) print(tabulate(tab_data, headers=col_names, tablefmt="psql")) return tab_data
def download_datasets(args): if not isinstance(args.dataset, list): args.dataset = [args.dataset] for name in args.dataset: dataset = build_dataset_from_name(name) print(dataset[0])
def __init__(self, app: str, model: str, **kwargs): super(RecommendationPipepline, self).__init__(app, model=model, **kwargs) if "data" in kwargs: data = kwargs["data"] val_data = test_data = data[-100:, :] data = build_recommendation_data("custom", data, val_data, test_data) self.data_path = kwargs.get("data_path", "tmp_data.pt") self.batch_size = kwargs.get("batch_size", 128) torch.save(data, self.data_path) self.dataset = NodeDataset(path=self.data_path, scale_feat=False) elif "dataset" in kwargs: dataset = kwargs.pop("dataset") self.dataset = build_dataset_from_name(dataset) else: print("Please provide recommendation data!") exit(0) self.batch_size = kwargs.get("batch_size", 2048) self.n_items = self.dataset[0].n_params["n_items"] args = get_default_args(task="recommendation", dataset="ali", model=model, **kwargs) args.model = args.model[0] # task = build_task(args, dataset=self.dataset) # task.train() # self.model = task.model self.model = build_model(args) self.model.eval() self.user_emb, self.item_emb = self.model.generate()
def plot_graph(args): if not isinstance(args.dataset, list): args.dataset = [args.dataset] for name in args.dataset: dataset = build_dataset_from_name(name) data = dataset[0] depth = args.depth pic_file = osp.join(args.save_dir, f'display_{name}.png') col_names = [ 'Dataset', '#nodes', '#edges', '#features', '#classes', '#labeled data' ] tab_data = [[ name, data.x.shape[0], data.edge_index.shape[1], data.x.shape[1], len(set(data.y.numpy())), sum(data.train_mask.numpy()) ]] print(tabulate(tab_data, headers=col_names, tablefmt='psql')) G = nx.Graph() G.add_edges_from([ tuple(data.edge_index[:, i].numpy()) for i in range(data.edge_index.shape[1]) ]) s = random.choice(list(G.nodes())) q = [s] node_set = set([s]) node_index = {s: 0} max_index = 1 for _ in range(depth): nq = [] for x in q: for key in G[x].keys(): if key not in node_set: nq.append(key) node_set.add(key) node_index[key] = node_index[x] + 1 if len(nq) > 0: max_index += 1 q = nq cmap = cm.rainbow(np.linspace(0.0, 1.0, max_index)) for node, index in node_index.items(): G.nodes[node]['color'] = cmap[index] G.nodes[node]['size'] = (max_index - index) * 50 fig, ax = plt.subplots() plot_network(G.subgraph(list(node_set)), node_style=use_attributes()) plt.savefig(pic_file) print(f'Sampled ego network saved to {pic_file} .')
def _call(self, dataset="cora", seed=-1, depth=3, **kwargs): if isinstance(dataset, list): dataset = dataset[0] name = dataset dataset = build_dataset_from_name(name) data = dataset[0] G = nx.Graph() edge_index = torch.stack(data.edge_index) G.add_edges_from([ tuple(edge_index[:, i].numpy()) for i in range(edge_index.shape[1]) ]) if seed == -1: seed = random.choice(list(G.nodes())) q = [seed] node_set = set([seed]) node_index = {seed: 0} max_index = 1 for _ in range(depth): nq = [] for x in q: for key in G[x].keys(): if key not in node_set: nq.append(key) node_set.add(key) node_index[key] = node_index[x] + 1 if len(nq) > 0: max_index += 1 q = nq cmap = cm.rainbow(np.linspace(0.0, 1.0, max_index)) for node, index in node_index.items(): G.nodes[node]["color"] = cmap[index] G.nodes[node]["size"] = (max_index - index) * 50 pic_file = f"{name}.png" plt.subplots() plot_network(G.subgraph(list(node_set)), node_style=use_attributes()) plt.savefig(pic_file) print(f"Sampled ego network saved to {pic_file}") return q
def __init__(self, in_feats, hidden_size, out_feats, dropout): super(Gnn, self).__init__() self.conv1 = GCNLayer(in_feats, hidden_size) self.conv2 = GCNLayer(hidden_size, out_feats) self.dropout = nn.Dropout(dropout) def forward(self, graph): graph.sym_norm() h = graph.x h = F.relu(self.conv1(graph, self.dropout(h))) h = self.conv2(graph, self.dropout(h)) return F.log_softmax(h, dim=1) if __name__ == "__main__": dataset = build_dataset_from_name("cora")[0] model = Gnn(in_feats=dataset.num_features, hidden_size=64, out_feats=dataset.num_classes, dropout=0.1) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) model.train() for epoch in range(300): optimizer.zero_grad() out = model(dataset) loss = F.nll_loss(out[dataset.train_mask], dataset.y[dataset.train_mask]) loss.backward() optimizer.step()
def test_customized_dataset(): dataset = build_dataset_from_name("mydataset") assert isinstance(dataset[0], Graph) assert dataset[0].x.shape[0] == 100