def main(): args = arg_parse() edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) WN_graph = nx.read_gpickle(args.data_path) print('Each node has node ID (n_id). Example: ', WN_graph.nodes[0]) print( 'Each edge has edge ID (id) and categorical label (e_label). Example: ', WN_graph[0][5871]) # Since both feature and label are relation types, # Only the disjoint mode would make sense node_feature = torch.ones(WN_graph.number_of_nodes(), 5) edge_index = torch.LongTensor(list(WN_graph.edges())).permute(1, 0) graph = Graph(node_feature=node_feature, edge_index=edge_index, directed=True) dataset = GraphDataset( [graph], task='link_pred', edge_train_mode=edge_train_mode, edge_message_ratio=args.edge_message_ratio, edge_negative_sampling_ratio=args.neg_sampling_ratio) # find num edge types max_label = 0 labels = [] for u, v, edge_key in WN_graph.edges: l = WN_graph[u][v][edge_key]['e_label'] if not l in labels: labels.append(l) # labels are consecutive (0-17) num_edge_types = len(labels) print('Pre-transform: ', dataset[0]) dataset = dataset.apply_transform(WN_transform, update_tensor=False, G=WN_graph, num_edge_types=num_edge_types, deep_copy=False) print('Post-transform: ', dataset[0]) print('Initial data: {} nodes; {} edges.'.format(dataset[0].num_nodes, dataset[0].num_edges)) print('Number of node features: {}'.format(dataset.num_node_features)) # split dataset datasets = {} datasets['train'], datasets['val'], datasets['test'] = dataset.split( transductive=True, split_ratio=[0.8, 0.1, 0.1]) print('After split:') print('Train message-passing graph: {} nodes; {} edges.'.format( datasets['train'][0].num_nodes, datasets['train'][0].num_edges)) print('Val message-passing graph: {} nodes; {} edges.'.format( datasets['val'][0].num_nodes, datasets['val'][0].num_edges)) print('Test message-passing graph: {} nodes; {} edges.'.format( datasets['test'][0].num_nodes, datasets['test'][0].num_edges)) # node feature dimension input_dim = datasets['train'].num_node_features edge_feat_dim = datasets['train'].num_edge_features num_classes = datasets['train'].num_edge_labels print( 'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format( input_dim, edge_feat_dim, num_classes)) # relation type is both used for edge features and edge labels model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = { split: DataLoader(ds, collate_fn=Batch.collate(follow_batch), batch_size=1, shuffle=(split == 'train')) for split, ds in datasets.items() } print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args)
# The edges in the graph have the features: edge_type ("cora_edge" or "citeseer_edge") print("The edges in the concatenated heterogeneous graph have the following features:") for edge in G.edges(data=True): print(edge[2]) break hete = HeteroGraph(G) print(f"Heterogeneous graph {hete.num_nodes()} nodes, {hete.num_edges()} edges") dataset = GraphDataset([hete], task='node') dataset_train, dataset_val, dataset_test = dataset.split( transductive=True, split_ratio=[0.8, 0.1, 0.1] ) train_loader = DataLoader( dataset_train, collate_fn=Batch.collate(), batch_size=16 ) val_loader = DataLoader( dataset_val, collate_fn=Batch.collate(), batch_size=16 ) test_loader = DataLoader( dataset_test, collate_fn=Batch.collate(), batch_size=16 ) loaders = [train_loader, val_loader, test_loader] hidden_size = 32 model = HeteroNet(hete, hidden_size, 0.5).to(device) optimizer = torch.optim.Adam( model.parameters(), lr=0.01, weight_decay=5e-3 ) num_epochs = 100
def main(): args = arg_parse() pyg_dataset = Planetoid('./cora', 'Cora', transform=T.TargetIndegree()) # the input that we assume users have edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) graphs = GraphDataset.pyg_to_graphs(pyg_dataset, tensor_backend=True) if args.multigraph: graphs = [copy.deepcopy(graphs[0]) for _ in range(10)] dataset = GraphDataset( graphs, task='link_pred', edge_message_ratio=args.edge_message_ratio, edge_train_mode=edge_train_mode # resample_disjoint=True, # resample_disjoint_period=100 ) print('Initial dataset: {}'.format(dataset)) # split dataset datasets = {} datasets['train'], datasets['val'], datasets['test']= dataset.split( transductive=not args.multigraph, split_ratio=[0.85, 0.05, 0.1]) print('after split') print('Train message-passing graph: {} nodes; {} edges.'.format( datasets['train'][0].num_nodes, datasets['train'][0].num_edges)) print('Val message-passing graph: {} nodes; {} edges.'.format( datasets['val'][0].num_nodes, datasets['val'][0].num_edges)) print('Test message-passing graph: {} nodes; {} edges.'.format( datasets['test'][0].num_nodes, datasets['test'][0].num_edges)) # node feature dimension input_dim = datasets['train'].num_node_features # link prediction needs 2 classes (0, 1) num_classes = datasets['train'].num_edge_labels model = Net(input_dim, num_classes, args).to(args.device) #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = {split: DataLoader( ds, collate_fn=Batch.collate(follow_batch), batch_size=args.batch_size, shuffle=(split=='train')) for split, ds in datasets.items()} print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args, scheduler=scheduler)
def main(): writer = SummaryWriter() args = arg_parse() edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) ppi_graph = read_ppi_data(args.ppi_path) mode = 'mixed' if mode == 'ppi': message_passing_graph = ppi_graph cmap_graph, knockout_nodes = read_cmap_data(args.data_path) elif mode == 'mixed': message_passing_graph, knockout_nodes = (read_cmap_data( args.data_path, ppi_graph)) print('Each node has gene ID. Example: ', message_passing_graph.nodes['ADPGK']) print('Each edge has de direction. Example', message_passing_graph['ADPGK']['IL1B']) print('Total num edges: ', message_passing_graph.number_of_edges()) # disjoint edge label disjoint_split_ratio = 0.1 val_ratio = 0.1 disjoint_edge_label_index = [] val_edges = [] # newly edited train_edges = [] for u in knockout_nodes: rand_num = np.random.rand() if rand_num < disjoint_split_ratio: # add all edges (cmap only) into edge label index # cmap is not a multigraph disjoint_edge_label_index.extend([ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ]) train_edges.extend([ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ]) elif rand_num < disjoint_split_ratio + val_ratio: val_edges.extend([ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ]) else: train_edges.extend([ (u, v, edge_key) for v in message_passing_graph.successors(u) for edge_key in message_passing_graph[u][v] if message_passing_graph[u][v][edge_key]['edge_type'] == 1 ]) print('Num edges to predict: ', len(disjoint_edge_label_index)) print('Num edges in val: ', len(val_edges)) print('Num edges in train: ', len(train_edges)) graph = Graph( message_passing_graph, custom={ "general_splits": [train_edges, val_edges], # "disjoint_split": disjoint_edge_label_index, "task": "link_pred" }) graphs = [graph] graphDataset = GraphDataset(graphs, task="link_pred", edge_train_mode="disjoint", resample_disjoint=True, resample_disjoint_period=100) # Transform dataset # de direction (currently using homogeneous graph) num_edge_types = 2 graphDataset = graphDataset.apply_transform(cmap_transform, num_edge_types=num_edge_types, deep_copy=False) print('Number of node features: {}'.format(graphDataset.num_node_features)) # split dataset dataset = {} dataset['train'], dataset['val'] = graphDataset.split(transductive=True) # sanity check print( f"dataset['train'][0].edge_label_index.shape[1]: {dataset['train'][0].edge_label_index.shape[1]}" ) print( f"dataset['val'][0].edge_label_index.shape[1]: {dataset['val'][0].edge_label_index.shape[1]}" ) print( f"len(list(dataset['train'][0].G.edges)): {len(list(dataset['train'][0].G.edges))}" ) print( f"len(list(dataset['val'][0].G.edges)): {len(list(dataset['val'][0].G.edges))}" ) print( f"list(dataset['train'][0].G.edges)[:10]: {list(dataset['train'][0].G.edges)[:10]}" ) print( f"list(dataset['val'][0].G.edges)[:10]: {list(dataset['val'][0].G.edges)[:10]}" ) # node feature dimension input_dim = dataset['train'].num_node_features edge_feat_dim = dataset['train'].num_edge_features num_classes = dataset['train'].num_edge_labels print( 'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format( input_dim, edge_feat_dim, num_classes)) # relation type is both used for edge features and edge labels model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = { split: DataLoader(ds, collate_fn=Batch.collate(follow_batch), batch_size=1, shuffle=(split == 'train')) for split, ds in dataset.items() } print("Graphs after split: ") for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ": ", batch) train(model, dataloaders, optimizer, args, writer=writer)
directed=directed) graph_test = Graph(node_feature=x, node_label=y, edge_index=edge_index, node_label_index=test_label_index, directed=directed) graphs_train = [graph_train] graphs_val = [graph_val] graphs_test = [graph_test] dataset_train, dataset_val, dataset_test = \ GraphDataset(graphs_train, task='node'), GraphDataset(graphs_val,task='node'), \ GraphDataset(graphs_test, task='node') train_loader = DataLoader(dataset_train, collate_fn=Batch.collate(), batch_size=16) # basic data loader val_loader = DataLoader(dataset_val, collate_fn=Batch.collate(), batch_size=16) # basic data loader test_loader = DataLoader(dataset_test, collate_fn=Batch.collate(), batch_size=16) # basic data loader num_node_features = dataset_train.num_node_features num_classes = dataset_train.num_node_labels train(train_loader, val_loader, test_loader, args, num_node_features, num_classes, args.device)