def main():
    args = arg_parse()

    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    WN_graph = nx.read_gpickle(args.data_path)
    print('Each node has node ID (n_id). Example: ', WN_graph.nodes[0])
    print(
        'Each edge has edge ID (id) and categorical label (e_label). Example: ',
        WN_graph[0][5871])

    # Since both feature and label are relation types,
    # Only the disjoint mode would make sense
    node_feature = torch.ones(WN_graph.number_of_nodes(), 5)
    edge_index = torch.LongTensor(list(WN_graph.edges())).permute(1, 0)
    graph = Graph(node_feature=node_feature,
                  edge_index=edge_index,
                  directed=True)

    dataset = GraphDataset(
        [graph],
        task='link_pred',
        edge_train_mode=edge_train_mode,
        edge_message_ratio=args.edge_message_ratio,
        edge_negative_sampling_ratio=args.neg_sampling_ratio)

    # find num edge types
    max_label = 0
    labels = []
    for u, v, edge_key in WN_graph.edges:
        l = WN_graph[u][v][edge_key]['e_label']
        if not l in labels:
            labels.append(l)
    # labels are consecutive (0-17)
    num_edge_types = len(labels)

    print('Pre-transform: ', dataset[0])
    dataset = dataset.apply_transform(WN_transform,
                                      update_tensor=False,
                                      G=WN_graph,
                                      num_edge_types=num_edge_types,
                                      deep_copy=False)
    print('Post-transform: ', dataset[0])
    print('Initial data: {} nodes; {} edges.'.format(dataset[0].num_nodes,
                                                     dataset[0].num_edges))
    print('Number of node features: {}'.format(dataset.num_node_features))

    # split dataset
    datasets = {}
    datasets['train'], datasets['val'], datasets['test'] = dataset.split(
        transductive=True, split_ratio=[0.8, 0.1, 0.1])

    print('After split:')
    print('Train message-passing graph: {} nodes; {} edges.'.format(
        datasets['train'][0].num_nodes, datasets['train'][0].num_edges))
    print('Val message-passing graph: {} nodes; {} edges.'.format(
        datasets['val'][0].num_nodes, datasets['val'][0].num_edges))
    print('Test message-passing graph: {} nodes; {} edges.'.format(
        datasets['test'][0].num_nodes, datasets['test'][0].num_edges))

    # node feature dimension
    input_dim = datasets['train'].num_node_features
    edge_feat_dim = datasets['train'].num_edge_features
    num_classes = datasets['train'].num_edge_labels
    print(
        'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format(
            input_dim, edge_feat_dim, num_classes))

    # relation type is both used for edge features and edge labels
    model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.001,
                                 weight_decay=5e-3)
    follow_batch = []  # e.g., follow_batch = ['edge_index']

    dataloaders = {
        split: DataLoader(ds,
                          collate_fn=Batch.collate(follow_batch),
                          batch_size=1,
                          shuffle=(split == 'train'))
        for split, ds in datasets.items()
    }
    print('Graphs after split: ')
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ': ', batch)

    train(model, dataloaders, optimizer, args)
Example #2
0
    # The edges in the graph have the features: edge_type ("cora_edge" or "citeseer_edge")
    print("The edges in the concatenated heterogeneous graph have the following features:")
    for edge in G.edges(data=True):
        print(edge[2])
        break

    hete = HeteroGraph(G)
    print(f"Heterogeneous graph {hete.num_nodes()} nodes, {hete.num_edges()} edges")

    dataset = GraphDataset([hete], task='node')
    dataset_train, dataset_val, dataset_test = dataset.split(
        transductive=True,
        split_ratio=[0.8, 0.1, 0.1]
    )
    train_loader = DataLoader(
        dataset_train, collate_fn=Batch.collate(), batch_size=16
    )
    val_loader = DataLoader(
        dataset_val, collate_fn=Batch.collate(), batch_size=16
    )
    test_loader = DataLoader(
        dataset_test, collate_fn=Batch.collate(), batch_size=16
    )
    loaders = [train_loader, val_loader, test_loader]

    hidden_size = 32
    model = HeteroNet(hete, hidden_size, 0.5).to(device)
    optimizer = torch.optim.Adam(
        model.parameters(), lr=0.01, weight_decay=5e-3
    )
    num_epochs = 100
def main():
    args = arg_parse()

    pyg_dataset = Planetoid('./cora', 'Cora', transform=T.TargetIndegree())
    
    # the input that we assume users have
    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    graphs = GraphDataset.pyg_to_graphs(pyg_dataset, tensor_backend=True)
    if args.multigraph:
        graphs = [copy.deepcopy(graphs[0]) for _ in range(10)]

    dataset = GraphDataset(
        graphs,
        task='link_pred',
        edge_message_ratio=args.edge_message_ratio,
        edge_train_mode=edge_train_mode
        # resample_disjoint=True,
        # resample_disjoint_period=100
    )
    print('Initial dataset: {}'.format(dataset))

    # split dataset
    datasets = {}
    datasets['train'], datasets['val'], datasets['test']= dataset.split(
            transductive=not args.multigraph, split_ratio=[0.85, 0.05, 0.1])

    print('after split')
    print('Train message-passing graph: {} nodes; {} edges.'.format(
            datasets['train'][0].num_nodes,
            datasets['train'][0].num_edges))
    print('Val message-passing graph: {} nodes; {} edges.'.format(
            datasets['val'][0].num_nodes,
            datasets['val'][0].num_edges))
    print('Test message-passing graph: {} nodes; {} edges.'.format(
            datasets['test'][0].num_nodes,
            datasets['test'][0].num_edges))


    # node feature dimension
    input_dim = datasets['train'].num_node_features
    # link prediction needs 2 classes (0, 1)
    num_classes = datasets['train'].num_edge_labels

    model = Net(input_dim, num_classes, args).to(args.device)
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
    follow_batch = [] # e.g., follow_batch = ['edge_index']

    dataloaders = {split: DataLoader(
            ds, collate_fn=Batch.collate(follow_batch), 
            batch_size=args.batch_size, shuffle=(split=='train'))
            for split, ds in datasets.items()}
    print('Graphs after split: ')
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ': ', batch)

    train(model, dataloaders, optimizer, args, scheduler=scheduler)
Example #4
0
def main():
    writer = SummaryWriter()
    args = arg_parse()

    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    ppi_graph = read_ppi_data(args.ppi_path)

    mode = 'mixed'
    if mode == 'ppi':
        message_passing_graph = ppi_graph
        cmap_graph, knockout_nodes = read_cmap_data(args.data_path)
    elif mode == 'mixed':
        message_passing_graph, knockout_nodes = (read_cmap_data(
            args.data_path, ppi_graph))

    print('Each node has gene ID. Example: ',
          message_passing_graph.nodes['ADPGK'])
    print('Each edge has de direction. Example',
          message_passing_graph['ADPGK']['IL1B'])
    print('Total num edges: ', message_passing_graph.number_of_edges())

    # disjoint edge label
    disjoint_split_ratio = 0.1
    val_ratio = 0.1
    disjoint_edge_label_index = []
    val_edges = []

    # newly edited
    train_edges = []
    for u in knockout_nodes:
        rand_num = np.random.rand()
        if rand_num < disjoint_split_ratio:
            # add all edges (cmap only) into edge label index
            # cmap is not a multigraph
            disjoint_edge_label_index.extend([
                (u, v, edge_key) for v in message_passing_graph.successors(u)
                for edge_key in message_passing_graph[u][v]
                if message_passing_graph[u][v][edge_key]['edge_type'] == 1
            ])

            train_edges.extend([
                (u, v, edge_key) for v in message_passing_graph.successors(u)
                for edge_key in message_passing_graph[u][v]
                if message_passing_graph[u][v][edge_key]['edge_type'] == 1
            ])
        elif rand_num < disjoint_split_ratio + val_ratio:
            val_edges.extend([
                (u, v, edge_key) for v in message_passing_graph.successors(u)
                for edge_key in message_passing_graph[u][v]
                if message_passing_graph[u][v][edge_key]['edge_type'] == 1
            ])
        else:
            train_edges.extend([
                (u, v, edge_key) for v in message_passing_graph.successors(u)
                for edge_key in message_passing_graph[u][v]
                if message_passing_graph[u][v][edge_key]['edge_type'] == 1
            ])

    print('Num edges to predict: ', len(disjoint_edge_label_index))
    print('Num edges in val: ', len(val_edges))
    print('Num edges in train: ', len(train_edges))

    graph = Graph(
        message_passing_graph,
        custom={
            "general_splits": [train_edges, val_edges],
            # "disjoint_split": disjoint_edge_label_index,
            "task": "link_pred"
        })
    graphs = [graph]
    graphDataset = GraphDataset(graphs,
                                task="link_pred",
                                edge_train_mode="disjoint",
                                resample_disjoint=True,
                                resample_disjoint_period=100)

    # Transform dataset
    # de direction (currently using homogeneous graph)
    num_edge_types = 2

    graphDataset = graphDataset.apply_transform(cmap_transform,
                                                num_edge_types=num_edge_types,
                                                deep_copy=False)
    print('Number of node features: {}'.format(graphDataset.num_node_features))

    # split dataset
    dataset = {}
    dataset['train'], dataset['val'] = graphDataset.split(transductive=True)

    # sanity check
    print(
        f"dataset['train'][0].edge_label_index.shape[1]: {dataset['train'][0].edge_label_index.shape[1]}"
    )
    print(
        f"dataset['val'][0].edge_label_index.shape[1]: {dataset['val'][0].edge_label_index.shape[1]}"
    )
    print(
        f"len(list(dataset['train'][0].G.edges)): {len(list(dataset['train'][0].G.edges))}"
    )
    print(
        f"len(list(dataset['val'][0].G.edges)): {len(list(dataset['val'][0].G.edges))}"
    )
    print(
        f"list(dataset['train'][0].G.edges)[:10]: {list(dataset['train'][0].G.edges)[:10]}"
    )
    print(
        f"list(dataset['val'][0].G.edges)[:10]: {list(dataset['val'][0].G.edges)[:10]}"
    )

    # node feature dimension
    input_dim = dataset['train'].num_node_features
    edge_feat_dim = dataset['train'].num_edge_features
    num_classes = dataset['train'].num_edge_labels
    print(
        'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format(
            input_dim, edge_feat_dim, num_classes))

    # relation type is both used for edge features and edge labels
    model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.001,
                                 weight_decay=5e-3)
    follow_batch = []  # e.g., follow_batch = ['edge_index']

    dataloaders = {
        split: DataLoader(ds,
                          collate_fn=Batch.collate(follow_batch),
                          batch_size=1,
                          shuffle=(split == 'train'))
        for split, ds in dataset.items()
    }
    print("Graphs after split: ")
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ": ", batch)

    train(model, dataloaders, optimizer, args, writer=writer)
Example #5
0
                          directed=directed)
        graph_test = Graph(node_feature=x,
                           node_label=y,
                           edge_index=edge_index,
                           node_label_index=test_label_index,
                           directed=directed)

        graphs_train = [graph_train]
        graphs_val = [graph_val]
        graphs_test = [graph_test]

        dataset_train, dataset_val, dataset_test = \
            GraphDataset(graphs_train, task='node'), GraphDataset(graphs_val,task='node'), \
            GraphDataset(graphs_test, task='node')

    train_loader = DataLoader(dataset_train,
                              collate_fn=Batch.collate(),
                              batch_size=16)  # basic data loader
    val_loader = DataLoader(dataset_val,
                            collate_fn=Batch.collate(),
                            batch_size=16)  # basic data loader
    test_loader = DataLoader(dataset_test,
                             collate_fn=Batch.collate(),
                             batch_size=16)  # basic data loader

    num_node_features = dataset_train.num_node_features
    num_classes = dataset_train.num_node_labels

    train(train_loader, val_loader, test_loader, args, num_node_features,
          num_classes, args.device)