Exemple #1
0
def deepsnap_pagerank(args, pyg_dataset):
    avg_time = 0
    task = 'graph'
    for i in range(args.num_runs):
        if args.print_run:
            print("Run {}".format(i + 1))
        graphs = GraphDataset.pyg_to_graphs(pyg_dataset,
                                            verbose=True,
                                            fixed_split=False,
                                            netlib=netlib)
        dataset = GraphDataset(graphs, task=task)
        s = time.time()
        dataset.apply_transform(page_fun, update_tensor=False, lib=args.netlib)
        avg_time += (time.time() - s)
    print("DeepSNAP has average time: {}".format(avg_time / args.num_runs))
Exemple #2
0
def main():
    args = arg_parse()

    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    WN_graph = nx.read_gpickle(args.data_path)
    print('Each node has node ID (n_id). Example: ', WN_graph.nodes[0])
    print(
        'Each edge has edge ID (id) and categorical label (e_label). Example: ',
        WN_graph[0][5871])

    # Since both feature and label are relation types,
    # Only the disjoint mode would make sense
    dataset = GraphDataset(
        [WN_graph],
        task='link_pred',
        edge_train_mode=edge_train_mode,
        edge_message_ratio=args.edge_message_ratio,
        edge_negative_sampling_ratio=args.neg_sampling_ratio)

    # find num edge types
    max_label = 0
    labels = []
    for u, v, edge_key in WN_graph.edges:
        l = WN_graph[u][v][edge_key]['e_label']
        if not l in labels:
            labels.append(l)
    # labels are consecutive (0-17)
    num_edge_types = len(labels)

    print('Pre-transform: ', dataset[0])
    dataset = dataset.apply_transform(WN_transform,
                                      num_edge_types=num_edge_types,
                                      deep_copy=False)
    print('Post-transform: ', dataset[0])
    print('Initial data: {} nodes; {} edges.'.format(
        dataset[0].G.number_of_nodes(), dataset[0].G.number_of_edges()))
    print('Number of node features: {}'.format(dataset.num_node_features))

    # split dataset
    datasets = {}
    datasets['train'], datasets['val'], datasets['test'] = dataset.split(
        transductive=True, split_ratio=[0.8, 0.1, 0.1])

    print('After split:')
    print('Train message-passing graph: {} nodes; {} edges.'.format(
        datasets['train'][0].G.number_of_nodes(),
        datasets['train'][0].G.number_of_edges()))
    print('Val message-passing graph: {} nodes; {} edges.'.format(
        datasets['val'][0].G.number_of_nodes(),
        datasets['val'][0].G.number_of_edges()))
    print('Test message-passing graph: {} nodes; {} edges.'.format(
        datasets['test'][0].G.number_of_nodes(),
        datasets['test'][0].G.number_of_edges()))

    # node feature dimension
    input_dim = datasets['train'].num_node_features
    edge_feat_dim = datasets['train'].num_edge_features
    num_classes = datasets['train'].num_edge_labels
    print(
        'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format(
            input_dim, edge_feat_dim, num_classes))

    # relation type is both used for edge features and edge labels
    model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.001,
                                 weight_decay=5e-3)
    follow_batch = []  # e.g., follow_batch = ['edge_index']

    dataloaders = {
        split: DataLoader(ds,
                          collate_fn=Batch.collate(follow_batch),
                          batch_size=1,
                          shuffle=(split == 'train'))
        for split, ds in datasets.items()
    }
    print('Graphs after split: ')
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ': ', batch)

    train(model, dataloaders, optimizer, args)
Exemple #3
0
        print("Use SnapX as the backend network library.")
    else:
        raise ValueError("{} network library is not supported.".format(args.netlib))

    args.netlib = netlib

    graphs = GraphDataset.pyg_to_graphs(pyg_dataset, netlib=args.netlib)

    dataset = GraphDataset(graphs, task="graph")
    datasets = {}
    datasets['train'], datasets['val'], datasets['test'] = dataset.split(
            transductive=False, split_ratio = [0.8, 0.1, 0.1])

    if args.transform_dataset is not None:
        trans_func = get_transform(args.transform_dataset)
        for _, dataset in datasets.items():
            dataset.apply_transform(trans_func, radius=args.radius, netlib=args.netlib)

    dataloaders = {
        split: DataLoader(
            dataset, collate_fn=Batch.collate(), 
            batch_size=args.batch_size, shuffle=True
        ) for split, dataset in datasets.items()
    }

    num_classes = datasets['train'].num_graph_labels
    num_node_features = datasets['train'].num_node_features

    train(dataloaders['train'], dataloaders['val'], dataloaders['test'], 
            args, num_node_features, num_classes, args.device)
Exemple #4
0
def main():
    writer = SummaryWriter()
    args = arg_parse()

    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    ppi_graph = read_ppi_data(args.ppi_path)

    mode = 'mixed'
    if mode == 'ppi':
        message_passing_graph = ppi_graph
        cmap_graph, knockout_nodes = read_cmap_data(args.data_path)
    elif mode == 'mixed':
        message_passing_graph, knockout_nodes = (
            read_cmap_data(args.data_path, ppi_graph)
        )

    print('Each node has gene ID. Example: ', message_passing_graph.nodes['ADPGK'])
    print('Each edge has de direction. Example', message_passing_graph['ADPGK']['IL1B'])
    print('Total num edges: ', message_passing_graph.number_of_edges())

    # disjoint edge label
    disjoint_split_ratio = 0.1
    val_ratio = 0.1
    disjoint_edge_label_index = []
    val_edges = []

    # newly edited
    train_edges = []
    for u in knockout_nodes:
        rand_num = np.random.rand()
        if rand_num < disjoint_split_ratio:
            # add all edges (cmap only) into edge label index
            # cmap is not a multigraph
            disjoint_edge_label_index.extend(
                [
                    (u, v, edge_key)
                    for v in message_passing_graph.successors(u)
                    for edge_key in message_passing_graph[u][v]
                    if message_passing_graph[u][v][edge_key]['edge_type'] == 1
                ]
            )

            train_edges.extend(
                [
                    (u, v, edge_key)
                    for v in message_passing_graph.successors(u)
                    for edge_key in message_passing_graph[u][v]
                    if message_passing_graph[u][v][edge_key]['edge_type'] == 1
                ]
            )
        elif rand_num < disjoint_split_ratio + val_ratio:
            val_edges.extend(
                [
                    (u, v, edge_key)
                    for v in message_passing_graph.successors(u)
                    for edge_key in message_passing_graph[u][v]
                    if message_passing_graph[u][v][edge_key]['edge_type'] == 1
                ]
            )
        else:
            train_edges.extend(
                [
                    (u, v, edge_key)
                    for v in message_passing_graph.successors(u)
                    for edge_key in message_passing_graph[u][v]
                    if message_passing_graph[u][v][edge_key]['edge_type'] == 1
                ]
            )
    # add default node types for message_passing_graph
    for node in message_passing_graph.nodes:
        message_passing_graph.nodes[node]['node_type'] = 0

    print('Num edges to predict: ', len(disjoint_edge_label_index))
    print('Num edges in val: ', len(val_edges))
    print('Num edges in train: ', len(train_edges))

    graph = HeteroGraph(
        message_passing_graph,
        custom={
            "general_splits": [
                train_edges,
                val_edges
            ],
            "disjoint_split": disjoint_edge_label_index,
            "task": "link_pred"
        }
    )

    graphs = [graph]
    graphDataset = GraphDataset(
        graphs,
        task="link_pred",
        edge_train_mode="disjoint"
    )

    # Transform dataset
    # de direction (currently using homogeneous graph)
    num_edge_types = 2

    graphDataset = graphDataset.apply_transform(
        cmap_transform, num_edge_types=num_edge_types, deep_copy=False
    )
    print('Number of node features: ', graphDataset.num_node_features())

    # split dataset
    dataset = {}
    dataset['train'], dataset['val'] = graphDataset.split(transductive=True)

    # sanity check
    print(f"dataset['train'][0].edge_label_index.keys(): {dataset['train'][0].edge_label_index.keys()}")
    print(f"dataset['train'][0].edge_label_index[(0, 1, 0)].shape[1]: {dataset['train'][0].edge_label_index[(0, 1, 0)].shape[1]}")
    print(f"dataset['val'][0].edge_label_index.keys(): {dataset['val'][0].edge_label_index.keys()}")
    print(f"dataset['val'][0].edge_label_index[(0, 1, 0)].shape[1]: {dataset['val'][0].edge_label_index[(0, 1, 0)].shape[1]}")
    print(f"len(list(dataset['train'][0].G.edges)): {len(list(dataset['train'][0].G.edges))}")
    print(f"len(list(dataset['val'][0].G.edges)): {len(list(dataset['val'][0].G.edges))}")
    print(f"list(dataset['train'][0].G.edges)[:10]: {list(dataset['train'][0].G.edges)[:10]}")
    print(f"list(dataset['val'][0].G.edges)[:10]: {list(dataset['val'][0].G.edges)[:10]}")


    # node feature dimension
    input_dim = dataset['train'].num_node_features()
    edge_feat_dim = dataset['train'].num_edge_features()
    num_classes = dataset['train'].num_edge_labels()
    print(
        'Node feature dim: {}; edge feature dim: {}; num classes: {}.'.format(
            input_dim, edge_feat_dim, num_classes
        )
    )
    exit()

    # relation type is both used for edge features and edge labels
    model = Net(input_dim, edge_feat_dim, num_classes, args).to(args.device)
    optimizer = torch.optim.Adam(
        model.parameters(), lr=0.001, weight_decay=5e-3
    )
    follow_batch = []  # e.g., follow_batch = ['edge_index']

    dataloaders = {
        split: DataLoader(
            ds, collate_fn=Batch.collate(follow_batch),
            batch_size=1, shuffle=(split == 'train')
        )
        for split, ds in dataset.items()
    }
    print('Graphs after split: ')
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ': ', batch)

    train(model, dataloaders, optimizer, args, writer=writer)
Exemple #5
0
    elif args.dataset == 'dd':
        pyg_dataset = TUDataset('./dd', 'DD')
    else:
        raise ValueError("Unsupported dataset.")

    graphs = GraphDataset.pyg_to_graphs(pyg_dataset)

    dataset = GraphDataset(graphs, task="graph")
    datasets = {}
    datasets['train'], datasets['val'], datasets['test'] = dataset.split(
        transductive=False, split_ratio=[0.8, 0.1, 0.1])

    if args.transform_dataset is not None:
        trans_func = get_transform(args.transform_dataset)
        for _, dataset in datasets.items():
            dataset.apply_transform(trans_func, radius=args.radius)

    dataloaders = {
        split: DataLoader(dataset,
                          collate_fn=Batch.collate(),
                          batch_size=args.batch_size,
                          shuffle=True)
        for split, dataset in datasets.items()
    }

    num_classes = datasets['train'].num_graph_labels
    num_node_features = datasets['train'].num_node_features

    train(dataloaders['train'], dataloaders['val'], dataloaders['test'], args,
          num_node_features, num_classes, args.device)