コード例 #1
0
def run(dataset_name):
    args = default_parameter().__dict__
    results = experiment(task="heterogeneous_node_classification",
                         dataset=dataset_name,
                         model="han",
                         **args)
    return results
コード例 #2
0
def train():
    args = build_default_args()

    combinations = get_parameters()
    best_parameters = None
    best_result = None
    best_val_acc = 0

    print(f"===== Start At: {get_time()} ===========")
    start = time.time()

    for item in combinations:
        for key, val in item.items():
            args[key] = val

        print(f"### -- Parameters: {args}")

        res = experiment(dataset="cora", model="gcn", **args)
        result_list = list(res.values())[0]

        val_acc = [x["val_acc"] for x in result_list]
        test_acc = [x["test_acc"] for x in result_list]
        val_acc = sum(val_acc) / len(val_acc)
        print(f"###    Result: {val_acc}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_parameters = copy.deepcopy(args)
            best_result = dict(Acc=sum(test_acc) / len(test_acc), ValAcc=val_acc)
    print(f"Best Parameters: {best_parameters}")
    print(f"Best result: {best_result}")

    end = time.time()
    print(f"===== End At: {get_time()} ===========")
    print("Time cost:", end - start)
コード例 #3
0
def run(dataset_name):
    args = default_parameter()
    args = DATASET_REGISTRY[dataset_name](args).__dict__
    results = experiment(task="multiplex_node_classification",
                         dataset=dataset_name,
                         model="pte",
                         **args)
    return results
コード例 #4
0
def run(dataset_name):
    args = default_parameter()
    args = DATASET_REGISTRY[dataset_name](args).__dict__
    results = experiment(task="unsupervised_node_classification",
                         dataset=dataset_name,
                         model="grarep",
                         **args)
    return results
コード例 #5
0
def run(dataset_name):
    args = default_parameter()
    args = DATASET_REGISTRY[dataset_name](args).__dict__
    results = experiment(task="multiplex_link_prediction",
                         dataset=dataset_name,
                         model="gatne",
                         **args)
    return results
コード例 #6
0
def run(dataset_name):
    unsup = False
    if unsup:
        task = "unsupervised_node_classification"
        model = "unsup_graphsage"
        args = default_parameter_unsup()
    else:
        task = "node_classification"
        model = "graphsage"
        args = default_parameters_sup()
    args = DATASET_REGISTRY[dataset_name](args).__dict__
    results = experiment(task=task, dataset=dataset_name, model=model, **args)
    return results
コード例 #7
0
ファイル: unet.py プロジェクト: znsoftm/cogdl
        self.num_layers = num_layers
        self.dropout = dropout
        self.num_nodes = 0

        self.unet = GraphUNet(self.in_feats,
                              self.hidden_size,
                              self.out_feats,
                              depth=3,
                              pool_ratios=[2000 / num_nodes, 0.5],
                              act=F.elu)

    def forward(self, x, edge_index):
        edge_index, _ = dropout_adj(edge_index,
                                    p=0.2,
                                    force_undirected=True,
                                    num_nodes=x.shape[0],
                                    training=self.training)
        x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.unet(x, edge_index)
        return x

    def predict(self, data):
        return self.forward(data.x, data.edge_index)


if __name__ == "__main__":
    ret = experiment(task="node_classification",
                     dataset="cora",
                     model="pyg_unet")
コード例 #8
0
        self.num_layers = num_layers
        self.dropout = dropout
        shapes = [num_features
                  ] + [hidden_size] * (num_layers - 1) + [num_classes]
        self.convs = nn.ModuleList([
            GCNConv(shapes[layer], shapes[layer + 1], cached=False)
            for layer in range(num_layers)
        ])

    def forward(self, graph):
        x = graph.x
        edge_index, edge_weight = torch.stack(
            graph.edge_index), graph.edge_weight
        for conv in self.convs[:-1]:
            x = F.relu(conv(x, edge_index, edge_weight))
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, edge_index, edge_weight)
        return F.log_softmax(x, dim=1)


if __name__ == "__main__":
    cora = CoraDataset()
    model = GCN(
        num_features=cora.num_features,
        hidden_size=64,
        num_classes=cora.num_classes,
        num_layers=2,
        dropout=0.5,
    )
    ret = experiment(dataset=cora, model=model)
コード例 #9
0
        val_mask[int(0.3 * num_nodes):int(0.7 * num_nodes)] = True
        test_mask = torch.zeros(num_nodes).bool()
        test_mask[int(0.7 * num_nodes):] = True
        data = Graph(x=x,
                     edge_index=edge_index,
                     y=y,
                     train_mask=train_mask,
                     val_mask=val_mask,
                     test_mask=test_mask)
        return data


if __name__ == "__main__":
    # Train customized dataset via defining a new class
    dataset = MyNodeDataset()
    experiment(dataset=dataset, model="gcn")

    # Train customized dataset via feeding the graph data to NodeDataset
    data = generate_random_graph(num_nodes=100, num_edges=300, num_feats=30)
    dataset = NodeDataset(data=data)
    experiment(dataset=dataset, model="gcn")

# %%
# Dataset for graph_classification
# ---------------------------------
from cogdl.data import Graph
from cogdl.datasets import GraphDataset


class MyGraphDataset(GraphDataset):
    def __init__(self, path="data.pt"):
コード例 #10
0
ファイル: custom_dataset.py プロジェクト: Aliang-CN/cogdl
        num_edges = 300
        feat_dim = 30

        # load or generate your dataset
        edge_index = torch.randint(0, num_nodes, (2, num_edges))
        x = torch.randn(num_nodes, feat_dim)
        y = torch.randint(0, 2, (num_nodes, ))

        # set train/val/test mask in node_classification task
        train_mask = torch.zeros(num_nodes).bool()
        train_mask[0:int(0.3 * num_nodes)] = True
        val_mask = torch.zeros(num_nodes).bool()
        val_mask[int(0.3 * num_nodes):int(0.7 * num_nodes)] = True
        test_mask = torch.zeros(num_nodes).bool()
        test_mask[int(0.7 * num_nodes):] = True
        data = Data(x=x,
                    edge_index=edge_index,
                    y=y,
                    train_mask=train_mask,
                    val_mask=val_mask,
                    test_mask=test_mask)
        torch.save(data, "mydata.pt")
        return data


if __name__ == "__main__":
    # Run with self-loaded dataset
    experiment(task="node_classification", dataset="mydataset", model="gcn")
    # Run with given datapaath
    experiment(task="node_classification", dataset="./mydata.pt", model="gcn")
コード例 #11
0
ファイル: train.py プロジェクト: znsoftm/cogdl
from cogdl import options, experiment

if __name__ == "__main__":
    parser = options.get_training_parser()
    args, _ = parser.parse_known_args()
    args = options.parse_args_and_arch(parser, args)
    assert len(args.device_id) == 1

    experiment(task=args.task,
               dataset=args.dataset,
               model=args.model,
               args=args)
コード例 #12
0
ファイル: custom_gcn.py プロジェクト: znsoftm/cogdl
    @classmethod
    def build_model_from_args(cls, args):
        return cls(args.num_features, args.hidden_size, args.num_classes,
                   args.dropout)

    def __init__(self, in_feats, hidden_size, out_feats, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(in_feats, hidden_size)
        self.gc2 = GraphConvolution(hidden_size, out_feats)
        self.dropout = dropout

    def forward(self, x, edge_index):
        edge_index, edge_attr = add_remaining_self_loops(edge_index,
                                                         num_nodes=x.shape[0])
        edge_attr = symmetric_normalization(x.shape[0], edge_index, edge_attr)

        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc1(x, edge_index, edge_attr))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, edge_index, edge_attr)
        return x

    def predict(self, data):
        return self.forward(data.x, data.edge_index)


if __name__ == "__main__":
    experiment(task="node_classification", dataset="cora", model="mygcn")
コード例 #13
0
from cogdl import experiment

# basic usage
experiment(dataset="cora", model="gcn")

# set other hyper-parameters
experiment(dataset="cora", model="gcn", hidden_size=32, epochs=200)

# run over multiple models on different seeds
experiment(dataset="cora", model=["gcn", "gat"], seed=[0, 1])

# run on different splits
experiment(dataset="chameleon", model="gcn", seed=[0, 1], split=[0, 1])


def search_space(trial):
    return {
        "lr": trial.suggest_categorical("lr", [1e-3, 5e-3, 1e-2]),
        "hidden_size": trial.suggest_categorical("hidden_size", [32, 64, 128]),
        "dropout": trial.suggest_uniform("dropout", 0.5, 0.8),
    }


experiment(dataset="cora", model="gcn", seed=[1, 2], search_space=search_space, n_trials=3)
コード例 #14
0
ファイル: train.py プロジェクト: rpatil524/cogdl
from cogdl import options, experiment

if __name__ == "__main__":
    parser = options.get_training_parser()
    args, _ = parser.parse_known_args()
    args = options.parse_args_and_arch(parser, args)

    experiment(dataset=args.dataset, model=args.model, args=args)
コード例 #15
0
ファイル: run.py プロジェクト: huaxz1986/cogdl
        logits, _ = self.gat_layers[-1](self.g, h, self.e_feat, res_attn=None)
        logits = logits.mean(1)
        # This is an equivalent replacement for tf.l2_normalize, see https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/math/l2_normalize for more information.
        logits = logits / (torch.max(torch.norm(logits, dim=1, keepdim=True),
                                     self.epsilon))
        y = logits[target_x]
        loss = self.cross_entropy_loss(y, target)
        return loss, y, None

    def loss(self, data):
        loss, y, _ = self.forward(data.adj, data.x, data.train_node,
                                  data.train_target)
        return loss

    def evaluate(self, data, nodes, targets):
        loss, y, _ = self.forward(data.adj, data.x, nodes, targets)
        f1 = accuracy(y, targets)
        return loss.item(), f1


if __name__ == "__main__":
    # CUDA_VISIBLE_DEVICES=0 python custom_gcn.py --seed 0 1 2 3 4 -t heterogeneous_node_classification -dt gtn-acm -m simple_hgn --lr 0.001
    parser = options.get_training_parser()
    args, _ = parser.parse_known_args()
    args = options.parse_args_and_arch(parser, args)
    experiment(task="heterogeneous_node_classification",
               dataset="gtn-acm",
               model="simple_hgn",
               args=args)
    # experiment(task="node_classification", dataset="cora", model="mygcn")
コード例 #16
0
ファイル: gat.py プロジェクト: rpatil524/cogdl
from cogdl import experiment
from cogdl.models import BaseModel
from cogdl.datasets.planetoid_data import CoraDataset


class GAT(BaseModel):
    def __init__(self, in_feats, hidden_size, out_feats, num_heads, dropout):
        super(GAT, self).__init__()
        self.in_feats = in_feats
        self.out_feats = out_feats
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.dropout = dropout
        self.conv1 = GATConv(in_feats, hidden_size, heads=num_heads, dropout=dropout)
        self.conv2 = GATConv(hidden_size * num_heads, out_feats, dropout=dropout)

    def forward(self, graph):
        x = graph.x
        edge_index = torch.stack(graph.edge_index)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv2(x, edge_index))
        return x


if __name__ == "__main__":
    cora = CoraDataset()
    model = GAT(in_feats=cora.num_features, hidden_size=64, out_feats=cora.num_classes, num_heads=2, dropout=0.1)
    ret = experiment(dataset=cora, model=model, dw="node_classification_dw", mw="node_classification_mw")
コード例 #17
0
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(dropout),
            nn.Linear(256, out_feats),
        )

    def forward(self, batch):
        h = batch.x
        h1 = self.conv1(h, batch.batch)
        h2 = self.conv2(h1, batch.batch)
        h = self.linear(torch.cat([h1, h2], dim=1))
        h = global_max_pool(h, batch.batch)
        out = self.final_mlp(h)
        return out


if __name__ == "__main__":
    mutag = MUTAGDataset()
    model = DGCNN(
        in_feats=mutag.num_features,
        hidden_size=64,
        out_feats=mutag.num_classes,
        k=20,
        dropout=0.5,
    )
    ret = experiment(dataset=mutag,
                     model=model,
                     dw="graph_classification_dw",
                     mw="graph_classification_mw")
コード例 #18
0
ファイル: gnn.py プロジェクト: rpatil524/cogdl
    parser.add_argument("--eval-step", type=int, default=10)
    parser.add_argument("--patience", type=int, default=10)
    parser.add_argument("--logger", type=str, default=None)
    parser.add_argument("--runs", type=int, default=10)
    args = parser.parse_args()

    dataset = OGBProductsDataset()
    gnn = SAGE(
        in_feats=dataset.num_features,
        hidden_size=args.hidden_size,
        out_feats=dataset.num_classes,
        num_layers=args.num_layers,
        dropout=args.dropout,
    )

    experiment(
        model=gnn,
        dataset=dataset,
        lr=args.lr,
        weight_decay=args.weight_decay,
        epochs=args.epochs,
        seed=list(range(args.runs)),
        dw="cluster_dw",
        batch_size=args.batch_size,
        n_cluster=args.n_cluster,
        cpu_inference=True,
        eval_step=args.eval_step,
        logger=args.logger,
        patience=args.patience,
    )
コード例 #19
0
ファイル: 2training_cn.py プロジェクト: rpatil524/cogdl
        optimizer.step()
    model.eval()
    _, pred = model(dataset).max(dim=1)
    correct = float(pred[dataset.test_mask].eq(
        dataset.y[dataset.test_mask]).sum().item())
    acc = correct / dataset.test_mask.sum().item()
    print(
        'The accuracy rate obtained by running the experiment with the custom training logic: {:.6f}'
        .format(acc))

# %%
# Experiment API
# --------------
# CogDL在训练上提供了更易于使用的 API ,即Experiment
from cogdl import experiment
experiment(model="gcn", dataset="cora")
#或者,您可以单独创建每个组件并使用CogDL 中的 build_dataset , build_model 来手动运行该过程。

from cogdl import experiment
from cogdl.datasets import build_dataset
from cogdl.models import build_model
from cogdl.options import get_default_args

args = get_default_args(model="gcn", dataset="cora")
dataset = build_dataset(args)
model = build_model(args)
experiment(model=model, dataset=dataset)

# %%
# 如何保存训练好的模型?
# --------------------------
コード例 #20
0
ファイル: 4custom_gnn.py プロジェクト: rpatil524/cogdl
    """
    Args:
        in_feats: int
            Input feature size
        out_feats: int
            Output feature size
    """
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.fc = torch.nn.Linear(in_feats, out_feats)

    def forward(self, graph, x):
        h = self.fc(x)
        h = spmm(graph, h)
        return h


# %%
# 将自定义的GNN模型与Cogdl一起使用
# ------------------------------------
# 现在您已经定义了自己的 GNN,您可以使用 CogDL 中的数据集/任务来立即训练和评估模型的性能。

from cogdl import experiment
from cogdl.datasets import build_dataset_from_name
data = build_dataset_from_name("cora")[0]
# Use the JKNet model as defined above
model = JKNet(data.num_features, data.num_classes, 32, 4)
experiment(model=model,
           dataset="cora",
           mw="node_classification_mw",
           dw="node_classification_dw")
コード例 #21
0
        # output projection
        logits, _ = self.gat_layers[-1](self.g, h, res_attn=None)
        # This is an equivalent replacement for tf.l2_normalize, see https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/math/l2_normalize for more information.
        logits = logits / (torch.max(torch.norm(logits, dim=1, keepdim=True),
                                     self.epsilon))

        return logits


if __name__ == "__main__":
    parser = options.get_training_parser()
    args, _ = parser.parse_known_args()
    args.mw = "heterogeneous_gnn_mw"
    args.dw = "heterogeneous_gnn_dw"
    args = options.parse_args_and_arch(parser, args)
    if args.dataset[0] == "gtn-acm":
        dataset = ACM_GTNDataset()
    elif args.dataset[0] == "gtn-dblp":
        dataset = DBLP_GTNDataset()
    elif args.dataset[0] == "gtn-imdb":
        dataset = IMDB_GTNDataset()
    else:
        raise NotImplementedError
    hgn = SimpleHGN(in_dims=dataset.num_features,
                    num_classes=dataset.num_classes)
    experiment(dataset=dataset,
               model=hgn,
               dw="heterogeneous_gnn_dw",
               mw="heterogeneous_gnn_mw",
               args=args)