class GAT(torch.nn.Module): """ Graph Attention Networks <https://arxiv.org/abs/1710.10903> """ def __init__(self): super(GAT, self).__init__() self.conv1 = GATConv(75, 8, heads=8, dropout=0.6) self.conv2 = GATConv(8 * 8, 128, heads=1, concat=True, dropout=0.6) self.gather_layer = nn.Linear(128, 1) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, data): x, edge_index, batch = data.x, data.edge_index, data.batch x1 = F.dropout(x, p=0.6, training=self.training) x2 = F.elu(self.conv1(x1, edge_index)) x3 = F.dropout(x2, p=0.6, training=self.training) x4 = self.conv2(x3, edge_index) y_molecules = global_add_pool(x4, batch) z_molecules = self.gather_layer(y_molecules) return z_molecules def __call__(self, data): target = torch.unsqueeze(data.y, 1) out = self.forward(data) loss = F.mse_loss(out, target) z = out.to('cpu').data.numpy() t = target.to('cpu').data.numpy() return loss, z, t
class GATNet(nn.Module): def __init__(self, dataset): super(GATNet, self).__init__() self.conv1 = GATConv( dataset.num_features, 8, heads=8, dropout=0.6) self.conv2 = GATConv( 8 * 8, dataset.num_classes, heads=OUTPUT_HEADS, concat=False, dropout=0.6) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, x, edge_index, training=None): training = self.training if training == None else training x = F.dropout(x, p=0.6, training=training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=0.6, training=training) x = self.conv2(x, edge_index) return x
class GAT_Net(torch.nn.Module): def __init__(self, features_num, num_class, hidden, heads, output_heads, concat, dropout): super(GAT_Net, self).__init__() self.dropout = dropout self.first_lin = Linear(features_num, hidden) self.conv1 = GATConv(in_channels=hidden, out_channels=hidden, concat=concat, heads=heads, dropout=dropout) self.conv2 = GATConv(in_channels=hidden * heads, out_channels=num_class, concat=concat, heads=output_heads, dropout=dropout) def reset_parameters(self): self.first_lin.reset_parameters() self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, data): x, edge_index = data.x, data.edge_index x = F.relu(self.first_lin(x)) x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.conv2(x, edge_index) return F.log_softmax(x, dim=-1) def __repr__(self): return self.__class__.__name__
class GATNet(nn.Module): def __init__(self, num_feature, num_class, num_layers=2, hidden=64, drop=0.5, use_edge_weight=True): super(GATNet, self).__init__() self.conv0 = GATConv(num_feature, hidden, heads=8, dropout=drop, concat=False) self.conv1 = GATConv(hidden, hidden, heads=8, dropout=drop, concat=False) self.linear = Linear(hidden, num_class) self.n_layer = num_layers self.use_edge_weight = use_edge_weight self.drop = drop def reset_parameters(self): self.conv0.reset_parameters() self.conv1.reset_parameters() nn.init.normal_(self.linear.weight) nn.init.normal_(self.linear.bias) def forward(self, data): # TODO: edge weight x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr.squeeze(1) for i in range(self.n_layer): conv = self.conv0 if i == 0 else self.conv1 x = conv(x, edge_index) x = F.relu(x) x = F.dropout(x, p=self.drop, training=self.training) x = self.linear(x) return F.log_softmax(x, dim=1)
class GAT(nn.Module): def __init__(self, dataset, nhid, first_heads, output_heads, dropout): super(GAT, self).__init__() self.gc1 = GATConv(dataset.num_features, nhid, heads=first_heads, dropout=dropout) self.gc2 = GATConv(nhid * first_heads, dataset.num_classes, heads=output_heads, dropout=dropout) self.dropout = dropout def reset_parameters(self): self.gc1.reset_parameters() self.gc2.reset_parameters() def forward(self, data): x, edge_index = data.x, data.edge_index x = F.dropout(x, p=self.dropout, training=self.training) x = self.gc1(x, edge_index) x = F.elu(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.gc2(x, edge_index) return F.log_softmax(x, dim=1)
class Net(torch.nn.Module): def __init__(self, dataset): super(Net, self).__init__() self.conv1 = GATConv( dataset.num_features, args.hidden, heads=args.heads, dropout=args.dropout) self.conv2 = GATConv( args.hidden * args.heads, dataset.num_classes, heads=args.output_heads, concat=False, dropout=args.dropout) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, data): x, edge_index = data.x, data.edge_index x = F.dropout(x, p=args.dropout, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=args.dropout, training=self.training) x = self.conv2(x, edge_index) return F.log_softmax(x, dim=1)
class PGATNetEx(ExKGNet): def __init__(self, num_nodes, num_relations, hidden_size, emb_dim, heads, repr_dim): super(PGATNetEx, self).__init__(emb_dim, repr_dim, num_nodes, num_relations) self.emb_dim = emb_dim self.repr_dim = self.repr_dim self.node_emb = torch.nn.Embedding(num_nodes, emb_dim, max_norm=1, norm_type=2.0) self.r_emb = torch.nn.Embedding(num_relations, repr_dim, max_norm=1, norm_type=2.0) self.r_proj = torch.nn.Embedding(num_relations, emb_dim * repr_dim, max_norm=1, norm_type=2.0) self.kg_loss_func = torch.nn.MSELoss() self.conv1 = GATConv(emb_dim, int(hidden_size // heads), heads=heads, dropout=0.6) self.conv2 = PAConv(int(hidden_size // heads) * heads, repr_dim, heads=1, dropout=0.6) # self.conv1 = ChebConv(data.num_features, 16, K=2) # self.conv2 = ChebConv(16, data.num_features, K=2) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() def forward_(self, edge_index, sec_order_edge_index): self.forward(self.node_emb.weight, edge_index, sec_order_edge_index) def forward(self, x, edge_index, sec_order_edge_index): ''' :param edge_index: np.array, [2, N] :param sec_order_edge_index: [3, M] :return: ''' x = F.relu(self.conv1(self.node_emb.weight, edge_index)) x = F.dropout(x, training=self.training) x = self.conv2(x, sec_order_edge_index) return x
class GATNet(torch.nn.Module): def __init__(self, input_size, output_size, hidden_size=512, heads=1): super(GATNet, self).__init__() self.conv1 = GATConv(input_size, hidden_size, heads=heads) self.conv2 = GATConv(hidden_size * heads, output_size) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, feature, edge_index): x = F.dropout(feature, p=0.5, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=0.5, training=self.training) x = self.conv2(x, edge_index) return x
class GATRecsysModel(GraphRecsysModel): def __init__(self, **kwargs): super(GATRecsysModel, self).__init__(**kwargs) def _init(self, **kwargs): self.if_use_features = kwargs['if_use_features'] self.dropout = kwargs['dropout'] if not self.if_use_features: self.x = torch.nn.Embedding(kwargs['dataset']['num_nodes'], kwargs['emb_dim'], max_norm=1).weight else: raise NotImplementedError('Feature not implemented!') self.edge_index = self.update_graph_input(kwargs['dataset']) self.conv1 = GATConv(kwargs['emb_dim'], kwargs['hidden_size'], heads=kwargs['num_heads'], dropout=kwargs['dropout']) self.conv2 = GATConv(kwargs['hidden_size'] * kwargs['num_heads'], kwargs['repr_dim'], heads=1, dropout=kwargs['dropout']) self.fc1 = torch.nn.Linear(2 * kwargs['repr_dim'], kwargs['repr_dim']) self.fc2 = torch.nn.Linear(kwargs['repr_dim'], 1) def reset_parameters(self): if not self.if_use_features: torch.nn.init.uniform_(self.x, -1.0, 1.0) self.conv1.reset_parameters() self.conv2.reset_parameters() torch.nn.init.uniform_(self.fc1.weight, -1.0, 1.0) torch.nn.init.uniform_(self.fc2.weight, -1.0, 1.0) def forward(self): x = F.relu(self.conv1(self.x, self.edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.conv2(x, self.edge_index) x = F.normalize(x) return x
class GATRecsysModel(GraphRecsysModel): def __init__(self, **kwargs): super(GATRecsysModel, self).__init__(**kwargs) def _init(self, **kwargs): self.if_use_features = kwargs['if_use_features'] self.dropout = kwargs['dropout'] if not self.if_use_features: self.x = torch.nn.Embedding(kwargs['num_nodes'], kwargs['emb_dim'], max_norm=1) self.conv1 = GATConv(kwargs['emb_dim'], kwargs['hidden_size'], heads=kwargs['num_heads'], dropout=kwargs['dropout']) self.conv2 = GATConv(kwargs['hidden_size'] * kwargs['num_heads'], kwargs['repr_dim'], heads=1, dropout=kwargs['dropout']) self.reset_parameters() def reset_parameters(self): if not self.if_use_features: torch.nn.init.uniform_(self.x.weight, -1.0, 1.0) self.conv1.reset_parameters() self.conv2.reset_parameters() def forward(self, edge_index, x=None): if not self.if_use_features: x = self.x.weight x = F.relu(self.conv1(x, edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.conv2(x, edge_index) x = F.normalize(x) return x
class AttentiveFP(torch.nn.Module): r"""The Attentive FP model for molecular representation learning from the `"Pushing the Boundaries of Molecular Representation for Drug Discovery with the Graph Attention Mechanism" <https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959>`_ paper, based on graph attention mechanisms. Args: in_channels (int): Size of each input sample. hidden_channels (int): Hidden node feature dimensionality. out_channels (int): Size of each output sample. edge_dim (int): Edge feature dimensionality. num_layers (int): Number of GNN layers. num_timesteps (int): Number of iterative refinement steps for global readout. dropout (float, optional): Dropout probability. (default: :obj:`0.0`) """ def __init__(self, in_channels: int, hidden_channels: int, out_channels: int, edge_dim: int, num_layers: int, num_timesteps: int, dropout: float = 0.0): super().__init__() self.num_layers = num_layers self.num_timesteps = num_timesteps self.dropout = dropout self.lin1 = Linear(in_channels, hidden_channels) conv = GATEConv(hidden_channels, hidden_channels, edge_dim, dropout) gru = GRUCell(hidden_channels, hidden_channels) self.atom_convs = torch.nn.ModuleList([conv]) self.atom_grus = torch.nn.ModuleList([gru]) for _ in range(num_layers - 1): conv = GATConv(hidden_channels, hidden_channels, dropout=dropout, add_self_loops=False, negative_slope=0.01) self.atom_convs.append(conv) self.atom_grus.append(GRUCell(hidden_channels, hidden_channels)) self.mol_conv = GATConv(hidden_channels, hidden_channels, dropout=dropout, add_self_loops=False, negative_slope=0.01) self.mol_gru = GRUCell(hidden_channels, hidden_channels) self.lin2 = Linear(hidden_channels, out_channels) self.reset_parameters() def reset_parameters(self): self.lin1.reset_parameters() for conv, gru in zip(self.atom_convs, self.atom_grus): conv.reset_parameters() gru.reset_parameters() self.mol_conv.reset_parameters() self.mol_gru.reset_parameters() self.lin2.reset_parameters() def forward(self, x, edge_index, edge_attr, batch): """""" # Atom Embedding: x = F.leaky_relu_(self.lin1(x)) h = F.elu_(self.atom_convs[0](x, edge_index, edge_attr)) h = F.dropout(h, p=self.dropout, training=self.training) x = self.atom_grus[0](h, x).relu_() for conv, gru in zip(self.atom_convs[1:], self.atom_grus[1:]): h = F.elu_(conv(x, edge_index)) h = F.dropout(h, p=self.dropout, training=self.training) x = gru(h, x).relu_() # Molecule Embedding: row = torch.arange(batch.size(0), device=batch.device) edge_index = torch.stack([row, batch], dim=0) out = global_add_pool(x, batch).relu_() for t in range(self.num_timesteps): h = F.elu_(self.mol_conv((x, out), edge_index)) h = F.dropout(h, p=self.dropout, training=self.training) out = self.mol_gru(h, out).relu_() # Predictor: out = F.dropout(out, p=self.dropout, training=self.training) return self.lin2(out)
class GAT(nn.Module): """ 2 Layer Graph Attention Network based on pytorch geometric. Parameters ---------- nfeat : int size of input feature dimension nhid : int number of hidden units nclass : int size of output dimension heads: int number of attention heads output_heads: int number of attention output heads dropout : float dropout rate for GAT lr : float learning rate for GAT weight_decay : float weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0. with_bias: bool whether to include bias term in GAT weights. device: str 'cpu' or 'cuda'. Examples -------- We can first load dataset and then train GAT. >>> from deeprobust.graph.data import Dataset >>> from deeprobust.graph.defense import GAT >>> data = Dataset(root='/tmp/', name='cora') >>> adj, features, labels = data.adj, data.features, data.labels >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test >>> gat = GAT(nfeat=features.shape[1], nhid=8, heads=8, nclass=labels.max().item() + 1, dropout=0.5, device='cpu') >>> gat = gat.to('cpu') >>> pyg_data = Dpr2Pyg(data) # convert deeprobust dataset to pyg dataset >>> gat.fit(pyg_data, patience=100, verbose=True) # train with earlystopping """ def __init__(self, nfeat, nhid, nclass, heads=8, output_heads=1, dropout=0.5, lr=0.01, weight_decay=5e-4, with_bias=True, device=None): super(GAT, self).__init__() assert device is not None, "Please specify 'device'!" self.device = device self.conv1 = GATConv(nfeat, nhid, heads=heads, dropout=dropout, bias=with_bias) self.conv2 = GATConv(nhid * heads, nclass, heads=output_heads, concat=False, dropout=dropout, bias=with_bias) self.dropout = dropout self.weight_decay = weight_decay self.lr = lr self.output = None self.best_model = None self.best_output = None def forward(self, data): x, edge_index = data.x, data.edge_index x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.conv2(x, edge_index) return F.log_softmax(x, dim=1) def initialize(self): """Initialize parameters of GAT. """ self.conv1.reset_parameters() self.conv2.reset_parameters() def fit(self, pyg_data, train_iters=1000, initialize=True, verbose=False, patience=100, **kwargs): """Train the GAT model, when idx_val is not None, pick the best model according to the validation loss. Parameters ---------- pyg_data : pytorch geometric dataset object train_iters : int number of training epochs initialize : bool whether to initialize parameters before training verbose : bool whether to show verbose logs patience : int patience for early stopping, only valid when `idx_val` is given """ if initialize: self.initialize() self.data = pyg_data[0].to(self.device) # By default, it is trained with early stopping on validation self.train_with_early_stopping(train_iters, patience, verbose) def fit1(self, pyg_data, train_iters=1000, initialize=True, verbose=False, patience=100, **kwargs): """Train the GAT model, when idx_val is not None, pick the best model according to the validation loss. Parameters ---------- pyg_data : pytorch geometric dataset object train_iters : int number of training epochs initialize : bool whether to initialize parameters before training verbose : bool whether to show verbose logs patience : int patience for early stopping, only valid when `idx_val` is given """ if initialize: self.initialize() self.data = pyg_data.to(self.device) # By default, it is trained with early stopping on validation self.train_with_early_stopping(train_iters, patience, verbose) def train_with_early_stopping(self, train_iters, patience, verbose): """early stopping based on the validation loss """ if verbose: print('=== training GAT model ===') optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) labels = self.data.y train_mask, val_mask = self.data.train_mask, self.data.val_mask early_stopping = patience best_loss_val = 100 for i in range(train_iters): self.train() optimizer.zero_grad() output = self.forward(self.data) loss_train = F.nll_loss(output[train_mask], labels[train_mask]) loss_train.backward() optimizer.step() if verbose and i % 10 == 0: print('Epoch {}, training loss: {}'.format( i, loss_train.item())) self.eval() output = self.forward(self.data) loss_val = F.nll_loss(output[val_mask], labels[val_mask]) if best_loss_val > loss_val: best_loss_val = loss_val self.output = output weights = deepcopy(self.state_dict()) patience = early_stopping else: patience -= 1 if i > early_stopping and patience <= 0: break if verbose: print('=== early stopping at {0}, loss_val = {1} ==='.format( i, best_loss_val)) self.load_state_dict(weights) def test(self): """Evaluate GAT performance on test set. Parameters ---------- idx_test : node testing indices """ self.eval() test_mask = self.data.test_mask labels = self.data.y output = self.forward(self.data) # output = self.output loss_test = F.nll_loss(output[test_mask], labels[test_mask]) acc_test = utils.accuracy(output[test_mask], labels[test_mask]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return acc_test.item() def predict(self): """ Returns ------- torch.FloatTensor output (log probabilities) of GAT """ self.eval() return self.forward(self.data)
class GAT(torch.nn.Module): def __init__(self, args): super(GAT, self).__init__() self.args = set_default(args, { 'hidden': 64, 'hidden2': 32, 'dropout': 0.5, 'lr': 0.005, 'epoches': 300, 'weight_decay': 5e-4, 'agg': 'self', 'act': 'leaky_relu', 'withbn': True, }) self.timer = self.args['timer'] self.dropout = self.args['dropout'] self.agg = self.args['agg'] self.withbn = self.args['withbn'] self.conv1 = GATConv(self.args['hidden'], self.args['hidden'], self.args['heads'], dropout=self.args['dropout']) self.conv2 = GATConv(self.args['hidden']*self.args['heads'], self.args['hidden2'], dropout=self.args['dropout']) hd = [self.args['hidden'], self.args['hidden']*self.args['heads'], self.args['hidden2']] if self.withbn: self.bn1 = BatchNorm1d(self.args['hidden']*self.args['heads']) self.bn2 = BatchNorm1d(self.args['hidden2']) if self.args['agg'] == 'concat': outdim = sum(hd) elif self.args['agg'] == 'self': outdim = hd[-1] if self.args['act'] == 'leaky_relu': self.act = F.leaky_relu elif self.args['act'] == 'tanh': self.act = torch.tanh else: self.act = lambda x: x self.lin2 = Linear(outdim, self.args['num_class']) self.first_lin = Linear(self.args['features_num'], self.args['hidden']) def reset_parameters(self): self.conv1.reset_parameters() self.conv2.reset_parameters() self.lin2.reset_parameters() def forward(self, data): x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight x = self.act(self.first_lin(x)) xs = [x] x = self.act(self.conv1(x, edge_index)) if self.withbn: x = self.bn1(x) xs.append(x) x = self.act(self.conv2(x, edge_index)) if self.withbn: x = self.bn2(x) xs.append(x) if self.agg == 'concat': x = torch.cat(xs, dim=1) elif self.agg == 'self': x = xs[-1] x = self.lin2(x) return F.log_softmax(x, dim=-1) def train_predict(self, data, train_mask=None, val_mask=None, return_out=True): if train_mask is None: train_mask = data.train_mask optimizer = torch.optim.Adam(self.parameters(), lr=self.args['lr'], weight_decay=self.args['weight_decay']) flag_end = False st = time.time() for epoch in range(1, self.args['epoches']): self.train() optimizer.zero_grad() res = self.forward(data) loss = F.nll_loss(res[train_mask], data.y[train_mask]) loss.backward() optimizer.step() if epoch%50 == 0: cost = (time.time()-st)/epoch*50 if max(cost*10, 5) > self.timer.remain_time(): flag_end = True break test_mask = data.test_mask self.eval() with torch.no_grad(): res = self.forward(data) if return_out: pred = res else: pred = res[test_mask] if val_mask is not None: return pred, res[val_mask], flag_end return pred, flag_end def __repr__(self): return self.__class__.__name__
class AttentiveFP(torch.nn.Module): r"""The Attentive FP model for molecular representation learning from the `"Pushing the Boundaries of Molecular Representation for Drug Discovery with the Graph Attention Mechanism" <https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959>`_ paper, based on graph attention mechanisms. Args: emb_dim (int): Hidden node feature dimensionality. num_tasks (int): Size of each output sample. num_layers (int): Number of GNN layers. num_timesteps (int): Number of iterative refinement steps for global readout. drop_ratio (float, optional): Dropout probability. (default: :obj:`0.0`) """ def __init__(self, num_timesteps=4, emb_dim=300, num_layers=5, drop_ratio=0, num_tasks=1, **args): super(AttentiveFP, self).__init__() self.num_layers = num_layers self.num_timesteps = num_timesteps self.drop_ratio = drop_ratio self.atom_encoder = AtomEncoder(emb_dim) self.bond_encoder = BondEncoder(emb_dim=emb_dim) conv = GATEConv(emb_dim, emb_dim, emb_dim, drop_ratio) gru = GRUCell(emb_dim, emb_dim) self.atom_convs = torch.nn.ModuleList([conv]) self.atom_grus = torch.nn.ModuleList([gru]) for _ in range(num_layers - 1): conv = GATConv(emb_dim, emb_dim, dropout=drop_ratio, add_self_loops=False, negative_slope=0.01) self.atom_convs.append(conv) self.atom_grus.append(GRUCell(emb_dim, emb_dim)) self.mol_conv = GATConv(emb_dim, emb_dim, dropout=drop_ratio, add_self_loops=False, negative_slope=0.01) self.mol_gru = GRUCell(emb_dim, emb_dim) self.graph_pred_linear = Linear(emb_dim, num_tasks) self.reset_parameters() def reset_parameters(self): # self.atom_encoder.reset_parameters() # reset in init() # self.bond_encoder.reset_parameters() # reset in init() for conv, gru in zip(self.atom_convs, self.atom_grus): conv.reset_parameters() gru.reset_parameters() self.mol_conv.reset_parameters() self.mol_gru.reset_parameters() self.graph_pred_linear.reset_parameters() def forward(self, batched_data): """""" x, edge_index, edge_attr, batch = batched_data.x, batched_data.edge_index, batched_data.edge_attr, batched_data.batch # Atom Embedding: x = F.leaky_relu_(self.atom_encoder(x)) edge_attr = self.bond_encoder(edge_attr) h = F.elu_(self.atom_convs[0](x, edge_index, edge_attr)) h = F.dropout(h, p=self.drop_ratio, training=self.training) x = self.atom_grus[0](h, x).relu_() for conv, gru in zip(self.atom_convs[1:], self.atom_grus[1:]): h = F.elu_(conv(x, edge_index)) h = F.dropout(h, p=self.drop_ratio, training=self.training) x = gru(h, x).relu_() # Molecule Embedding: row = torch.arange(batch.size(0), device=batch.device) edge_index = torch.stack([row, batch], dim=0) out = global_add_pool(x, batch).relu_() for t in range(self.num_timesteps): h = F.elu_(self.mol_conv((x, out), edge_index)) h = F.dropout(h, p=self.drop_ratio, training=self.training) out = self.mol_gru(h, out).relu_() # Predictor: out = F.dropout(out, p=self.drop_ratio, training=self.training) return self.graph_pred_linear(out)
class GATNet(torch.nn.Module): def __init__(self, num_layers, num_input_features, hidden): super(GATNet, self).__init__() self.conv1 = GATConv(num_input_features, hidden) # GATconv layer self.convs = torch.nn.ModuleList() for i in range(num_layers - 1): self.convs.append(GATConv(hidden, hidden)) # remaining GATconv layers self.lin1 = Linear(3 * hidden, hidden) # linear layer self.lin2 = Linear(hidden, 2) # linear layer, output layer, 2 classes def reset_parameters(self): # reset all conv and linear layers self.conv1.reset_parameters() for conv in self.convs: conv.reset_parameters( ) # .reset_parameters() is method of the torch_geometric.nn.GATConv class self.lin1.reset_parameters( ) # .reset_parameters() is method of the torch.nn.Linear class self.lin2.reset_parameters() def forward(self, data): # data: Batch(batch=[num_nodes_in_batch], # edge_attr=[2*num_nodes_in_batch,num_edge_features_per_edge], # edge_index=[2,2*num_nodes_in_batch], # pos=[num_nodes_in_batch,2], # x=[num_nodes_in_batch, num_input_features_per_node], # y=[num_graphs_in_batch, num_classes] # example: Batch(batch=[2490], edge_attr=[4980,1], edge_index=[2,4980], pos=[2490,2], x=[2490,33], y=[32,2] x, edge_index, batch = data.x, data.edge_index, data.batch # x.shape: torch.Size([num_nodes_in_batch, num_input_features_per_node]) # edge_index.shape: torch.Size([2, 2*num_nodes_in_batch]) # batch.shape: torch.Size([num_nodes_in_batch]) # example: x.shape = troch.Size([2490,33]) # edge_index.shape = torch.Size([2,4980]) # batch.shape = torch.Size([2490]) # graph convolutions and relu activation x = F.relu(self.conv1(x, edge_index)) # x.shape: torch.Size([num_nodes_in_batch, hidden]) # example: x.shape = torch.Size([2490, 66]) for conv in self.convs: x = F.relu(conv(x, edge_index)) # x.shape: torch.Size([num_nodes_in_batch, hidden]) # example: x.shape = torch.Size([2490, 66]) x = torch.cat([ global_add_pool(x, batch), global_mean_pool(x, batch), global_max_pool(x, batch) ], dim=1) # x.shape: torch.Size([num_graphs_in_batch, hidden) # example: x.shape = torch.Size([32, 66]) # linear layers, activation function, dropout x = F.relu(self.lin1(x)) # x.shape: torch.Size([num_graphs_in_batch, hidden) # example: x.shape = torch.Size([32, 66]) x = F.dropout(x, p=0.5, training=self.training) x = self.lin2(x) # x.shape: torch.Size([num_graphs_in_batch, num_classes) # example: x.shape = torch.Size([32, 2]) output = F.log_softmax(x, dim=-1) return output def __repr__(self): #for getting a printable representation of an object return self.__class__.__name__
class GAT(nn.Module): def __init__(self, nfeat, nhid, nclass, dropout=0.5, lr=0.01, weight_decay=5e-4, n_edge=1, with_relu=True, drop=False, with_bias=True, device=None): super(GAT, self).__init__() assert device is not None, "Please specify 'device'!" self.device = device self.nfeat = nfeat self.hidden_sizes = [nhid] self.nclass = int(nclass) self.dropout = dropout self.lr = lr self.drop = drop if not with_relu: self.weight_decay = 0 else: self.weight_decay = weight_decay self.with_relu = with_relu self.with_bias = with_bias self.n_edge = n_edge self.output = None self.best_model = None self.best_output = None self.adj_norm = None self.features = None self.gate = Parameter(torch.rand(1)) # creat a generator between [0,1] # self.beta = Parameter(torch.Tensor(self.n_edge)) self.bns = torch.nn.BatchNorm1d(nhid) nclass = int(nclass) """define the networks: deeprobust""" # self.gc1 = GraphConvolution(nfeat, nhid, with_bias=with_bias) # self.gc2 = GraphConvolution(nhid, nclass, with_bias=with_bias) """GCN from geometric""" """network from torch-geometric, """ # self.gc1 = GCNConv(nfeat, nhid, bias=True,) # self.gc2 = GCNConv(nhid, nclass, bias=True, ) """GAT from torch-geometric""" self.gc1 = GATConv(nfeat, nhid, heads=8, dropout=0.6) self.gc2 = GATConv(nhid * 8, nclass, heads=1, concat=True, dropout=0.6) """GIN from torch-geometric""" # num_features = nfeat # dim = 32 # nn1 = Sequential(Linear(num_features, dim), ReLU(), ) # self.gc1 = GINConv(nn1) # # self.bn1 = torch.nn.BatchNorm1d(dim) # # nn2 = Sequential(Linear(dim, dim), ReLU(), ) # self.gc2 = GINConv(nn2) # self.jump = JumpingKnowledge(mode='cat') # # self.bn2 = torch.nn.BatchNorm1d(dim) # self.fc2 = Linear(dim, nclass) def forward(self, x, adj): """we don't change the edge_index, just update the edge_weight; some edge_weight are regarded as removed if it equals to zero""" x = x.to_dense() edge_index = adj._indices() """GCN and GAT""" if self.attention: adj = self.att_coef(x, adj, i=0) x = self.gc1(x, edge_index, edge_weight=adj._values()) x = F.relu(x) if self.attention: # if attention=True, use attention mechanism adj_2 = self.att_coef(x, adj, i=1) adj_values = self.gate * adj._values() + ( 1 - self.gate) * adj_2._values() else: adj_values = adj._values() x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, edge_index, edge_weight=adj_values) # """GIN""" # x = F.relu(self.gc1(x, adj, edge_weight=edge_weight)) # if self.attention: # if attention=True, use attention mechanism # adj, edge_weight_2 = self.att_coef_2(x, adj) # update the attention by L2 # try: # edge_weight = self.gate* edge_weight_2 + (1-self.gate)* edge_weight # involve the last layer's attention # except: # edge_weight = edge_weight_2 # print('the gate is not ok') # x = F.dropout(x, p=0.2, training=self.training) # x = F.relu(self.gc2(x, adj, edge_weight=edge_weight)) # # x = [x] ### Add Jumping # x = self.jump(x) # x = F.dropout(x, p=0.2,training=self.training) # x = self.fc2(x) return F.log_softmax(x, dim=1) def initialize(self): self.gc1.reset_parameters() self.gc2.reset_parameters() def att_coef(self, fea, edge_index, is_lil=False, i=0): if is_lil == False: edge_index = edge_index._indices() else: edge_index = edge_index.tocoo() n_node = fea.shape[0] row, col = edge_index[0].cpu().data.numpy()[:], edge_index[1].cpu( ).data.numpy()[:] # row, col = edge_index[0], edge_index[1] fea_copy = fea.cpu().data.numpy() sim_matrix = cosine_similarity(X=fea_copy, Y=fea_copy) # try cosine similarity # sim_matrix = torch.from_numpy(sim_matrix) sim = sim_matrix[row, col] sim[sim < 0.1] = 0 # print('dropped {} edges'.format(1-sim.nonzero()[0].shape[0]/len(sim))) # """use jaccard for binary features and cosine for numeric features""" # fea_start, fea_end = fea[edge_index[0]], fea[edge_index[1]] # isbinray = np.array_equal(fea_copy, fea_copy.astype(bool)) # check is the fea are binary # np.seterr(divide='ignore', invalid='ignore') # if isbinray: # fea_start, fea_end = fea_start.T, fea_end.T # sim = jaccard_score(fea_start, fea_end, average=None) # similarity scores of each edge # else: # fea_copy[np.isinf(fea_copy)] = 0 # fea_copy[np.isnan(fea_copy)] = 0 # sim_matrix = cosine_similarity(X=fea_copy, Y=fea_copy) # try cosine similarity # sim = sim_matrix[edge_index[0], edge_index[1]] # sim[sim < 0.01] = 0 """build a attention matrix""" att_dense = lil_matrix((n_node, n_node), dtype=np.float32) att_dense[row, col] = sim if att_dense[0, 0] == 1: att_dense = att_dense - sp.diags( att_dense.diagonal(), offsets=0, format="lil") # normalization, make the sum of each row is 1 att_dense_norm = normalize(att_dense, axis=1, norm='l1') """add learnable dropout, make character vector""" if self.drop: character = np.vstack( (att_dense_norm[row, col].A1, att_dense_norm[col, row].A1)) character = torch.from_numpy(character.T) drop_score = self.drop_learn_1(character) drop_score = torch.sigmoid( drop_score ) # do not use softmax since we only have one element mm = torch.nn.Threshold(0.5, 0) drop_score = mm(drop_score) mm_2 = torch.nn.Threshold(-0.49, 1) drop_score = mm_2(-drop_score) drop_decision = drop_score.clone().requires_grad_() # print('rate of left edges', drop_decision.sum().data/drop_decision.shape[0]) drop_matrix = lil_matrix((n_node, n_node), dtype=np.float32) drop_matrix[row, col] = drop_decision.cpu().data.numpy().squeeze(-1) att_dense_norm = att_dense_norm.multiply( drop_matrix.tocsr()) # update, remove the 0 edges if att_dense_norm[ 0, 0] == 0: # add the weights of self-loop only add self-loop at the first layer degree = (att_dense_norm != 0).sum(1).A1 # degree = degree.squeeze(-1).squeeze(-1) lam = 1 / (degree + 1) # degree +1 is to add itself self_weight = sp.diags(np.array(lam), offsets=0, format="lil") att = att_dense_norm + self_weight # add the self loop else: att = att_dense_norm att_adj = edge_index att_edge_weight = att[row, col] att_edge_weight = np.exp(att_edge_weight) # exponent, kind of softmax att_edge_weight = torch.tensor(np.array(att_edge_weight)[0], dtype=torch.float32).cuda() shape = (n_node, n_node) new_adj = torch.sparse.FloatTensor(att_adj, att_edge_weight, shape) return new_adj def add_loop_sparse(self, adj, fill_value=1): # make identify sparse tensor row = torch.range(0, int(adj.shape[0] - 1), dtype=torch.int64) i = torch.stack((row, row), dim=0) v = torch.ones(adj.shape[0], dtype=torch.float32) shape = adj.shape I_n = torch.sparse.FloatTensor(i, v, shape) return adj + I_n.to(self.device) def fit( self, features, adj, labels, idx_train, idx_val=None, idx_test=None, train_iters=81, att_0=None, attention=False, model_name=None, initialize=True, verbose=False, normalize=False, patience=500, ): ''' train the gcn model, when idx_val is not None, pick the best model according to the validation loss ''' self.sim = None self.attention = attention self.idx_test = idx_test # self.device = self.gc1.weight.device if initialize: self.initialize() if type(adj) is not torch.Tensor: features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device) else: features = features.to(self.device) adj = adj.to(self.device) labels = labels.to(self.device) # normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model # if normalize: # if utils.is_sparse_tensor(adj): # adj_norm = utils.normalize_adj_tensor(adj, sparse=True) # else: # adj_norm = utils.normalize_adj_tensor(adj) # else: # adj_norm = adj adj = self.add_loop_sparse(adj) self.adj_norm = adj self.features = features self.labels = labels if idx_val is None: self._train_without_val(labels, idx_train, train_iters, verbose) else: if patience < train_iters: self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose) else: self._train_with_val(labels, idx_train, idx_val, train_iters, verbose) def _train_without_val(self, labels, idx_train, train_iters, verbose): self.train() optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) for i in range(train_iters): optimizer.zero_grad() output = self.forward(self.features, self.adj_norm) loss_train = F.nll_loss( output[idx_train], labels[idx_train], weight=None ) # this weight is the weight of each training nodes loss_train.backward() optimizer.step() if verbose and i % 10 == 0: print('Epoch {}, training loss: {}'.format( i, loss_train.item())) self.eval() output = self.forward(self.features, self.adj_norm) self.output = output def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose): if verbose: print('=== training gcn model ===') optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) best_loss_val = 100 best_acc_val = 0 for i in range(train_iters): self.train() optimizer.zero_grad() output = self.forward(self.features, self.adj_norm) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() # acc_test =accuracy(output[self.idx_test], labels[self.idx_test]) self.eval() output = self.forward(self.features, self.adj_norm) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = utils.accuracy(output[idx_val], labels[idx_val]) # if verbose and i % 20 == 0: # print('Epoch {}, training loss: {}, val acc: {}'.format(i, loss_train.item(), acc_val)) if best_loss_val > loss_val: best_loss_val = loss_val self.output = output weights = deepcopy(self.state_dict()) if acc_val > best_acc_val: best_acc_val = acc_val self.output = output weights = deepcopy(self.state_dict()) if verbose: print( '=== picking the best model according to the performance on validation ===' ) self.load_state_dict(weights) def _train_with_early_stopping(self, labels, idx_train, idx_val, train_iters, patience, verbose): if verbose: print('=== training gcn model ===') optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) early_stopping = patience best_loss_val = 100 for i in range(train_iters): self.train() optimizer.zero_grad() output = self.forward(self.features, self.adj_norm) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() self.eval() output = self.forward(self.features, self.adj_norm) if verbose and i % 10 == 0: print('Epoch {}, training loss: {}'.format( i, loss_train.item())) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) if best_loss_val > loss_val: best_loss_val = loss_val self.output = output weights = deepcopy(self.state_dict()) patience = early_stopping else: patience -= 1 if i > early_stopping and patience <= 0: break if verbose: print('=== early stopping at {0}, loss_val = {1} ==='.format( i, best_loss_val)) self.load_state_dict(weights) def test(self, idx_test, model_name=None): # self.model_name = model_name self.eval() output = self.predict() # output = self.output loss_test = F.nll_loss(output[idx_test], self.labels[idx_test]) acc_test = utils.accuracy(output[idx_test], self.labels[idx_test]) # print("Test set results:", # "loss= {:.4f}".format(loss_test.item()), # "accuracy= {:.4f}".format(acc_test.item())) return acc_test, output def _set_parameters(self): # TODO pass def predict(self, features=None, adj=None): '''By default, inputs are unnormalized data''' # self.eval() if features is None and adj is None: return self.forward(self.features, self.adj_norm) else: if type(adj) is not torch.Tensor: features, adj = utils.to_tensor(features, adj, device=self.device) self.features = features if utils.is_sparse_tensor(adj): self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) else: self.adj_norm = utils.normalize_adj_tensor(adj) return self.forward(self.features, self.adj_norm)