Esempio n. 1
0
class GAT(torch.nn.Module):
    """
    Graph Attention Networks
    <https://arxiv.org/abs/1710.10903>
    """
    def __init__(self):
        super(GAT, self).__init__()
        self.conv1 = GATConv(75, 8, heads=8, dropout=0.6)
        self.conv2 = GATConv(8 * 8, 128, heads=1, concat=True, dropout=0.6)

        self.gather_layer = nn.Linear(128, 1)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x1 = F.dropout(x, p=0.6, training=self.training)
        x2 = F.elu(self.conv1(x1, edge_index))
        x3 = F.dropout(x2, p=0.6, training=self.training)
        x4 = self.conv2(x3, edge_index)

        y_molecules = global_add_pool(x4, batch)
        z_molecules = self.gather_layer(y_molecules)
        return z_molecules

    def __call__(self, data):
        target = torch.unsqueeze(data.y, 1)
        out = self.forward(data)
        loss = F.mse_loss(out, target)
        z = out.to('cpu').data.numpy()
        t = target.to('cpu').data.numpy()
        return loss, z, t
Esempio n. 2
0
class GATNet(nn.Module):
    def __init__(self, dataset):
        super(GATNet, self).__init__()

        self.conv1 = GATConv(
            dataset.num_features,
            8,
            heads=8,
            dropout=0.6)

        self.conv2 = GATConv(
            8 * 8,
            dataset.num_classes,
            heads=OUTPUT_HEADS,
            concat=False,
            dropout=0.6)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, x, edge_index, training=None):
        training = self.training if training == None else training
        x = F.dropout(x, p=0.6, training=training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.6, training=training)
        x = self.conv2(x, edge_index)
        return x
Esempio n. 3
0
class GAT_Net(torch.nn.Module):
    def __init__(self, features_num, num_class, hidden, heads, output_heads,
                 concat, dropout):
        super(GAT_Net, self).__init__()
        self.dropout = dropout
        self.first_lin = Linear(features_num, hidden)
        self.conv1 = GATConv(in_channels=hidden,
                             out_channels=hidden,
                             concat=concat,
                             heads=heads,
                             dropout=dropout)
        self.conv2 = GATConv(in_channels=hidden * heads,
                             out_channels=num_class,
                             concat=concat,
                             heads=output_heads,
                             dropout=dropout)

    def reset_parameters(self):
        self.first_lin.reset_parameters()
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.first_lin(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=-1)

    def __repr__(self):
        return self.__class__.__name__
Esempio n. 4
0
class GATNet(nn.Module):
    def __init__(self, num_feature, num_class, num_layers=2, hidden=64, drop=0.5, use_edge_weight=True):
        super(GATNet, self).__init__()
        self.conv0 = GATConv(num_feature, hidden, heads=8, dropout=drop, concat=False)
        self.conv1 = GATConv(hidden, hidden, heads=8, dropout=drop, concat=False)
        self.linear = Linear(hidden, num_class)
        self.n_layer = num_layers
        self.use_edge_weight = use_edge_weight
        self.drop = drop

    def reset_parameters(self):
        self.conv0.reset_parameters()
        self.conv1.reset_parameters()
        nn.init.normal_(self.linear.weight)
        nn.init.normal_(self.linear.bias)

    def forward(self, data): # TODO: edge weight
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr.squeeze(1)

        for i in range(self.n_layer):
            conv = self.conv0 if i == 0 else self.conv1
            x = conv(x, edge_index)
            x = F.relu(x)
            x = F.dropout(x, p=self.drop, training=self.training)

        x = self.linear(x)

        return F.log_softmax(x, dim=1)
class GAT(nn.Module):
    def __init__(self, dataset, nhid, first_heads, output_heads, dropout):
        super(GAT, self).__init__()
        self.gc1 = GATConv(dataset.num_features,
                           nhid,
                           heads=first_heads,
                           dropout=dropout)
        self.gc2 = GATConv(nhid * first_heads,
                           dataset.num_classes,
                           heads=output_heads,
                           dropout=dropout)
        self.dropout = dropout

    def reset_parameters(self):
        self.gc1.reset_parameters()
        self.gc2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gc1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gc2(x, edge_index)
        return F.log_softmax(x, dim=1)
Esempio n. 6
0
class Net(torch.nn.Module):
    def __init__(self, dataset):
        super(Net, self).__init__()
        self.conv1 = GATConv(
            dataset.num_features,
            args.hidden,
            heads=args.heads,
            dropout=args.dropout)
        self.conv2 = GATConv(
            args.hidden * args.heads,
            dataset.num_classes,
            heads=args.output_heads,
            concat=False,
            dropout=args.dropout)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=args.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=args.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)
Esempio n. 7
0
class PGATNetEx(ExKGNet):
    def __init__(self, num_nodes, num_relations, hidden_size, emb_dim, heads,
                 repr_dim):
        super(PGATNetEx, self).__init__(emb_dim, repr_dim, num_nodes,
                                        num_relations)
        self.emb_dim = emb_dim
        self.repr_dim = self.repr_dim

        self.node_emb = torch.nn.Embedding(num_nodes,
                                           emb_dim,
                                           max_norm=1,
                                           norm_type=2.0)
        self.r_emb = torch.nn.Embedding(num_relations,
                                        repr_dim,
                                        max_norm=1,
                                        norm_type=2.0)
        self.r_proj = torch.nn.Embedding(num_relations,
                                         emb_dim * repr_dim,
                                         max_norm=1,
                                         norm_type=2.0)

        self.kg_loss_func = torch.nn.MSELoss()

        self.conv1 = GATConv(emb_dim,
                             int(hidden_size // heads),
                             heads=heads,
                             dropout=0.6)
        self.conv2 = PAConv(int(hidden_size // heads) * heads,
                            repr_dim,
                            heads=1,
                            dropout=0.6)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward_(self, edge_index, sec_order_edge_index):
        self.forward(self.node_emb.weight, edge_index, sec_order_edge_index)

    def forward(self, x, edge_index, sec_order_edge_index):
        '''

        :param edge_index: np.array, [2, N]
        :param sec_order_edge_index: [3, M]
        :return:
        '''
        x = F.relu(self.conv1(self.node_emb.weight, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, sec_order_edge_index)
        return x
Esempio n. 8
0
class GATNet(torch.nn.Module):
    def __init__(self, input_size, output_size, hidden_size=512, heads=1):
        super(GATNet, self).__init__()
        self.conv1 = GATConv(input_size, hidden_size, heads=heads)
        self.conv2 = GATConv(hidden_size * heads, output_size)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, feature, edge_index):
        x = F.dropout(feature, p=0.5, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x
Esempio n. 9
0
class GATRecsysModel(GraphRecsysModel):
    def __init__(self, **kwargs):
        super(GATRecsysModel, self).__init__(**kwargs)

    def _init(self, **kwargs):
        self.if_use_features = kwargs['if_use_features']
        self.dropout = kwargs['dropout']

        if not self.if_use_features:
            self.x = torch.nn.Embedding(kwargs['dataset']['num_nodes'],
                                        kwargs['emb_dim'],
                                        max_norm=1).weight
        else:
            raise NotImplementedError('Feature not implemented!')
        self.edge_index = self.update_graph_input(kwargs['dataset'])

        self.conv1 = GATConv(kwargs['emb_dim'],
                             kwargs['hidden_size'],
                             heads=kwargs['num_heads'],
                             dropout=kwargs['dropout'])
        self.conv2 = GATConv(kwargs['hidden_size'] * kwargs['num_heads'],
                             kwargs['repr_dim'],
                             heads=1,
                             dropout=kwargs['dropout'])

        self.fc1 = torch.nn.Linear(2 * kwargs['repr_dim'], kwargs['repr_dim'])
        self.fc2 = torch.nn.Linear(kwargs['repr_dim'], 1)

    def reset_parameters(self):
        if not self.if_use_features:
            torch.nn.init.uniform_(self.x, -1.0, 1.0)
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()
        torch.nn.init.uniform_(self.fc1.weight, -1.0, 1.0)
        torch.nn.init.uniform_(self.fc2.weight, -1.0, 1.0)

    def forward(self):
        x = F.relu(self.conv1(self.x, self.edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, self.edge_index)
        x = F.normalize(x)
        return x
Esempio n. 10
0
class GATRecsysModel(GraphRecsysModel):
    def __init__(self, **kwargs):
        super(GATRecsysModel, self).__init__(**kwargs)

    def _init(self, **kwargs):
        self.if_use_features = kwargs['if_use_features']
        self.dropout = kwargs['dropout']

        if not self.if_use_features:
            self.x = torch.nn.Embedding(kwargs['num_nodes'],
                                        kwargs['emb_dim'],
                                        max_norm=1)

        self.conv1 = GATConv(kwargs['emb_dim'],
                             kwargs['hidden_size'],
                             heads=kwargs['num_heads'],
                             dropout=kwargs['dropout'])
        self.conv2 = GATConv(kwargs['hidden_size'] * kwargs['num_heads'],
                             kwargs['repr_dim'],
                             heads=1,
                             dropout=kwargs['dropout'])

        self.reset_parameters()

    def reset_parameters(self):
        if not self.if_use_features:
            torch.nn.init.uniform_(self.x.weight, -1.0, 1.0)
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, edge_index, x=None):
        if not self.if_use_features:
            x = self.x.weight
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.normalize(x)
        return x
class AttentiveFP(torch.nn.Module):
    r"""The Attentive FP model for molecular representation learning from the
    `"Pushing the Boundaries of Molecular Representation for Drug Discovery
    with the Graph Attention Mechanism"
    <https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959>`_ paper, based on
    graph attention mechanisms.

    Args:
        in_channels (int): Size of each input sample.
        hidden_channels (int): Hidden node feature dimensionality.
        out_channels (int): Size of each output sample.
        edge_dim (int): Edge feature dimensionality.
        num_layers (int): Number of GNN layers.
        num_timesteps (int): Number of iterative refinement steps for global
            readout.
        dropout (float, optional): Dropout probability. (default: :obj:`0.0`)

    """
    def __init__(self,
                 in_channels: int,
                 hidden_channels: int,
                 out_channels: int,
                 edge_dim: int,
                 num_layers: int,
                 num_timesteps: int,
                 dropout: float = 0.0):
        super().__init__()

        self.num_layers = num_layers
        self.num_timesteps = num_timesteps
        self.dropout = dropout

        self.lin1 = Linear(in_channels, hidden_channels)

        conv = GATEConv(hidden_channels, hidden_channels, edge_dim, dropout)
        gru = GRUCell(hidden_channels, hidden_channels)
        self.atom_convs = torch.nn.ModuleList([conv])
        self.atom_grus = torch.nn.ModuleList([gru])
        for _ in range(num_layers - 1):
            conv = GATConv(hidden_channels,
                           hidden_channels,
                           dropout=dropout,
                           add_self_loops=False,
                           negative_slope=0.01)
            self.atom_convs.append(conv)
            self.atom_grus.append(GRUCell(hidden_channels, hidden_channels))

        self.mol_conv = GATConv(hidden_channels,
                                hidden_channels,
                                dropout=dropout,
                                add_self_loops=False,
                                negative_slope=0.01)
        self.mol_gru = GRUCell(hidden_channels, hidden_channels)

        self.lin2 = Linear(hidden_channels, out_channels)

        self.reset_parameters()

    def reset_parameters(self):
        self.lin1.reset_parameters()
        for conv, gru in zip(self.atom_convs, self.atom_grus):
            conv.reset_parameters()
            gru.reset_parameters()
        self.mol_conv.reset_parameters()
        self.mol_gru.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, x, edge_index, edge_attr, batch):
        """"""
        # Atom Embedding:
        x = F.leaky_relu_(self.lin1(x))

        h = F.elu_(self.atom_convs[0](x, edge_index, edge_attr))
        h = F.dropout(h, p=self.dropout, training=self.training)
        x = self.atom_grus[0](h, x).relu_()

        for conv, gru in zip(self.atom_convs[1:], self.atom_grus[1:]):
            h = F.elu_(conv(x, edge_index))
            h = F.dropout(h, p=self.dropout, training=self.training)
            x = gru(h, x).relu_()

        # Molecule Embedding:
        row = torch.arange(batch.size(0), device=batch.device)
        edge_index = torch.stack([row, batch], dim=0)

        out = global_add_pool(x, batch).relu_()
        for t in range(self.num_timesteps):
            h = F.elu_(self.mol_conv((x, out), edge_index))
            h = F.dropout(h, p=self.dropout, training=self.training)
            out = self.mol_gru(h, out).relu_()

        # Predictor:
        out = F.dropout(out, p=self.dropout, training=self.training)
        return self.lin2(out)
Esempio n. 12
0
class GAT(nn.Module):
    """ 2 Layer Graph Attention Network based on pytorch geometric.

    Parameters
    ----------
    nfeat : int
        size of input feature dimension
    nhid : int
        number of hidden units
    nclass : int
        size of output dimension
    heads: int
        number of attention heads
    output_heads: int
        number of attention output heads
    dropout : float
        dropout rate for GAT
    lr : float
        learning rate for GAT
    weight_decay : float
        weight decay coefficient (l2 normalization) for GCN.
        When `with_relu` is True, `weight_decay` will be set to 0.
    with_bias: bool
        whether to include bias term in GAT weights.
    device: str
        'cpu' or 'cuda'.

    Examples
    --------
	We can first load dataset and then train GAT.

    >>> from deeprobust.graph.data import Dataset
    >>> from deeprobust.graph.defense import GAT
    >>> data = Dataset(root='/tmp/', name='cora')
    >>> adj, features, labels = data.adj, data.features, data.labels
    >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    >>> gat = GAT(nfeat=features.shape[1],
              nhid=8, heads=8,
              nclass=labels.max().item() + 1,
              dropout=0.5, device='cpu')
    >>> gat = gat.to('cpu')
    >>> pyg_data = Dpr2Pyg(data) # convert deeprobust dataset to pyg dataset
    >>> gat.fit(pyg_data, patience=100, verbose=True) # train with earlystopping
    """
    def __init__(self,
                 nfeat,
                 nhid,
                 nclass,
                 heads=8,
                 output_heads=1,
                 dropout=0.5,
                 lr=0.01,
                 weight_decay=5e-4,
                 with_bias=True,
                 device=None):

        super(GAT, self).__init__()

        assert device is not None, "Please specify 'device'!"
        self.device = device

        self.conv1 = GATConv(nfeat,
                             nhid,
                             heads=heads,
                             dropout=dropout,
                             bias=with_bias)

        self.conv2 = GATConv(nhid * heads,
                             nclass,
                             heads=output_heads,
                             concat=False,
                             dropout=dropout,
                             bias=with_bias)

        self.dropout = dropout
        self.weight_decay = weight_decay
        self.lr = lr
        self.output = None
        self.best_model = None
        self.best_output = None

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

    def initialize(self):
        """Initialize parameters of GAT.
        """
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def fit(self,
            pyg_data,
            train_iters=1000,
            initialize=True,
            verbose=False,
            patience=100,
            **kwargs):
        """Train the GAT model, when idx_val is not None, pick the best model
        according to the validation loss.

        Parameters
        ----------
        pyg_data :
            pytorch geometric dataset object
        train_iters : int
            number of training epochs
        initialize : bool
            whether to initialize parameters before training
        verbose : bool
            whether to show verbose logs
        patience : int
            patience for early stopping, only valid when `idx_val` is given
        """

        if initialize:
            self.initialize()

        self.data = pyg_data[0].to(self.device)
        # By default, it is trained with early stopping on validation
        self.train_with_early_stopping(train_iters, patience, verbose)

    def fit1(self,
             pyg_data,
             train_iters=1000,
             initialize=True,
             verbose=False,
             patience=100,
             **kwargs):
        """Train the GAT model, when idx_val is not None, pick the best model
        according to the validation loss.

        Parameters
        ----------
        pyg_data :
            pytorch geometric dataset object
        train_iters : int
            number of training epochs
        initialize : bool
            whether to initialize parameters before training
        verbose : bool
            whether to show verbose logs
        patience : int
            patience for early stopping, only valid when `idx_val` is given
        """

        if initialize:
            self.initialize()

        self.data = pyg_data.to(self.device)
        # By default, it is trained with early stopping on validation
        self.train_with_early_stopping(train_iters, patience, verbose)

    def train_with_early_stopping(self, train_iters, patience, verbose):
        """early stopping based on the validation loss
        """
        if verbose:
            print('=== training GAT model ===')
        optimizer = optim.Adam(self.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        labels = self.data.y
        train_mask, val_mask = self.data.train_mask, self.data.val_mask

        early_stopping = patience
        best_loss_val = 100

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.data)

            loss_train = F.nll_loss(output[train_mask], labels[train_mask])
            loss_train.backward()
            optimizer.step()

            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(
                    i, loss_train.item()))

            self.eval()
            output = self.forward(self.data)
            loss_val = F.nll_loss(output[val_mask], labels[val_mask])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())
                patience = early_stopping
            else:
                patience -= 1
            if i > early_stopping and patience <= 0:
                break

        if verbose:
            print('=== early stopping at {0}, loss_val = {1} ==='.format(
                i, best_loss_val))
        self.load_state_dict(weights)

    def test(self):
        """Evaluate GAT performance on test set.

        Parameters
        ----------
        idx_test :
            node testing indices
        """
        self.eval()
        test_mask = self.data.test_mask
        labels = self.data.y
        output = self.forward(self.data)
        # output = self.output
        loss_test = F.nll_loss(output[test_mask], labels[test_mask])
        acc_test = utils.accuracy(output[test_mask], labels[test_mask])
        print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))
        return acc_test.item()

    def predict(self):
        """
        Returns
        -------
        torch.FloatTensor
            output (log probabilities) of GAT
        """

        self.eval()
        return self.forward(self.data)
Esempio n. 13
0
class GAT(torch.nn.Module):
    def __init__(self, args):
        super(GAT, self).__init__()
        self.args = set_default(args, {
                    'hidden': 64,
                    'hidden2': 32,
                    'dropout': 0.5,
                    'lr': 0.005,
                    'epoches': 300,
                    'weight_decay': 5e-4,
                    'agg': 'self',
                    'act': 'leaky_relu',
                    'withbn': True,
                        })
        self.timer = self.args['timer']
        self.dropout = self.args['dropout']
        self.agg = self.args['agg']
        self.withbn = self.args['withbn']
        self.conv1 = GATConv(self.args['hidden'], self.args['hidden'], self.args['heads'], dropout=self.args['dropout'])
        self.conv2 = GATConv(self.args['hidden']*self.args['heads'], self.args['hidden2'], dropout=self.args['dropout'])
        hd = [self.args['hidden'], self.args['hidden']*self.args['heads'], self.args['hidden2']]
        if self.withbn:
            self.bn1 = BatchNorm1d(self.args['hidden']*self.args['heads'])
            self.bn2 = BatchNorm1d(self.args['hidden2'])
        if self.args['agg'] == 'concat':
            outdim = sum(hd)
        elif self.args['agg'] == 'self':
            outdim = hd[-1]
        if self.args['act'] == 'leaky_relu':
            self.act = F.leaky_relu
        elif self.args['act'] == 'tanh':
            self.act = torch.tanh
        else:
            self.act = lambda x: x
        self.lin2 = Linear(outdim, self.args['num_class'])
        self.first_lin = Linear(self.args['features_num'], self.args['hidden'])

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
        x = self.act(self.first_lin(x))
        xs = [x]
        x = self.act(self.conv1(x, edge_index))
        if self.withbn:
            x = self.bn1(x)
        xs.append(x)
        x = self.act(self.conv2(x, edge_index))
        if self.withbn:
            x = self.bn2(x)
        xs.append(x)
        if self.agg == 'concat':
            x = torch.cat(xs, dim=1)
        elif self.agg == 'self':
            x = xs[-1]
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)

    def train_predict(self, data, train_mask=None, val_mask=None, return_out=True):
        if train_mask is None:
            train_mask = data.train_mask
        optimizer = torch.optim.Adam(self.parameters(), lr=self.args['lr'], weight_decay=self.args['weight_decay'])
        flag_end = False
        st = time.time()
        for epoch in range(1, self.args['epoches']):
            self.train()
            optimizer.zero_grad()
            res = self.forward(data)
            loss = F.nll_loss(res[train_mask], data.y[train_mask])
            loss.backward()
            optimizer.step()
            if epoch%50 == 0:
                cost = (time.time()-st)/epoch*50
                if max(cost*10, 5) > self.timer.remain_time():
                    flag_end = True
                    break

        test_mask = data.test_mask
        self.eval()
        with torch.no_grad():
            res = self.forward(data)
            if return_out:
                pred = res
            else:
                pred = res[test_mask]
            if val_mask is not None:
                return pred, res[val_mask], flag_end
        return pred, flag_end
 
    def __repr__(self):
        return self.__class__.__name__
Esempio n. 14
0
class AttentiveFP(torch.nn.Module):
    r"""The Attentive FP model for molecular representation learning from the
    `"Pushing the Boundaries of Molecular Representation for Drug Discovery
    with the Graph Attention Mechanism"
    <https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959>`_ paper, based on
    graph attention mechanisms.

    Args:
        emb_dim (int): Hidden node feature dimensionality.
        num_tasks (int): Size of each output sample.
        num_layers (int): Number of GNN layers.
        num_timesteps (int): Number of iterative refinement steps for global
            readout.
        drop_ratio (float, optional): Dropout probability. (default: :obj:`0.0`)

    """
    def __init__(self,
                 num_timesteps=4,
                 emb_dim=300,
                 num_layers=5,
                 drop_ratio=0,
                 num_tasks=1,
                 **args):
        super(AttentiveFP, self).__init__()

        self.num_layers = num_layers
        self.num_timesteps = num_timesteps
        self.drop_ratio = drop_ratio

        self.atom_encoder = AtomEncoder(emb_dim)
        self.bond_encoder = BondEncoder(emb_dim=emb_dim)

        conv = GATEConv(emb_dim, emb_dim, emb_dim, drop_ratio)
        gru = GRUCell(emb_dim, emb_dim)
        self.atom_convs = torch.nn.ModuleList([conv])
        self.atom_grus = torch.nn.ModuleList([gru])
        for _ in range(num_layers - 1):
            conv = GATConv(emb_dim,
                           emb_dim,
                           dropout=drop_ratio,
                           add_self_loops=False,
                           negative_slope=0.01)
            self.atom_convs.append(conv)
            self.atom_grus.append(GRUCell(emb_dim, emb_dim))

        self.mol_conv = GATConv(emb_dim,
                                emb_dim,
                                dropout=drop_ratio,
                                add_self_loops=False,
                                negative_slope=0.01)
        self.mol_gru = GRUCell(emb_dim, emb_dim)

        self.graph_pred_linear = Linear(emb_dim, num_tasks)

        self.reset_parameters()

    def reset_parameters(self):
        # self.atom_encoder.reset_parameters() # reset in init()
        # self.bond_encoder.reset_parameters() # reset in init()
        for conv, gru in zip(self.atom_convs, self.atom_grus):
            conv.reset_parameters()
            gru.reset_parameters()
        self.mol_conv.reset_parameters()
        self.mol_gru.reset_parameters()
        self.graph_pred_linear.reset_parameters()

    def forward(self, batched_data):
        """"""
        x, edge_index, edge_attr, batch = batched_data.x, batched_data.edge_index, batched_data.edge_attr, batched_data.batch
        # Atom Embedding:
        x = F.leaky_relu_(self.atom_encoder(x))
        edge_attr = self.bond_encoder(edge_attr)

        h = F.elu_(self.atom_convs[0](x, edge_index, edge_attr))
        h = F.dropout(h, p=self.drop_ratio, training=self.training)
        x = self.atom_grus[0](h, x).relu_()

        for conv, gru in zip(self.atom_convs[1:], self.atom_grus[1:]):
            h = F.elu_(conv(x, edge_index))
            h = F.dropout(h, p=self.drop_ratio, training=self.training)
            x = gru(h, x).relu_()

        # Molecule Embedding:
        row = torch.arange(batch.size(0), device=batch.device)
        edge_index = torch.stack([row, batch], dim=0)

        out = global_add_pool(x, batch).relu_()
        for t in range(self.num_timesteps):
            h = F.elu_(self.mol_conv((x, out), edge_index))
            h = F.dropout(h, p=self.drop_ratio, training=self.training)
            out = self.mol_gru(h, out).relu_()

        # Predictor:
        out = F.dropout(out, p=self.drop_ratio, training=self.training)
        return self.graph_pred_linear(out)
Esempio n. 15
0
class GATNet(torch.nn.Module):
    def __init__(self, num_layers, num_input_features, hidden):
        super(GATNet, self).__init__()
        self.conv1 = GATConv(num_input_features, hidden)  # GATconv layer
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(GATConv(hidden,
                                      hidden))  # remaining GATconv layers
        self.lin1 = Linear(3 * hidden, hidden)  # linear layer
        self.lin2 = Linear(hidden, 2)  # linear layer, output layer, 2 classes

    def reset_parameters(self):  # reset all conv and linear layers
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters(
            )  # .reset_parameters() is method of the torch_geometric.nn.GATConv class
        self.lin1.reset_parameters(
        )  # .reset_parameters() is method of the torch.nn.Linear class
        self.lin2.reset_parameters()

    def forward(self, data):
        # data: Batch(batch=[num_nodes_in_batch],
        #               edge_attr=[2*num_nodes_in_batch,num_edge_features_per_edge],
        #               edge_index=[2,2*num_nodes_in_batch],
        #               pos=[num_nodes_in_batch,2],
        #               x=[num_nodes_in_batch, num_input_features_per_node],
        #               y=[num_graphs_in_batch, num_classes]
        # example: Batch(batch=[2490], edge_attr=[4980,1], edge_index=[2,4980], pos=[2490,2], x=[2490,33], y=[32,2]

        x, edge_index, batch = data.x, data.edge_index, data.batch
        # x.shape: torch.Size([num_nodes_in_batch, num_input_features_per_node])
        # edge_index.shape: torch.Size([2, 2*num_nodes_in_batch])
        # batch.shape: torch.Size([num_nodes_in_batch])
        # example:  x.shape = troch.Size([2490,33])
        #           edge_index.shape = torch.Size([2,4980])
        #           batch.shape = torch.Size([2490])

        # graph convolutions and relu activation
        x = F.relu(self.conv1(x, edge_index))
        # x.shape:  torch.Size([num_nodes_in_batch, hidden])
        # example:  x.shape = torch.Size([2490, 66])

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
        # x.shape:  torch.Size([num_nodes_in_batch, hidden])
        # example:  x.shape = torch.Size([2490, 66])

        x = torch.cat([
            global_add_pool(x, batch),
            global_mean_pool(x, batch),
            global_max_pool(x, batch)
        ],
                      dim=1)
        # x.shape:  torch.Size([num_graphs_in_batch, hidden)
        # example:  x.shape = torch.Size([32, 66])

        # linear layers, activation function, dropout
        x = F.relu(self.lin1(x))
        # x.shape:  torch.Size([num_graphs_in_batch, hidden)
        # example:  x.shape = torch.Size([32, 66])
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        # x.shape:  torch.Size([num_graphs_in_batch, num_classes)
        # example:  x.shape = torch.Size([32, 2])

        output = F.log_softmax(x, dim=-1)
        return output

    def __repr__(self):
        #for getting a printable representation of an object
        return self.__class__.__name__
Esempio n. 16
0
class GAT(nn.Module):
    def __init__(self,
                 nfeat,
                 nhid,
                 nclass,
                 dropout=0.5,
                 lr=0.01,
                 weight_decay=5e-4,
                 n_edge=1,
                 with_relu=True,
                 drop=False,
                 with_bias=True,
                 device=None):

        super(GAT, self).__init__()

        assert device is not None, "Please specify 'device'!"
        self.device = device
        self.nfeat = nfeat
        self.hidden_sizes = [nhid]
        self.nclass = int(nclass)
        self.dropout = dropout
        self.lr = lr
        self.drop = drop
        if not with_relu:
            self.weight_decay = 0
        else:
            self.weight_decay = weight_decay
        self.with_relu = with_relu
        self.with_bias = with_bias
        self.n_edge = n_edge
        self.output = None
        self.best_model = None
        self.best_output = None
        self.adj_norm = None
        self.features = None
        self.gate = Parameter(torch.rand(1))  # creat a generator between [0,1]
        # self.beta = Parameter(torch.Tensor(self.n_edge))
        self.bns = torch.nn.BatchNorm1d(nhid)
        nclass = int(nclass)
        """define the networks: deeprobust"""
        # self.gc1 = GraphConvolution(nfeat, nhid, with_bias=with_bias)
        # self.gc2 = GraphConvolution(nhid, nclass, with_bias=with_bias)
        """GCN from geometric"""
        """network from torch-geometric, """
        # self.gc1 = GCNConv(nfeat, nhid, bias=True,)
        # self.gc2 = GCNConv(nhid, nclass, bias=True, )
        """GAT from torch-geometric"""
        self.gc1 = GATConv(nfeat, nhid, heads=8, dropout=0.6)
        self.gc2 = GATConv(nhid * 8, nclass, heads=1, concat=True, dropout=0.6)
        """GIN from torch-geometric"""
        # num_features = nfeat
        # dim = 32
        # nn1 = Sequential(Linear(num_features, dim), ReLU(), )
        # self.gc1 = GINConv(nn1)
        # # self.bn1 = torch.nn.BatchNorm1d(dim)
        #
        # nn2 = Sequential(Linear(dim, dim), ReLU(), )
        # self.gc2 = GINConv(nn2)
        # self.jump = JumpingKnowledge(mode='cat')
        # # self.bn2 = torch.nn.BatchNorm1d(dim)
        # self.fc2 = Linear(dim, nclass)

    def forward(self, x, adj):
        """we don't change the edge_index, just update the edge_weight;
        some edge_weight are regarded as removed if it equals to zero"""
        x = x.to_dense()
        edge_index = adj._indices()
        """GCN and GAT"""
        if self.attention:
            adj = self.att_coef(x, adj, i=0)
        x = self.gc1(x, edge_index, edge_weight=adj._values())
        x = F.relu(x)
        if self.attention:  # if attention=True, use attention mechanism
            adj_2 = self.att_coef(x, adj, i=1)
            adj_values = self.gate * adj._values() + (
                1 - self.gate) * adj_2._values()
        else:
            adj_values = adj._values()

        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, edge_index, edge_weight=adj_values)

        # """GIN"""
        # x = F.relu(self.gc1(x, adj, edge_weight=edge_weight))
        # if self.attention:  # if attention=True, use attention mechanism
        #     adj, edge_weight_2 = self.att_coef_2(x, adj) # update the attention by L2
        #     try:
        #         edge_weight = self.gate* edge_weight_2 + (1-self.gate)* edge_weight # involve the last layer's attention
        #     except:
        #         edge_weight = edge_weight_2
        #         print('the gate is not ok')
        # x = F.dropout(x, p=0.2, training=self.training)
        # x = F.relu(self.gc2(x, adj, edge_weight=edge_weight))
        # # x = [x] ### Add Jumping        # x = self.jump(x)
        # x = F.dropout(x, p=0.2,training=self.training)
        # x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def initialize(self):
        self.gc1.reset_parameters()
        self.gc2.reset_parameters()

    def att_coef(self, fea, edge_index, is_lil=False, i=0):
        if is_lil == False:
            edge_index = edge_index._indices()
        else:
            edge_index = edge_index.tocoo()

        n_node = fea.shape[0]
        row, col = edge_index[0].cpu().data.numpy()[:], edge_index[1].cpu(
        ).data.numpy()[:]
        # row, col = edge_index[0], edge_index[1]

        fea_copy = fea.cpu().data.numpy()
        sim_matrix = cosine_similarity(X=fea_copy,
                                       Y=fea_copy)  # try cosine similarity
        # sim_matrix = torch.from_numpy(sim_matrix)
        sim = sim_matrix[row, col]
        sim[sim < 0.1] = 0
        # print('dropped {} edges'.format(1-sim.nonzero()[0].shape[0]/len(sim)))

        # """use jaccard for binary features and cosine for numeric features"""
        # fea_start, fea_end = fea[edge_index[0]], fea[edge_index[1]]
        # isbinray = np.array_equal(fea_copy, fea_copy.astype(bool))  # check is the fea are binary
        # np.seterr(divide='ignore', invalid='ignore')
        # if isbinray:
        #     fea_start, fea_end = fea_start.T, fea_end.T
        #     sim = jaccard_score(fea_start, fea_end, average=None)  # similarity scores of each edge
        # else:
        #     fea_copy[np.isinf(fea_copy)] = 0
        #     fea_copy[np.isnan(fea_copy)] = 0
        #     sim_matrix = cosine_similarity(X=fea_copy, Y=fea_copy)  # try cosine similarity
        #     sim = sim_matrix[edge_index[0], edge_index[1]]
        #     sim[sim < 0.01] = 0
        """build a attention matrix"""
        att_dense = lil_matrix((n_node, n_node), dtype=np.float32)
        att_dense[row, col] = sim
        if att_dense[0, 0] == 1:
            att_dense = att_dense - sp.diags(
                att_dense.diagonal(), offsets=0, format="lil")
        # normalization, make the sum of each row is 1
        att_dense_norm = normalize(att_dense, axis=1, norm='l1')
        """add learnable dropout, make character vector"""
        if self.drop:
            character = np.vstack(
                (att_dense_norm[row, col].A1, att_dense_norm[col, row].A1))
            character = torch.from_numpy(character.T)
            drop_score = self.drop_learn_1(character)
            drop_score = torch.sigmoid(
                drop_score
            )  # do not use softmax since we only have one element
            mm = torch.nn.Threshold(0.5, 0)
            drop_score = mm(drop_score)
            mm_2 = torch.nn.Threshold(-0.49, 1)
            drop_score = mm_2(-drop_score)
            drop_decision = drop_score.clone().requires_grad_()
            # print('rate of left edges', drop_decision.sum().data/drop_decision.shape[0])
            drop_matrix = lil_matrix((n_node, n_node), dtype=np.float32)
            drop_matrix[row,
                        col] = drop_decision.cpu().data.numpy().squeeze(-1)
            att_dense_norm = att_dense_norm.multiply(
                drop_matrix.tocsr())  # update, remove the 0 edges

        if att_dense_norm[
                0,
                0] == 0:  # add the weights of self-loop only add self-loop at the first layer
            degree = (att_dense_norm != 0).sum(1).A1
            # degree = degree.squeeze(-1).squeeze(-1)
            lam = 1 / (degree + 1)  # degree +1 is to add itself
            self_weight = sp.diags(np.array(lam), offsets=0, format="lil")
            att = att_dense_norm + self_weight  # add the self loop
        else:
            att = att_dense_norm

        att_adj = edge_index
        att_edge_weight = att[row, col]
        att_edge_weight = np.exp(att_edge_weight)  # exponent, kind of softmax
        att_edge_weight = torch.tensor(np.array(att_edge_weight)[0],
                                       dtype=torch.float32).cuda()

        shape = (n_node, n_node)
        new_adj = torch.sparse.FloatTensor(att_adj, att_edge_weight, shape)
        return new_adj

    def add_loop_sparse(self, adj, fill_value=1):
        # make identify sparse tensor
        row = torch.range(0, int(adj.shape[0] - 1), dtype=torch.int64)
        i = torch.stack((row, row), dim=0)
        v = torch.ones(adj.shape[0], dtype=torch.float32)
        shape = adj.shape
        I_n = torch.sparse.FloatTensor(i, v, shape)
        return adj + I_n.to(self.device)

    def fit(
        self,
        features,
        adj,
        labels,
        idx_train,
        idx_val=None,
        idx_test=None,
        train_iters=81,
        att_0=None,
        attention=False,
        model_name=None,
        initialize=True,
        verbose=False,
        normalize=False,
        patience=500,
    ):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        self.sim = None
        self.attention = attention
        self.idx_test = idx_test

        # self.device = self.gc1.weight.device
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        # normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
        # if normalize:
        #     if utils.is_sparse_tensor(adj):
        #         adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
        #     else:
        #         adj_norm = utils.normalize_adj_tensor(adj)
        # else:
        #     adj_norm = adj
        adj = self.add_loop_sparse(adj)

        self.adj_norm = adj
        self.features = features
        self.labels = labels

        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            if patience < train_iters:
                self._train_with_early_stopping(labels, idx_train, idx_val,
                                                train_iters, patience, verbose)
            else:
                self._train_with_val(labels, idx_train, idx_val, train_iters,
                                     verbose)

    def _train_without_val(self, labels, idx_train, train_iters, verbose):
        self.train()
        optimizer = optim.Adam(self.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)
        for i in range(train_iters):
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(
                output[idx_train], labels[idx_train], weight=None
            )  # this weight is the weight of each training nodes
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(
                    i, loss_train.item()))

        self.eval()
        output = self.forward(self.features, self.adj_norm)
        self.output = output

    def _train_with_val(self, labels, idx_train, idx_val, train_iters,
                        verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        best_loss_val = 100
        best_acc_val = 0

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            # acc_test =accuracy(output[self.idx_test], labels[self.idx_test])

            self.eval()
            output = self.forward(self.features, self.adj_norm)
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = utils.accuracy(output[idx_val], labels[idx_val])

            # if verbose and i % 20 == 0:
            #     print('Epoch {}, training loss: {}, val acc: {}'.format(i, loss_train.item(), acc_val))

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())

            if acc_val > best_acc_val:
                best_acc_val = acc_val
                self.output = output
                weights = deepcopy(self.state_dict())

        if verbose:
            print(
                '=== picking the best model according to the performance on validation ==='
            )
        self.load_state_dict(weights)

    def _train_with_early_stopping(self, labels, idx_train, idx_val,
                                   train_iters, patience, verbose):
        if verbose:
            print('=== training gcn model ===')
        optimizer = optim.Adam(self.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        early_stopping = patience
        best_loss_val = 100

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward(self.features, self.adj_norm)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            self.eval()
            output = self.forward(self.features, self.adj_norm)

            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(
                    i, loss_train.item()))

            loss_val = F.nll_loss(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output
                weights = deepcopy(self.state_dict())
                patience = early_stopping
            else:
                patience -= 1
            if i > early_stopping and patience <= 0:
                break

        if verbose:
            print('=== early stopping at {0}, loss_val = {1} ==='.format(
                i, best_loss_val))
        self.load_state_dict(weights)

    def test(self, idx_test, model_name=None):
        # self.model_name = model_name
        self.eval()
        output = self.predict()
        # output = self.output
        loss_test = F.nll_loss(output[idx_test], self.labels[idx_test])
        acc_test = utils.accuracy(output[idx_test], self.labels[idx_test])
        # print("Test set results:",
        #       "loss= {:.4f}".format(loss_test.item()),
        #       "accuracy= {:.4f}".format(acc_test.item()))
        return acc_test, output

    def _set_parameters(self):
        # TODO
        pass

    def predict(self, features=None, adj=None):
        '''By default, inputs are unnormalized data'''

        # self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features,
                                                adj,
                                                device=self.device)

            self.features = features
            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)