예제 #1
0
def test_compute_charges_forward_batched(dgl_carboxylate):

    batch = DGLMoleculeBatch(dgl_carboxylate, DGLMolecule.from_smiles("[H]Cl", [], []))

    inputs = torch.tensor(
        [
            # [H]C(=O)O- form 1
            [30.0, 80.0],
            [35.0, 75.0],
            [40.0, 70.0],
            [50.0, 65.0],
            # [H]C(=O)O- form 2
            [30.0, 80.0],
            [35.0, 75.0],
            [50.0, 65.0],
            [40.0, 70.0],
            # [H]Cl
            [55.0, 60.0],
            [60.0, 55.0],
        ]
    )
    partial_charges = ComputePartialCharges().forward(batch, inputs)
    assert partial_charges.shape == (6, 1)

    assert numpy.isclose(partial_charges.sum(), -1.0)
    # The carboxylate oxygen charges should be identical.
    assert numpy.allclose(partial_charges[2], partial_charges[3])
예제 #2
0
def test_compute_charges_forward(dgl_methane):
    inputs = torch.tensor(
        [
            [30.8, 78.4],
            [27.4, 73.9],
            [27.4, 73.9],
            [27.4, 73.9],
            [27.4, 73.9],
        ]
    )
    partial_charges = ComputePartialCharges().forward(dgl_methane, inputs)

    assert numpy.isclose(partial_charges.sum(), 0.0)
    assert numpy.allclose(partial_charges[1:], partial_charges[1])
예제 #3
0
def test_atomic_parameters_to_charges_neutral():

    partial_charges = ComputePartialCharges.atomic_parameters_to_charges(
        electronegativity=torch.tensor([30.8, 27.4, 27.4, 27.4, 27.4]),
        hardness=torch.tensor([78.4, 73.9, 73.9, 73.9, 73.9]),
        total_charge=0.0,
    ).numpy()

    assert numpy.isclose(partial_charges.sum(), 0.0)
    assert numpy.allclose(partial_charges[1:], partial_charges[1])
예제 #4
0
def mock_atom_model() -> DGLMoleculeLightningModel:

    return DGLMoleculeLightningModel(
        convolution_module=ConvolutionModule("SAGEConv",
                                             in_feats=4,
                                             hidden_feats=[4]),
        readout_modules={
            "atom":
            ReadoutModule(
                pooling_layer=PoolAtomFeatures(),
                readout_layers=SequentialLayers(in_feats=4, hidden_feats=[2]),
                postprocess_layer=ComputePartialCharges(),
            ),
        },
        learning_rate=0.01,
    )
예제 #5
0
    def test_forward(self, dgl_methane):

        model = MoleculeGCNModel(
            convolution_module=ConvolutionModule("SAGEConv",
                                                 in_feats=4,
                                                 hidden_feats=[4]),
            readout_modules={
                "atom":
                ReadoutModule(
                    pooling_layer=PoolAtomFeatures(),
                    readout_layers=SequentialLayers(in_feats=4,
                                                    hidden_feats=[2]),
                    postprocess_layer=ComputePartialCharges(),
                ),
            },
        )

        output = model.forward(dgl_methane)
        assert "atom" in output

        assert output["atom"].shape == (5, 1)
예제 #6
0
    def test_init(self):

        model = DGLMoleculeLightningModel(
            convolution_module=ConvolutionModule("SAGEConv",
                                                 in_feats=1,
                                                 hidden_feats=[2, 2]),
            readout_modules={
                "atom":
                ReadoutModule(
                    pooling_layer=PoolAtomFeatures(),
                    readout_layers=SequentialLayers(in_feats=2,
                                                    hidden_feats=[2],
                                                    activation=["Identity"]),
                    postprocess_layer=ComputePartialCharges(),
                ),
                "bond":
                ReadoutModule(
                    pooling_layer=PoolBondFeatures(
                        layers=SequentialLayers(in_feats=4, hidden_feats=[4])),
                    readout_layers=SequentialLayers(in_feats=4,
                                                    hidden_feats=[8]),
                ),
            },
            learning_rate=0.01,
        )

        assert model.convolution_module is not None
        assert isinstance(model.convolution_module, ConvolutionModule)

        assert isinstance(model.convolution_module.gcn_layers, GCNStack)
        assert len(model.convolution_module.gcn_layers) == 2

        assert all(x in model.readout_modules for x in ["atom", "bond"])

        assert isinstance(model.readout_modules["atom"].pooling_layer,
                          PoolAtomFeatures)
        assert isinstance(model.readout_modules["bond"].pooling_layer,
                          PoolBondFeatures)

        assert numpy.isclose(model.learning_rate, 0.01)
def main():

    print(torch.seed())

    # Define the atom / bond features of interest.
    atom_features = [
        AtomicElement(["C", "O", "H"]),
        AtomConnectivity(),
    ]
    bond_features = [
        BondOrder(),
    ]

    # Compute the total length of the input atomic feature vector
    n_atom_features = sum(len(feature) for feature in atom_features)

    # Load in the training and test data
    training_smiles = ["CO", "CCO", "CCCO", "CCCCO"]
    training_data = DGLMoleculeDataset.from_smiles(
        training_smiles,
        atom_features,
        bond_features,
        label_function,
    )
    training_loader = DGLMoleculeDataLoader(training_data,
                                            batch_size=len(training_smiles),
                                            shuffle=False)

    test_smiles = [
        "CCCCCCCCCO",
    ]
    test_loader = DGLMoleculeDataLoader(
        DGLMoleculeDataset.from_smiles(
            test_smiles,
            atom_features,
            bond_features,
            label_function,
        ),
        batch_size=len(test_smiles),
        shuffle=False,
    )

    # Define the model.
    n_gcn_layers = 5
    n_gcn_hidden_features = 128

    n_am1_layers = 2
    n_am1_hidden_features = 64

    learning_rate = 0.001

    model = DGLMoleculeLightningModel(
        convolution_module=ConvolutionModule(
            architecture="SAGEConv",
            in_feats=n_atom_features,
            hidden_feats=[n_gcn_hidden_features] * n_gcn_layers,
        ),
        readout_modules={
            # The keys of the readout modules should correspond to keys in the
            # label dictionary.
            "am1-charges":
            ReadoutModule(
                pooling_layer=PoolAtomFeatures(),
                readout_layers=SequentialLayers(
                    in_feats=n_gcn_hidden_features,
                    hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2],
                    activation=["ReLU"] * n_am1_layers + ["Identity"],
                ),
                postprocess_layer=ComputePartialCharges(),
            )
        },
        learning_rate=learning_rate,
    )

    print(model)

    # Train the model
    n_epochs = 100

    n_gpus = 0 if not torch.cuda.is_available() else 1
    print(f"Using {n_gpus} GPUs")

    trainer = pl.Trainer(gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs)

    trainer.fit(model, train_dataloaders=training_loader)
    trainer.test(model, test_dataloaders=test_loader)
예제 #8
0
 def test_init(self):
     module = ReadoutModule(PoolAtomFeatures(), SequentialLayers(1, [1]),
                            ComputePartialCharges())
     assert isinstance(module.pooling_layer, PoolAtomFeatures)
     assert isinstance(module.readout_layers, SequentialLayers)
     assert isinstance(module.postprocess_layer, ComputePartialCharges)
예제 #9
0
def main(
    train_set_path,
    train_batch_size,
    val_set_path,
    test_set_path,
    n_gcn_layers,
    n_gcn_hidden_features,
    n_am1_layers,
    n_am1_hidden_features,
    learning_rate,
    n_epochs,
):

    pprint(locals())

    # pl.seed_everything(3992210414)  # h-parameter sweep v1

    # Define the features of interest.
    atom_features = [
        AtomicElement(["C", "O", "H", "N", "S", "F", "Br", "Cl", "I", "P"]),
        AtomConnectivity(),
        AtomAverageFormalCharge(),
    ]
    bond_features = [
        # BondIsInRing(),
        # BondOrder()
    ]

    # Load in the pre-processed training and test molecules and store them in
    # featurized graphs.
    data_module = DGLMoleculeDataModule(
        atom_features,
        bond_features,
        partial_charge_method="am1",
        bond_order_method=None,
        train_set_path=train_set_path,
        train_batch_size=train_batch_size,
        val_set_path=val_set_path,
        val_batch_size=None,
        test_set_path=test_set_path,
        test_batch_size=None,
        use_cached_data=True,
    )
    n_atom_features = data_module.n_atom_features

    # Define the model.
    model = DGLMoleculeLightningModel(
        convolution_module=ConvolutionModule(
            architecture="SAGEConv",
            in_feats=n_atom_features,
            hidden_feats=[n_gcn_hidden_features] * n_gcn_layers,
        ),
        readout_modules={
            "am1-charges": ReadoutModule(
                pooling_layer=PoolAtomFeatures(),
                readout_layers=SequentialLayers(
                    in_feats=n_gcn_hidden_features,
                    hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2],
                    activation=["ReLU"] * n_am1_layers + ["Identity"],
                ),
                postprocess_layer=ComputePartialCharges(),
            )
        },
        learning_rate=learning_rate,
    )
    print(model)

    # Train the model
    n_gpus = 0 if not torch.cuda.is_available() else 1
    print(f"Using {n_gpus} GPUs")

    logger = TensorBoardLogger(
        "lightning-logs",
        version=(
            f"{train_batch_size}-"
            f"{n_gcn_layers}-"
            f"{n_gcn_hidden_features}-"
            f"{n_am1_layers}-"
            f"{n_am1_hidden_features}-"
            f"{learning_rate}"
        ),
    )

    trainer = pl.Trainer(
        gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs, logger=logger
    )

    trainer.fit(model, datamodule=data_module)
    trainer.test(model, data_module)