Ejemplo n.º 1
0
def loadmodel(path, eval: bool = True) -> nn.ModuleDict:
    """
    Load AffinityModel.

    Parameters
    ----------
    path:
        Save path
    eval: bool
        Flag to put model in evaluation mode

    Returns
    -------
    nn.ModuleDict
        Model

    Notes
    -----
    Evaluation mode is needed to switch off the dropout layers when using the model
    for inference.
    """
    d = torch.load(path)

    model = models.AffinityModel(**d["args"])

    model.load_state_dict(d["state_dict"])

    if eval:
        # Put model in evaluation mode
        model.eval()
    else:
        # Put model in training mode
        model.train()

    return model
Ejemplo n.º 2
0
def test_predict(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true, predicted = predict.predict(model, AEVC, loader)

    assert isinstance(true, np.ndarray)
    assert len(true) == batch_size

    assert isinstance(predicted, np.ndarray)
    assert len(predicted) == batch_size
Ejemplo n.º 3
0
def test_savemodel_loadmodel(tmpdir, eval, dropp):
    n_species = 10

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 10
    # AEV: 1 * 10 + 1 * 10 * (10 + 1) // 2 = 10 (R) + 55 (A) = 65
    assert AEVC.aev_length == 65

    model = models.AffinityModel(n_species, AEVC.aev_length, dropp=dropp)

    path = os.path.join(tmpdir, "model-tmp.pth")

    utils.savemodel(model, path)

    model_loaded = utils.loadmodel(path, eval=eval)

    assert model.aev_length == model_loaded.aev_length == 65
    assert model.n_species == model_loaded.n_species == n_species
    assert model.dropp == model.dropp
    assert model.layers_sizes == model_loaded.layers_sizes

    # Check weights
    for ANN, ANNl in zip(model.modules(), model_loaded.modules()):
        for layer, layerl in zip(ANN.modules(), ANNl.modules()):
            if type(layer) == nn.Linear:
                assert torch.allclose(layer.weight, layerl.weight)
                assert torch.allclose(layer.bias, layerl.bias)
Ejemplo n.º 4
0
def test_train_small_save(testdata, testdir, modelidx, tmpdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        batch_size = 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        loader = torch.utils.data.DataLoader(data,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             collate_fn=loaders.pad_collate)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species,
                                     AEVC.aev_length,
                                     layers_sizes=[1])
        optimizer = optim.SGD(model.parameters(), lr=0.01)
        mse = nn.MSELoss()

        # Check number of ANNs
        assert len(model) == n_species

        train_losses, valid_losses = train.train(
            model,
            optimizer,
            mse,
            AEVC,
            loader,
            loader,
            epochs=15,  # torchani.AEVComputer
            savepath=tmpdir,
            idx=modelidx,
        )

        assert os.path.isfile(
            os.path.join(
                tmpdir,
                "best.pth" if modelidx is None else f"best_{modelidx}.pth"))

        # Validation loss is shifted when trainloader and testloader are the same
        assert np.allclose(train_losses[1:], valid_losses[:-1])
Ejemplo n.º 5
0
def test_train_small_cmap(testdata, testdir):

    # Map all elements to dummy atom
    cmap = {"C": ["N", "O"]}  # Map N and O to C, leave P and S

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir, cmap)

        batch_size = 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # cmap maps everything to single dummy element
        assert n_species == 3

        loader = torch.utils.data.DataLoader(data,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             collate_fn=loaders.pad_collate)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 3
        # AEV: 1 * 3 + 1 * 3 * (3 + 1) // 2 = 3 (R) + 6 (A) = 9
        assert AEVC.aev_length == 9

        model = models.AffinityModel(n_species, AEVC.aev_length)
        optimizer = optim.SGD(model.parameters(), lr=0.0001)
        mse = nn.MSELoss()

        # Check number of ANNs
        assert len(model) == n_species

        train_losses, valid_losses = train.train(
            model,
            optimizer,
            mse,
            AEVC,
            loader,
            loader,
            epochs=15,  # torchani.AEVComputer
        )

        # Validation loss is shifted when trainloader and testloader are the same
        assert np.allclose(train_losses[1:], valid_losses[:-1])
Ejemplo n.º 6
0
def test_forward_atomic(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    _, labels, (species, coordinates) = next(iloader)

    # Move everything to device
    labels = labels.to(device)
    species = species.to(device)
    coordinates = coordinates.to(device)

    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    aev = AEVC.forward((species, coordinates))

    assert aev.species.shape == species.shape
    assert aev.aevs.shape == (batch_size, 42, 20)

    model = models.AffinityModel(n_species, AEVC.aev_length)

    # Move model to device
    model.to(device)

    output = model(aev.species, aev.aevs)
    assert output.shape == (batch_size, )

    atomic_constributions = model._forward_atomic(aev.species, aev.aevs)

    assert atomic_constributions.shape == species.shape

    o = torch.sum(atomic_constributions, dim=1)

    assert np.allclose(output.cpu().detach().numpy(), o.cpu().detach().numpy())
Ejemplo n.º 7
0
def test_affinitymodel_parameters():
    n_inputs = 256
    dropp = 0.5
    n_species = 10
    layers_sizes = [128, 64, 1]

    model = models.AffinityModel(n_species, n_inputs, layers_sizes, dropp)

    assert model.n_species == n_species
    assert model.aev_length == n_inputs
    assert model.layers_sizes == [n_inputs] + layers_sizes
    assert model.dropp == pytest.approx(dropp)
Ejemplo n.º 8
0
def test_predict_baseline(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true, predicted = predict.predict(model, AEVC, loader)

    assert isinstance(true, np.ndarray)
    assert len(true) == batch_size

    assert isinstance(predicted, np.ndarray)
    assert len(predicted) == batch_size

    # Systems are the other way around with respect to file order
    # This is to test that deltas are added to the correct ID
    delta_ids = np.array(["1a4w", "1a4r"])
    delta_baseline = np.array([500, 600])
    delta = np.array([5.92, 6.66])
    s = np.argsort(delta_ids)

    ids_b, true_b, predicted_b = predict.predict(
        model, AEVC, loader, baseline=(delta_ids, delta_baseline, delta)
    )

    sort = np.argsort(ids)
    bsort = np.argsort(ids_b)

    assert (ids[sort] == ids_b[bsort]).all()
    assert np.allclose(true[sort], true[bsort])
    assert np.allclose(predicted[sort], predicted_b[bsort] - delta_baseline[s])
Ejemplo n.º 9
0
def test_atomic(testdata, testdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        n_systems = len(data)

        assert n_systems == 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species, AEVC.aev_length)

        # Move model and AEVComputer to device
        model.to(device)
        AEVC.to(device)

        # Model in evaluation mode
        model.eval()

        for pdbid, _, (species, coordinates) in data:

            atomic = grad.atomic(species, coordinates, model, AEVC, device)

            # Add fictitious batch dimension
            species = species.unsqueeze(0)
            coordinates = coordinates.unsqueeze(0)

            assert atomic.shape == species.shape

            aevs = AEVC.forward((species, coordinates)).aevs
            prediction = model(species, aevs)

            assert np.allclose(
                torch.sum(atomic, dim=1).cpu().detach().numpy(),
                prediction.cpu().detach().numpy(),
            )
Ejemplo n.º 10
0
def test_evaluate(testdata, testdir, tmpdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    mods = [
        models.AffinityModel(n_species, AEVC.aev_length),
        models.AffinityModel(n_species, AEVC.aev_length),
    ]

    with mlflow.start_run():
        predict.evaluate(mods, loader, AEVC, outpath=tmpdir)

        assert os.path.isfile(os.path.join(tmpdir, "predict.csv"))
        assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.pdf"))
        assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.png"))
Ejemplo n.º 11
0
def test_predict_scaling(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    original_labels = data.labels.copy()

    # Scale labels
    scaler = utils.labels_scaler(data)

    assert np.allclose(data.labels, [1.0, -1.0])

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true_scaled, predicted_scaled = predict.predict(model, AEVC, loader)

    assert np.allclose(true_scaled, data.labels)
    assert (-1 <= true_scaled).all() and (true_scaled <= 1).all()

    ids, true, predicted = predict.predict(model, AEVC, loader, scaler=scaler)

    assert np.allclose(true, original_labels)
    assert np.allclose(predicted, scaler.inverse_transform(predicted_scaled))
Ejemplo n.º 12
0
def test_grad(testdata, testdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        n_systems = len(data)

        assert n_systems == 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species, AEVC.aev_length)
        loss = nn.MSELoss()

        # Move model and AEVComputer to device
        model.to(device)
        AEVC.to(device)

        # Model in evaluation mode
        model.eval()

        for i in range(n_systems):
            pdbid, label, (species, coordinates) = data[i]

            gradient = grad.gradient(species, coordinates, label, model, AEVC,
                                     loss, device)

            assert gradient.shape == coordinates.shape
Ejemplo n.º 13
0
        AEVC = torchani.AEVComputer(args.RcR, args.RcA, EtaR, RsR, EtaA, Zeta,
                                    RsA, TsA, n_species)

        # Save AEVComputer
        utils.saveAEVC(AEVC,
                       n_species,
                       path=os.path.join(args.outpath, "aevc.pth"))

        # Define models
        models_list = []
        optimizers_list = []
        for idx in range(args.consensus):
            models_list.append(
                models.AffinityModel(
                    n_species,
                    AEVC.aev_length,
                    layers_sizes=args.layers,
                    dropp=args.dropout,
                ))

            # Define optimizer
            optimizers_list.append(
                optim.Adam(models_list[-1].parameters(), lr=args.lr))

            # Define loss
            mse = nn.MSELoss()

            # Train model
            train_losses, valid_losses = train(
                models_list[-1],
                optimizers_list[-1],
                mse,