예제 #1
0
def test_pdbloader_ligand_coordinates(testdata, testdir):
    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert (ids == np.array(["1a4r", "1a4w"])).all()

    assert species.shape == (batch_size, 42)  # Ligand 1a4w is the largest
    assert coordinates.shape == (batch_size, 42, 3
                                 )  # Ligand 1a4w is the largest

    assert np.allclose(coordinates[0, 0], [102.486, 24.870, -2.909])
    assert np.allclose(coordinates[0, -1], [0.0, 0.0, 0.0])  # 1a4r is padded

    assert np.allclose(coordinates[1, 0], [17.735, -17.178, 22.612])
    assert np.allclose(coordinates[1, -1], [18.049, -13.554, 14.106])
예제 #2
0
def test_pdbloader_batch(testdata, testdir, distance, n1_atoms, n2_atoms):

    data = loaders.PDBData(testdata, distance, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert isinstance(ids, np.ndarray)
    assert ids.shape == (batch_size, )

    assert isinstance(labels, torch.Tensor)
    assert labels.shape == (batch_size, )

    assert isinstance(species, torch.Tensor)
    assert species.shape == (batch_size, max(n1_atoms, n2_atoms))

    assert isinstance(coordinates, torch.Tensor)
    assert coordinates.shape == (batch_size, max(n1_atoms, n2_atoms), 3)
예제 #3
0
def test_pdbloader_removeHs(testdata, testdir, distance, n1_atoms, n2_atoms):

    data = loaders.PDBData(testdata, distance, testdir, removeHs=True)

    batch_size = 1

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    n_atoms_iter = iter([n1_atoms, n2_atoms])

    for ids, label, (species, coordinates) in iloader:

        n_atoms = next(n_atoms_iter)

        assert isinstance(ids, np.ndarray)
        assert ids.shape == (batch_size, )

        assert isinstance(label, torch.Tensor)
        assert label.shape == (batch_size, )

        assert isinstance(species, torch.Tensor)
        assert species.shape == (batch_size, n_atoms)

        assert isinstance(coordinates, torch.Tensor)
        assert coordinates.shape == (batch_size, n_atoms, 3)
예제 #4
0
def test_predict(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true, predicted = predict.predict(model, AEVC, loader)

    assert isinstance(true, np.ndarray)
    assert len(true) == batch_size

    assert isinstance(predicted, np.ndarray)
    assert len(predicted) == batch_size
예제 #5
0
def test_train_small_save(testdata, testdir, modelidx, tmpdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        batch_size = 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        loader = torch.utils.data.DataLoader(data,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             collate_fn=loaders.pad_collate)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species,
                                     AEVC.aev_length,
                                     layers_sizes=[1])
        optimizer = optim.SGD(model.parameters(), lr=0.01)
        mse = nn.MSELoss()

        # Check number of ANNs
        assert len(model) == n_species

        train_losses, valid_losses = train.train(
            model,
            optimizer,
            mse,
            AEVC,
            loader,
            loader,
            epochs=15,  # torchani.AEVComputer
            savepath=tmpdir,
            idx=modelidx,
        )

        assert os.path.isfile(
            os.path.join(
                tmpdir,
                "best.pth" if modelidx is None else f"best_{modelidx}.pth"))

        # Validation loss is shifted when trainloader and testloader are the same
        assert np.allclose(train_losses[1:], valid_losses[:-1])
예제 #6
0
def test_atomicnum_map(testdata, testdir):

    data = loaders.PDBData(testdata, 2.0, testdir)

    amap = loaders.anummap(data.species)

    # Elements: H, C, N, O, P, S
    assert len(amap) == 6
    assert [1, 6, 7, 8, 15, 16] == list(amap.keys())
    assert list(range(6)) == list(amap.values())
예제 #7
0
def test_forward_atomic(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    _, labels, (species, coordinates) = next(iloader)

    # Move everything to device
    labels = labels.to(device)
    species = species.to(device)
    coordinates = coordinates.to(device)

    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    aev = AEVC.forward((species, coordinates))

    assert aev.species.shape == species.shape
    assert aev.aevs.shape == (batch_size, 42, 20)

    model = models.AffinityModel(n_species, AEVC.aev_length)

    # Move model to device
    model.to(device)

    output = model(aev.species, aev.aevs)
    assert output.shape == (batch_size, )

    atomic_constributions = model._forward_atomic(aev.species, aev.aevs)

    assert atomic_constributions.shape == species.shape

    o = torch.sum(atomic_constributions, dim=1)

    assert np.allclose(output.cpu().detach().numpy(), o.cpu().detach().numpy())
예제 #8
0
def test_train_small_cmap(testdata, testdir):

    # Map all elements to dummy atom
    cmap = {"C": ["N", "O"]}  # Map N and O to C, leave P and S

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir, cmap)

        batch_size = 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # cmap maps everything to single dummy element
        assert n_species == 3

        loader = torch.utils.data.DataLoader(data,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             collate_fn=loaders.pad_collate)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 3
        # AEV: 1 * 3 + 1 * 3 * (3 + 1) // 2 = 3 (R) + 6 (A) = 9
        assert AEVC.aev_length == 9

        model = models.AffinityModel(n_species, AEVC.aev_length)
        optimizer = optim.SGD(model.parameters(), lr=0.0001)
        mse = nn.MSELoss()

        # Check number of ANNs
        assert len(model) == n_species

        train_losses, valid_losses = train.train(
            model,
            optimizer,
            mse,
            AEVC,
            loader,
            loader,
            epochs=15,  # torchani.AEVComputer
        )

        # Validation loss is shifted when trainloader and testloader are the same
        assert np.allclose(train_losses[1:], valid_losses[:-1])
예제 #9
0
def test_predict_baseline(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true, predicted = predict.predict(model, AEVC, loader)

    assert isinstance(true, np.ndarray)
    assert len(true) == batch_size

    assert isinstance(predicted, np.ndarray)
    assert len(predicted) == batch_size

    # Systems are the other way around with respect to file order
    # This is to test that deltas are added to the correct ID
    delta_ids = np.array(["1a4w", "1a4r"])
    delta_baseline = np.array([500, 600])
    delta = np.array([5.92, 6.66])
    s = np.argsort(delta_ids)

    ids_b, true_b, predicted_b = predict.predict(
        model, AEVC, loader, baseline=(delta_ids, delta_baseline, delta)
    )

    sort = np.argsort(ids)
    bsort = np.argsort(ids_b)

    assert (ids[sort] == ids_b[bsort]).all()
    assert np.allclose(true[sort], true[bsort])
    assert np.allclose(predicted[sort], predicted_b[bsort] - delta_baseline[s])
예제 #10
0
파일: test_grad.py 프로젝트: RMeli/aescore
def test_atomic(testdata, testdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        n_systems = len(data)

        assert n_systems == 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species, AEVC.aev_length)

        # Move model and AEVComputer to device
        model.to(device)
        AEVC.to(device)

        # Model in evaluation mode
        model.eval()

        for pdbid, _, (species, coordinates) in data:

            atomic = grad.atomic(species, coordinates, model, AEVC, device)

            # Add fictitious batch dimension
            species = species.unsqueeze(0)
            coordinates = coordinates.unsqueeze(0)

            assert atomic.shape == species.shape

            aevs = AEVC.forward((species, coordinates)).aevs
            prediction = model(species, aevs)

            assert np.allclose(
                torch.sum(atomic, dim=1).cpu().detach().numpy(),
                prediction.cpu().detach().numpy(),
            )
예제 #11
0
def test_pdbloader_species_cmap_toX(testdata, testdir):

    # Map all elements to dummy atom
    cmap = {"X": ["C", "N", "O", "S", "P"]}

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir, cmap)

    # TODO: Access to data loader is quite ugly... NamedTuple?
    assert np.allclose(
        data[0][2][0],
        np.zeros(28),  # Species for first ligand  # Element X maps to 0
    )

    assert np.allclose(
        data[1][2][0],
        np.zeros(42),  # Species for second ligand  # Element X maps to 0
    )

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    assert len(amap) == 1

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert (ids == np.array(["1a4r", "1a4w"])).all()

    assert species.shape == (batch_size, 42)  # Ligand 1a4w is the largest

    # Test ligand 1a4r (padded with -1)
    assert torch.allclose(
        species[0, :],
        torch.tensor([0] * 28 + 14 * [-1]),
    )

    # Test ligand 1a4w (no padding)
    assert torch.allclose(
        species[1, :],
        torch.zeros(42, dtype=int),
    )
예제 #12
0
def test_pdbloader_species_cmap_OtoS(testdata, testdir):

    # Map all elements to dummy atom
    cmap = {"S": "O"}

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir, cmap)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    # Check O is not in amap
    with pytest.raises(KeyError):
        amap[8]

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert (ids == np.array(["1a4r", "1a4w"])).all()

    assert species.shape == (batch_size, 42)  # Ligand 1a4w is the largest

    # Test ligand 1a4r (padded with -1)
    assert torch.allclose(
        species[0, :],
        torch.tensor(
            elements_to_idxs("NPSSSPSSSCCSCSCSCNCNCCSNCNNC", amap) +
            14 * [-1]),
    )

    # Test ligand 1a4w (no padding)
    assert torch.allclose(
        species[1, :],
        torch.tensor(
            elements_to_idxs("CCCCCCCCCCNCCSSSNCCSCCCNCNNNCCCCCCCSSCCCCN",
                             amap)),
    )
예제 #13
0
def test_predict_scaling(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    original_labels = data.labels.copy()

    # Scale labels
    scaler = utils.labels_scaler(data)

    assert np.allclose(data.labels, [1.0, -1.0])

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    model = models.AffinityModel(n_species, AEVC.aev_length)

    ids, true_scaled, predicted_scaled = predict.predict(model, AEVC, loader)

    assert np.allclose(true_scaled, data.labels)
    assert (-1 <= true_scaled).all() and (true_scaled <= 1).all()

    ids, true, predicted = predict.predict(model, AEVC, loader, scaler=scaler)

    assert np.allclose(true, original_labels)
    assert np.allclose(predicted, scaler.inverse_transform(predicted_scaled))
예제 #14
0
def test_grad(testdata, testdir):

    with mlflow.start_run():

        # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
        data = loaders.PDBData(testdata, 0.1, testdir)

        n_systems = len(data)

        assert n_systems == 2

        # Transform atomic numbers to species
        amap = loaders.anummap(data.species)
        data.atomicnums_to_idxs(amap)

        n_species = len(amap)

        # Define AEVComputer
        AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA,
                                    n_species)

        # Radial functions: 1
        # Angular functions: 1
        # Number of species: 5
        # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
        assert AEVC.aev_length == 20

        model = models.AffinityModel(n_species, AEVC.aev_length)
        loss = nn.MSELoss()

        # Move model and AEVComputer to device
        model.to(device)
        AEVC.to(device)

        # Model in evaluation mode
        model.eval()

        for i in range(n_systems):
            pdbid, label, (species, coordinates) = data[i]

            gradient = grad.gradient(species, coordinates, label, model, AEVC,
                                     loss, device)

            assert gradient.shape == coordinates.shape
예제 #15
0
def test_vsloader(testvsdata, testdir, distance, n_atoms, f_label, l_label):

    data = loaders.VSData(testvsdata, distance, testdir, labelspath=testdir)

    # One batch here corresponds to one target
    batch_size = 10

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    n_atoms_iter = iter(n_atoms)
    f_label_iter = iter(f_label)  # Iterator over first label (in batch)
    l_label_iter = iter(l_label)  # Iterator over last label (in batch)

    for ids, label, (species, coordinates) in iloader:

        n_atoms = next(n_atoms_iter)
        f_label = next(f_label_iter)
        l_label = next(l_label_iter)

        assert isinstance(ids, np.ndarray)
        assert ids.shape == (batch_size, )
        assert ids[0][4:] == "_pose_1"
        assert ids[-2][4:] == "_pose_9"
        assert ids[-1][4:] == "_ligand"

        assert isinstance(label, torch.Tensor)
        assert label.shape == (batch_size, )
        assert label[0].item() == pytest.approx(f_label)
        assert label[-1].item() == pytest.approx(l_label)

        assert isinstance(species, torch.Tensor)
        assert species.shape == (batch_size, n_atoms)

        assert isinstance(coordinates, torch.Tensor)
        assert coordinates.shape == (batch_size, n_atoms, 3)
예제 #16
0
def test_pdbloader_labels(testdata, testdir):
    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert ids.shape == (batch_size, )
    assert (ids == np.array(["1a4r", "1a4w"])).all()
    assert labels.shape == (batch_size, )
    assert torch.allclose(labels, torch.tensor([6.66, 5.92]))
예제 #17
0
def test_aev_from_loader(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Compute map of atomic numbers to indices from species
    amap = loaders.anummap(data.species)

    # Transform atomic number to species in data
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )
    iloader = iter(loader)

    _, labels, (species, coordinates) = next(iloader)

    # Move everything to device
    labels = labels.to(device)
    species = species.to(device)
    coordinates = coordinates.to(device)

    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    aev = AEVC.forward((species, coordinates))

    assert aev.species.shape == species.shape
    assert aev.aevs.shape == (batch_size, 42, 20)
예제 #18
0
def test_pdbloader_ligand_species(testdata, testdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    loader = torch.utils.data.DataLoader(data,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         collate_fn=loaders.pad_collate)
    iloader = iter(loader)

    ids, labels, (species, coordinates) = next(iloader)

    assert (ids == np.array(["1a4r", "1a4w"])).all()

    assert species.shape == (batch_size, 42)  # Ligand 1a4w is the largest

    # Test ligand 1a4r (padded with -1)
    assert torch.allclose(
        species[0, :],
        torch.tensor(
            elements_to_idxs("NPOOOPOOOCCOCOCOCNCNCCONCNNC", amap) +
            14 * [-1]),
    )

    # Test ligand 1a4w (no padding)
    assert torch.allclose(
        species[1, :],
        torch.tensor(
            elements_to_idxs("CCCCCCCCCCNCCSOONCCOCCCNCNNNCCCCCCCSOCCCCN",
                             amap)),
    )
예제 #19
0
def test_evaluate(testdata, testdir, tmpdir):

    # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656)
    data = loaders.PDBData(testdata, 0.1, testdir)

    batch_size = 2

    # Transform atomic numbers to species
    amap = loaders.anummap(data.species)
    data.atomicnums_to_idxs(amap)

    n_species = len(amap)

    loader = torch.utils.data.DataLoader(
        data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate
    )

    # Define AEVComputer
    AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species)

    # Radial functions: 1
    # Angular functions: 1
    # Number of species: 5
    # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20
    assert AEVC.aev_length == 20

    mods = [
        models.AffinityModel(n_species, AEVC.aev_length),
        models.AffinityModel(n_species, AEVC.aev_length),
    ]

    with mlflow.start_run():
        predict.evaluate(mods, loader, AEVC, outpath=tmpdir)

        assert os.path.isfile(os.path.join(tmpdir, "predict.csv"))
        assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.pdf"))
        assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.png"))
예제 #20
0
                                    cmap,
                                    desc="Test set",
                                    removeHs=args.removeHs,
                                )
            else:
                testdata = loaders.VSData(
                    args.testfile,
                    args.distance,
                    args.datapaths,
                    cmap,
                    desc="Test set",
                    removeHs=args.removeHs,
                    labelspath=args.vscreening,
                )

            amap = loaders.anummap(traindata.species, validdata.species,
                                   testdata.species)
        else:
            amap = loaders.anummap(traindata.species, validdata.species)

        n_species = len(amap)

        if args.scale:
            if args.testfile is None:
                scaler = utils.labels_scaler(traindata, validdata)
            else:
                scaler = utils.labels_scaler(traindata, validdata, testdata)
        else:
            scaler = None

        # Transform atomic numbers to 0-based indices
        traindata.atomicnums_to_idxs(amap)