def loadmodel(path, eval: bool = True) -> nn.ModuleDict: """ Load AffinityModel. Parameters ---------- path: Save path eval: bool Flag to put model in evaluation mode Returns ------- nn.ModuleDict Model Notes ----- Evaluation mode is needed to switch off the dropout layers when using the model for inference. """ d = torch.load(path) model = models.AffinityModel(**d["args"]) model.load_state_dict(d["state_dict"]) if eval: # Put model in evaluation mode model.eval() else: # Put model in training mode model.train() return model
def test_predict(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true, predicted = predict.predict(model, AEVC, loader) assert isinstance(true, np.ndarray) assert len(true) == batch_size assert isinstance(predicted, np.ndarray) assert len(predicted) == batch_size
def test_savemodel_loadmodel(tmpdir, eval, dropp): n_species = 10 # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 10 # AEV: 1 * 10 + 1 * 10 * (10 + 1) // 2 = 10 (R) + 55 (A) = 65 assert AEVC.aev_length == 65 model = models.AffinityModel(n_species, AEVC.aev_length, dropp=dropp) path = os.path.join(tmpdir, "model-tmp.pth") utils.savemodel(model, path) model_loaded = utils.loadmodel(path, eval=eval) assert model.aev_length == model_loaded.aev_length == 65 assert model.n_species == model_loaded.n_species == n_species assert model.dropp == model.dropp assert model.layers_sizes == model_loaded.layers_sizes # Check weights for ANN, ANNl in zip(model.modules(), model_loaded.modules()): for layer, layerl in zip(ANN.modules(), ANNl.modules()): if type(layer) == nn.Linear: assert torch.allclose(layer.weight, layerl.weight) assert torch.allclose(layer.bias, layerl.bias)
def test_train_small_save(testdata, testdir, modelidx, tmpdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length, layers_sizes=[1]) optimizer = optim.SGD(model.parameters(), lr=0.01) mse = nn.MSELoss() # Check number of ANNs assert len(model) == n_species train_losses, valid_losses = train.train( model, optimizer, mse, AEVC, loader, loader, epochs=15, # torchani.AEVComputer savepath=tmpdir, idx=modelidx, ) assert os.path.isfile( os.path.join( tmpdir, "best.pth" if modelidx is None else f"best_{modelidx}.pth")) # Validation loss is shifted when trainloader and testloader are the same assert np.allclose(train_losses[1:], valid_losses[:-1])
def test_train_small_cmap(testdata, testdir): # Map all elements to dummy atom cmap = {"C": ["N", "O"]} # Map N and O to C, leave P and S with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir, cmap) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # cmap maps everything to single dummy element assert n_species == 3 loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 3 # AEV: 1 * 3 + 1 * 3 * (3 + 1) // 2 = 3 (R) + 6 (A) = 9 assert AEVC.aev_length == 9 model = models.AffinityModel(n_species, AEVC.aev_length) optimizer = optim.SGD(model.parameters(), lr=0.0001) mse = nn.MSELoss() # Check number of ANNs assert len(model) == n_species train_losses, valid_losses = train.train( model, optimizer, mse, AEVC, loader, loader, epochs=15, # torchani.AEVComputer ) # Validation loss is shifted when trainloader and testloader are the same assert np.allclose(train_losses[1:], valid_losses[:-1])
def test_forward_atomic(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) iloader = iter(loader) _, labels, (species, coordinates) = next(iloader) # Move everything to device labels = labels.to(device) species = species.to(device) coordinates = coordinates.to(device) AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 aev = AEVC.forward((species, coordinates)) assert aev.species.shape == species.shape assert aev.aevs.shape == (batch_size, 42, 20) model = models.AffinityModel(n_species, AEVC.aev_length) # Move model to device model.to(device) output = model(aev.species, aev.aevs) assert output.shape == (batch_size, ) atomic_constributions = model._forward_atomic(aev.species, aev.aevs) assert atomic_constributions.shape == species.shape o = torch.sum(atomic_constributions, dim=1) assert np.allclose(output.cpu().detach().numpy(), o.cpu().detach().numpy())
def test_affinitymodel_parameters(): n_inputs = 256 dropp = 0.5 n_species = 10 layers_sizes = [128, 64, 1] model = models.AffinityModel(n_species, n_inputs, layers_sizes, dropp) assert model.n_species == n_species assert model.aev_length == n_inputs assert model.layers_sizes == [n_inputs] + layers_sizes assert model.dropp == pytest.approx(dropp)
def test_predict_baseline(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true, predicted = predict.predict(model, AEVC, loader) assert isinstance(true, np.ndarray) assert len(true) == batch_size assert isinstance(predicted, np.ndarray) assert len(predicted) == batch_size # Systems are the other way around with respect to file order # This is to test that deltas are added to the correct ID delta_ids = np.array(["1a4w", "1a4r"]) delta_baseline = np.array([500, 600]) delta = np.array([5.92, 6.66]) s = np.argsort(delta_ids) ids_b, true_b, predicted_b = predict.predict( model, AEVC, loader, baseline=(delta_ids, delta_baseline, delta) ) sort = np.argsort(ids) bsort = np.argsort(ids_b) assert (ids[sort] == ids_b[bsort]).all() assert np.allclose(true[sort], true[bsort]) assert np.allclose(predicted[sort], predicted_b[bsort] - delta_baseline[s])
def test_atomic(testdata, testdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) n_systems = len(data) assert n_systems == 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) # Move model and AEVComputer to device model.to(device) AEVC.to(device) # Model in evaluation mode model.eval() for pdbid, _, (species, coordinates) in data: atomic = grad.atomic(species, coordinates, model, AEVC, device) # Add fictitious batch dimension species = species.unsqueeze(0) coordinates = coordinates.unsqueeze(0) assert atomic.shape == species.shape aevs = AEVC.forward((species, coordinates)).aevs prediction = model(species, aevs) assert np.allclose( torch.sum(atomic, dim=1).cpu().detach().numpy(), prediction.cpu().detach().numpy(), )
def test_evaluate(testdata, testdir, tmpdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 mods = [ models.AffinityModel(n_species, AEVC.aev_length), models.AffinityModel(n_species, AEVC.aev_length), ] with mlflow.start_run(): predict.evaluate(mods, loader, AEVC, outpath=tmpdir) assert os.path.isfile(os.path.join(tmpdir, "predict.csv")) assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.pdf")) assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.png"))
def test_predict_scaling(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) original_labels = data.labels.copy() # Scale labels scaler = utils.labels_scaler(data) assert np.allclose(data.labels, [1.0, -1.0]) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true_scaled, predicted_scaled = predict.predict(model, AEVC, loader) assert np.allclose(true_scaled, data.labels) assert (-1 <= true_scaled).all() and (true_scaled <= 1).all() ids, true, predicted = predict.predict(model, AEVC, loader, scaler=scaler) assert np.allclose(true, original_labels) assert np.allclose(predicted, scaler.inverse_transform(predicted_scaled))
def test_grad(testdata, testdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) n_systems = len(data) assert n_systems == 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) loss = nn.MSELoss() # Move model and AEVComputer to device model.to(device) AEVC.to(device) # Model in evaluation mode model.eval() for i in range(n_systems): pdbid, label, (species, coordinates) = data[i] gradient = grad.gradient(species, coordinates, label, model, AEVC, loss, device) assert gradient.shape == coordinates.shape
AEVC = torchani.AEVComputer(args.RcR, args.RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Save AEVComputer utils.saveAEVC(AEVC, n_species, path=os.path.join(args.outpath, "aevc.pth")) # Define models models_list = [] optimizers_list = [] for idx in range(args.consensus): models_list.append( models.AffinityModel( n_species, AEVC.aev_length, layers_sizes=args.layers, dropp=args.dropout, )) # Define optimizer optimizers_list.append( optim.Adam(models_list[-1].parameters(), lr=args.lr)) # Define loss mse = nn.MSELoss() # Train model train_losses, valid_losses = train( models_list[-1], optimizers_list[-1], mse,