def test_predict(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true, predicted = predict.predict(model, AEVC, loader) assert isinstance(true, np.ndarray) assert len(true) == batch_size assert isinstance(predicted, np.ndarray) assert len(predicted) == batch_size
def setUp(self): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') consts = torchani.neurochem.Constants(const_file) self.aev_computer = torchani.AEVComputer(**consts).to(self.device) self.species_to_tensor = consts.species_to_tensor self.rcr = self.aev_computer.Rcr self.rca = self.aev_computer.Rca
def setUp(self): self.tolerance = 5e-5 self.device = 'cuda' Rcr = 5.2000e+00 Rca = 3.5000e+00 EtaR = torch.tensor([1.6000000e+01], device=self.device) ShfR = torch.tensor([9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00], device=self.device) Zeta = torch.tensor([3.2000000e+01], device=self.device) ShfZ = torch.tensor([1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00], device=self.device) EtaA = torch.tensor([8.0000000e+00], device=self.device) ShfA = torch.tensor([9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=self.device) num_species = 4 self.aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species) self.cuaev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species, use_cuda_extension=True) self.nn = torch.nn.Sequential(torch.nn.Linear(384, 1, False)).to(self.device) self.radial_length = self.aev_computer.radial_length
def test_savemodel_loadmodel(tmpdir, eval, dropp): n_species = 10 # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 10 # AEV: 1 * 10 + 1 * 10 * (10 + 1) // 2 = 10 (R) + 55 (A) = 65 assert AEVC.aev_length == 65 model = models.AffinityModel(n_species, AEVC.aev_length, dropp=dropp) path = os.path.join(tmpdir, "model-tmp.pth") utils.savemodel(model, path) model_loaded = utils.loadmodel(path, eval=eval) assert model.aev_length == model_loaded.aev_length == 65 assert model.n_species == model_loaded.n_species == n_species assert model.dropp == model.dropp assert model.layers_sizes == model_loaded.layers_sizes # Check weights for ANN, ANNl in zip(model.modules(), model_loaded.modules()): for layer, layerl in zip(ANN.modules(), ANNl.modules()): if type(layer) == nn.Linear: assert torch.allclose(layer.weight, layerl.weight) assert torch.allclose(layer.bias, layerl.bias)
def get_aev_params(device): Rcr = 5.2000e+00 Rca = 3.5000e+00 EtaR = torch.tensor([1.6000000e+01], device=device) ShfR = torch.tensor([ 9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00 ], device=device) Zeta = torch.tensor([3.2000000e+01], device=device) ShfZ = torch.tensor([ 1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00 ], device=device) EtaA = torch.tensor([8.0000000e+00], device=device) taA = torch.tensor([8.0000000e+00], device=device) ShfA = torch.tensor( [9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=device) species_order = ['H', 'C', 'N', 'O'] num_species = len(species_order) aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species) energy_shifter = torchani.utils.EnergyShifter(None) return energy_shifter, aev_computer
def test_train_small_save(testdata, testdir, modelidx, tmpdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length, layers_sizes=[1]) optimizer = optim.SGD(model.parameters(), lr=0.01) mse = nn.MSELoss() # Check number of ANNs assert len(model) == n_species train_losses, valid_losses = train.train( model, optimizer, mse, AEVC, loader, loader, epochs=15, # torchani.AEVComputer savepath=tmpdir, idx=modelidx, ) assert os.path.isfile( os.path.join( tmpdir, "best.pth" if modelidx is None else f"best_{modelidx}.pth")) # Validation loss is shifted when trainloader and testloader are the same assert np.allclose(train_losses[1:], valid_losses[:-1])
def testAEVComputer(self): path = os.path.dirname(os.path.realpath(__file__)) const_file = os.path.join(path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params') # noqa: E501 consts = torchani.neurochem.Constants(const_file) aev_computer = torchani.AEVComputer(**consts, use_cuda_extension=True) s = torch.jit.script(aev_computer) # Computation of AEV using cuaev when there is no atoms does not require CUDA, and can be run without GPU species = make_tensor((8, 0), 'cpu', torch.int64, low=-1, high=4) coordinates = make_tensor((8, 0, 3), 'cpu', torch.float32, low=-5, high=5) self.assertIn("cuaev::run", str(s.graph_for((species, coordinates))))
def setUp(self): path = os.path.dirname(os.path.realpath(__file__)) const_file = os.path.join( path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params' ) # noqa: E501 consts = torchani.neurochem.Constants(const_file) self.aev_computer = torchani.AEVComputer(**consts) self.radial_length = self.aev_computer.radial_length self.debug = False
def test_forward_atomic(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) iloader = iter(loader) _, labels, (species, coordinates) = next(iloader) # Move everything to device labels = labels.to(device) species = species.to(device) coordinates = coordinates.to(device) AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 aev = AEVC.forward((species, coordinates)) assert aev.species.shape == species.shape assert aev.aevs.shape == (batch_size, 42, 20) model = models.AffinityModel(n_species, AEVC.aev_length) # Move model to device model.to(device) output = model(aev.species, aev.aevs) assert output.shape == (batch_size, ) atomic_constributions = model._forward_atomic(aev.species, aev.aevs) assert atomic_constributions.shape == species.shape o = torch.sum(atomic_constributions, dim=1) assert np.allclose(output.cpu().detach().numpy(), o.cpu().detach().numpy())
def test_train_small_cmap(testdata, testdir): # Map all elements to dummy atom cmap = {"C": ["N", "O"]} # Map N and O to C, leave P and S with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir, cmap) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # cmap maps everything to single dummy element assert n_species == 3 loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 3 # AEV: 1 * 3 + 1 * 3 * (3 + 1) // 2 = 3 (R) + 6 (A) = 9 assert AEVC.aev_length == 9 model = models.AffinityModel(n_species, AEVC.aev_length) optimizer = optim.SGD(model.parameters(), lr=0.0001) mse = nn.MSELoss() # Check number of ANNs assert len(model) == n_species train_losses, valid_losses = train.train( model, optimizer, mse, AEVC, loader, loader, epochs=15, # torchani.AEVComputer ) # Validation loss is shifted when trainloader and testloader are the same assert np.allclose(train_losses[1:], valid_losses[:-1])
def setUp(self): consts = torchani.neurochem.Constants(const_file) self.aev_computer = torchani.AEVComputer(**consts) filename = os.path.join(path, '../tools/generate-unit-test-expect/others/Benzene.json') benzene = ase.io.read(filename) self.cell = torch.tensor(benzene.get_cell(complete=True)).float() self.pbc = torch.tensor(benzene.get_pbc(), dtype=torch.bool) species_to_tensor = torchani.utils.ChemicalSymbolsToInts(['H', 'C', 'N', 'O']) self.species = species_to_tensor(benzene.get_chemical_symbols()).unsqueeze(0) self.coordinates = torch.tensor(benzene.get_positions()).unsqueeze(0).float() _, self.aev = self.aev_computer((self.species, self.coordinates), cell=self.cell, pbc=self.pbc) self.natoms = self.aev.shape[1]
def test_predict_baseline(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true, predicted = predict.predict(model, AEVC, loader) assert isinstance(true, np.ndarray) assert len(true) == batch_size assert isinstance(predicted, np.ndarray) assert len(predicted) == batch_size # Systems are the other way around with respect to file order # This is to test that deltas are added to the correct ID delta_ids = np.array(["1a4w", "1a4r"]) delta_baseline = np.array([500, 600]) delta = np.array([5.92, 6.66]) s = np.argsort(delta_ids) ids_b, true_b, predicted_b = predict.predict( model, AEVC, loader, baseline=(delta_ids, delta_baseline, delta) ) sort = np.argsort(ids) bsort = np.argsort(ids_b) assert (ids[sort] == ids_b[bsort]).all() assert np.allclose(true[sort], true[bsort]) assert np.allclose(predicted[sort], predicted_b[bsort] - delta_baseline[s])
def test_atomic(testdata, testdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) n_systems = len(data) assert n_systems == 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) # Move model and AEVComputer to device model.to(device) AEVC.to(device) # Model in evaluation mode model.eval() for pdbid, _, (species, coordinates) in data: atomic = grad.atomic(species, coordinates, model, AEVC, device) # Add fictitious batch dimension species = species.unsqueeze(0) coordinates = coordinates.unsqueeze(0) assert atomic.shape == species.shape aevs = AEVC.forward((species, coordinates)).aevs prediction = model(species, aevs) assert np.allclose( torch.sum(atomic, dim=1).cpu().detach().numpy(), prediction.cpu().detach().numpy(), )
def test_h2(): """ Test TorchANI AEV for H2. """ n_species = 1 # H AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 1 # AEV: 1 * 1 + 1 * 1 * (1 + 1) // 2 = 1 (R) + 1 (A) # Radial: H # Angular: HH assert AEVC.aev_length == 2 # Converts species (atomic numbers) to indices SC = torchani.SpeciesConverter("H") for atomicnum in [2, 3, 4, 5]: sc = SC((torch.tensor([[atomicnum]]), torch.tensor([[0.0, 0.0, 0.0]]))) # Elements not present in SpeciesConverter are assigned -1 assert sc.species.item() == -1 # Define H2 R = 1.0 atomicnums = torch.tensor([[1, 1]], device=device) coordinates = torch.tensor([[[0.0, 0.0, 0.0], [R, 0.0, 0.0]]], device=device) # Map atomic numbers to index sc = SC((atomicnums, coordinates)) # Species is returned on CPU, see TorchANI#461 assert torch.allclose(sc.species, torch.tensor([0, 0])) assert torch.allclose(sc.coordinates, coordinates) # Species is returned on CPU, see TorchANI#461 aev = AEVC.forward((sc.species.to(device), sc.coordinates)) # Species is returned on CPU, see TorchANI#461 assert torch.allclose(sc.species, aev.species.cpu()) # Remove batch dimension and store as numpy array aevs = aev.aevs.squeeze(0).cpu().numpy() gr = GR(torch.tensor(R, device=device), RcR, EtaR, RsR).cpu().numpy() assert np.allclose(aevs[0, :], [gr, 0.0]) # AEV of atom 1 assert np.allclose(aevs[1, :], [gr, 0.0]) # AEV of atom 2
def testPickle(self): path = os.path.dirname(os.path.realpath(__file__)) const_file = os.path.join( path, '../torchani/resources/ani-1x_8x/rHCNO-5.2R_16-3.5A_a4-8.params' ) # noqa: E501 consts = torchani.neurochem.Constants(const_file) aev_computer = torchani.AEVComputer(**consts, use_cuda_extension=True) tmpfile = '/tmp/cuaev.pkl' with open(tmpfile, 'wb') as file: pickle.dump(aev_computer, file) with open(tmpfile, 'rb') as file: aev_computer = pickle.load(file) os.remove(tmpfile)
def testCoverLinearly(self): consts = torchani.neurochem.Constants(const_file) aev_computer = torchani.AEVComputer(**consts) ani1x_values = {'radial_cutoff': 5.2, 'angular_cutoff': 3.5, 'radial_eta': 16.0, 'angular_eta': 8.0, 'radial_dist_divisions': 16, 'angular_dist_divisions': 4, 'zeta': 32.0, 'angle_sections': 8, 'num_species': 4} aev_computer_alt = torchani.AEVComputer.cover_linearly(**ani1x_values) constants = aev_computer.constants() constants_alt = aev_computer_alt.constants() for c, ca in zip(constants, constants_alt): self.assertEqual(c, ca)
def setUp(self): self.eps = 1e-9 cell = ase.geometry.cellpar_to_cell([100, 100, 100 * math.sqrt(2), 90, 45, 90]) self.cell = torch.tensor(ase.geometry.complete_cell(cell), dtype=torch.double) self.inv_cell = torch.inverse(self.cell) self.coordinates = torch.tensor([[[0.0, 0.0, 0.0], [1.0, -0.1, -0.1], [-0.1, 1.0, -0.1], [-0.1, -0.1, 1.0], [-1.0, -1.0, -1.0]]], dtype=torch.double) self.species = torch.tensor([[1, 0, 0, 0, 0]]) self.pbc = torch.ones(3, dtype=torch.bool) self.v1, self.v2, self.v3 = self.cell self.center_coordinates = self.coordinates + 0.5 * (self.v1 + self.v2 + self.v3) consts = torchani.neurochem.Constants(const_file) self.aev_computer = torchani.AEVComputer(**consts).to(torch.double) _, self.aev = self.aev_computer((self.species, self.center_coordinates), cell=self.cell, pbc=self.pbc)
def test_grad(testdata, testdir): with mlflow.start_run(): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) n_systems = len(data) assert n_systems == 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) loss = nn.MSELoss() # Move model and AEVComputer to device model.to(device) AEVC.to(device) # Model in evaluation mode model.eval() for i in range(n_systems): pdbid, label, (species, coordinates) = data[i] gradient = grad.gradient(species, coordinates, label, model, AEVC, loss, device) assert gradient.shape == coordinates.shape
def test_predict_scaling(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) original_labels = data.labels.copy() # Scale labels scaler = utils.labels_scaler(data) assert np.allclose(data.labels, [1.0, -1.0]) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 model = models.AffinityModel(n_species, AEVC.aev_length) ids, true_scaled, predicted_scaled = predict.predict(model, AEVC, loader) assert np.allclose(true_scaled, data.labels) assert (-1 <= true_scaled).all() and (true_scaled <= 1).all() ids, true, predicted = predict.predict(model, AEVC, loader, scaler=scaler) assert np.allclose(true, original_labels) assert np.allclose(predicted, scaler.inverse_transform(predicted_scaled))
def loadAEVC(path) -> torchani.AEVComputer: """ Load AEVComputer. Parameters ---------- Returns ------- torchani.AEVComputer AEVComputer """ d = torch.load(path) AEVC = torchani.AEVComputer(**d["args"]) AEVC.load_state_dict(d["state_dict"]) return AEVC
def test_saveAEVC_loadAEVC(tmpdir): n_species = 10 # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 10 # AEV: 1 * 10 + 1 * 10 * (10 + 1) // 2 = 10 (R) + 55 (A) = 65 assert AEVC.aev_length == 65 path = os.path.join(tmpdir, "aevc-tmp.pth") utils.saveAEVC(AEVC, n_species, path) AEVC_loaded = utils.loadAEVC(path) assert AEVC.aev_length == AEVC_loaded.aev_length == 65 # Ints assert AEVC.num_species == AEVC_loaded.num_species == n_species # Floats assert np.allclose(AEVC_loaded.Rcr, RcR) assert np.allclose(AEVC_loaded.Rca, RcA) assert torch.allclose(AEVC_loaded.EtaR, EtaR) assert torch.allclose(AEVC_loaded.EtaA, EtaA) assert torch.allclose(AEVC_loaded.ShfR, RsR) assert torch.allclose(AEVC_loaded.ShfA, RsA) assert torch.allclose(AEVC_loaded.Zeta, Zeta) assert torch.allclose(AEVC_loaded.ShfZ, TsA) assert AEVC.radial_sublength == AEVC_loaded.radial_sublength assert AEVC.radial_length == AEVC_loaded.radial_length assert AEVC.angular_sublength == AEVC.angular_sublength assert AEVC.angular_length == AEVC.angular_length
def loadAEVC(path) -> torchani.AEVComputer: """ Load AEVComputer. Parameters ---------- Returns ------- torchani.AEVComputer AEVComputer """ if torch.cuda.is_available(): d = torch.load(path) else: d = torch.load(path, map_location=torch.device("cpu")) AEVC = torchani.AEVComputer(**d["args"]) AEVC.load_state_dict(d["state_dict"]) return AEVC
def test_aev_from_loader(testdata, testdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Compute map of atomic numbers to indices from species amap = loaders.anummap(data.species) # Transform atomic number to species in data data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) iloader = iter(loader) _, labels, (species, coordinates) = next(iloader) # Move everything to device labels = labels.to(device) species = species.to(device) coordinates = coordinates.to(device) AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 aev = AEVC.forward((species, coordinates)) assert aev.species.shape == species.shape assert aev.aevs.shape == (batch_size, 42, 20)
def test_evaluate(testdata, testdir, tmpdir): # Distance 0.0 produces a segmentation fault (see MDAnalysis#2656) data = loaders.PDBData(testdata, 0.1, testdir) batch_size = 2 # Transform atomic numbers to species amap = loaders.anummap(data.species) data.atomicnums_to_idxs(amap) n_species = len(amap) loader = torch.utils.data.DataLoader( data, batch_size=batch_size, shuffle=False, collate_fn=loaders.pad_collate ) # Define AEVComputer AEVC = torchani.AEVComputer(RcR, RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Radial functions: 1 # Angular functions: 1 # Number of species: 5 # AEV: 1 * 5 + 1 * 5 * (5 + 1) // 2 = 5 (R) + 15 (A) = 20 assert AEVC.aev_length == 20 mods = [ models.AffinityModel(n_species, AEVC.aev_length), models.AffinityModel(n_species, AEVC.aev_length), ] with mlflow.start_run(): predict.evaluate(mods, loader, AEVC, outpath=tmpdir) assert os.path.isfile(os.path.join(tmpdir, "predict.csv")) assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.pdf")) assert os.path.isfile(os.path.join(tmpdir, "regplot-predict.png"))
def torchani_calculator(ani_model, numb_networks): """ Return a calculator from a model. choices are %s """ % " ".join(models.keys()) model_file = models[ani_model] wkdir = model_file.rsplit('/', 1)[0] + '/' data = np.loadtxt(model_file, dtype=str) cnstfile = wkdir + data[0] # AEV parameters saefile = wkdir + data[1] # Atomic shifts nnfdir = wkdir + data[2] # network prefix Nn = int(data[3]) # Number of networks in the ensemble assert numb_networks <= Nn constants = torchani.neurochem.Constants(cnstfile) energy_shifter = torchani.neurochem.load_sae(saefile) aev_computer = torchani.AEVComputer(**constants) aev_computer.to(torch.double) nn_models = torchani.neurochem.load_model_ensemble( constants.species, nnfdir, numb_networks) # go to double precision for model in nn_models: model.to(torch.double) calculator = torchani.ase.Calculator( constants.species, aev_computer, nn_models, energy_shifter) return calculator
4.9312500e+00 ], device=device) Zeta = torch.tensor([3.2000000e+01], device=device) ShfZ = torch.tensor([ 1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00 ], device=device) EtaA = torch.tensor([8.0000000e+00], device=device) ShfA = torch.tensor( [9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=device) species_order = ['H', 'C', 'N', 'O'] num_species = len(species_order) aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species) energy_shifter = torchani.utils.EnergyShifter(None) ############################################################################### # Now let's setup datasets. These paths assumes the user run this script under # the ``examples`` directory of TorchANI's repository. If you download this # script, you should manually set the path of these files in your system before # this script can run successfully. # # Also note that we need to subtracting energies by the self energies of all # atoms for each molecule. This makes the range of energies in a reasonable # range. The second argument defines how to convert species as a list of string # to tensor, that is, for all supported chemical symbols, which is correspond to # ``0``, which correspond to ``1``, etc. try:
# Let's now load the built-in ANI-1 models. The builtin ANI-1ccx contains 8 # models trained with diffrent initialization. Predicting the energy and force # using the average of the 8 models outperform using a single model, so it is # always recommended to use an ensemble, unless the speed of computation is an # issue in your application. # (not sure if these need the level shifters...) # ani1x = torchani.models.ANI1x() # ani1ccx = torchani.models.ANI1ccx() # this whole pile is to load ANI-2x (since it's not public yet) # path to ANI-2x subdirectories path = '/Users/ghutchis/Devel/torchani/torchani/resources/ani-2x' const_file = path + '/rHCNOSFCl-5.1R_16-3.5A_a8-4.params' # noqa: E501 consts = torchani.neurochem.Constants(const_file) aev_computer = torchani.AEVComputer(**consts) sae_file = path + '/sae_linfit.dat' # noqa: E501 energy_shifter = torchani.neurochem.load_sae(sae_file) model_prefix = path + '/train' # noqa: E501 ensemble = torchani.neurochem.load_model_ensemble(consts.species, model_prefix, 8) # noqa: E501 model = torch.nn.Sequential(aev_computer, ensemble, energy_shifter) # done (that's ANI-2x) ############################################################################### # Now let's define the coordinate and species. If you just want to compute the # energy and force for a single structure like in this example, you need to # make the coordinate tensor has shape ``(1, Na, 3)`` and species has shape # ``(1, Na)``, where ``Na`` is the number of atoms in the molecule, the # preceding ``1`` in the shape is here to support batch processing like in # training. If you have ``N`` different structures to compute, then make it
def benchmark(parser, dataset, use_cuda_extension, force_inference=False): synchronize = True timers = {} def time_func(key, func): timers[key] = 0 def wrapper(*args, **kwargs): start = timeit.default_timer() ret = func(*args, **kwargs) sync_cuda(synchronize) end = timeit.default_timer() timers[key] += end - start return ret return wrapper Rcr = 5.2000e+00 Rca = 3.5000e+00 EtaR = torch.tensor([1.6000000e+01], device=parser.device) ShfR = torch.tensor([ 9.0000000e-01, 1.1687500e+00, 1.4375000e+00, 1.7062500e+00, 1.9750000e+00, 2.2437500e+00, 2.5125000e+00, 2.7812500e+00, 3.0500000e+00, 3.3187500e+00, 3.5875000e+00, 3.8562500e+00, 4.1250000e+00, 4.3937500e+00, 4.6625000e+00, 4.9312500e+00 ], device=parser.device) Zeta = torch.tensor([3.2000000e+01], device=parser.device) ShfZ = torch.tensor([ 1.9634954e-01, 5.8904862e-01, 9.8174770e-01, 1.3744468e+00, 1.7671459e+00, 2.1598449e+00, 2.5525440e+00, 2.9452431e+00 ], device=parser.device) EtaA = torch.tensor([8.0000000e+00], device=parser.device) ShfA = torch.tensor( [9.0000000e-01, 1.5500000e+00, 2.2000000e+00, 2.8500000e+00], device=parser.device) num_species = 4 aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species, use_cuda_extension) nn = torchani.ANIModel(build_network()) model = torch.nn.Sequential(aev_computer, nn).to(parser.device) optimizer = torch.optim.Adam(model.parameters(), lr=0.000001) mse = torch.nn.MSELoss(reduction='none') # enable timers torchani.aev.cutoff_cosine = time_func('torchani.aev.cutoff_cosine', torchani.aev.cutoff_cosine) torchani.aev.radial_terms = time_func('torchani.aev.radial_terms', torchani.aev.radial_terms) torchani.aev.angular_terms = time_func('torchani.aev.angular_terms', torchani.aev.angular_terms) torchani.aev.compute_shifts = time_func('torchani.aev.compute_shifts', torchani.aev.compute_shifts) torchani.aev.neighbor_pairs = time_func('torchani.aev.neighbor_pairs', torchani.aev.neighbor_pairs) torchani.aev.neighbor_pairs_nopbc = time_func( 'torchani.aev.neighbor_pairs_nopbc', torchani.aev.neighbor_pairs_nopbc) torchani.aev.triu_index = time_func('torchani.aev.triu_index', torchani.aev.triu_index) torchani.aev.cumsum_from_zero = time_func('torchani.aev.cumsum_from_zero', torchani.aev.cumsum_from_zero) torchani.aev.triple_by_molecule = time_func( 'torchani.aev.triple_by_molecule', torchani.aev.triple_by_molecule) torchani.aev.compute_aev = time_func('torchani.aev.compute_aev', torchani.aev.compute_aev) model[0].forward = time_func('total', model[0].forward) model[1].forward = time_func('forward', model[1].forward) optimizer.step = time_func('optimizer.step', optimizer.step) print('=> start training') start = time.time() loss_time = 0 force_time = 0 for epoch in range(0, parser.num_epochs): print('Epoch: %d/%d' % (epoch + 1, parser.num_epochs)) progbar = pkbar.Kbar(target=len(dataset) - 1, width=8) for i, properties in enumerate(dataset): species = properties['species'].to(parser.device) coordinates = properties['coordinates'].to( parser.device).float().requires_grad_(force_inference) true_energies = properties['energies'].to(parser.device).float() num_atoms = (species >= 0).sum(dim=1, dtype=true_energies.dtype) _, predicted_energies = model((species, coordinates)) # TODO add sync after aev is done sync_cuda(synchronize) energy_loss = (mse(predicted_energies, true_energies) / num_atoms.sqrt()).mean() if force_inference: sync_cuda(synchronize) force_coefficient = 0.1 true_forces = properties['forces'].to(parser.device).float() force_start = time.time() try: sync_cuda(synchronize) forces = -torch.autograd.grad(predicted_energies.sum(), coordinates, create_graph=True, retain_graph=True)[0] sync_cuda(synchronize) except Exception as e: alert('Error: {}'.format(e)) return force_time += time.time() - force_start force_loss = (mse(true_forces, forces).sum(dim=(1, 2)) / num_atoms).mean() loss = energy_loss + force_coefficient * force_loss sync_cuda(synchronize) else: loss = energy_loss rmse = hartree2kcalmol( (mse(predicted_energies, true_energies)).mean()).detach().cpu().numpy() progbar.update(i, values=[("rmse", rmse)]) if not force_inference: sync_cuda(synchronize) loss_start = time.time() loss.backward() # print('2', coordinates.grad) sync_cuda(synchronize) loss_stop = time.time() loss_time += loss_stop - loss_start optimizer.step() sync_cuda(synchronize) checkgpu() sync_cuda(synchronize) stop = time.time() print('=> More detail about benchmark PER EPOCH') total_time = (stop - start) / parser.num_epochs loss_time = loss_time / parser.num_epochs force_time = force_time / parser.num_epochs opti_time = timers['optimizer.step'] / parser.num_epochs forward_time = timers['forward'] / parser.num_epochs aev_time = timers['total'] / parser.num_epochs print_timer(' Total AEV', aev_time) print_timer(' Forward', forward_time) print_timer(' Backward', loss_time) print_timer(' Force', force_time) print_timer(' Optimizer', opti_time) print_timer( ' Others', total_time - loss_time - aev_time - forward_time - opti_time - force_time) print_timer(' Epoch time', total_time)
mlflow.log_param("RsR", args.RsR) # Angular coefficients RsA = torch.tensor(args.RsA, device=device) EtaA = torch.tensor([args.EtaA], device=device) TsA = torch.tensor(args.TsA, device=device) Zeta = torch.tensor([args.Zeta], device=device) mlflow.log_param("RcA", args.RcA) mlflow.log_param("RsA", args.RsA) mlflow.log_param("EtaA", args.EtaA) mlflow.log_param("TsA", args.TsA) mlflow.log_param("Zeta", args.Zeta) # Define AEVComputer AEVC = torchani.AEVComputer(args.RcR, args.RcA, EtaR, RsR, EtaA, Zeta, RsA, TsA, n_species) # Save AEVComputer utils.saveAEVC(AEVC, n_species, path=os.path.join(args.outpath, "aevc.pth")) # Define models models_list = [] optimizers_list = [] for idx in range(args.consensus): models_list.append( models.AffinityModel( n_species, AEVC.aev_length, layers_sizes=args.layers,
def setUp(self): consts = torchani.neurochem.Constants(const_file) self.aev_computer = torchani.AEVComputer(**consts).to(torch.double)