def test_pack(device): """Sanity test of batch packing operation.""" # Generate matrix list sizes = torch.randint(2, 8, (10, )) matrices = [torch.rand(i, i, device=device) for i in sizes] # Pack matrices into a single tensor packed = batch.pack(matrices) # Construct a numpy equivalent max_size = max(packed.shape[1:]) ref = np.stack( np.array([np.pad(i.sft(), (0, max_size - len(i))) for i in matrices])) equivalent = np.all((packed.sft() - ref) < 1E-12) same_device = packed.device == device assert equivalent, 'Check pack method against numpy' assert same_device, 'Device persistence check (packed tensor)' # Check that the mask is correct *_, mask = batch.pack([ torch.rand(1, device=device), torch.rand(2, device=device), torch.rand(3, device=device) ], return_mask=True) ref_mask = torch.tensor([[1, 0, 0], [1, 1, 0], [1, 1, 1]], dtype=torch.bool, device=device) same_device_mask = mask.device == device eq = torch.all(torch.eq(mask, ref_mask)) assert eq, 'Mask yielded an unexpected result' assert same_device_mask, 'Device persistence check (mask)'
def test_eighb_general_grad(device): """eighb gradient stability on general eigenvalue problems.""" def eigen_proxy(m, n, target_scheme, size_data=None): m, n = maths.sym(m), maths.sym(n) if size_data is not None: m = clean_zero_padding(m, size_data) n = clean_zero_padding(n, size_data) return maths.eighb(m, n, scheme=target_scheme) # Generate a single generalised eigenvalue test instance a1 = maths.sym(torch.rand(8, 8, device=device)) b1 = maths.sym(torch.eye(8, device=device) * torch.rand(8, device=device)) a1.requires_grad, b1.requires_grad = True, True schemes = ['chol', 'lowd'] for scheme in schemes: grad_is_safe = gradcheck(eigen_proxy, (a1, b1, scheme), raise_exception=False) assert grad_is_safe, f'Non-degenerate single test failed on {scheme}' # Generate a batch of generalised eigenvalue test instances sizes = torch.randint(3, 8, (5, ), device=device) a2 = batch.pack( [maths.sym(torch.rand(s, s, device=device)) for s in sizes]) b2 = batch.pack([ maths.sym(torch.eye(s, device=device) * torch.rand(s, device=device)) for s in sizes ]) a2.requires_grad, b2.requires_grad = True, True for scheme in schemes: grad_is_safe = gradcheck(eigen_proxy, (a2, b2, scheme, sizes), raise_exception=False) assert grad_is_safe, f'Non-degenerate batch test failed on {scheme}'
def test_eighb_general_batch(device): """eighb accuracy on a batch of general eigenvalue problems.""" sizes = torch.randint(2, 10, (11, ), device=device) a = [maths.sym(torch.rand(s, s, device=device)) for s in sizes] b = [ maths.sym(torch.eye(s, device=device) * torch.rand(s, device=device)) for s in sizes ] a_batch, b_batch = batch.pack(a), batch.pack(b) w_ref = batch.pack( [torch.tensor(linalg.eigh(i.sft(), j.sft())[0]) for i, j in zip(a, b)]) aux_settings = [True, False] schemes = ['chol', 'lowd'] for scheme in schemes: for aux in aux_settings: w_calc = maths.eighb(a_batch, b_batch, scheme=scheme, aux=aux)[0] mae_w = torch.max(torch.abs(w_calc.cpu() - w_ref)) same_device = w_calc.device == device assert mae_w < 1E-12, f'Eigenvalue tolerance test {scheme}' assert same_device, 'Device persistence check'
def __init__(self, atomic_numbers: Union[Tensor, List[Tensor]], positions: Union[Tensor, List[Tensor]], units: Optional[str] = 'bohr'): if isinstance(atomic_numbers, Tensor): self.atomic_numbers = atomic_numbers # Mask for clearing padding values in the distance matrix. self._mask_dist: Union[Tensor, bool] = False self.positions: Tensor = positions else: self.atomic_numbers, _mask = pack(atomic_numbers, return_mask=True) self._mask_dist: Union[Tensor, bool] = ~(_mask.unsqueeze(-2) * _mask.unsqueeze(-1)) self.positions: Tensor = pack(positions) self.n_atoms: Tensor = self.atomic_numbers.count_nonzero(-1) # Number of batches if in batch mode (for internal use only) self._n_batch: Optional[int] = (None if self.atomic_numbers.dim() == 1 else len(atomic_numbers)) # Ensure the distances are in atomic units (bohr) if units != 'bohr': self.positions: Tensor = self.positions * length_units[units]
def geometry_basic_helper(device, positions, atomic_numbers): """Function to reduce code duplication when testing basic functionality.""" # Pack the reference data, if multiple systems provided batch = isinstance(atomic_numbers, list) if batch: atomic_numbers_ref = pack(atomic_numbers) positions_ref = pack(positions) positions_angstrom = [i / length_units['angstrom'] for i in positions] else: atomic_numbers_ref = atomic_numbers positions_ref = positions positions_angstrom = positions / length_units['angstrom'] # Check 1: Ensure the geometry entity is correct constructed geom_1 = Geometry(atomic_numbers, positions) check_1 = (torch.allclose(geom_1.atomic_numbers, atomic_numbers_ref) and torch.allclose(geom_1.positions, positions_ref)) assert check_1, 'Geometry was not instantiated correctly' # Check 2: Check unit conversion proceeds as anticipated. geom_2 = Geometry(atomic_numbers, positions_angstrom, units='angstrom') check_2 = torch.allclose(geom_1.positions, geom_2.positions) assert check_2, 'Geometry failed to correctly convert length units' # Check 3: Check that __repr__ does not crash when called. No assert is # needed here as a failure will result in an exception being raised. _t = repr(geom_1) # Test with a larger number of systems to ensure the string gets truncated. # This is only applicable to batched Geometry instances. if batch: geom_3 = Geometry([atomic_numbers[0] for _ in range(10)], [positions[0] for _ in range(10)]) _t2 = repr(geom_3) check_3 = '...' in _t2 assert check_3, 'String representation was not correctly truncated' # Check 4: Verify that the `.chemical_symbols` returns the correct value check_4 = all([chemical_symbols[int(j)] == i if isinstance(i, str) else [chemical_symbols[int(k)] for k in j] == i for i, j in zip(geom_1.chemical_symbols, atomic_numbers)]) assert check_4, 'The ".chemical_symbols" property is incorrect' # Check 5: Test the device on which the Geometry's tensor are located # can be changed via the `.to()` method. Note that this check will only # be performed if a cuda device is present. if torch.cuda.device_count(): # Select a device to move to new_device = {'cuda': torch.device('cpu'), 'cpu': torch.device('cuda:0')}[device.type] geom_1.to(new_device) check_5 = (geom_1.atomic_numbers.device == new_device and geom_1.positions.device == new_device) assert check_5, '".to" method failed to set the correct device'
def geometry_hdf5_helper(path, atomic_numbers, positions): """Function to reduce code duplication when testing the HDF5 functionality.""" # Ensure any test hdf5 database is erased before running if os.path.exists(path): os.remove(path) # Pack the reference data, if multiple systems provided batch = isinstance(atomic_numbers, list) atomic_numbers_ref = pack(atomic_numbers) if batch else atomic_numbers positions_ref = pack(positions) if batch else positions # Construct a geometry instance geom_1 = Geometry(atomic_numbers, positions) # Infer target device device = geom_1.positions.device # Open the database with h5py.File(path, 'w') as db: # Check 1: Write to the database and check that the written data # matches the reference data. geom_1.to_hdf5(db) check_1 = (np.allclose(db['atomic_numbers'][()], atomic_numbers_ref.sft()) and np.allclose(db['positions'][()], positions_ref.sft())) assert check_1, 'Geometry not saved the database correctly' # Check 2: Ensure geometries are correctly constructed from hdf5 data geom_2 = Geometry.from_hdf5(db, device=device) check_2 = (torch.allclose(geom_2.positions, geom_1.positions) and torch.allclose(geom_2.atomic_numbers, geom_1.atomic_numbers)) assert check_2, 'Geometry could not be loaded from hdf5 data' # Check 3: Make sure that the tensors were placed on the correct device check_3 = (geom_2.positions.device == device and geom_2.atomic_numbers.device == device) assert check_3, 'Tensors not placed on the correct device' # If this is a batch test then repeat test 2 but pass in a list of HDF5 # groups rather than one batch HDF5 group. if batch: os.remove(path) with h5py.File(path, 'w') as db: for n, (an, pos) in enumerate(zip(atomic_numbers, positions)): Geometry(an, pos).to_hdf5(db.create_group(f'geom_{n + 1}')) geom_3 = Geometry.from_hdf5([db[f'geom_{i}'] for i in range(1, 4)]) check_4 = torch.allclose(geom_3.positions.to(device), geom_1.positions) assert check_4, 'Instance could not be loaded from hdf5 data (batch)' # Remove the test database os.remove(path)
def test_eighb_standard_batch(device): """eighb accuracy on a batch of standard eigenvalue problems.""" sizes = torch.randint(2, 10, (11, ), device=device) a = [maths.sym(torch.rand(s, s, device=device)) for s in sizes] a_batch = batch.pack(a) w_ref = batch.pack([torch.tensor(linalg.eigh(i.cpu())[0]) for i in a]) w_calc = maths.eighb(a_batch)[0] mae_w = torch.max(torch.abs(w_calc.cpu() - w_ref)) same_device = w_calc.device == device assert mae_w < 1E-12, 'Eigenvalue tolerance test' assert same_device, 'Device persistence check'
def batch_chemical_symbols( atomic_numbers: Union[Tensor, List[Tensor]]) -> list: """Converts atomic numbers to their chemical symbols. This function allows for en-mass conversion of atomic numbers to chemical symbols. Arguments: atomic_numbers: Atomic numbers of the elements. Returns: symbols: The corresponding chemical symbols. Notes: Padding vales, i.e. zeros, will be ignored. """ a_nums = atomic_numbers # Catch for list tensors (still faster doing it this way) if isinstance(a_nums, list) and isinstance(a_nums[0], Tensor): a_nums = pack(a_nums, value=0) # Convert from atomic numbers to chemical symbols via a itemgetter symbols = np.array( # numpy must be used as torch cant handle strings itemgetter(*a_nums.flatten())(chemical_symbols)).reshape(a_nums.shape) # Mask out element "X", aka padding values mask = symbols != 'X' if symbols.ndim == 1: return symbols[mask].tolist() else: return [s[m].tolist() for s, m in zip(symbols, mask)]
def test_eighb_broadening_grad(device): """eighb gradient stability on standard, broadened, eigenvalue problems. There is no separate test for the standard eigenvalue problem without broadening as this would result in a direct call to torch.symeig which is unnecessary. However, it is important to note that conditional broadening technically is never tested, i.e. the lines: .. code-block:: python ... if ctx.bm == 'cond': # <- Conditional broadening deltas = 1 / torch.where(torch.abs(deltas) > bf, deltas, bf) * torch.sign(deltas) ... of `_SymEigB` are never actual run. This is because it only activates when there are true eigen-value degeneracies; & degenerate eigenvalue problems do not "play well" with the gradcheck operation. """ def eigen_proxy(m, target_method, size_data=None): m = maths.sym(m) if size_data is not None: m = clean_zero_padding(m, size_data) if target_method is None: return torch.symeig(m, True) else: return maths.eighb(m, broadening_method=target_method) # Generate a single standard eigenvalue test instance a1 = maths.sym(torch.rand(8, 8, device=device)) a1.requires_grad = True broadening_methods = [None, 'none', 'cond', 'lorn'] for method in broadening_methods: grad_is_safe = gradcheck(eigen_proxy, (a1, method), raise_exception=False) assert grad_is_safe, f'Non-degenerate single test failed on {method}' # Generate a batch of standard eigenvalue test instances sizes = torch.randint(3, 8, (5, ), device=device) a2 = batch.pack( [maths.sym(torch.rand(s, s, device=device)) for s in sizes]) a2.requires_grad = True for method in broadening_methods[2:]: grad_is_safe = gradcheck(eigen_proxy, (a2, method, sizes), raise_exception=False) assert grad_is_safe, f'Non-degenerate batch test failed on {method}'
def geometry_distance_vectors_helper(atomic_numbers, positions): """Function to reduce code duplication when checking .distance_vectors.""" geom = Geometry(atomic_numbers, positions) # Check 1: Calculate distance vector tolerance if isinstance(positions, torch.Tensor): ref_d_vec = positions.unsqueeze(1) - positions else: ref_d_vec = pack([i.unsqueeze(1) - i for i in positions]) d_vec = geom.distance_vectors check_1 = torch.allclose(d_vec, ref_d_vec) assert check_1, 'Distance vectors are outside of tolerance thresholds' # Check 2: Device persistence check check_2 = d_vec.device == geom.positions.device assert check_2, 'Distance vectors were not returned on the correct device'
def test_geometry_from_ase_atoms_batch(device): """Check batch instances can be instantiated from ase.Atoms objects.""" # Create an ase.Atoms object atoms = [molecule('CH4'), molecule('H2O')] ref_pos = pack([torch.tensor(i.positions) for i in atoms]).sft() ref_pos = ref_pos * length_units['angstrom'] # Check 1: Ensure that the from_ase_atoms method correctly constructs # a geometry instance. This includes the unit conversion operation. geom_1 = Geometry.from_ase_atoms(atoms, device=device) check_1 = np.allclose(geom_1.positions.sft(), ref_pos), assert check_1, 'from_ase_atoms did not correctly parse the positions' # Check 2: Check the tensors were placed on the correct device check_2 = (geom_1.positions.device == device and geom_1.atomic_numbers.device == device) assert check_2, 'from_ase_atoms did not place tensors on the correct device'
def geometry_distance_helper(geom): """Function to reduce code duplication when checking .distances.""" # Infer target device device = geom.positions.device # Calculate the distance matrix and its reference dmat = geom.distances if geom.atomic_numbers.dim() == 1: dmat_ref = distance_matrix(geom.positions.sft(), geom.positions.sft()) else: pos = [i[:j.count_nonzero()].sft() for i, j in zip(geom.positions, geom.atomic_numbers)] dmat_ref = pack([torch.tensor(distance_matrix(i, i)) for i in pos]).sft() # Ensure distances are within tolerance thresholds. check_1 = np.allclose(dmat.sft(), dmat_ref) assert check_1, 'Distances are not within tolerance thresholds' # Confirm that results are on the correct device check_2 = dmat.device == device assert check_2, 'Distances were not returned on the correct device'
def proxy(*args): # Proxy function is used to prevent an undiagnosed error from occurring. return batch.pack(list(args))