Ejemplo n.º 1
0
def test_pack(device):
    """Sanity test of batch packing operation."""
    # Generate matrix list
    sizes = torch.randint(2, 8, (10, ))
    matrices = [torch.rand(i, i, device=device) for i in sizes]
    # Pack matrices into a single tensor
    packed = batch.pack(matrices)
    # Construct a numpy equivalent
    max_size = max(packed.shape[1:])
    ref = np.stack(
        np.array([np.pad(i.sft(), (0, max_size - len(i))) for i in matrices]))
    equivalent = np.all((packed.sft() - ref) < 1E-12)
    same_device = packed.device == device

    assert equivalent, 'Check pack method against numpy'
    assert same_device, 'Device persistence check (packed tensor)'

    # Check that the mask is correct
    *_, mask = batch.pack([
        torch.rand(1, device=device),
        torch.rand(2, device=device),
        torch.rand(3, device=device)
    ],
                          return_mask=True)

    ref_mask = torch.tensor([[1, 0, 0], [1, 1, 0], [1, 1, 1]],
                            dtype=torch.bool,
                            device=device)

    same_device_mask = mask.device == device
    eq = torch.all(torch.eq(mask, ref_mask))

    assert eq, 'Mask yielded an unexpected result'
    assert same_device_mask, 'Device persistence check (mask)'
Ejemplo n.º 2
0
def test_eighb_general_grad(device):
    """eighb gradient stability on general eigenvalue problems."""
    def eigen_proxy(m, n, target_scheme, size_data=None):
        m, n = maths.sym(m), maths.sym(n)
        if size_data is not None:
            m = clean_zero_padding(m, size_data)
            n = clean_zero_padding(n, size_data)

        return maths.eighb(m, n, scheme=target_scheme)

    # Generate a single generalised eigenvalue test instance
    a1 = maths.sym(torch.rand(8, 8, device=device))
    b1 = maths.sym(torch.eye(8, device=device) * torch.rand(8, device=device))
    a1.requires_grad, b1.requires_grad = True, True

    schemes = ['chol', 'lowd']
    for scheme in schemes:
        grad_is_safe = gradcheck(eigen_proxy, (a1, b1, scheme),
                                 raise_exception=False)
        assert grad_is_safe, f'Non-degenerate single test failed on {scheme}'

    # Generate a batch of generalised eigenvalue test instances
    sizes = torch.randint(3, 8, (5, ), device=device)
    a2 = batch.pack(
        [maths.sym(torch.rand(s, s, device=device)) for s in sizes])
    b2 = batch.pack([
        maths.sym(torch.eye(s, device=device) * torch.rand(s, device=device))
        for s in sizes
    ])
    a2.requires_grad, b2.requires_grad = True, True

    for scheme in schemes:
        grad_is_safe = gradcheck(eigen_proxy, (a2, b2, scheme, sizes),
                                 raise_exception=False)
        assert grad_is_safe, f'Non-degenerate batch test failed on {scheme}'
Ejemplo n.º 3
0
def test_eighb_general_batch(device):
    """eighb accuracy on a batch of general eigenvalue problems."""
    sizes = torch.randint(2, 10, (11, ), device=device)
    a = [maths.sym(torch.rand(s, s, device=device)) for s in sizes]
    b = [
        maths.sym(torch.eye(s, device=device) * torch.rand(s, device=device))
        for s in sizes
    ]
    a_batch, b_batch = batch.pack(a), batch.pack(b)

    w_ref = batch.pack(
        [torch.tensor(linalg.eigh(i.sft(), j.sft())[0]) for i, j in zip(a, b)])

    aux_settings = [True, False]
    schemes = ['chol', 'lowd']
    for scheme in schemes:
        for aux in aux_settings:
            w_calc = maths.eighb(a_batch, b_batch, scheme=scheme, aux=aux)[0]

            mae_w = torch.max(torch.abs(w_calc.cpu() - w_ref))

            same_device = w_calc.device == device

            assert mae_w < 1E-12, f'Eigenvalue tolerance test {scheme}'
            assert same_device, 'Device persistence check'
Ejemplo n.º 4
0
    def __init__(self,
                 atomic_numbers: Union[Tensor, List[Tensor]],
                 positions: Union[Tensor, List[Tensor]],
                 units: Optional[str] = 'bohr'):

        if isinstance(atomic_numbers, Tensor):
            self.atomic_numbers = atomic_numbers
            # Mask for clearing padding values in the distance matrix.
            self._mask_dist: Union[Tensor, bool] = False
            self.positions: Tensor = positions
        else:
            self.atomic_numbers, _mask = pack(atomic_numbers, return_mask=True)
            self._mask_dist: Union[Tensor, bool] = ~(_mask.unsqueeze(-2) *
                                                     _mask.unsqueeze(-1))
            self.positions: Tensor = pack(positions)

        self.n_atoms: Tensor = self.atomic_numbers.count_nonzero(-1)

        # Number of batches if in batch mode (for internal use only)
        self._n_batch: Optional[int] = (None if self.atomic_numbers.dim() == 1
                                        else len(atomic_numbers))

        # Ensure the distances are in atomic units (bohr)
        if units != 'bohr':
            self.positions: Tensor = self.positions * length_units[units]
Ejemplo n.º 5
0
def geometry_basic_helper(device, positions, atomic_numbers):
    """Function to reduce code duplication when testing basic functionality."""
    # Pack the reference data, if multiple systems provided
    batch = isinstance(atomic_numbers, list)
    if batch:
        atomic_numbers_ref = pack(atomic_numbers)
        positions_ref = pack(positions)
        positions_angstrom = [i / length_units['angstrom'] for i in positions]
    else:
        atomic_numbers_ref = atomic_numbers
        positions_ref = positions
        positions_angstrom = positions / length_units['angstrom']

    # Check 1: Ensure the geometry entity is correct constructed
    geom_1 = Geometry(atomic_numbers, positions)
    check_1 = (torch.allclose(geom_1.atomic_numbers, atomic_numbers_ref)
               and torch.allclose(geom_1.positions, positions_ref))
    assert check_1, 'Geometry was not instantiated correctly'

    # Check 2: Check unit conversion proceeds as anticipated.
    geom_2 = Geometry(atomic_numbers, positions_angstrom, units='angstrom')
    check_2 = torch.allclose(geom_1.positions, geom_2.positions)
    assert check_2, 'Geometry failed to correctly convert length units'

    # Check 3: Check that __repr__ does not crash when called. No assert is
    # needed here as a failure will result in an exception being raised.
    _t = repr(geom_1)

    # Test with a larger number of systems to ensure the string gets truncated.
    # This is only applicable to batched Geometry instances.
    if batch:
        geom_3 = Geometry([atomic_numbers[0] for _ in range(10)],
                          [positions[0] for _ in range(10)])
        _t2 = repr(geom_3)
        check_3 = '...' in _t2
        assert check_3, 'String representation was not correctly truncated'

    # Check 4: Verify that the `.chemical_symbols` returns the correct value
    check_4 = all([chemical_symbols[int(j)] == i if isinstance(i, str)
                   else [chemical_symbols[int(k)] for k in j] == i
                   for i, j in zip(geom_1.chemical_symbols, atomic_numbers)])
    assert check_4, 'The ".chemical_symbols" property is incorrect'

    # Check 5: Test the device on which the Geometry's tensor are located
    # can be changed via the `.to()` method. Note that this check will only
    # be performed if a cuda device is present.
    if torch.cuda.device_count():
        # Select a device to move to
        new_device = {'cuda': torch.device('cpu'),
                      'cpu': torch.device('cuda:0')}[device.type]
        geom_1.to(new_device)
        check_5 = (geom_1.atomic_numbers.device == new_device
                   and geom_1.positions.device == new_device)

        assert check_5, '".to" method failed to set the correct device'
Ejemplo n.º 6
0
def geometry_hdf5_helper(path, atomic_numbers, positions):
    """Function to reduce code duplication when testing the HDF5 functionality."""
    # Ensure any test hdf5 database is erased before running
    if os.path.exists(path):
        os.remove(path)

    # Pack the reference data, if multiple systems provided
    batch = isinstance(atomic_numbers, list)
    atomic_numbers_ref = pack(atomic_numbers) if batch else atomic_numbers
    positions_ref = pack(positions) if batch else positions

    # Construct a geometry instance
    geom_1 = Geometry(atomic_numbers, positions)

    # Infer target device
    device = geom_1.positions.device

    # Open the database
    with h5py.File(path, 'w') as db:
        # Check 1: Write to the database and check that the written data
        # matches the reference data.
        geom_1.to_hdf5(db)
        check_1 = (np.allclose(db['atomic_numbers'][()], atomic_numbers_ref.sft())
                   and np.allclose(db['positions'][()], positions_ref.sft()))
        assert check_1, 'Geometry not saved the database correctly'

        # Check 2: Ensure geometries are correctly constructed from hdf5 data
        geom_2 = Geometry.from_hdf5(db, device=device)
        check_2 = (torch.allclose(geom_2.positions, geom_1.positions)
                   and torch.allclose(geom_2.atomic_numbers, geom_1.atomic_numbers))
        assert check_2, 'Geometry could not be loaded from hdf5 data'

        # Check 3: Make sure that the tensors were placed on the correct device
        check_3 = (geom_2.positions.device == device
                   and geom_2.atomic_numbers.device == device)
        assert check_3, 'Tensors not placed on the correct device'

    # If this is a batch test then repeat test 2 but pass in a list of HDF5
    # groups rather than one batch HDF5 group.
    if batch:
        os.remove(path)
        with h5py.File(path, 'w') as db:
            for n, (an, pos) in enumerate(zip(atomic_numbers, positions)):
                Geometry(an, pos).to_hdf5(db.create_group(f'geom_{n + 1}'))
            geom_3 = Geometry.from_hdf5([db[f'geom_{i}'] for i in range(1, 4)])
            check_4 = torch.allclose(geom_3.positions.to(device), geom_1.positions)
            assert check_4, 'Instance could not be loaded from hdf5 data (batch)'

    # Remove the test database
    os.remove(path)
Ejemplo n.º 7
0
def test_eighb_standard_batch(device):
    """eighb accuracy on a batch of standard eigenvalue problems."""
    sizes = torch.randint(2, 10, (11, ), device=device)
    a = [maths.sym(torch.rand(s, s, device=device)) for s in sizes]
    a_batch = batch.pack(a)

    w_ref = batch.pack([torch.tensor(linalg.eigh(i.cpu())[0]) for i in a])

    w_calc = maths.eighb(a_batch)[0]

    mae_w = torch.max(torch.abs(w_calc.cpu() - w_ref))

    same_device = w_calc.device == device

    assert mae_w < 1E-12, 'Eigenvalue tolerance test'
    assert same_device, 'Device persistence check'
Ejemplo n.º 8
0
def batch_chemical_symbols(
        atomic_numbers: Union[Tensor, List[Tensor]]) -> list:
    """Converts atomic numbers to their chemical symbols.

    This function allows for en-mass conversion of atomic numbers to chemical
    symbols.

    Arguments:
        atomic_numbers: Atomic numbers of the elements.

    Returns:
        symbols: The corresponding chemical symbols.

    Notes:
        Padding vales, i.e. zeros, will be ignored.

    """
    a_nums = atomic_numbers

    # Catch for list tensors (still faster doing it this way)
    if isinstance(a_nums, list) and isinstance(a_nums[0], Tensor):
        a_nums = pack(a_nums, value=0)

    # Convert from atomic numbers to chemical symbols via a itemgetter
    symbols = np.array(  # numpy must be used as torch cant handle strings
        itemgetter(*a_nums.flatten())(chemical_symbols)).reshape(a_nums.shape)
    # Mask out element "X", aka padding values
    mask = symbols != 'X'
    if symbols.ndim == 1:
        return symbols[mask].tolist()
    else:
        return [s[m].tolist() for s, m in zip(symbols, mask)]
Ejemplo n.º 9
0
def test_eighb_broadening_grad(device):
    """eighb gradient stability on standard, broadened, eigenvalue problems.

    There is no separate test for the standard eigenvalue problem without
    broadening as this would result in a direct call to torch.symeig which is
    unnecessary. However, it is important to note that conditional broadening
    technically is never tested, i.e. the lines:

    .. code-block:: python
        ...
        if ctx.bm == 'cond':  # <- Conditional broadening
            deltas = 1 / torch.where(torch.abs(deltas) > bf,
                                     deltas, bf) * torch.sign(deltas)
        ...

    of `_SymEigB` are never actual run. This is because it only activates when
    there are true eigen-value degeneracies; & degenerate eigenvalue problems
    do not "play well" with the gradcheck operation.
    """
    def eigen_proxy(m, target_method, size_data=None):
        m = maths.sym(m)
        if size_data is not None:
            m = clean_zero_padding(m, size_data)
        if target_method is None:
            return torch.symeig(m, True)
        else:
            return maths.eighb(m, broadening_method=target_method)

    # Generate a single standard eigenvalue test instance
    a1 = maths.sym(torch.rand(8, 8, device=device))
    a1.requires_grad = True

    broadening_methods = [None, 'none', 'cond', 'lorn']
    for method in broadening_methods:
        grad_is_safe = gradcheck(eigen_proxy, (a1, method),
                                 raise_exception=False)
        assert grad_is_safe, f'Non-degenerate single test failed on {method}'

    # Generate a batch of standard eigenvalue test instances
    sizes = torch.randint(3, 8, (5, ), device=device)
    a2 = batch.pack(
        [maths.sym(torch.rand(s, s, device=device)) for s in sizes])
    a2.requires_grad = True

    for method in broadening_methods[2:]:
        grad_is_safe = gradcheck(eigen_proxy, (a2, method, sizes),
                                 raise_exception=False)
        assert grad_is_safe, f'Non-degenerate batch test failed on {method}'
Ejemplo n.º 10
0
def geometry_distance_vectors_helper(atomic_numbers, positions):
    """Function to reduce code duplication when checking .distance_vectors."""
    geom = Geometry(atomic_numbers, positions)

    # Check 1: Calculate distance vector tolerance
    if isinstance(positions, torch.Tensor):
        ref_d_vec = positions.unsqueeze(1) - positions
    else:
        ref_d_vec = pack([i.unsqueeze(1) - i for i in positions])
    d_vec = geom.distance_vectors
    check_1 = torch.allclose(d_vec, ref_d_vec)
    assert check_1, 'Distance vectors are outside of tolerance thresholds'

    # Check 2: Device persistence check
    check_2 = d_vec.device == geom.positions.device
    assert check_2, 'Distance vectors were not returned on the correct device'
Ejemplo n.º 11
0
def test_geometry_from_ase_atoms_batch(device):
    """Check batch instances can be instantiated from ase.Atoms objects."""

    # Create an ase.Atoms object
    atoms = [molecule('CH4'), molecule('H2O')]
    ref_pos = pack([torch.tensor(i.positions) for i in atoms]).sft()
    ref_pos = ref_pos * length_units['angstrom']

    # Check 1: Ensure that the from_ase_atoms method correctly constructs
    # a geometry instance. This includes the unit conversion operation.
    geom_1 = Geometry.from_ase_atoms(atoms, device=device)
    check_1 = np.allclose(geom_1.positions.sft(), ref_pos),

    assert check_1, 'from_ase_atoms did not correctly parse the positions'

    # Check 2: Check the tensors were placed on the correct device
    check_2 = (geom_1.positions.device == device
               and geom_1.atomic_numbers.device == device)

    assert check_2, 'from_ase_atoms did not place tensors on the correct device'
Ejemplo n.º 12
0
def geometry_distance_helper(geom):
    """Function to reduce code duplication when checking .distances."""
    # Infer target device
    device = geom.positions.device
    # Calculate the distance matrix and its reference
    dmat = geom.distances
    if geom.atomic_numbers.dim() == 1:
        dmat_ref = distance_matrix(geom.positions.sft(), geom.positions.sft())
    else:
        pos = [i[:j.count_nonzero()].sft() for i, j in
               zip(geom.positions, geom.atomic_numbers)]
        dmat_ref = pack([torch.tensor(distance_matrix(i, i))
                         for i in pos]).sft()

    # Ensure distances are within tolerance thresholds.
    check_1 = np.allclose(dmat.sft(), dmat_ref)
    assert check_1, 'Distances are not within tolerance thresholds'

    # Confirm that results are on the correct device
    check_2 = dmat.device == device
    assert check_2, 'Distances were not returned on the correct device'
Ejemplo n.º 13
0
 def proxy(*args):
     # Proxy function is used to prevent an undiagnosed error from occurring.
     return batch.pack(list(args))