Example #1
0
    def extract_from_ase(self, filename, init_config):
        """Load an ASE trajectory as a gt Trajectory by extracting positions"""
        assert init_config is not None

        traj = ase_traj.Trajectory(filename)

        # Iterate through each frame (set of atoms) in the trajectory
        for atoms in traj:
            config = init_config.copy()
            config.energy = None
            config.forces = None

            # Set the coordinate of every atom in the configuration
            for i, position in enumerate(atoms.get_positions()):
                config.atoms[i].coord = position

            self._list.append(config)

        # Set the energies from the trajectory if possible
        if os.path.exists('tmp_energies.txt'):
            for i, line in enumerate(open('tmp_energies.txt', 'r')):
                try:
                    self[i].energy = float(line.split()[0])

                except (TypeError, ValueError, IndexError):
                    logger.warning('Could not set the energy')

            os.remove('tmp_energies.txt')

        return None
Example #2
0
    def _set_soap(self, atom_symbols):
        """Set the SOAP parameters"""
        added_pairs = []

        for symbol in set(atom_symbols):

            if symbol == 'H':
                logger.warning('Not adding SOAP on H')
                continue

            params = GTConfig.gap_default_soap_params.copy()

            # Add all the atomic symbols that aren't this one, the neighbour
            # density for which also hasn't been added already
            params["other"] = [
                s for s in set(atom_symbols) if s +
                symbol not in added_pairs and symbol + s not in added_pairs
            ]

            # If there are no other atoms of this type then remove the self
            # pair
            if atom_symbols.count(symbol) == 1:
                params["other"].remove(symbol)

            for other_symbol in params["other"]:
                added_pairs.append(symbol + other_symbol)

            if len(params["other"]) == 0:
                logger.info(f'Not adding SOAP to {symbol} - should be covered')
                continue

            self.soap[symbol] = params

        return None
Example #3
0
    def sub_sampled_data(self, data, gap=None, random=True):
        """
        Select a portion of the data to train a GAP on as an nth of the full
        training data where n is the number of GAPs in this ensemble

        :param data: (gaptrain.data.Data)
        :param gap: (gaptrain.gap.GAP | None)
        :param random: (bool) Whether to take a random sample
        :return:
        """
        sub_sampled_data = data.copy()

        # Remove points randomly from the training data to give an n-th
        n_data = int(len(data) / self.n_gaps())

        if n_data == 0:
            raise RuntimeError('Insufficient configurations to sub-sample')

        if gap is not None:
            if any(n_sparse > n_data for n_sparse in gap.params.n_sparses()):
                raise RuntimeError('Number of sub-sampled data must be greater'
                                   ' than or equal to the number of sparse '
                                   'points')
        else:
            logger.warning('Cannot check that the number of data is larger'
                           'than the number of sparse points')

        if random:
            sub_sampled_data.remove_random(remainder=n_data)
        else:
            raise NotImplementedError

        return sub_sampled_data
Example #4
0
    def loss(self, configs_a, configs_b, attr):
        """
        Compute the root mean squared loss between two sets of configurations

        -----------------------------------------------------------------------
        :param configs_a: (gaptrain.configurations.ConfigurationSet)

        :param configs_b: (gaptrain.configurations.ConfigurationSet)

        :param attr: (str) Attribute of a configuration to calculate use in
                           the loss function e.g. energy or froca

        :return: (gaptrain.loss.RMSE)
        """
        assert len(configs_a) == len(configs_b)

        deltas = []

        for (ca, cb) in zip(configs_a, configs_b):
            val_a, val_b = getattr(ca, attr), getattr(cb, attr)

            if val_a is None or val_b is None:
                logger.warning(f'Cannot calculate loss for {attr} at least '
                               f'one value was None')
                return None

            # Append the difference between the floats
            deltas.append(val_a - val_b)

        return self.function(np.array(deltas))
Example #5
0
def simulation_steps(dt, kwargs):
    """Calculate the number of simulation steps

    :param dt: (float) Timestep in fs
    :param kwargs: (dict)
    :return: (float)
    """
    if dt < 0.09 or dt > 5:
        logger.warning('Unexpectedly small or large timestep - is it in fs?')

    if 'ps' in kwargs:
        time_fs = 1E3 * kwargs['ps']

    elif 'fs' in kwargs:
        time_fs = kwargs['fs']

    elif 'ns' in kwargs:
        time_fs = 1E6 * kwargs['ns']

    else:
        raise ValueError('Simulation time not found')

    logger.info(f'Running {time_fs / dt:.0f} steps with a timestep of {dt} fs')
    # Run at least one step
    return max(int(time_fs / dt), 1)
Example #6
0
    def _calculate_single(self, init_config, gap, method_name):
        """
        Calculate a single τ_acc from one configuration

        :param init_config: (gt.Configuration)
        :param gap: (gt.GAP)
        :param method_name: (str) Ground truth method e.g. dftb, orca, gpaw
        """

        cuml_error, curr_time = 0, 0

        block_time = self.interval_time * gt.GTConfig.n_cores
        step_interval = self.interval_time // self.dt

        while curr_time < self.max_time:

            traj = gt.md.run_gapmd(init_config,
                                   gap=gap,
                                   temp=self.temp,
                                   dt=self.dt,
                                   interval=step_interval,
                                   fs=block_time,
                                   n_cores=min(gt.GTConfig.n_cores, 4))

            # Only evaluate the energy
            try:
                traj.single_point(method_name=method_name)
            except ValueError:
                logger.warning('Failed to calculate single point energies with'
                               f' {method_name}. τ_acc will be underestimated '
                               f'by <{block_time}')
                return curr_time

            pred = traj.copy()
            pred.parallel_gap(gap=gap)

            logger.info('      ___ |E_true - E_GAP|/eV ___')
            logger.info(f' t/fs      err      cumul(err)')

            for j in range(len(traj)):
                e_error = np.abs(traj[j].energy - pred[j].energy)

                # Add any error above the allowed threshold
                cuml_error += max(e_error - self.e_l, 0)
                curr_time += self.dt * step_interval
                logger.info(f'{curr_time:5.0f}     '
                            f'{e_error:6.4f}     '
                            f'{cuml_error:6.4f}')

                if cuml_error > self.e_t:
                    return curr_time

            init_config = traj[-1]

        logger.info(f'Reached max(τ_acc) = {self.max_time} fs')
        return self.max_time
Example #7
0
def get_init_configs(system, init_configs=None, n=10, method_name=None):
    """Generate a set of initial configurations to use for active learning"""

    if init_configs is not None:

        if all(cfg.energy is not None for cfg in init_configs):
            logger.info(f'Initialised with {len(init_configs)} configurations '
                        f'all with defined energy')
            return init_configs

    # Initial configurations are not defined, so make some - will use random
    # with the largest maximum distance between molecules possible
    max_vdw = max(get_vdw_radius(symbol) for symbol in system.atom_symbols())
    ideal_dist = 2*max_vdw - 0.5    # Desired minimum distance in Å

    # Reduce the distance until there is a probability at least 0.1 that a
    # random configuration can be generated with that distance threshold
    p_acc, dist = 0, ideal_dist+0.2

    while p_acc < 0.1:
        n_generated_configs = 0
        dist -= 0.2                 # Reduce the minimum distance requirement

        for _ in range(10):
            try:
                _ = system.random(min_dist_threshold=dist)
                n_generated_configs += 1

            except ex.RandomiseFailed:
                continue

        p_acc = n_generated_configs / 10
        logger.info(f'Generated configurations with p={p_acc:.2f} with a '
                    f'minimum distance of {dist:.2f}')

    init_configs = gt.Data(name='init_configs')
    # Finally generate the initial configurations
    while len(init_configs) < n:
        try:
            init_configs += system.random(min_dist_threshold=dist,
                                          with_intra=True)
        except ex.RandomiseFailed:
            continue
    logger.info(f'Added {len(init_configs)} configurations with min dist = '
                f'{dist:.3f} Å')

    if method_name is None:
        logger.warning('Have no method - not evaluating energies')
        return init_configs

    # And run the desired method in parallel across them
    method = getattr(init_configs, f'parallel_{method_name.lower()}')
    method()

    init_configs.save()
    return init_configs
Example #8
0
    def save(self, filename=None, append=False):
        """
        Print this configuration as an extended xyz file where the first 4
        columns are the atom symbol, x, y, z and, if this configuration
        contains forces then add the x, y, z components of the force on as
        columns 4-7.

        -----------------------------------------------------------------------
        :param filename: (str)

        :param append: (bool) Append to the end of this exyz file?
        """
        if filename is None:
            filename = f'{self.name}.xyz'
            logger.info(f'Saving configuration as {filename}')

        a, b, c = self.box.size

        energy_str = ''
        if self.energy is not None:
            energy_str += f'dft_energy={self.energy:.8f}'

        prop_str = 'Properties=species:S:1:pos:R:3'
        if self.forces is not None:
            prop_str += ':dft_forces:R:3'

        if not filename.endswith('.xyz'):
            logger.warning('Filename had no .xyz extension - adding')
            filename += '.xyz'

        with open(filename, 'a' if append else 'w') as exyz_file:
            print(
                f'{len(self.atoms)}\n'
                f'Lattice="{a:.6f} 0.000000 0.000000 '
                f'0.000000 {b:.6f} 0.000000 '
                f'0.000000 0.000000 {c:.6f}" '
                f'{prop_str} '
                f'{energy_str}',
                file=exyz_file)

            for i, atom in enumerate(self.atoms):
                x, y, z = atom.coord
                line = f'{atom.label} {x:.5f} {y:.5f} {z:.5f} '

                if self.forces is not None:
                    fx, fy, fz = self.forces[i]
                    line += f'{fx:.5f} {fy:.5f} {fz:.5f}'

                print(line, file=exyz_file)

        return None
Example #9
0
    def radius(self):
        """
        Calculate the radius of this species as half the maximum distance
        between two atoms plus the van der Walls radius of H if there are >1
        atoms otherwise

        :return: (float) Radius in Å
        """
        if self.n_atoms == 1:
            return get_vdw_radius(atom_label=self.atoms[0].label)

        coords = self.coordinates
        max_distance = np.max(distance_matrix(coords, coords))

        logger.warning('Assuming hydrogen on the exterior in calculating the '
                       f'radius of {self.name}')
        return max_distance / 2.0 + get_vdw_radius('H')
Example #10
0
    def __init__(self, filename, init_configuration=None, charge=None,
                 mult=None, box=None):
        super().__init__()

        if filename == 'geo_end.xyz':
            self.extract_from_dftb(init_config=init_configuration)

        elif filename.endswith('.traj'):
            self.extract_from_ase(filename, init_config=init_configuration)

        elif all(prm is not None for prm in (charge, mult, box)):
            self.load(filename, box=box, charge=charge, mult=mult)

        elif filename.endswith('.gro'):
            self.extract_from_gmx(filename, init_configuration)

        if len(self) == 0:
            logger.warning('Loaded an empty trajectory')
Example #11
0
    def __init__(self, name, system=None, default_params=True):
        """
        A Gaussian Approximation Potential

        :param name: (str)
        :param system: (gaptrain.systems.System | None)
        """

        self.name = name

        if system is not None and default_params:
            self.params = Parameters(atom_symbols=system.atom_symbols())

        else:
            self.params = Parameters(atom_symbols=[])
            logger.warning('Initialised a GAP with no parameters. '
                           'gap.train not available')

        self.training_data = None
Example #12
0
    def load(self,
             filename=None,
             system=None,
             box=None,
             charge=None,
             mult=None):
        """
        Load a set of configurations from an extended xyz file - needs to have
        a system to be able to assign a charge, multiplicity and box size.
        Will set the *true* values

        ----------------------------------------------------------------------
        :param system: (gaptrain.systems.System |
                        gaptrain.configuration.Configuration)

        :param filename: (str) Filename to load configurations from if
                         None defaults to "name.xyz"

        :param box: (gaptrain.box.Box)

        :param charge: (int)

        :param mult: (int)
        """
        filename = f'{self.name}.xyz' if filename is None else filename

        if not os.path.exists(filename):
            raise ex.LoadingFailed(f'XYZ file for {filename} did not exist')

        if system is not None:
            if all(prm for prm in (system.box, system.charge, system.mult)):
                logger.info('Setting box, charge and multiplicity from a conf')
                box, charge, mult = system.box, system.charge, system.mult

        logger.info(f'Loading configuration set from {filename}')
        lines = open(filename, 'r').readlines()

        # Stride through the file and add configuration for each
        i = 0
        while i < len(lines):

            # Configurations may have different numbers of atoms
            try:
                n_atoms = int(lines[i].split()[0])
            except (TypeError, IndexError):
                raise ex.LoadingFailed('Could not read the number of atoms in'
                                       f'{filename}')

            stride = n_atoms + 2

            configuration = Configuration()
            configuration.load(file_lines=lines[i:i + stride],
                               box=box,
                               charge=charge,
                               mult=mult)

            self._list.append(configuration)
            i += stride

        if self.name is None or self.name == 'data':
            self.name = filename.rstrip('.xyz')
            logger.warning(f'Set self.name to {self.name}')

        return None
Example #13
0
    def load(self,
             filename=None,
             file_lines=None,
             box=None,
             charge=None,
             mult=None):
        """
        Load a configuration from a file or a list of file lines

        ----------------------------------------------------------------------
        :param filename: (str) Filename to load configurations from if
                         None defaults to "name.xyz"

        :param file_lines: (list(str)) List of extended xyz file lines to read
                           from

        :param box: (gaptrain.box.Box)

        :param charge: (int)

        :param mult: (int)
        """
        if filename is None and file_lines is None:
            try:
                file_lines = open(f'{self.name}.xyz', 'r').readlines()
            except IOError:
                raise ex.LoadingFailed('Could not load no file or file lines')

        if filename is not None and file_lines is None:
            file_lines = open(filename, 'r').readlines()

        self.charge = charge if charge is not None else self.charge
        self.mult = mult if mult is not None else self.mult
        self.box = box if box is not None else self.box

        # Atoms, true forces and energy
        n_atoms, atoms, forces = None, [], []

        # Grab the coordinates, energy and forces 0->n_atoms + 2 inclusive
        for j, line in enumerate(file_lines):

            if j == 0:
                # First thing should be the number of atoms
                try:
                    n_atoms = int(line.split()[0])
                except (IndexError, TypeError):
                    raise ex.LoadingFailed('Line 1 of the xyz file '
                                           'malformatted')

            elif j == 1:
                if 'dft_energy' in line:

                    # Grab the energy, which may be in any position in the line
                    for item in line.split():
                        if 'dft_energy=' in item:
                            self.energy = float(item.split('=')[-1])
                            break

                # Try and load the box
                if 'Lattice="' in line and box is None:
                    try:
                        # Remove anything before or after the quotes
                        vec_string = line.split('"')[1].split('"')[0]
                        components = [float(val) for val in vec_string.split()]
                        # Expecting all the components of the lattice
                        # vectors, so for an orthorhombic box take the
                        # diagonal elements of the a, b, c vectors
                        self.box = gt.Box(
                            size=[components[0], components[4], components[8]])

                    except (TypeError, ValueError, IndexError):
                        raise ex.LoadingFailed('Failed to load the box')

            elif len(line.split()) < 4:
                logger.warning('Unexpected line break, assuming end of atoms')
                break

            else:
                atom_label, x, y, z = line.split()[:4]
                atoms.append(Atom(atom_label, x=x, y=y, z=z))

                if len(line.split()) != 7:
                    continue

                # System has forces
                fx, fy, fz = line.split()[4:]
                forces.append(np.array([float(fx), float(fy), float(fz)]))

        # Default charge and multiplicity if there is a box but no charge
        if self.box is not None:
            if charge is None and self.charge is None:
                logger.warning('Found a box but no charge, defaulting to 0')
                self.charge = 0
            if mult is None and self.mult is None:
                logger.warning('Found a box but no multiplicity, '
                               'defaulting to 1')
                self.mult = 1

        if len(atoms) == 0 or n_atoms is None:
            raise ex.LoadingFailed('Found no atoms in the file')

        if len(atoms) != n_atoms:
            raise ex.LoadingFailed(f'Number of atoms declared {n_atoms} not '
                                   f'equal to the number found {len(atoms)}')

        self.set_atoms(atoms=atoms)

        # Set the  forces if there are some
        if len(forces) > 0:
            self.forces = np.array(forces)

        return None
Example #14
0
def get_solvent(name):
    """Gets solvent molecule from solvent list"""
    for solvent in solvents:
        if solvent.name == name:
            return solvent
    return None


# Generate Solvent objects for all molecules in solvent_lib
solvents = []

for filename in os.listdir(solvent_dir):
    if not filename.endswith('.xyz'):
        continue

    itp_filename = filename.replace('.xyz', '.itp')
    itp_filepath = os.path.join(solvent_dir, itp_filename)

    if not os.path.exists(itp_filepath):
        logger.warning(f'Found solvent xyz file without associated '
                       f'itp {filename}')
        continue

    solvent = Solvent(xyz_filename=os.path.join(solvent_dir, filename),
                      gmx_itp_filename=itp_filepath)

    solvent.name = os.path.basename(itp_filepath.rstrip('.itp'))

    solvents.append(solvent)
Example #15
0
def get_active_config_diff(config, gap, temp, e_thresh, max_time_fs,
                           ref_method_name='dftb', curr_time_fs=0, n_calls=0,
                           extra_time_fs=0, **kwargs):
    """
    Given a configuration run MD with a GAP until the absolute error between
    the predicted and true values is above a threshold

    --------------------------------------------------------------------------
    :param config: (gt.Configuration)

    :param gap: (gt.GAP)

    :param e_thresh: (float) Threshold energy error (eV) above which the
                     configuration is returned

    :param temp: (float) Temperature to propagate GAP-MD

    :param max_time_fs: (float)

    :param ref_method_name: (str)

    :param curr_time_fs: (float)

    :param n_calls: (int) Number of times this function has been called

    :param extra_time_fs: (float) Some extra time to run initially e.g. as the
                          GAP is already likely to get to e.g. 100 fs, so run
                          that initially and don't run ground truth evaluations

    :return: (gt.Configuration)
    """
    if float(temp) < 0:
        raise ValueError('Cannot run MD with a negative temperature')

    if float(e_thresh) < 0:
        raise ValueError(f'Error threshold {e_thresh} must be positive (eV)')

    if extra_time_fs > 0:
        logger.info(f'Running an extra {extra_time_fs:.1f} fs of MD before '
                    f'calculating an error')

    md_time_fs = 2 + n_calls**3 + float(extra_time_fs)
    gap_traj = gt.md.run_gapmd(config,
                               gap=gap,
                               temp=float(temp),
                               dt=0.5,
                               interval=4,
                               fs=md_time_fs,
                               n_cores=1,
                               **kwargs)

    # Actual initial time, given this function can be called multiple times
    for frame in gap_traj:
        frame.t0 = curr_time_fs + extra_time_fs

    # Evaluate the error on the final frame
    error = calc_error(frame=gap_traj[-1], gap=gap, method_name=ref_method_name)

    # And the number of ground truth evaluations for this configuration
    n_evals = n_calls + 1

    if error > 100 * e_thresh:
        logger.error('Huge error: 100x threshold, returning the first frame')
        gap_traj[0].single_point(method_name=ref_method_name, n_cores=1)
        gap_traj[0].n_evals = n_evals + 1
        return gap_traj[0]

    if error > 10 * e_thresh:
        logger.warning('Error 10 x threshold! Taking the last frame less than '
                       '10x the threshold')
        # Stride through only 10 frames to prevent very slow backtracking
        for frame in reversed(gap_traj[::max(1, len(gap_traj)//10)]):
            error = calc_error(frame, gap=gap, method_name=ref_method_name)
            n_evals += 1

            if e_thresh < error < 10 * e_thresh:
                frame.n_evals = n_evals
                return frame

    if error > e_thresh:
        gap_traj[-1].n_evals = n_evals
        return gap_traj[-1]

    if curr_time_fs + md_time_fs > max_time_fs:
        logger.info(f'Reached the maximum time {max_time_fs} fs, returning '
                    f'None')
        return None

    # Increment t_0 to the new time
    curr_time_fs += md_time_fs

    # If the prediction is within the threshold then call this function again
    return get_active_config_diff(config, gap, temp, e_thresh, max_time_fs,
                                  curr_time_fs=curr_time_fs,
                                  ref_method_name=ref_method_name,
                                  n_calls=n_calls+1,
                                  **kwargs)