Esempio n. 1
0
def test_element_to_Z():
    for i in range(120):
        assert element_to_Z(i) == i

    for pair in zip(['H', 'C', 'O', 'Og'], [1, 6, 8, 118]):

        assert element_to_Z(pair[0]) == pair[1]
Esempio n. 2
0
def test_element_to_Z():
    for i in range(120):
        assert element_to_Z(i) == i

    assert element_to_Z('1') == 1
    assert element_to_Z(np.int(1.0)) == 1

    for pair in zip(['H', 'C', 'O', 'Og'], [1, 6, 8, 118]):
        assert element_to_Z(pair[0]) == pair[1]
Esempio n. 3
0
    def __init__(self,
                 cell: 'ndarray',
                 species: Union[List[str], List[int]],
                 positions: 'ndarray',
                 mass_dict: dict = None,
                 prev_positions: 'ndarray' = None,
                 species_labels: List[str] = None,
                 forces=None,
                 stds=None):

        # Set up individual Bravais lattice vectors
        self.cell = np.array(cell)
        self.vec1 = self.cell[0, :]
        self.vec2 = self.cell[1, :]
        self.vec3 = self.cell[2, :]

        # get cell matrices for wrapping coordinates
        self.cell_transpose = self.cell.transpose()
        self.cell_transpose_inverse = np.linalg.inv(self.cell_transpose)
        self.cell_dot = self.get_cell_dot()
        self.cell_dot_inverse = np.linalg.inv(self.cell_dot)

        # set positions
        self.positions = np.array(positions)
        self.wrapped_positions = self.wrap_positions(in_place=False)

        # If species are strings, convert species to integers by atomic number
        if species_labels is None:
            self.species_labels = species
        else:
            self.species_labels = species_labels
        self.coded_species = np.array([element_to_Z(spec) for spec in species])
        self.nat = len(species)

        # Default: atoms have no velocity
        if prev_positions is None:
            self.prev_positions = np.copy(self.positions)
        else:
            assert len(positions) == len(prev_positions), 'Previous ' \
                                                          'positions and ' \
                                                          'positions are not' \
                                                          'same length'
            self.prev_positions = prev_positions

        self.energy = None
        self.stress = None

        if forces is not None:
            self.forces = np.array(forces)
        else:
            self.forces = np.zeros((len(positions), 3))

        if stds is not None:
            self.stds = np.array(stds)
        else:
            self.stds = np.zeros((len(positions), 3))

        self.mass_dict = mass_dict
Esempio n. 4
0
    def __init__(self,
                 cell,
                 species,
                 positions,
                 mass_dict=None,
                 prev_positions=None,
                 species_labels=None):
        self.cell = cell
        self.vec1 = cell[0, :]
        self.vec2 = cell[1, :]
        self.vec3 = cell[2, :]

        # get cell matrices for wrapping coordinates
        self.cell_transpose = self.cell.transpose()
        self.cell_transpose_inverse = np.linalg.inv(self.cell_transpose)
        self.cell_dot = self.get_cell_dot()
        self.cell_dot_inverse = np.linalg.inv(self.cell_dot)

        # set positions
        self.positions = np.array(positions)
        self.wrap_positions()

        # If species are strings, convert species to integers by atomic number
        if species_labels is None:
            self.species_labels = species
        else:
            self.species_labels = species_labels
        self.coded_species = np.array([element_to_Z(spec) for spec in species])
        self.nat = len(species)

        # Default: atoms have no velocity
        if prev_positions is None:
            self.prev_positions = np.copy(self.positions)
        else:
            assert len(positions) == len(prev_positions), 'Previous ' \
                                                          'positions and ' \
                                                          'positions are not' \
                                                          'same length'
            self.prev_positions = prev_positions

        self.energy = None
        self.stress = None
        self.forces = np.zeros((len(positions), 3))
        self.stds = np.zeros((len(positions), 3))
        self.mass_dict = mass_dict
Esempio n. 5
0
    def __init__(self,
                 frames: List[Structure],
                 gp: Union[GaussianProcess, MappedGaussianProcess],
                 rel_std_tolerance: float = 4,
                 abs_std_tolerance: float = 1,
                 abs_force_tolerance: float = 0,
                 max_force_error: float = inf,
                 parallel: bool = False,
                 n_cpus: int = 1,
                 skip: int = 1,
                 validate_ratio: float = 0.0,
                 calculate_energy: bool = False,
                 output_name: str = 'gp_from_aimd',
                 pre_train_max_iter: int = 50,
                 max_atoms_from_frame: int = np.inf,
                 max_trains: int = np.inf,
                 min_atoms_per_train: int = 1,
                 shuffle_frames: bool = False,
                 verbose: int = 1,
                 pre_train_on_skips: int = -1,
                 pre_train_seed_frames: List[Structure] = None,
                 pre_train_seed_envs: List[Tuple[AtomicEnvironment,
                                                 'np.array']] = None,
                 pre_train_atoms_per_element: dict = None,
                 train_atoms_per_element: dict = None,
                 predict_atoms_per_element: dict = None,
                 train_checkpoint_interval: int = 1,
                 checkpoint_interval: int = 1,
                 atom_checkpoint_interval: int = 100,
                 model_format: str = 'json'):
        """
        Class which trains a GP off of an AIMD trajectory, and generates
        error statistics between the DFT and GP calls.

        There are a variety of options which can give you a finer control
        over the training process.

        :param frames: List of structures to evaluate / train GP on
        :param gp: Gaussian Process object
        :param rel_std_tolerance: Train if uncertainty is above this *
            noise variance hyperparameter
        :param abs_std_tolerance: Train if uncertainty is above this
        :param abs_force_tolerance: Add atom force error exceeds this
        :param max_force_error: Don't add atom if force error exceeds this
        :param parallel: Use parallel functions or not
        :param validate_ratio: Fraction of frames used for validation
        :param skip: Skip through frames
        :param calculate_energy: Use local energy kernel or not
        :param output_name: Write output of training to this file
        :param max_atoms_from_frame: Largest # of atoms added from one frame
        :param min_atoms_per_train: Only train when this many atoms have been
            added
        :param max_trains: Stop training GP after this many calls to train
        :param n_cpus: Number of CPUs to parallelize over for parallelization
                over atoms
        :param shuffle_frames: Randomize order of frames for better training
        :param verbose: 0: Silent, NO output written or printed at all.
                        1: Minimal,
                        2: Lots of information
        :param pre_train_on_skips: Train model on every n frames before running
        :param pre_train_seed_frames: Frames to train on before running
        :param pre_train_seed_envs: Environments to train on before running
        :param pre_train_atoms_per_element: Max # of environments to add from
            each species in the seed pre-training steps
        :param train_atoms_per_element: Max # of environments to add from
            each species in the training steps
        :param predict_atoms_per_element: Choose a random subset of N random
            atoms from each specified element to predict on. For instance,
            {"H":5} will only predict the forces and uncertainties
            associated with 5 Hydrogen atoms per frame. Elements not
            specified will be predicted as normal. This is useful for
            systems where you are most interested in a subset of elements.
            This will result in a faster but less exhaustive learning process.
        :param checkpoint_interval: Will be deprecated. Same as
                            train_checkpoint_interval
        :param train_checkpoint_interval: How often to write model after
                        trainings
        :param atom_checkpoint_interval: How often to write model after atoms are
            added (since atoms may be added without training)
        :param model_format: Format to write GP model to
        """

        # Set up parameters
        self.frames = frames
        if shuffle_frames:
            np.random.shuffle(frames)

        # GP Training and Execution parameters
        self.gp = gp
        # Check to see if GP is MGP for later flagging
        self.mgp = isinstance(gp, MappedGaussianProcess)
        self.rel_std_tolerance = rel_std_tolerance
        self.abs_std_tolerance = abs_std_tolerance
        self.abs_force_tolerance = abs_force_tolerance
        self.max_force_error = max_force_error
        self.max_trains = max_trains
        self.max_atoms_from_frame = max_atoms_from_frame
        self.min_atoms_per_train = min_atoms_per_train
        self.predict_atoms_per_element = predict_atoms_per_element
        self.verbose = verbose
        self.train_count = 0
        self.calculate_energy = calculate_energy
        self.n_cpus = n_cpus

        if parallel is True:
            warnings.warn(
                "Parallel flag will be deprecated;"
                "we will instead use n_cpu alone.", DeprecationWarning)

        # Set prediction function based on if forces or energies are
        # desired, and parallelization accordingly
        if not self.mgp:
            if calculate_energy:
                self.pred_func = predict_on_structure_par_en
            else:
                self.pred_func = predict_on_structure_par

        elif self.mgp:
            self.pred_func = predict_on_structure_mgp

        # Parameters for negotiating with the training frames

        # To later be filled in using the time library
        self.start_time = None

        self.skip = skip
        assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \
                                                      "positive integer."
        self.validate_ratio = validate_ratio
        assert (0 <= validate_ratio <= 1), \
            "validate_ratio needs to be [0,1]"

        # Set up for pretraining
        self.pre_train_max_iter = pre_train_max_iter
        self.pre_train_on_skips = pre_train_on_skips
        self.seed_envs = [] if pre_train_seed_envs is None else \
            pre_train_seed_envs
        self.seed_frames = [] if pre_train_seed_frames is None \
            else pre_train_seed_frames

        self.pre_train_env_per_species = {} if pre_train_atoms_per_element \
                                       is None else pre_train_atoms_per_element
        self.train_env_per_species = {} if train_atoms_per_element \
                                           is None else train_atoms_per_element

        # Convert to Coded Species
        if self.pre_train_env_per_species:
            pre_train_species = list(self.pre_train_env_per_species.keys())
            for key in pre_train_species:
                self.pre_train_env_per_species[element_to_Z(key)] = \
                    self.pre_train_env_per_species[key]

        # Output parameters
        self.verbose = verbose
        if self.verbose:
            self.output = Output(output_name, always_flush=True)
        else:
            self.output = None
        self.train_checkpoint_interval = train_checkpoint_interval or \
                                         checkpoint_interval
        self.atom_checkpoint_interval = atom_checkpoint_interval

        self.model_format = model_format
        self.output_name = output_name

        # Defining variables to be used later
        self.curr_step = 0
        self.train_count = 0
        self.start_time = None
Esempio n. 6
0
    def __init__(self, frames: List[Structure],
                 gp: GaussianProcess,
                 rel_std_tolerance: float = 4,
                 abs_std_tolerance: float = 1,
                 abs_force_tolerance: float = 0,
                 max_force_error: float = inf,
                 parallel: bool = False,
                 n_cpus: int = None,
                 skip: int = 1,
                 validate_ratio: float = 0.1,
                 calculate_energy: bool = False,
                 output_name: str = 'gp_from_aimd',
                 pre_train_max_iter: int = 50,
                 max_atoms_from_frame: int = np.inf,
                 max_trains: int = np.inf,
                 min_atoms_per_train: int = 1,
                 shuffle_frames: bool = False,
                 verbose: int = 0,
                 pre_train_on_skips: int = -1,
                 pre_train_seed_frames: List[Structure] = None,
                 pre_train_seed_envs: List[Tuple[AtomicEnvironment,
                                                 'np.array']] = None,
                 pre_train_atoms_per_element: dict = None,
                 train_atoms_per_element: dict = None,
                 checkpoint_interval: int = None,
                 model_format: str = 'json'):
        """
        Class which trains a GP off of an AIMD trajectory, and generates
        error statistics between the DFT and GP calls.

        There are a variety of options which can give you a finer control
        over the training process.

        :param frames: List of structures to evaluate / train GP on
        :param gp: Gaussian Process object
        :param rel_std_tolerance: Train if uncertainty is above this *
            noise variance hyperparameter
        :param abs_std_tolerance: Train if uncertainty is above this
        :param abs_force_tolerance: Add atom force error exceeds this
        :param max_force_error: Don't add atom if force error exceeds this
        :param parallel: Use parallel functions or not
        :param validate_ratio: Fraction of frames used for validation
        :param n_cpus: number of cpus to run with multithreading
        :param skip: Skip through frames
        :param calculate_energy: Use local energy kernel or not
        :param output_name: Write output of training to this file
        :param max_atoms_from_frame: Largest # of atoms added from one frame
        :param min_atoms_added: Only train when this many atoms have been
            added
        :param max_trains: Stop training GP after this many calls to train
        :param n_cpus: Number of CPUs to parallelize over
        :param shuffle_frames: Randomize order of frames for better training
        :param verbose: 0: Silent, 1: Minimal, 2: Lots of information
        :param pre_train_on_skips: Train model on every n frames before running
        :param pre_train_seed_frames: Frames to train on before running
        :param pre_train_seed_envs: Environments to train on before running
        :param pre_train_atoms_per_element: Max # of environments to add from
            each species in the seed pre-training steps
        :param train_atoms_per_element: Max # of environments to add from
            each species in the training steps
        :param checkpoint_interval: How often to write model after trainings
        :param model_format: Format to write GP model to
        """

        # Set up parameters
        self.frames = frames
        if shuffle_frames:
            np.random.shuffle(frames)

        # GP Training and Execution parameters
        self.gp = gp
        self.rel_std_tolerance = rel_std_tolerance
        self.abs_std_tolerance = abs_std_tolerance
        self.abs_force_tolerance = abs_force_tolerance
        self.max_force_error = max_force_error
        self.max_trains = max_trains
        self.max_atoms_from_frame = max_atoms_from_frame
        self.min_atoms_per_train = min_atoms_per_train
        self.verbose = verbose
        self.train_count = 0

        self.parallel = parallel
        self.n_cpus = n_cpus
        # Set prediction function based on if forces or energies are
        # desired, and parallelization accordingly
        if (parallel and gp.par and gp.per_atom_par):
            if calculate_energy:
                self.pred_func = predict_on_structure_par_en
            else:
                self.pred_func = predict_on_structure_par
        else:
            if calculate_energy:
                self.pred_func = predict_on_structure_en
            else:
                self.pred_func = predict_on_structure

        # Parameters for negotiating with the training frames
        self.output = Output(output_name, always_flush=True)

        # To later be filled in using the time library
        self.start_time = None

        self.skip = skip
        assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \
                                                     "positive integer."
        self.validate_ratio = validate_ratio
        assert (validate_ratio>=0 and validate_ratio<=1), \
                "validate_ratio needs to be [0,1]"
        
        # Set up for pretraining 
        self.pre_train_max_iter = pre_train_max_iter
        self.pre_train_on_skips = pre_train_on_skips
        self.seed_envs = [] if pre_train_seed_envs is None else \
            pre_train_seed_envs
        self.seed_frames = [] if pre_train_seed_frames is None \
            else pre_train_seed_frames

        self.pre_train_env_per_species = {} if pre_train_atoms_per_element \
                                    is None else pre_train_atoms_per_element
        self.train_env_per_species = {} if train_atoms_per_element \
                                        is None else train_atoms_per_element

        # Convert to Coded Species
        if self.pre_train_env_per_species:
            pre_train_species = list(self.pre_train_env_per_species.keys())
            for key in pre_train_species:
                self.pre_train_env_per_species[element_to_Z(key)] = \
                    self.pre_train_env_per_species[key]

        # Output parameters
        self.output = Output(output_name, always_flush=True)
        self.verbose = verbose
        self.checkpoint_interval = checkpoint_interval
        self.model_format = model_format
        self.output_name = output_name

        # Defining variables to be used later
        self.curr_step = 0
        self.train_count = 0
        self.start_time = None
Esempio n. 7
0
def test_elt_warning():
    with pytest.warns(Warning):
        element_to_Z('Fe2')
Esempio n. 8
0
    def __init__(self,
                 frames: List[Structure],
                 gp: GaussianProcess,
                 rel_std_tolerance: float = 4,
                 abs_std_tolerance: float = 1,
                 abs_force_tolerance: float = 0,
                 max_force_error: float = inf,
                 parallel: bool = False,
                 n_cpus: int = None,
                 skip: int = 1,
                 validate_ratio: float = 0.0,
                 calculate_energy: bool = False,
                 output_name: str = 'gp_from_aimd',
                 pre_train_max_iter: int = 50,
                 max_atoms_from_frame: int = inf,
                 max_trains: int = inf,
                 min_atoms_per_train: int = 1,
                 shuffle_frames: bool = False,
                 verbose: int = 0,
                 pre_train_on_skips: int = -1,
                 pre_train_seed_frames: List[Structure] = None,
                 pre_train_seed_envs: List[Tuple[AtomicEnvironment,
                                                 'np.array']] = None,
                 pre_train_atoms_per_element: dict = None,
                 train_atoms_per_element: dict = None,
                 checkpoint_interval: int = None,
                 model_format: str = 'json'):

        # Set up parameters
        self.frames = frames
        if shuffle_frames:
            np.random.shuffle(frames)

        # GP Training and Execution parameters
        self.gp = gp
        self.rel_std_tolerance = rel_std_tolerance
        self.abs_std_tolerance = abs_std_tolerance
        self.abs_force_tolerance = abs_force_tolerance
        self.max_force_error = max_force_error
        self.max_trains = max_trains
        self.parallel = parallel
        self.n_cpus = n_cpus

        # Set prediction function based on if forces or energies are
        # desired, and parallelization accordingly
        if parallel:
            if calculate_energy:
                self.pred_func = predict_on_structure_par_en
            else:
                self.pred_func = predict_on_structure_par
        else:
            if calculate_energy:
                self.pred_func = predict_on_structure_en
            else:
                self.pred_func = predict_on_structure

        # Parameters for negotiating with the training frames

        assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \
                                                     "positive integer."
        self.skip = skip
        self.max_atoms_from_frame = max_atoms_from_frame
        self.min_atoms_added = min_atoms_per_train
        self.pre_train_max_iter = pre_train_max_iter
        self.pre_train_on_skips = pre_train_on_skips
        self.seed_envs = [] if pre_train_seed_envs is None else \
            pre_train_seed_envs
        self.seed_frames = [] if pre_train_seed_frames is None \
            else pre_train_seed_frames
        self.pre_train_env_per_species = {} if pre_train_atoms_per_element \
                                    is None else pre_train_atoms_per_element
        self.train_env_per_species = {} if train_atoms_per_element \
                                        is None else train_atoms_per_element
        self.validate_ratio = validate_ratio

        # Convert to Coded Species
        if self.pre_train_env_per_species:
            pre_train_species = list(self.pre_train_env_per_species.keys())
            for key in pre_train_species:
                self.pre_train_env_per_species[element_to_Z(key)] = \
                    self.pre_train_env_per_species[key]

        # Output parameters
        self.output = Output(output_name, always_flush=True)
        self.verbose = verbose
        self.checkpoint_interval = checkpoint_interval
        self.model_format = model_format
        self.output_name = output_name

        # Defining variables to be used later
        self.curr_step = 0
        self.train_count = 0
        self.start_time = None