def test_element_to_Z(): for i in range(120): assert element_to_Z(i) == i for pair in zip(['H', 'C', 'O', 'Og'], [1, 6, 8, 118]): assert element_to_Z(pair[0]) == pair[1]
def test_element_to_Z(): for i in range(120): assert element_to_Z(i) == i assert element_to_Z('1') == 1 assert element_to_Z(np.int(1.0)) == 1 for pair in zip(['H', 'C', 'O', 'Og'], [1, 6, 8, 118]): assert element_to_Z(pair[0]) == pair[1]
def __init__(self, cell: 'ndarray', species: Union[List[str], List[int]], positions: 'ndarray', mass_dict: dict = None, prev_positions: 'ndarray' = None, species_labels: List[str] = None, forces=None, stds=None): # Set up individual Bravais lattice vectors self.cell = np.array(cell) self.vec1 = self.cell[0, :] self.vec2 = self.cell[1, :] self.vec3 = self.cell[2, :] # get cell matrices for wrapping coordinates self.cell_transpose = self.cell.transpose() self.cell_transpose_inverse = np.linalg.inv(self.cell_transpose) self.cell_dot = self.get_cell_dot() self.cell_dot_inverse = np.linalg.inv(self.cell_dot) # set positions self.positions = np.array(positions) self.wrapped_positions = self.wrap_positions(in_place=False) # If species are strings, convert species to integers by atomic number if species_labels is None: self.species_labels = species else: self.species_labels = species_labels self.coded_species = np.array([element_to_Z(spec) for spec in species]) self.nat = len(species) # Default: atoms have no velocity if prev_positions is None: self.prev_positions = np.copy(self.positions) else: assert len(positions) == len(prev_positions), 'Previous ' \ 'positions and ' \ 'positions are not' \ 'same length' self.prev_positions = prev_positions self.energy = None self.stress = None if forces is not None: self.forces = np.array(forces) else: self.forces = np.zeros((len(positions), 3)) if stds is not None: self.stds = np.array(stds) else: self.stds = np.zeros((len(positions), 3)) self.mass_dict = mass_dict
def __init__(self, cell, species, positions, mass_dict=None, prev_positions=None, species_labels=None): self.cell = cell self.vec1 = cell[0, :] self.vec2 = cell[1, :] self.vec3 = cell[2, :] # get cell matrices for wrapping coordinates self.cell_transpose = self.cell.transpose() self.cell_transpose_inverse = np.linalg.inv(self.cell_transpose) self.cell_dot = self.get_cell_dot() self.cell_dot_inverse = np.linalg.inv(self.cell_dot) # set positions self.positions = np.array(positions) self.wrap_positions() # If species are strings, convert species to integers by atomic number if species_labels is None: self.species_labels = species else: self.species_labels = species_labels self.coded_species = np.array([element_to_Z(spec) for spec in species]) self.nat = len(species) # Default: atoms have no velocity if prev_positions is None: self.prev_positions = np.copy(self.positions) else: assert len(positions) == len(prev_positions), 'Previous ' \ 'positions and ' \ 'positions are not' \ 'same length' self.prev_positions = prev_positions self.energy = None self.stress = None self.forces = np.zeros((len(positions), 3)) self.stds = np.zeros((len(positions), 3)) self.mass_dict = mass_dict
def __init__(self, frames: List[Structure], gp: Union[GaussianProcess, MappedGaussianProcess], rel_std_tolerance: float = 4, abs_std_tolerance: float = 1, abs_force_tolerance: float = 0, max_force_error: float = inf, parallel: bool = False, n_cpus: int = 1, skip: int = 1, validate_ratio: float = 0.0, calculate_energy: bool = False, output_name: str = 'gp_from_aimd', pre_train_max_iter: int = 50, max_atoms_from_frame: int = np.inf, max_trains: int = np.inf, min_atoms_per_train: int = 1, shuffle_frames: bool = False, verbose: int = 1, pre_train_on_skips: int = -1, pre_train_seed_frames: List[Structure] = None, pre_train_seed_envs: List[Tuple[AtomicEnvironment, 'np.array']] = None, pre_train_atoms_per_element: dict = None, train_atoms_per_element: dict = None, predict_atoms_per_element: dict = None, train_checkpoint_interval: int = 1, checkpoint_interval: int = 1, atom_checkpoint_interval: int = 100, model_format: str = 'json'): """ Class which trains a GP off of an AIMD trajectory, and generates error statistics between the DFT and GP calls. There are a variety of options which can give you a finer control over the training process. :param frames: List of structures to evaluate / train GP on :param gp: Gaussian Process object :param rel_std_tolerance: Train if uncertainty is above this * noise variance hyperparameter :param abs_std_tolerance: Train if uncertainty is above this :param abs_force_tolerance: Add atom force error exceeds this :param max_force_error: Don't add atom if force error exceeds this :param parallel: Use parallel functions or not :param validate_ratio: Fraction of frames used for validation :param skip: Skip through frames :param calculate_energy: Use local energy kernel or not :param output_name: Write output of training to this file :param max_atoms_from_frame: Largest # of atoms added from one frame :param min_atoms_per_train: Only train when this many atoms have been added :param max_trains: Stop training GP after this many calls to train :param n_cpus: Number of CPUs to parallelize over for parallelization over atoms :param shuffle_frames: Randomize order of frames for better training :param verbose: 0: Silent, NO output written or printed at all. 1: Minimal, 2: Lots of information :param pre_train_on_skips: Train model on every n frames before running :param pre_train_seed_frames: Frames to train on before running :param pre_train_seed_envs: Environments to train on before running :param pre_train_atoms_per_element: Max # of environments to add from each species in the seed pre-training steps :param train_atoms_per_element: Max # of environments to add from each species in the training steps :param predict_atoms_per_element: Choose a random subset of N random atoms from each specified element to predict on. For instance, {"H":5} will only predict the forces and uncertainties associated with 5 Hydrogen atoms per frame. Elements not specified will be predicted as normal. This is useful for systems where you are most interested in a subset of elements. This will result in a faster but less exhaustive learning process. :param checkpoint_interval: Will be deprecated. Same as train_checkpoint_interval :param train_checkpoint_interval: How often to write model after trainings :param atom_checkpoint_interval: How often to write model after atoms are added (since atoms may be added without training) :param model_format: Format to write GP model to """ # Set up parameters self.frames = frames if shuffle_frames: np.random.shuffle(frames) # GP Training and Execution parameters self.gp = gp # Check to see if GP is MGP for later flagging self.mgp = isinstance(gp, MappedGaussianProcess) self.rel_std_tolerance = rel_std_tolerance self.abs_std_tolerance = abs_std_tolerance self.abs_force_tolerance = abs_force_tolerance self.max_force_error = max_force_error self.max_trains = max_trains self.max_atoms_from_frame = max_atoms_from_frame self.min_atoms_per_train = min_atoms_per_train self.predict_atoms_per_element = predict_atoms_per_element self.verbose = verbose self.train_count = 0 self.calculate_energy = calculate_energy self.n_cpus = n_cpus if parallel is True: warnings.warn( "Parallel flag will be deprecated;" "we will instead use n_cpu alone.", DeprecationWarning) # Set prediction function based on if forces or energies are # desired, and parallelization accordingly if not self.mgp: if calculate_energy: self.pred_func = predict_on_structure_par_en else: self.pred_func = predict_on_structure_par elif self.mgp: self.pred_func = predict_on_structure_mgp # Parameters for negotiating with the training frames # To later be filled in using the time library self.start_time = None self.skip = skip assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \ "positive integer." self.validate_ratio = validate_ratio assert (0 <= validate_ratio <= 1), \ "validate_ratio needs to be [0,1]" # Set up for pretraining self.pre_train_max_iter = pre_train_max_iter self.pre_train_on_skips = pre_train_on_skips self.seed_envs = [] if pre_train_seed_envs is None else \ pre_train_seed_envs self.seed_frames = [] if pre_train_seed_frames is None \ else pre_train_seed_frames self.pre_train_env_per_species = {} if pre_train_atoms_per_element \ is None else pre_train_atoms_per_element self.train_env_per_species = {} if train_atoms_per_element \ is None else train_atoms_per_element # Convert to Coded Species if self.pre_train_env_per_species: pre_train_species = list(self.pre_train_env_per_species.keys()) for key in pre_train_species: self.pre_train_env_per_species[element_to_Z(key)] = \ self.pre_train_env_per_species[key] # Output parameters self.verbose = verbose if self.verbose: self.output = Output(output_name, always_flush=True) else: self.output = None self.train_checkpoint_interval = train_checkpoint_interval or \ checkpoint_interval self.atom_checkpoint_interval = atom_checkpoint_interval self.model_format = model_format self.output_name = output_name # Defining variables to be used later self.curr_step = 0 self.train_count = 0 self.start_time = None
def __init__(self, frames: List[Structure], gp: GaussianProcess, rel_std_tolerance: float = 4, abs_std_tolerance: float = 1, abs_force_tolerance: float = 0, max_force_error: float = inf, parallel: bool = False, n_cpus: int = None, skip: int = 1, validate_ratio: float = 0.1, calculate_energy: bool = False, output_name: str = 'gp_from_aimd', pre_train_max_iter: int = 50, max_atoms_from_frame: int = np.inf, max_trains: int = np.inf, min_atoms_per_train: int = 1, shuffle_frames: bool = False, verbose: int = 0, pre_train_on_skips: int = -1, pre_train_seed_frames: List[Structure] = None, pre_train_seed_envs: List[Tuple[AtomicEnvironment, 'np.array']] = None, pre_train_atoms_per_element: dict = None, train_atoms_per_element: dict = None, checkpoint_interval: int = None, model_format: str = 'json'): """ Class which trains a GP off of an AIMD trajectory, and generates error statistics between the DFT and GP calls. There are a variety of options which can give you a finer control over the training process. :param frames: List of structures to evaluate / train GP on :param gp: Gaussian Process object :param rel_std_tolerance: Train if uncertainty is above this * noise variance hyperparameter :param abs_std_tolerance: Train if uncertainty is above this :param abs_force_tolerance: Add atom force error exceeds this :param max_force_error: Don't add atom if force error exceeds this :param parallel: Use parallel functions or not :param validate_ratio: Fraction of frames used for validation :param n_cpus: number of cpus to run with multithreading :param skip: Skip through frames :param calculate_energy: Use local energy kernel or not :param output_name: Write output of training to this file :param max_atoms_from_frame: Largest # of atoms added from one frame :param min_atoms_added: Only train when this many atoms have been added :param max_trains: Stop training GP after this many calls to train :param n_cpus: Number of CPUs to parallelize over :param shuffle_frames: Randomize order of frames for better training :param verbose: 0: Silent, 1: Minimal, 2: Lots of information :param pre_train_on_skips: Train model on every n frames before running :param pre_train_seed_frames: Frames to train on before running :param pre_train_seed_envs: Environments to train on before running :param pre_train_atoms_per_element: Max # of environments to add from each species in the seed pre-training steps :param train_atoms_per_element: Max # of environments to add from each species in the training steps :param checkpoint_interval: How often to write model after trainings :param model_format: Format to write GP model to """ # Set up parameters self.frames = frames if shuffle_frames: np.random.shuffle(frames) # GP Training and Execution parameters self.gp = gp self.rel_std_tolerance = rel_std_tolerance self.abs_std_tolerance = abs_std_tolerance self.abs_force_tolerance = abs_force_tolerance self.max_force_error = max_force_error self.max_trains = max_trains self.max_atoms_from_frame = max_atoms_from_frame self.min_atoms_per_train = min_atoms_per_train self.verbose = verbose self.train_count = 0 self.parallel = parallel self.n_cpus = n_cpus # Set prediction function based on if forces or energies are # desired, and parallelization accordingly if (parallel and gp.par and gp.per_atom_par): if calculate_energy: self.pred_func = predict_on_structure_par_en else: self.pred_func = predict_on_structure_par else: if calculate_energy: self.pred_func = predict_on_structure_en else: self.pred_func = predict_on_structure # Parameters for negotiating with the training frames self.output = Output(output_name, always_flush=True) # To later be filled in using the time library self.start_time = None self.skip = skip assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \ "positive integer." self.validate_ratio = validate_ratio assert (validate_ratio>=0 and validate_ratio<=1), \ "validate_ratio needs to be [0,1]" # Set up for pretraining self.pre_train_max_iter = pre_train_max_iter self.pre_train_on_skips = pre_train_on_skips self.seed_envs = [] if pre_train_seed_envs is None else \ pre_train_seed_envs self.seed_frames = [] if pre_train_seed_frames is None \ else pre_train_seed_frames self.pre_train_env_per_species = {} if pre_train_atoms_per_element \ is None else pre_train_atoms_per_element self.train_env_per_species = {} if train_atoms_per_element \ is None else train_atoms_per_element # Convert to Coded Species if self.pre_train_env_per_species: pre_train_species = list(self.pre_train_env_per_species.keys()) for key in pre_train_species: self.pre_train_env_per_species[element_to_Z(key)] = \ self.pre_train_env_per_species[key] # Output parameters self.output = Output(output_name, always_flush=True) self.verbose = verbose self.checkpoint_interval = checkpoint_interval self.model_format = model_format self.output_name = output_name # Defining variables to be used later self.curr_step = 0 self.train_count = 0 self.start_time = None
def test_elt_warning(): with pytest.warns(Warning): element_to_Z('Fe2')
def __init__(self, frames: List[Structure], gp: GaussianProcess, rel_std_tolerance: float = 4, abs_std_tolerance: float = 1, abs_force_tolerance: float = 0, max_force_error: float = inf, parallel: bool = False, n_cpus: int = None, skip: int = 1, validate_ratio: float = 0.0, calculate_energy: bool = False, output_name: str = 'gp_from_aimd', pre_train_max_iter: int = 50, max_atoms_from_frame: int = inf, max_trains: int = inf, min_atoms_per_train: int = 1, shuffle_frames: bool = False, verbose: int = 0, pre_train_on_skips: int = -1, pre_train_seed_frames: List[Structure] = None, pre_train_seed_envs: List[Tuple[AtomicEnvironment, 'np.array']] = None, pre_train_atoms_per_element: dict = None, train_atoms_per_element: dict = None, checkpoint_interval: int = None, model_format: str = 'json'): # Set up parameters self.frames = frames if shuffle_frames: np.random.shuffle(frames) # GP Training and Execution parameters self.gp = gp self.rel_std_tolerance = rel_std_tolerance self.abs_std_tolerance = abs_std_tolerance self.abs_force_tolerance = abs_force_tolerance self.max_force_error = max_force_error self.max_trains = max_trains self.parallel = parallel self.n_cpus = n_cpus # Set prediction function based on if forces or energies are # desired, and parallelization accordingly if parallel: if calculate_energy: self.pred_func = predict_on_structure_par_en else: self.pred_func = predict_on_structure_par else: if calculate_energy: self.pred_func = predict_on_structure_en else: self.pred_func = predict_on_structure # Parameters for negotiating with the training frames assert (isinstance(skip, int) and skip >= 1), "Skip needs to be a " \ "positive integer." self.skip = skip self.max_atoms_from_frame = max_atoms_from_frame self.min_atoms_added = min_atoms_per_train self.pre_train_max_iter = pre_train_max_iter self.pre_train_on_skips = pre_train_on_skips self.seed_envs = [] if pre_train_seed_envs is None else \ pre_train_seed_envs self.seed_frames = [] if pre_train_seed_frames is None \ else pre_train_seed_frames self.pre_train_env_per_species = {} if pre_train_atoms_per_element \ is None else pre_train_atoms_per_element self.train_env_per_species = {} if train_atoms_per_element \ is None else train_atoms_per_element self.validate_ratio = validate_ratio # Convert to Coded Species if self.pre_train_env_per_species: pre_train_species = list(self.pre_train_env_per_species.keys()) for key in pre_train_species: self.pre_train_env_per_species[element_to_Z(key)] = \ self.pre_train_env_per_species[key] # Output parameters self.output = Output(output_name, always_flush=True) self.verbose = verbose self.checkpoint_interval = checkpoint_interval self.model_format = model_format self.output_name = output_name # Defining variables to be used later self.curr_step = 0 self.train_count = 0 self.start_time = None