def test_ase_api(self): """Test the ase api.""" gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) all_cand = gadb.get_all_relaxed_candidates() cf = all_cand[0].get_chemical_formula() extend_atoms_class(all_cand[0]) self.assertTrue(isinstance(all_cand[0], type(all_cand[1]))) f = FeatureGenerator() fp = f.composition_vec(all_cand[0]) all_cand[0].set_features(fp) self.assertTrue(np.allclose(all_cand[0].get_features(), fp)) self.assertTrue(all_cand[0].get_chemical_formula() == cf) extend_atoms_class(all_cand[1]) self.assertTrue(all_cand[1].get_features() is None) g = ase_to_networkx(all_cand[2]) all_cand[2].set_graph(g) self.assertTrue(all_cand[2].get_graph() == g) self.assertTrue(all_cand[1].get_graph() is None)
def test_create_database(): from ase.ga.data import PrepareDB from ase.ga.data import DataConnection import os import numpy as np db_file = 'gadb.db' if os.path.isfile(db_file): os.remove(db_file) from ase.build import fcc111 atom_numbers = np.array([78, 78, 79, 79]) slab = fcc111('Ag', size=(4, 4, 2), vacuum=10.) PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) assert os.path.isfile(db_file) dc = DataConnection(db_file) slab_get = dc.get_slab() an_get = dc.get_atom_numbers_to_optimize() assert len(slab) == len(slab_get) assert np.all(slab.numbers == slab_get.numbers) assert np.all(slab.get_positions() == slab_get.get_positions()) assert np.all(an_get == atom_numbers) os.remove(db_file)
def test_networkx_api(self): """Test the ase api.""" gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) all_cand = gadb.get_all_relaxed_candidates() g = ase_to_networkx(all_cand[1]) self.assertEqual(len(g), len(all_cand[1])) matrix = networkx_to_adjacency(g) self.assertEqual(np.shape(matrix), (len(all_cand[1]), len(all_cand[1])))
def test_feature_base(self): """Test the base feature generator.""" gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) all_cand = gadb.get_all_relaxed_candidates() f = BaseGenerator() nl = ase_neighborlist(all_cand[0]) assert f.get_neighborlist(all_cand[0]) == nl pos = all_cand[0].get_positions() assert np.allclose(f.get_positions(all_cand[0]), pos)
def test_ase_nl(self): """Function to test the ase wrapper.""" # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) nl = ase_neighborlist(all_cand[0]) self.assertEqual(len(all_cand[0]), len(nl))
def test_pdf(self): gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) cutoff_dictionary = {} for z in range(1, 92): cutoff_dictionary[z] = covalent_radii[z] pdf, x1 = pair_distribution(all_cand) # Get bond length deviations from touching spheres. dev, x2 = pair_deviation(all_cand, cutoffs=cutoff_dictionary)
def test_add_candidates(): import pytest from ase.build import fcc111 from ase.ga.data import PrepareDB from ase.ga.data import DataConnection from ase.ga.offspring_creator import OffspringCreator from ase.ga import set_raw_score import os db_file = 'gadb.db' if os.path.isfile(db_file): os.remove(db_file) db = PrepareDB(db_file) slab1 = fcc111('Ag', size=(2, 2, 2)) db.add_unrelaxed_candidate(slab1) slab2 = fcc111('Cu', size=(2, 2, 2)) set_raw_score(slab2, 4) db.add_relaxed_candidate(slab2) assert slab2.info['confid'] == 3 db = DataConnection(db_file) assert db.get_number_of_unrelaxed_candidates() == 1 slab3 = db.get_an_unrelaxed_candidate() old_confid = slab3.info['confid'] slab3[0].symbol = 'Au' db.add_unrelaxed_candidate(slab3, 'mutated: Parent {0}'.format(old_confid)) new_confid = slab3.info['confid'] # confid should update when using add_unrelaxed_candidate assert old_confid != new_confid slab3[1].symbol = 'Au' db.add_unrelaxed_step(slab3, 'mutated: Parent {0}'.format(new_confid)) # confid should not change when using add_unrelaxed_step assert slab3.info['confid'] == new_confid with pytest.raises(AssertionError): db.add_relaxed_step(slab3) set_raw_score(slab3, 3) db.add_relaxed_step(slab3) slab4 = OffspringCreator.initialize_individual(slab1, fcc111('Au', size=(2, 2, 2))) set_raw_score(slab4, 67) db.add_relaxed_candidate(slab4) assert slab4.info['confid'] == 7 more_slabs = [] for m in ['Ni', 'Pd', 'Pt']: slab = fcc111(m, size=(2, 2, 2)) slab = OffspringCreator.initialize_individual(slab1, slab) set_raw_score(slab, sum(slab.get_masses())) more_slabs.append(slab) db.add_more_relaxed_candidates(more_slabs) assert more_slabs[1].info['confid'] == 9 os.remove(db_file)
def test_catlearn_nl(self): """Function to test the ase wrapper.""" # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) nl1 = catlearn_neighborlist(all_cand[0], max_neighbor=1) self.assertEqual((len(all_cand[0]), len(all_cand[0])), np.shape(nl1)) nl4 = catlearn_neighborlist(all_cand[0], max_neighbor=4) self.assertFalse(np.allclose(nl1, nl4)) nl5 = catlearn_neighborlist(all_cand[0], max_neighbor=5) nlfull = catlearn_neighborlist(all_cand[0], max_neighbor='full') self.assertFalse(np.allclose(nl4, nl5)) self.assertTrue(np.allclose(nl5, nlfull))
def prep_db(self): """put scores on each parent""" db = connect(self.db_name) rows = db.count() - 1 da = DataConnection( self.db_name ) # The first row of the database contains the surface/framework w/out the adsorbate. struct = da.get_all_unrelaxed_candidates() for i in range(1, rows): a = struct[i - 1] da.c.update( a.info['confid'], atoms=None, # update the DB with pertinent information origin='StartingCandidateRelaxed', raw_score=-a.get_potential_energy(), relaxed=True)
def test_create_database(tmp_path): db_file = tmp_path / 'gadb.db' atom_numbers = np.array([78, 78, 79, 79]) slab = fcc111('Ag', size=(4, 4, 2), vacuum=10.) PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) assert os.path.isfile(db_file) dc = DataConnection(db_file) slab_get = dc.get_slab() an_get = dc.get_atom_numbers_to_optimize() assert len(slab) == len(slab_get) assert np.all(slab.numbers == slab_get.numbers) assert np.all(slab.get_positions() == slab_get.get_positions()) assert np.all(an_get == atom_numbers)
def get_data(self): """Generate features from atoms objects.""" # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. print('Getting candidates from the database') all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) # Setup the test and training datasets. testset = get_unique(atoms=all_cand, size=test_size, key='raw_score') trainset = get_train(atoms=all_cand, size=train_size, taken=testset['taken'], key='raw_score') # Clear out some old saved data. for i in trainset['atoms']: del i.info['data']['nnmat'] # Initiate the fingerprint generators with relevant input variables. print('Getting the fingerprints') f = FeatureGenerator() train_features = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec]) test_features = f.return_vec(testset['atoms'], [f.nearestneighbour_vec]) train_targets = [] for a in trainset['atoms']: train_targets.append(a.info['key_value_pairs']['raw_score']) test_targets = [] for a in testset['atoms']: test_targets.append(a.info['key_value_pairs']['raw_score']) return train_features, train_targets, trainset['atoms'], \ test_features, test_targets, testset['atoms']
def ga_init(self, ddiff, dmax, dE): self.ddiff = ddiff self.dmax = dmax self.dE = dE da = DataConnection(self.db_name) atom_numbers_to_optimize = da.get_atom_numbers_to_optimize( ) # adsorbate atom numbers to optimize n_to_optimize = len( atom_numbers_to_optimize) # number of atoms to optimize slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator( all_atom_types, ratio_of_covalent_radii=.7) # closest distance atoms can be comp = InteratomicDistanceComparator( n_top=None, pair_cor_cum_diff=self.ddiff, pair_cor_max=dmax, dE=dE, mic=True) # comparator to determine if parents should make childer pairing = CutAndSplicePairing( blmin, None, use_tags=True, p1=.2 ) # how children are generated (make sure your adsorbates are uniquely tagged) population = Population(data_connection=da, population_size=self.pop, comparator=comp) for i in range(self.n_to_test): print('Now starting configuration number {0}'.format(i)) a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) #print(a3.info) #view(a3) if a3 is None: continue da.add_unrelaxed_candidate(a3, description=desc)
print(str_out) def countdown(t): while t: mins, secs = divmod(t, 60) timer = '{:02d}:{:02d}'.format(mins, secs) print(timer, end="\r") sleep(1) t -= 1 ga_print(3, 'MOTION| output file is created: {}'.format(f_name_out)) da = DataConnection(database_filename) if not da: ga_print( 3, 'MOTION| database {} successfully opened.'.format(database_filename)) parallel_local_run = ParallelLocalRun(data_connection=da, tmp_folder=outdir, n_simul=n_parallel, calc_script=external_calc) if not parallel_local_run: ga_print( 3, 'MOTION| local parallel job running manager successfully loaded.') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize)
# calculated structures to make a good fit) if weights is None: return False regression_energy = sum(p * q for p, q in zip(weights, parameters)) # Skip with 90% likelihood if energy appears to go up 5 eV or more if (regression_energy - comparison_energy) > 5 and random() < 0.9: return True else: return False population_size = 20 mutation_probability = 0.3 # Initialize the different components of the GA da = DataConnection('gadb.db') tmp_folder = 'work_folder/' # The PBS queing interface is created pbs_run = PBSQueueRun(da, tmp_folder=tmp_folder, job_prefix='Ag2Au2_opt', n_simul=5, job_template_generator=jtg, find_neighbors=get_neighborlist, perform_parametrization=combine_parameters) atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types,
from ase.ga.standard_comparators import InteratomicDistanceComparator from ase.ga.cutandsplicepairing import CutAndSplicePairing from ase.ga.utilities import closest_distances_generator from ase.ga.utilities import get_all_atom_types from ase.ga.offspring_creator import OperationSelector from ase.ga.standardmutations import MirrorMutation from ase.ga.standardmutations import RattleMutation from ase.ga.standardmutations import PermutationMutation # Change the following three parameters to suit your needs population_size = 5 mutation_probability = 0.3 n_to_test = 5 # Initialize the different components of the GA da = DataConnection("gadb.db") atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, ratio_of_covalent_radii=0.7) comp = InteratomicDistanceComparator(n_top=n_to_optimize, pair_cor_cum_diff=0.015, pair_cor_max=0.7, dE=0.02, mic=False) pairing = CutAndSplicePairing(slab, n_to_optimize, blmin) mutations = OperationSelector( [1.0, 1.0, 1.0], [MirrorMutation(blmin, n_to_optimize), RattleMutation(blmin, n_to_optimize), PermutationMutation(n_to_optimize)], ) # Relax all unrelaxed structures (e.g. the starting population)
from ase.ga.data import DataConnection from ase.ga.element_mutations import RandomElementMutation from ase.ga.element_crossovers import OnePointElementCrossover from ase.ga.offspring_creator import OperationSelector from ase.ga.population import Population from ase.ga.convergence import GenerationRepetitionConvergence from ga_fcc_alloys_relax import relax # Specify the number of generations this script will run num_gens = 40 db = DataConnection('fcc_alloys.db') ref_db = 'refs.db' # Retrieve saved parameters population_size = db.get_param('population_size') metals = db.get_param('metals') # Specify the procreation operators for the algorithm # Try and play with the mutation operators that move to nearby # places in the periodic table oclist = ([1, 1], [RandomElementMutation(metals), OnePointElementCrossover(metals)]) operation_selector = OperationSelector(*oclist) # Pass parameters to the population instance pop = Population(data_connection=db, population_size=population_size) # We form generations in this algorithm run and can therefore set
db_file = 'gadb.db' if os.path.isfile(db_file): os.remove(db_file) db = PrepareDB(db_file) slab1 = fcc111('Ag', size=(2, 2, 2)) db.add_unrelaxed_candidate(slab1) slab2 = fcc111('Cu', size=(2, 2, 2)) set_raw_score(slab2, 4) db.add_relaxed_candidate(slab2) assert slab2.info['confid'] == 3 db = DataConnection(db_file) assert db.get_number_of_unrelaxed_candidates() == 1 slab3 = db.get_an_unrelaxed_candidate() old_confid = slab3.info['confid'] slab3[0].symbol = 'Au' db.add_unrelaxed_candidate(slab3, 'mutated: Parent {0}'.format(old_confid)) new_confid = slab3.info['confid'] # confid should update when using add_unrelaxed_candidate assert old_confid != new_confid slab3[1].symbol = 'Au' db.add_unrelaxed_step(slab3, 'mutated: Parent {0}'.format(new_confid)) # confid should not change when using add_unrelaxed_step assert slab3.info['confid'] == new_confid with must_raise(AssertionError):
atom_numbers=atom_numbers, closest_allowed_distances=cd, box_to_place_in=[p0, [v1, v2, v3]]) # generate the starting population starting_population = [sg.get_new_candidate() for i in xrange(20)] d = PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) for a in starting_population: d.add_unrelaxed_candidate(a) # and now for the actual test dc = DataConnection(db_file) slab_get = dc.get_slab() an_get = dc.get_atom_numbers_to_optimize() assert dc.get_number_of_unrelaxed_candidates() == 20 a1 = dc.get_an_unrelaxed_candidate() dc.mark_as_queued(a1) assert dc.get_number_of_unrelaxed_candidates() == 19 assert len(dc.get_all_candidates_in_queue()) == 1 a1.set_raw_score(0.0) dc.add_relaxed_step(a1)
from ase.ga.standard_comparators import InteratomicDistanceComparator from ase.ga.cutandsplicepairing import CutAndSplicePairing from ase.ga.utilities import closest_distances_generator from ase.ga.utilities import get_all_atom_types from ase.ga.offspring_creator import OperationSelector from ase.ga.standardmutations import MirrorMutation from ase.ga.standardmutations import RattleMutation from ase.ga.standardmutations import PermutationMutation # Change the following three parameters to suit your needs population_size = 20 mutation_probability = 0.3 n_to_test = 20 # Initialize the different components of the GA da = DataConnection('gadb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, ratio_of_covalent_radii=0.7) comp = InteratomicDistanceComparator(n_top=n_to_optimize, pair_cor_cum_diff=0.015, pair_cor_max=0.7, dE=0.02, mic=False) pairing = CutAndSplicePairing(slab, n_to_optimize, blmin) mutations = OperationSelector([1., 1., 1.], [
from ase.io import write from ase.ga import get_raw_score from ase.ga.data import DataConnection from ase.ga.population import Population from ase.ga.utilities import closest_distances_generator, CellBounds from ase.ga.ofp_comparator import OFPComparator from ase.ga.offspring_creator import OperationSelector from ase.ga.standardmutations import StrainMutation from ase.ga.soft_mutation import SoftMutation from ase.ga.cutandsplicepairing import CutAndSplicePairing from ga_bulk_relax import relax # Connect to the database and retrieve some information da = DataConnection('gadb.db') slab = da.get_slab() atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_top = len(atom_numbers_to_optimize) # Use Oganov's fingerprint functions to decide whether # two structures are identical or not comp = OFPComparator(n_top=n_top, dE=1.0, cos_dist_max=1e-3, rcut=10., binwidth=0.05, pbc=[True, True, True], sigma=0.05, nsigma=4, recalculate=False) # Define the cell and interatomic distance bounds
from ase.ga.data import DataConnection from catlearn.api.ase_data_setup import get_unique, get_train from catlearn.featurize.setup import FeatureGenerator from catlearn.regression import GaussianProcess from catlearn.preprocess.feature_engineering import single_transform from catlearn.ga import GeneticAlgorithm # ## Data Generation # # To start with we import some data. For this tutorial, the data for alloyed nanoparticles are used. # In[2]: # Connect ase atoms database. gadb = DataConnection('../../data/gadb.db') # Get all relaxed candidates from the db file. all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) # We then split this data into some training data and a holdout test set. # In[3]: testset = get_unique(atoms=all_cand, size=100, key='raw_score') trainset = get_train(atoms=all_cand, size=500, taken=testset['taken'], key='raw_score')
def run_ga(n_to_test, kptdensity=None): ''' This method specifies how to run the GA once the initial random structures have been stored in godb.db. ''' # Various initializations: population_size = 10 da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, ratio_of_covalent_radii=0.05) # Defining the mix of genetic operators: mutation_probability = 0.3333 pairing = CutAndSplicePairing(slab, n_to_optimize, blmin) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=1.5) mirrormut = MirrorMutation(blmin, n_to_optimize) mutations = OperationSelector([1., 1.], [rattlemut, mirrormut]) if True: # Recalculate raw scores of any relaxed candidates # present in the godb.db database (only applies to # iter007). structures = da.get_all_relaxed_candidates() for atoms in structures: atoms = singlepoint(atoms) da.c.delete([atoms.info['relax_id']]) if 'data' not in atoms.info: atoms.info['data'] = {} da.add_relaxed_step(atoms) print('Finished recalculating raw scores') # Relax the randomly generated initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a.wrap() a = relax_one(a) da.add_relaxed_step(a) # Create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() # Test n_to_test new candidates ga_raw_scores = [] step = 0 for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) clock = time() a3 = None r = random() if r > mutation_probability: while a3 is None: a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) else: while a3 is None: a1 = population.get_one_candidate() a3, desc = mutations.get_new_individual([a1]) dt = time() - clock op = 'pairing' if r > mutation_probability else 'mutating' print('Time for %s candidate(s): %.3f' % (op, dt), flush=True) a3.wrap() da.add_unrelaxed_candidate(a3, description=desc) a3 = relax_one(a3) da.add_relaxed_step(a3) # Various updates: population.update() current_pop = population.get_current_population() write('current_population.traj', current_pop) # Print out information for easy analysis/plotting afterwards: if r > mutation_probability: print('Step %d %s %.3f %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a2), get_raw_score(a3))) else: print('Step %d %s %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a3))) print('Step %d highest raw score in pop: %.3f' % \ (step, get_raw_score(current_pop[0]))) ga_raw_scores.append(get_raw_score(a3)) print('Step %d highest raw score generated by GA: %.3f' % \ (step, max(ga_raw_scores))) emin = population.pop[0].get_potential_energy() print('GA finished after step %d' % step) print('Lowest energy = %8.3f eV' % emin, flush=True) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())
def test_basic_example_main_run(seed, testdir): # set up the random number generator rng = np.random.RandomState(seed) # create the surface slab = fcc111('Au', size=(4, 4, 1), vacuum=10.0, orthogonal=True) slab.set_constraint(FixAtoms(mask=len(slab) * [True])) # define the volume in which the adsorbed cluster is optimized # the volume is defined by a corner position (p0) # and three spanning vectors (v1, v2, v3) pos = slab.get_positions() cell = slab.get_cell() p0 = np.array([0., 0., max(pos[:, 2]) + 2.]) v1 = cell[0, :] * 0.8 v2 = cell[1, :] * 0.8 v3 = cell[2, :] v3[2] = 3. # Define the composition of the atoms to optimize atom_numbers = 2 * [47] + 2 * [79] # define the closest distance two atoms of a given species can be to each other unique_atom_types = get_all_atom_types(slab, atom_numbers) blmin = closest_distances_generator(atom_numbers=unique_atom_types, ratio_of_covalent_radii=0.7) # create the starting population sg = StartGenerator(slab=slab, blocks=atom_numbers, blmin=blmin, box_to_place_in=[p0, [v1, v2, v3]], rng=rng) # generate the starting population population_size = 5 starting_population = [sg.get_new_candidate() for i in range(population_size)] # from ase.visualize import view # uncomment these lines # view(starting_population) # to see the starting population # create the database to store information in d = PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) for a in starting_population: d.add_unrelaxed_candidate(a) # XXXXXXXXXX This should be the beginning of a new test, # but we are using some resources from the precious part. # Maybe refactor those things as (module-level?) fixtures. # Change the following three parameters to suit your needs population_size = 5 mutation_probability = 0.3 n_to_test = 5 # Initialize the different components of the GA da = DataConnection('gadb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, ratio_of_covalent_radii=0.7) comp = InteratomicDistanceComparator(n_top=n_to_optimize, pair_cor_cum_diff=0.015, pair_cor_max=0.7, dE=0.02, mic=False) pairing = CutAndSplicePairing(slab, n_to_optimize, blmin, rng=rng) mutations = OperationSelector([1., 1., 1.], [MirrorMutation(blmin, n_to_optimize, rng=rng), RattleMutation(blmin, n_to_optimize, rng=rng), PermutationMutation(n_to_optimize, rng=rng)], rng=rng) # Relax all unrelaxed structures (e.g. the starting population) while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a.calc = EMT() print('Relaxing starting candidate {0}'.format(a.info['confid'])) dyn = BFGS(a, trajectory=None, logfile=None) dyn.run(fmax=0.05, steps=100) set_raw_score(a, -a.get_potential_energy()) da.add_relaxed_step(a) # create the population population = Population(data_connection=da, population_size=population_size, comparator=comp, rng=rng) # test n_to_test new candidates for i in range(n_to_test): print('Now starting configuration number {0}'.format(i)) a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) if a3 is None: continue da.add_unrelaxed_candidate(a3, description=desc) # Check if we want to do a mutation if rng.rand() < mutation_probability: a3_mut, desc = mutations.get_new_individual([a3]) if a3_mut is not None: da.add_unrelaxed_step(a3_mut, desc) a3 = a3_mut # Relax the new candidate a3.calc = EMT() dyn = BFGS(a3, trajectory=None, logfile=None) dyn.run(fmax=0.05, steps=100) set_raw_score(a3, -a3.get_potential_energy()) da.add_relaxed_step(a3) population.update() write('all_candidates.traj', da.get_all_relaxed_candidates())
s += 'python {} {}\n'.format(calc_config, traj_file) return s def redirect_print(strinfo): ''' redirect print function to file ''' f_std_redirect_f.writelines(strinfo + '\n') population_size = 20 mutation_probability = 0.3 n_to_test = 20 da = DataConnection(database_name) job_prefix = database_name[5:-3] pbs_run = PBSQueueRun(da, tmp_folder=tmp_folder, job_prefix=job_prefix, n_simul=10, job_template_generator=jtg, qsub_command='sbatch', qstat_command='squeue') time_to_wait = 180 atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize)
def test_generators(self): """Generate features from atoms objects.""" # Test generic features for Pt then both Pt and Au. get_mendeleev_params(atomic_number=78) get_mendeleev_params(atomic_number=[78, 79], params=default_params + ['en_ghosh']) # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. print('Getting candidates from the database') all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) # Setup the test and training datasets. testset = get_unique(atoms=all_cand, size=test_size, key='raw_score') self.assertTrue(len(testset['atoms']) == test_size) self.assertTrue(len(testset['taken']) == test_size) trainset = get_train(atoms=all_cand, size=train_size, taken=testset['taken'], key='raw_score') self.assertTrue(len(trainset['atoms']) == train_size) self.assertTrue(len(trainset['target']) == train_size) # Initiate the fingerprint generators with relevant input variables. print('Getting the fingerprints') f = FeatureGenerator(element_parameters='atomic_radius', nprocs=1) f.normalize_features(trainset['atoms'], testset['atoms']) data = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec]) n, d = np.shape(data) self.assertTrue(n == train_size and d == 4) self.assertTrue(len(f.return_names([f.nearestneighbour_vec])) == d) print('passed nearestneighbour_vec') train_fp = f.return_vec(trainset['atoms'], [f.bond_count_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 52) print('passed bond_count_vec') train_fp = f.return_vec(trainset['atoms'], [f.distribution_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 10) print('passed distribution_vec') # EXPENSIVE to calculate. Not included in training data. train_fp = f.return_vec(testset['atoms'], [f.connections_vec]) n, d = np.shape(train_fp) self.assertTrue(n == test_size and d == 26) print('passed connections_vec') train_fp = f.return_vec(trainset['atoms'], [f.rdf_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 20) print('passed rdf_vec') # Start testing the standard fingerprint vector generators. train_fp = f.return_vec(trainset['atoms'], [f.element_mass_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 1) self.assertTrue(len(f.return_names([f.element_mass_vec])) == d) print('passed element_mass_vec') train_fp = f.return_vec(trainset['atoms'], [f.element_parameter_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) # print(f.return_names([f.element_parameter_vec])) self.assertTrue(n == train_size and d == 4) self.assertTrue(len(f.return_names([f.element_parameter_vec])) == d) print('passed element_parameter_vec') train_fp = f.return_vec(trainset['atoms'], [f.composition_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 2) self.assertTrue(len(f.return_names([f.composition_vec])) == d) print('passed composition_vec') train_fp = f.return_vec(trainset['atoms'], [f.eigenspectrum_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 147) self.assertTrue(len(f.return_names([f.eigenspectrum_vec])) == d) print('passed eigenspectrum_vec') train_fp = f.return_vec(trainset['atoms'], [f.distance_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 2) self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed distance_vec') train_fp = f.return_vec( trainset['atoms'], [f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == 150) self.assertTrue( len( f.return_names([ f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec ])) == d) print('passed combined generation') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) # self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed neighbor_sum_vec') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) # self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed neighbor_mean_vec') f = FeatureGenerator(element_parameters='atomic_radius', max_neighbors='full', nprocs=1) f.normalize_features(trainset['atoms'], testset['atoms']) train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) print('passed neighbor_sum_vec all neighbors') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) print('passed neighbor_mean_vec all neighbors') # Do basic check for atomic porperties. no_prop = [] an_prop = [] # EXPENSIVE to calculate. Not included in training data. for atoms in testset['atoms']: no_prop.append(neighbor_features(atoms=atoms)) an_prop.append( neighbor_features(atoms=atoms, property=['atomic_number'])) self.assertTrue(np.shape(no_prop) == (test_size, 15)) self.assertTrue(np.shape(an_prop) == (test_size, 30)) print('passed graph_vec') self.__class__.all_cand = all_cand self.__class__.data = data
import numpy as np from ase.ga.population import RankFitnessPopulation from ase.ga.data import DataConnection from ase.ga.offspring_creator import OperationSelector from ase.ga.slab_operators import (CutSpliceSlabCrossover, RandomSlabPermutation, RandomCompositionMutation) from ase.ga import set_raw_score from ase.calculators.emt import EMT # Connect to the database containing all candidates db = DataConnection('hull.db') # Retrieve saved parameters pop_size = db.get_param('population_size') refs = db.get_param('reference_energies') metals = db.get_param('metals') lattice_constants = db.get_param('lattice_constants') def get_mixing_energy(atoms): # Set the correct cell size from the lattice constant new_a = get_avg_lattice_constant(atoms.get_chemical_symbols()) # Use the orthogonal fcc cell to find the current lattice constant current_a = atoms.cell[0][0] / np.sqrt(2) atoms.set_cell(atoms.cell * new_a / current_a, scale_atoms=True) # Calculate the energy atoms.set_calculator(EMT()) e = atoms.get_potential_energy()
import os import numpy as np db_file = 'gadb.db' if os.path.isfile(db_file): os.remove(db_file) from ase.build import fcc111 atom_numbers = np.array([78, 78, 79, 79]) slab = fcc111('Ag', size=(4, 4, 2), vacuum=10.) d = PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) assert os.path.isfile(db_file) dc = DataConnection(db_file) slab_get = dc.get_slab() an_get = dc.get_atom_numbers_to_optimize() assert len(slab) == len(slab_get) assert np.all(slab.numbers == slab_get.numbers) assert np.all(slab.get_positions() == slab_get.get_positions()) assert np.all(an_get == atom_numbers) os.remove(db_file)
from ase.ga.data import DataConnection import os import numpy as np db_file = 'gadb.db' if os.path.isfile(db_file): os.remove(db_file) from ase.lattice.surface import fcc111 atom_numbers = np.array([78, 78, 79, 79]) slab = fcc111('Ag', size=(4, 4, 2), vacuum=10.) d = PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) assert os.path.isfile(db_file) dc = DataConnection(db_file) slab_get = dc.get_slab() an_get = dc.get_atom_numbers_to_optimize() assert len(slab) == len(slab_get) assert np.all(slab.numbers == slab_get.numbers) assert np.all(slab.get_positions() == slab_get.get_positions()) assert np.all(an_get == atom_numbers) os.remove(db_file)
def run_ga(n_to_test): """ This method specifies how to run the GA once the initial random structures have been stored in godb.db. """ # Various initializations: population_size = 10 # maximal size of the population da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() # = [14] * 7 n_to_optimize = len(atom_numbers_to_optimize) # = 7 # This defines how close the Si atoms are allowed to get # in candidate structures generated by the genetic operators: blmin = closest_distances_generator(atom_numbers_to_optimize, ratio_of_covalent_radii=0.4) # This is our OFPComparator instance which will be # used to judge whether or not two structures are identical: comparator = OFPComparator(n_top=None, dE=1.0, cos_dist_max=1e-3, rcut=10., binwidth=0.05, pbc=[False]*3, sigma=0.1, nsigma=4, recalculate=False) # Defining a typical combination of genetic operators: pairing = CutAndSplicePairing(da.get_slab(), n_to_optimize, blmin) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=1.5) operators = OperationSelector([2., 1.], [pairing, rattlemut]) # Relax the randomly generated initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a = relax_one(a) da.add_relaxed_step(a) # Create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() # Test n_to_test new candidates for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) a3 = None while a3 is None: a1, a2 = population.get_two_candidates() a3, description = operators.get_new_individual([a1, a2]) da.add_unrelaxed_candidate(a3, description=description) a3 = relax_one(a3) da.add_relaxed_step(a3) population.update() best = population.get_current_population()[0] print('Highest raw score at this point: %.3f' % get_raw_score(best)) print('GA finished after step %d' % step) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())
def test_database_logic(seed, testdir): from ase.ga.data import PrepareDB from ase.ga.data import DataConnection from ase.ga.startgenerator import StartGenerator from ase.ga.utilities import closest_distances_generator from ase.ga import set_raw_score import numpy as np from ase.build import fcc111 from ase.constraints import FixAtoms # set up the random number generator rng = np.random.RandomState(seed) slab = fcc111('Au', size=(4, 4, 2), vacuum=10.0, orthogonal=True) slab.set_constraint(FixAtoms(mask=slab.positions[:, 2] <= 10.)) # define the volume in which the adsorbed cluster is optimized # the volume is defined by a corner position (p0) # and three spanning vectors (v1, v2, v3) pos = slab.get_positions() cell = slab.get_cell() p0 = np.array([0., 0., max(pos[:, 2]) + 2.]) v1 = cell[0, :] * 0.8 v2 = cell[1, :] * 0.8 v3 = cell[2, :] v3[2] = 3. # define the closest distance between two atoms of a given species blmin = closest_distances_generator(atom_numbers=[47, 79], ratio_of_covalent_radii=0.7) # Define the composition of the atoms to optimize atom_numbers = 2 * [47] + 2 * [79] # create the starting population sg = StartGenerator(slab=slab, blocks=atom_numbers, blmin=blmin, box_to_place_in=[p0, [v1, v2, v3]], rng=rng) # generate the starting population starting_population = [sg.get_new_candidate() for i in range(20)] d = PrepareDB(db_file_name=db_file, simulation_cell=slab, stoichiometry=atom_numbers) for a in starting_population: d.add_unrelaxed_candidate(a) # and now for the actual test dc = DataConnection(db_file) dc.get_slab() dc.get_atom_numbers_to_optimize() assert dc.get_number_of_unrelaxed_candidates() == 20 a1 = dc.get_an_unrelaxed_candidate() dc.mark_as_queued(a1) assert dc.get_number_of_unrelaxed_candidates() == 19 assert len(dc.get_all_candidates_in_queue()) == 1 set_raw_score(a1, 0.0) dc.add_relaxed_step(a1) assert dc.get_number_of_unrelaxed_candidates() == 19 assert len(dc.get_all_candidates_in_queue()) == 0 assert len(dc.get_all_relaxed_candidates()) == 1 a2 = dc.get_an_unrelaxed_candidate() dc.mark_as_queued(a2) confid = a2.info['confid'] assert dc.get_all_candidates_in_queue()[0] == confid dc.remove_from_queue(confid) assert len(dc.get_all_candidates_in_queue()) == 0
def run_ga(n_to_test, kptdensity=3.5): population_size = 20 da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, 0.05) # 0.5 # defining genetic operators: mutation_probability = 0.75 pairing = CutAndSplicePairing(blmin, p1=1., p2=0., minfrac=0.15, use_tags=False) cellbounds = CellBounds( bounds={ 'phi': [0.2 * 180., 0.8 * 180.], 'chi': [0.2 * 180., 0.8 * 180.], 'psi': [0.2 * 180., 0.8 * 180.] }) strainmut = StrainMutation(blmin, stddev=0.7, cellbounds=cellbounds, use_tags=False) blmin_soft = closest_distances_generator(all_atom_types, 0.1) softmut = SoftMutation(blmin_soft, bounds=[2., 5.], use_tags=False) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=2.5, use_tags=False) mutations = OperationSelector([4., 4., 2], [softmut, strainmut, rattlemut]) if True: # recalculate raw scores structures = da.get_all_relaxed_candidates() for atoms in structures: atoms = singlepoint(atoms, kptdensity=kptdensity) da.c.delete([atoms.info['relax_id']]) if 'data' not in atoms.info: atoms.info['data'] = {} da.add_relaxed_step(atoms) print('Finished recalculating raw scores') # relaxing the initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a.wrap() a = relax_one(a, kptdensity=kptdensity) da.add_relaxed_step(a) # create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) # Test n_to_test new candidates ga_raw_scores = [] step = 0 for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) clock = time() a3 = None r = random() if r > mutation_probability: while a3 is None: a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) else: while a3 is None: a1 = population.get_one_candidate() a3, desc = mutations.get_new_individual([a1]) dt = time() - clock op = 'pairing' if r > mutation_probability else 'mutating' print('Time for %s candidate(s): %.3f' % (op, dt), flush=True) a3.wrap() da.add_unrelaxed_candidate(a3, description=desc) a3 = relax_one(a3, kptdensity=kptdensity) da.add_relaxed_step(a3) # Various updates: population.update() current_pop = population.get_current_population() if step % 10 == 0: strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) write('current_population.traj', current_pop) # Print out information for easy analysis/plotting afterwards: if r > mutation_probability: print('Step %d %s %.3f %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a2), get_raw_score(a3))) else: print('Step %d %s %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a3))) print('Step %d highest raw score in pop: %.3f' % \ (step, get_raw_score(current_pop[0]))) ga_raw_scores.append(get_raw_score(a3)) print('Step %d highest raw score generated by GA: %.3f' % \ (step, max(ga_raw_scores))) emin = population.pop[0].get_potential_energy() print('GA finished after step %d' % step) print('Lowest energy = %8.3f eV' % emin, flush=True) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())