def __add_candidate__(self, a): """ Adds a single candidate to the population. """ # check if the structure is too low in raw score raw_score_a = get_raw_score(a) raw_score_worst = get_raw_score(self.pop[-1]) if raw_score_a < raw_score_worst \ and len(self.pop) == self.pop_size: return # check if the new candidate should # replace a similar structure in the population for (i, b) in enumerate(self.pop): if self.comparator.looks_like(a, b): if get_raw_score(b) < raw_score_a: del self.pop[i] a.info['looks_like'] = count_looks_like(a, self.all_cand, self.comparator) self.pop.append(a) self.pop.sort(key=lambda x: get_raw_score(x), reverse=True) return # the new candidate needs to be added, so remove the highest # energy one if len(self.pop) == self.pop_size: del self.pop[-1] # add the new candidate a.info['looks_like'] = count_looks_like(a, self.all_cand, self.comparator) self.pop.append(a) self.pop.sort(key=lambda x: get_raw_score(x), reverse=True)
def __get_fitness__(self, candidates): """Input should be sorted according to raw_score.""" max_s = get_raw_score(candidates[0]) min_s = get_raw_score(candidates[-1]) T = min_s - max_s shared_fit = [] for c in candidates: sc = get_raw_score(c) obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.)) m = 1. ck = c.info['key_value_pairs'][self.comp_key] for other in candidates: if other != c: name = tuple(sorted([c.info['confid'], other.info['confid']])) if name not in self.sh_cache: ok = other.info['key_value_pairs'][self.comp_key] d = abs(ck - ok) if d < self.dt: v = 1 - (d / self.dt)**self.alpha_sh self.sh_cache[name] = v else: self.sh_cache[name] = 0 m += self.sh_cache[name] shf = (obj_fit ** self.fit_scaling) / m shared_fit.append(shf) return shared_fit
def __get_fitness__(self, indecies, with_history=True): """Calculates the fitness using the formula from L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 Sign change on the fitness compared to the formulation in the abovementioned paper due to maximizing raw_score instead of minimizing energy. (Set raw_score=-energy to optimize the energy) """ scores = [get_raw_score(x) for x in self.pop] min_s = min(scores) max_s = max(scores) T = min_s - max_s if isinstance(indecies, int): indecies = [indecies] f = [ 0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) for i in indecies ] if with_history: M = [float(self.pop[i].info['n_paired']) for i in indecies] L = [float(self.pop[i].info['looks_like']) for i in indecies] f = [ f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) for i in range(len(f)) ] return f
def __initialize_pop__(self): # Get all relaxed candidates from the database ue = self.use_extinct all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) if len(all_cand) > 0: shared_fit = self.__get_fitness__(all_cand) all_sorted = list(zip(*sorted(zip(shared_fit, all_cand), reverse=True)))[1] # Fill up the population with the self.pop_size most stable # unique candidates. i = 0 while i < len(all_sorted) and len(self.pop) < self.pop_size: c = all_sorted[i] i += 1 eq = False for a in self.pop: if self.comparator.looks_like(a, c): eq = True break if not eq: self.pop.append(c) for a in self.pop: a.info['looks_like'] = count_looks_like(a, all_cand, self.comparator) self.all_cand = all_cand
def __initialize_pop__(self): # Get all relaxed candidates from the database ue = self.use_extinct all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) if len(all_cand) > 0: shared_fit = self.__get_fitness__(all_cand) all_sorted = list( zip(*sorted(zip(shared_fit, all_cand), reverse=True)))[1] # Fill up the population with the self.pop_size most stable # unique candidates. i = 0 while i < len(all_sorted) and len(self.pop) < self.pop_size: c = all_sorted[i] i += 1 eq = False for a in self.pop: if self.comparator.looks_like(a, c): eq = True break if not eq: self.pop.append(c) for a in self.pop: a.info['looks_like'] = count_looks_like( a, all_cand, self.comparator) self.all_cand = all_cand
def penalize(t): # penalize explosion: raw_score = get_raw_score(t) max_volume_per_atom = 50. if t.get_volume() / len(t) >= max_volume_per_atom: raw_score -= 1e9 set_raw_score(t, raw_score)
def run_ga(n_to_test): """ This method specifies how to run the GA once the initial random structures have been stored in godb.db. """ # Various initializations: population_size = 10 # maximal size of the population da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() # = [14] * 7 n_to_optimize = len(atom_numbers_to_optimize) # = 7 # This defines how close the Si atoms are allowed to get # in candidate structures generated by the genetic operators: blmin = closest_distances_generator(atom_numbers_to_optimize, ratio_of_covalent_radii=0.4) # This is our OFPComparator instance which will be # used to judge whether or not two structures are identical: comparator = OFPComparator(n_top=None, dE=1.0, cos_dist_max=1e-3, rcut=10., binwidth=0.05, pbc=[False]*3, sigma=0.1, nsigma=4, recalculate=False) # Defining a typical combination of genetic operators: pairing = CutAndSplicePairing(da.get_slab(), n_to_optimize, blmin) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=1.5) operators = OperationSelector([2., 1.], [pairing, rattlemut]) # Relax the randomly generated initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a = relax_one(a) da.add_relaxed_step(a) # Create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() # Test n_to_test new candidates for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) a3 = None while a3 is None: a1, a2 = population.get_two_candidates() a3, description = operators.get_new_individual([a1, a2]) da.add_unrelaxed_candidate(a3, description=description) a3 = relax_one(a3) da.add_relaxed_step(a3) population.update() best = population.get_current_population()[0] print('Highest raw score at this point: %.3f' % get_raw_score(best)) print('GA finished after step %d' % step) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())
def __initialize_pop__(self): """ Private method that initalizes the population when the population is created. """ # Get all relaxed candidates from the database ue = self.use_extinct all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) # all_cand.sort(key=lambda x: x.get_potential_energy()) if len(all_cand) > 0: # Fill up the population with the self.pop_size most stable # unique candidates. ratings = [] best_raw = get_raw_score(all_cand[0]) i = 0 while i < len(all_cand): c = all_cand[i] i += 1 eq = False for a in self.pop: if self.comparator.looks_like(a, c): eq = True break if not eq: if len(self.pop) < self.pop_size - self.bad_candidates: self.pop.append(c) else: exp_fact = exp(get_raw_score(c) / best_raw) ratings.append([c, (exp_fact - 1) * random()]) ratings.sort(key=itemgetter(1), reverse=True) for i in range(self.bad_candidates): self.pop.append(ratings[i][0]) for a in self.pop: a.info['looks_like'] = count_looks_like(a, all_cand, self.comparator) self.all_cand = all_cand self.__calc_participation__()
def singlepoint(t, kptdensity=1.5): if get_raw_score(t) < -1e5: return t try: calc = DftbPlusCalc(t, kpts=kptdensity, use_spline=True, read_chg=True) t.set_calculator(calc) E = t.get_potential_energy() F = t.get_forces() S = t.get_stress() finalize(t, energy=E, forces=F, stress=S) penalize(t) except (RuntimeError, IOError): print('Warning: problems with singlepoint recalculation') finalize(t, energy=1e9, forces=None, stress=None) return t
def get_all_relaxed_candidates_after_generation(self, gen): """ Returns all candidates that have been relaxed up to and including the specified generation """ q = 'relaxed=1,extinct=0,generation<={0}' entries = self.c.select(q.format(gen)) trajs = [] for v in entries: t = self.get_atoms(id=v.id) t.info['confid'] = v.gaid t.info['relax_id'] = v.id trajs.append(t) trajs.sort(key=lambda x: get_raw_score(x), reverse=True) return trajs
def looks_like(self, a1, a2): # Energy criterium try: dE = abs(a1.get_potential_energy() - a2.get_potential_energy()) except: dE = abs(get_raw_score(a1) - get_raw_score(a2)) if dE >= self.dE: return False # Structure criterium f1 = self.get_features(a1) f2 = self.get_features(a2) d1 = sum(f1.values(), []) d2 = sum(f2.values(), []) max_d = max(np.abs(np.array(d1) - np.array(d2))) s = self.get_similarity(f1, f2) # print s, max_d if s > self.pair_cor_cum_diff or max_d > self.pair_cor_max: return False else: return True
def singlepoint(t, kptdensity=3.5): if get_raw_score(t) < -1e5: return t try: calc = DftbPlusCalculator(t, kpts=kptdensity, use_spline=True, maximum_angular_momenta={'Pd': 2, 'H': 0, 'O': 1}) t.set_calculator(calc) E = t.get_potential_energy() F = t.get_forces() S = t.get_stress() finalize(t, energy=E, forces=F, stress=S) penalize(t) except (IOError, TypeError, RuntimeError, UnboundLocalError) as err: print(err) print('Warning: problems with singlepoint recalculation') finalize(t, energy=1e9, forces=None, stress=None) return t
def __initialize_pop__(self): # Get all relaxed candidates from the database ue = self.use_extinct all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) if len(all_cand) > 0: fitf = self.__get_fitness__(all_cand) all_sorted = list(zip(fitf, all_cand)) all_sorted.sort(key=itemgetter(0), reverse=True) sort_cand = [] for _, t2 in all_sorted: sort_cand.append(t2) all_sorted = sort_cand # Fill up the population with the self.pop_size most stable # unique candidates. i = 0 while i < len(all_sorted) and len(self.pop) < self.pop_size: c = all_sorted[i] # Use variable_function to decide whether to run comparator # if the function has been defined by the user. This does not # need to be dependent on using the rank_data function. if self.vf is not None: c_vf = self.vf(c) i += 1 eq = False for a in self.pop: if self.vf is not None: a_vf = self.vf(a) # Only run comparator if the variable_function # (self.vf) returns the same. If it returns something # different the candidates are inherently different. # This is done to speed up. if a_vf == c_vf: if self.comparator.looks_like(a, c): eq = True break else: if self.comparator.looks_like(a, c): eq = True break if not eq: self.pop.append(c) self.all_cand = all_cand
def __get_fitness__(self, indecies, with_history=True): """Calculates the fitness using the formula from L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 Sign change on the fitness compared to the formulation in the abovementioned paper due to maximizing raw_score instead of minimizing energy. (Set raw_score=-energy to optimize the energy) """ scores = [get_raw_score(x) for x in self.pop] min_s = min(scores) max_s = max(scores) T = min_s - max_s if isinstance(indecies, int): indecies = [indecies] f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) for i in indecies] if with_history: M = [float(self.pop[i].info['n_paired']) for i in indecies] L = [float(self.pop[i].info['looks_like']) for i in indecies] f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) for i in range(len(f))] return f
def converged(self): cur_pop = self.pop.get_current_population() if abs(get_raw_score(cur_pop[0]) - self.max_raw_score) <= self.eps: return True return False
syms = a2.get_chemical_symbols() assert 'Ba' in syms assert len(set(syms)) == 3 op = MoveUpMutation(cations, 1, 1.) a3, desc = op.get_new_individual([a2]) syms = a3.get_chemical_symbols() assert 'Ba' not in syms assert len(set(syms)) == 2 cations = ['Co', 'Ni', 'Cu'] a1 = Atoms('NiNiBrBr') a1.info['confid'] = 1 op = MoveRightMutation(cations, 1, 1.) a2, desc = op.get_new_individual([a1]) a2.info['confid'] = 2 syms = a2.get_chemical_symbols() assert len(set(syms)) == 2 assert len([i for i in syms if i == 'Cu']) == 2 op = MoveLeftMutation(cations, 2, .5) a3, desc = op.get_new_individual([a2]) syms = a3.get_chemical_symbols() from ase.ga import set_raw_score, get_raw_score assert len(set(syms)) == 3 set_raw_score(a3, 5.0) assert get_raw_score(a3) == 5.0
def looks_like(self, a1, a2): d = abs(get_raw_score(a1) - get_raw_score(a2)) if d >= self.dist: return False else: return True
def run_ga(n_to_test, kptdensity=3.5): population_size = 20 da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, 0.05) # 0.5 # defining genetic operators: mutation_probability = 0.75 pairing = CutAndSplicePairing(blmin, p1=1., p2=0., minfrac=0.15, use_tags=False) cellbounds = CellBounds( bounds={ 'phi': [0.2 * 180., 0.8 * 180.], 'chi': [0.2 * 180., 0.8 * 180.], 'psi': [0.2 * 180., 0.8 * 180.] }) strainmut = StrainMutation(blmin, stddev=0.7, cellbounds=cellbounds, use_tags=False) blmin_soft = closest_distances_generator(all_atom_types, 0.1) softmut = SoftMutation(blmin_soft, bounds=[2., 5.], use_tags=False) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=2.5, use_tags=False) mutations = OperationSelector([4., 4., 2], [softmut, strainmut, rattlemut]) if True: # recalculate raw scores structures = da.get_all_relaxed_candidates() for atoms in structures: atoms = singlepoint(atoms, kptdensity=kptdensity) da.c.delete([atoms.info['relax_id']]) if 'data' not in atoms.info: atoms.info['data'] = {} da.add_relaxed_step(atoms) print('Finished recalculating raw scores') # relaxing the initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a.wrap() a = relax_one(a, kptdensity=kptdensity) da.add_relaxed_step(a) # create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) # Test n_to_test new candidates ga_raw_scores = [] step = 0 for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) clock = time() a3 = None r = random() if r > mutation_probability: while a3 is None: a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) else: while a3 is None: a1 = population.get_one_candidate() a3, desc = mutations.get_new_individual([a1]) dt = time() - clock op = 'pairing' if r > mutation_probability else 'mutating' print('Time for %s candidate(s): %.3f' % (op, dt), flush=True) a3.wrap() da.add_unrelaxed_candidate(a3, description=desc) a3 = relax_one(a3, kptdensity=kptdensity) da.add_relaxed_step(a3) # Various updates: population.update() current_pop = population.get_current_population() if step % 10 == 0: strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) write('current_population.traj', current_pop) # Print out information for easy analysis/plotting afterwards: if r > mutation_probability: print('Step %d %s %.3f %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a2), get_raw_score(a3))) else: print('Step %d %s %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a3))) print('Step %d highest raw score in pop: %.3f' % \ (step, get_raw_score(current_pop[0]))) ga_raw_scores.append(get_raw_score(a3)) print('Step %d highest raw score generated by GA: %.3f' % \ (step, max(ga_raw_scores))) emin = population.pop[0].get_potential_energy() print('GA finished after step %d' % step) print('Lowest energy = %8.3f eV' % emin, flush=True) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())
def run_ga(n_to_test, kptdensity=None): ''' This method specifies how to run the GA once the initial random structures have been stored in godb.db. ''' # Various initializations: population_size = 10 da = DataConnection('godb.db') atom_numbers_to_optimize = da.get_atom_numbers_to_optimize() n_to_optimize = len(atom_numbers_to_optimize) slab = da.get_slab() all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize) blmin = closest_distances_generator(all_atom_types, ratio_of_covalent_radii=0.05) # Defining the mix of genetic operators: mutation_probability = 0.3333 pairing = CutAndSplicePairing(slab, n_to_optimize, blmin) rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8, rattle_strength=1.5) mirrormut = MirrorMutation(blmin, n_to_optimize) mutations = OperationSelector([1., 1.], [rattlemut, mirrormut]) if True: # Recalculate raw scores of any relaxed candidates # present in the godb.db database (only applies to # iter007). structures = da.get_all_relaxed_candidates() for atoms in structures: atoms = singlepoint(atoms) da.c.delete([atoms.info['relax_id']]) if 'data' not in atoms.info: atoms.info['data'] = {} da.add_relaxed_step(atoms) print('Finished recalculating raw scores') # Relax the randomly generated initial candidates: while da.get_number_of_unrelaxed_candidates() > 0: a = da.get_an_unrelaxed_candidate() a.wrap() a = relax_one(a) da.add_relaxed_step(a) # Create the population population = Population(data_connection=da, population_size=population_size, comparator=comparator, logfile='log.txt') current_pop = population.get_current_population() # Test n_to_test new candidates ga_raw_scores = [] step = 0 for step in range(n_to_test): print('Starting configuration number %d' % step, flush=True) clock = time() a3 = None r = random() if r > mutation_probability: while a3 is None: a1, a2 = population.get_two_candidates() a3, desc = pairing.get_new_individual([a1, a2]) else: while a3 is None: a1 = population.get_one_candidate() a3, desc = mutations.get_new_individual([a1]) dt = time() - clock op = 'pairing' if r > mutation_probability else 'mutating' print('Time for %s candidate(s): %.3f' % (op, dt), flush=True) a3.wrap() da.add_unrelaxed_candidate(a3, description=desc) a3 = relax_one(a3) da.add_relaxed_step(a3) # Various updates: population.update() current_pop = population.get_current_population() write('current_population.traj', current_pop) # Print out information for easy analysis/plotting afterwards: if r > mutation_probability: print('Step %d %s %.3f %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a2), get_raw_score(a3))) else: print('Step %d %s %.3f %.3f' % (step, desc,\ get_raw_score(a1), get_raw_score(a3))) print('Step %d highest raw score in pop: %.3f' % \ (step, get_raw_score(current_pop[0]))) ga_raw_scores.append(get_raw_score(a3)) print('Step %d highest raw score generated by GA: %.3f' % \ (step, max(ga_raw_scores))) emin = population.pop[0].get_potential_energy() print('GA finished after step %d' % step) print('Lowest energy = %8.3f eV' % emin, flush=True) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', population.get_current_population())
def test_element_operators(seed): import numpy as np from ase import Atoms from ase.ga.element_crossovers import OnePointElementCrossover # set up the random number generator rng = np.random.RandomState(seed) a1 = Atoms('SrSrSrBaClClClClBrBrBrBr') a1.info['confid'] = 1 a2 = Atoms('CaCaMgBaFFFFFFFF') a2.info['confid'] = 2 cations = ['Sr', 'Ba', 'Ca', 'Mg'] anions = ['Cl', 'F', 'Br'] op = OnePointElementCrossover([cations, anions], [3, 2], [.25, .5], rng=rng) a3, desc = op.get_new_individual([a1, a2]) syms = a3.get_chemical_symbols() assert len(set([i for i in syms if i in cations])) < 4 assert len(set([i for i in syms if i in anions])) < 3 from ase.ga.element_mutations import RandomElementMutation op = RandomElementMutation([cations, anions], [3, 2], [.25, .5], rng=rng) a4, desc = op.get_new_individual([a1]) syms = a4.get_chemical_symbols() assert len(set([i for i in syms if i in cations])) < 4 assert len(set([i for i in syms if i in anions])) < 3 op = RandomElementMutation(anions, 2, .5, rng=rng) a4, desc = op.get_new_individual([a2]) syms = a4.get_chemical_symbols() assert len(set([i for i in syms if i in anions])) == 2 from ase.ga.element_mutations import MoveDownMutation from ase.ga.element_mutations import MoveUpMutation from ase.ga.element_mutations import MoveRightMutation from ase.ga.element_mutations import MoveLeftMutation a1 = Atoms('SrSrClClClCl') a1.info['confid'] = 1 op = MoveDownMutation(cations, 2, .5, rng=rng) a2, desc = op.get_new_individual([a1]) a2.info['confid'] = 2 syms = a2.get_chemical_symbols() assert 'Ba' in syms assert len(set(syms)) == 3 op = MoveUpMutation(cations, 1, 1., rng=rng) a3, desc = op.get_new_individual([a2]) syms = a3.get_chemical_symbols() assert 'Ba' not in syms assert len(set(syms)) == 2 cations = ['Co', 'Ni', 'Cu'] a1 = Atoms('NiNiBrBr') a1.info['confid'] = 1 op = MoveRightMutation(cations, 1, 1., rng=rng) a2, desc = op.get_new_individual([a1]) a2.info['confid'] = 2 syms = a2.get_chemical_symbols() assert len(set(syms)) == 2 assert len([i for i in syms if i == 'Cu']) == 2 op = MoveLeftMutation(cations, 2, .5, rng=rng) a3, desc = op.get_new_individual([a2]) syms = a3.get_chemical_symbols() from ase.ga import set_raw_score, get_raw_score assert len(set(syms)) == 3 set_raw_score(a3, 5.0) assert get_raw_score(a3) == 5.0
a3, desc = operators.get_new_individual([a1, a2]) # Relax it and add to database da.add_unrelaxed_candidate(a3, description=desc) relax(a3) da.add_relaxed_step(a3) # Update the population population.update() current_pop = population.get_current_population() write('current_population.traj', current_pop) # Update the strain mutation and pairing operators if step % 10 == 0: strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) # Print out information for easier follow-up/analysis/plotting: print('Step %d %s %.3f %.3f %.3f' % (step, desc, get_raw_score(a1), get_raw_score(a2), get_raw_score(a3))) print('Step %d highest raw score in pop: %.3f' % (step, get_raw_score(current_pop[0]))) print('GA finished after step %d' % step) hiscore = get_raw_score(current_pop[0]) print('Highest raw score = %8.4f eV' % hiscore) write('all_candidates.traj', da.get_all_relaxed_candidates()) write('current_population.traj', current_pop)
def extract_best_unique(comparator, max_select=None, num_stddev=None, score_limit=None, dbfile='best_unique.db'): ''' Writes a database containing the best unique structures from a set of global optimization runs in the current working directory. These runs must have written an 'all_candidates.traj' file containing all the structures with their raw scores. comparator: a class instance with suitable _compare_structure_ and looks_like methods for comparing two structures max_select: upper bound on the number of best unique structures to select. If None (default), no bound is enforced. num_stddev: number of standard deviations relative to the average score of all candidates, which is used to pre-select only the more stable structures. Setting it to zero means all better-than-average structures are considered for further selection. Three standard deviations around the mean is used as cutoff in determining the average, to exclude very low-score outliers. score_limit: as an alternative to num_stddev, this argument sets the minimal raw score for structures to be included. dbfile: name of the database where the final selection will be saved. ''' db = connect(dbfile) all_candidates = [] all_cand_dict = {} for (dirpath, dirnames, filenames) in os.walk('.'): if 'all_candidates.traj' in filenames and 'run' in dirpath: print('Found run directory', dirpath) candidates = read(dirpath + '/all_candidates.traj@:') all_candidates.extend(candidates) all_cand_dict[dirpath] = candidates all_candidates.sort(key=lambda x: get_raw_score(x), reverse=True) raw_scores = np.array([get_raw_score(atoms) for atoms in all_candidates]) std = np.std(raw_scores) mean = raw_scores[len(raw_scores) // 2] min_score = mean - 3 * std izero = np.argmax(raw_scores < min_score) if izero != 0: raw_scores = raw_scores[:izero] all_candidates = all_candidates[:izero] average = np.mean(raw_scores) std = np.std(raw_scores) max_score = np.max(raw_scores) min_score = np.min(raw_scores) if num_stddev is not None: cut_score = average - num_stddev * std elif score_limit is not None: cut_score = max_score - score_limit else: cut_score = min_score print('Average = %.3f, Std. dev = %.3f' % (average, std)) print('N = %d before selecting unique structures' % len(all_candidates)) print('Max score = %.3f, min score = %.3f, cut score = %.3f' % \ (max_score, min_score, cut_score), flush=True) args = [] for key, val in all_cand_dict.items(): raw_scores = np.array([get_raw_score(atoms) for atoms in val]) izero = np.argmax(raw_scores < cut_score) if izero != 0: val = val[:izero] args.append([comparator, val, False]) po = mp.Pool(processes=None) harvest = po.map(get_unique, args, chunksize=1) po.close() po.join() all_candidates = [atoms for allcand in harvest for atoms in allcand] all_candidates.sort(key=lambda x: get_raw_score(x), reverse=True) raw_scores = [get_raw_score(atoms) for atoms in all_candidates] best_unique = [] print('N_unique = %d before next selection round' % len(all_candidates), flush=True) best_unique = get_unique([comparator, all_candidates, True]) best_unique.sort(key=lambda x: get_raw_score(x), reverse=True) N = len(best_unique) print('N_unique = %d before further refinement' % N, flush=True) if max_select is None or max_select >= N: selection = range(N) else: selection = range(max_select) print('Selected indices:', selection) for i in selection: atoms = best_unique[i] raw_score = get_raw_score(atoms) db.write(atoms, raw_score_from_ga=raw_score, gaid=i, relaxed=0) print('N_unique = %d after final refinement' % len(selection)) return
relax(a3, cellbounds=cellbounds) da.add_relaxed_step(a3) # If the relaxation has changed the cell parameters # beyond the bounds we disregard it in the population cell = a3.get_cell() if not cellbounds.is_within_bounds(cell): da.kill_candidate(a3.info['confid']) # Update the population population.update() if step % 10 == 0: # Update the scaling volumes of the strain mutation # and the pairing operator based on the current # best structures contained in the population current_pop = population.get_current_population() strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4) write('current_population.traj', current_pop) print('GA finished after step %d' % step) hiscore = get_raw_score(current_pop[0]) print('Highest raw score = %8.4f eV' % hiscore) all_candidates = da.get_all_relaxed_candidates() write('all_candidates.traj', all_candidates) current_pop = population.get_current_population() write('current_population.traj', current_pop)