Esempio n. 1
0
    def test_ase_api(self):
        """Test the ase api."""
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))
        all_cand = gadb.get_all_relaxed_candidates()

        cf = all_cand[0].get_chemical_formula()

        extend_atoms_class(all_cand[0])
        self.assertTrue(isinstance(all_cand[0], type(all_cand[1])))

        f = FeatureGenerator()
        fp = f.composition_vec(all_cand[0])
        all_cand[0].set_features(fp)

        self.assertTrue(np.allclose(all_cand[0].get_features(), fp))
        self.assertTrue(all_cand[0].get_chemical_formula() == cf)

        extend_atoms_class(all_cand[1])
        self.assertTrue(all_cand[1].get_features() is None)

        g = ase_to_networkx(all_cand[2])
        all_cand[2].set_graph(g)

        self.assertTrue(all_cand[2].get_graph() == g)
        self.assertTrue(all_cand[1].get_graph() is None)
def run_ga(n_to_test):
    """
    This method specifies how to run the GA once the
    initial random structures have been stored in godb.db.
    """
    # Various initializations:
    population_size = 10  # maximal size of the population
    da = DataConnection('godb.db')
    atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()  # = [14] * 7
    n_to_optimize = len(atom_numbers_to_optimize)  # = 7
    # This defines how close the Si atoms are allowed to get
    # in candidate structures generated by the genetic operators:
    blmin = closest_distances_generator(atom_numbers_to_optimize,
                                        ratio_of_covalent_radii=0.4)
    # This is our OFPComparator instance which will be
    # used to judge whether or not two structures are identical:
    comparator = OFPComparator(n_top=None, dE=1.0, cos_dist_max=1e-3,
                               rcut=10., binwidth=0.05, pbc=[False]*3,
                               sigma=0.1, nsigma=4, recalculate=False)

    # Defining a typical combination of genetic operators:
    pairing = CutAndSplicePairing(da.get_slab(), n_to_optimize, blmin)
    rattlemut = RattleMutation(blmin, n_to_optimize, rattle_prop=0.8,
                               rattle_strength=1.5)
    operators = OperationSelector([2., 1.], [pairing, rattlemut])

    # Relax the randomly generated initial candidates:
    while da.get_number_of_unrelaxed_candidates() > 0:
        a = da.get_an_unrelaxed_candidate()
        a = relax_one(a)
        da.add_relaxed_step(a)

    # Create the population
    population = Population(data_connection=da,
                            population_size=population_size,
                            comparator=comparator,
                            logfile='log.txt')
    current_pop = population.get_current_population()

    # Test n_to_test new candidates
    for step in range(n_to_test):
        print('Starting configuration number %d' % step, flush=True)

        a3 = None
        while a3 is None:
            a1, a2 = population.get_two_candidates()
            a3, description = operators.get_new_individual([a1, a2])

        da.add_unrelaxed_candidate(a3, description=description)
        a3 = relax_one(a3)
        da.add_relaxed_step(a3)

        population.update()
        best = population.get_current_population()[0]
        print('Highest raw score at this point: %.3f' % get_raw_score(best))

    print('GA finished after step %d' % step)
    write('all_candidates.traj', da.get_all_relaxed_candidates())
    write('current_population.traj', population.get_current_population())
Esempio n. 3
0
    def test_networkx_api(self):
        """Test the ase api."""
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))
        all_cand = gadb.get_all_relaxed_candidates()
        g = ase_to_networkx(all_cand[1])

        self.assertEqual(len(g), len(all_cand[1]))

        matrix = networkx_to_adjacency(g)
        self.assertEqual(np.shape(matrix),
                         (len(all_cand[1]), len(all_cand[1])))
Esempio n. 4
0
    def test_feature_base(self):
        """Test the base feature generator."""
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))
        all_cand = gadb.get_all_relaxed_candidates()

        f = BaseGenerator()
        nl = ase_neighborlist(all_cand[0])
        assert f.get_neighborlist(all_cand[0]) == nl

        pos = all_cand[0].get_positions()
        assert np.allclose(f.get_positions(all_cand[0]), pos)
Esempio n. 5
0
    def test_ase_nl(self):
        """Function to test the ase wrapper."""
        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        nl = ase_neighborlist(all_cand[0])

        self.assertEqual(len(all_cand[0]), len(nl))
Esempio n. 6
0
    def test_pdf(self):
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        cutoff_dictionary = {}
        for z in range(1, 92):
            cutoff_dictionary[z] = covalent_radii[z]

        pdf, x1 = pair_distribution(all_cand)

        # Get bond length deviations from touching spheres.
        dev, x2 = pair_deviation(all_cand, cutoffs=cutoff_dictionary)
Esempio n. 7
0
    def test_catlearn_nl(self):
        """Function to test the ase wrapper."""
        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        nl1 = catlearn_neighborlist(all_cand[0], max_neighbor=1)
        self.assertEqual((len(all_cand[0]), len(all_cand[0])), np.shape(nl1))
        nl4 = catlearn_neighborlist(all_cand[0], max_neighbor=4)
        self.assertFalse(np.allclose(nl1, nl4))

        nl5 = catlearn_neighborlist(all_cand[0], max_neighbor=5)
        nlfull = catlearn_neighborlist(all_cand[0], max_neighbor='full')
        self.assertFalse(np.allclose(nl4, nl5))
        self.assertTrue(np.allclose(nl5, nlfull))
Esempio n. 8
0
    def get_data(self):
        """Generate features from atoms objects."""
        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        print('Getting candidates from the database')
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        # Setup the test and training datasets.
        testset = get_unique(atoms=all_cand, size=test_size, key='raw_score')

        trainset = get_train(atoms=all_cand,
                             size=train_size,
                             taken=testset['taken'],
                             key='raw_score')

        # Clear out some old saved data.
        for i in trainset['atoms']:
            del i.info['data']['nnmat']

        # Initiate the fingerprint generators with relevant input variables.
        print('Getting the fingerprints')
        f = FeatureGenerator()

        train_features = f.return_vec(trainset['atoms'],
                                      [f.nearestneighbour_vec])
        test_features = f.return_vec(testset['atoms'],
                                     [f.nearestneighbour_vec])

        train_targets = []
        for a in trainset['atoms']:
            train_targets.append(a.info['key_value_pairs']['raw_score'])
        test_targets = []
        for a in testset['atoms']:
            test_targets.append(a.info['key_value_pairs']['raw_score'])

        return train_features, train_targets, trainset['atoms'], \
            test_features, test_targets, testset['atoms']
    da.add_relaxed_step(a)

# create the population
population = Population(data_connection=da, population_size=population_size, comparator=comp)

# test n_to_test new candidates
for i in xrange(n_to_test):
    print("Now starting configuration number {0}".format(i))
    a1, a2 = population.get_two_candidates()
    a3, desc = pairing.get_new_individual([a1, a2])
    if a3 == None:
        continue
    da.add_unrelaxed_candidate(a3, description=desc)

    # Check if we want to do a mutation
    if random() < mutation_probability:
        a3_mut, desc = mutations.get_new_individual([a3])
        if a3_mut != None:
            da.add_unrelaxed_step(a3_mut, desc)
            a3 = a3_mut

    # Relax the new candidate
    a3.set_calculator(EMT())
    dyn = BFGS(a3, trajectory=None, logfile=None)
    dyn.run(fmax=0.05, steps=100)
    a3.set_raw_score(-a3.get_potential_energy())
    da.add_relaxed_step(a3)
    population.update()

write("all_candidates.traj", da.get_all_relaxed_candidates())
Esempio n. 10
0
# create the population
population = Population(data_connection=da,
                        population_size=population_size,
                        comparator=comp)

# test n_to_test new candidates
for i in xrange(n_to_test):
    print('Now starting configuration number {0}'.format(i))
    a1, a2 = population.get_two_candidates()
    a3, desc = pairing.get_new_individual([a1, a2])
    if a3 is None:
        continue
    da.add_unrelaxed_candidate(a3, description=desc)

    # Check if we want to do a mutation
    if random() < mutation_probability:
        a3_mut, desc = mutations.get_new_individual([a3])
        if a3_mut is not None:
            da.add_unrelaxed_step(a3_mut, desc)
            a3 = a3_mut

    # Relax the new candidate
    parallel_local_run.relax(a3)
    population.update()

# Wait until the last candidates are relaxed
while parallel_local_run.get_number_of_jobs_running() > 0:
    time.sleep(5.)

write('all_candidates.traj', da.get_all_relaxed_candidates())
Esempio n. 11
0
dc = DataConnection(db_file)

slab_get = dc.get_slab()
an_get = dc.get_atom_numbers_to_optimize()

assert dc.get_number_of_unrelaxed_candidates() == 20

a1 = dc.get_an_unrelaxed_candidate()
dc.mark_as_queued(a1)

assert dc.get_number_of_unrelaxed_candidates() == 19
assert len(dc.get_all_candidates_in_queue()) == 1

a1.set_raw_score(0.0)
dc.add_relaxed_step(a1)

assert dc.get_number_of_unrelaxed_candidates() == 19
assert len(dc.get_all_candidates_in_queue()) == 0

assert len(dc.get_all_relaxed_candidates()) == 1

a2 = dc.get_an_unrelaxed_candidate()
dc.mark_as_queued(a2)
confid = a2.info['confid']
assert dc.get_all_candidates_in_queue()[0] == confid

dc.remove_from_queue(confid)
assert len(dc.get_all_candidates_in_queue()) == 0

os.remove(db_file)
Esempio n. 12
0
            .format(time_to_wait))
        sleep(time_to_wait)

    pbs_run.relax(a3)
    redirect_print('-(sub-motion)-> New job submitted.')

if pbs_run.number_of_jobs_running() != 0:

    redirect_print('MUTATION| Wait! There are still jobs unfinished.')
while pbs_run.number_of_jobs_running() != 0:

    redirect_print('MUTATION| Wait for {} seconds and check again...'.format(
        time_to_wait))
    sleep(time_to_wait)

coords_to_write = da.get_all_relaxed_candidates()
write('all_candidates.traj', coords_to_write)
if len(coords_to_write) == 0:

    redirect_print(
        'ERROR| Something still goes wrong, no structures are generated, exit.'
    )
    redirect_print('ERROR| execuating command scancel -p amd_256...')
    os.system('scancel -p amd_256')
    exit()
redirect_print('=' * 50)
redirect_print(
    'POST-PROCESSING| Now all individuals are saved in the same database {}'.
    format(database_name))
idx_coord = 0
for icoord in coords_to_write:
# create the population
population = Population(data_connection=da,
                        population_size=population_size,
                        comparator=comp)

# test n_to_test new candidates
for i in range(n_to_test):
    print('Now starting configuration number {0}'.format(i))
    a1, a2 = population.get_two_candidates()
    a3, desc = pairing.get_new_individual([a1, a2])
    if a3 is None:
        continue
    da.add_unrelaxed_candidate(a3, description=desc)

    # Check if we want to do a mutation
    if random() < mutation_probability:
        a3_mut, desc = mutations.get_new_individual([a3])
        if a3_mut is not None:
            da.add_unrelaxed_step(a3_mut, desc)
            a3 = a3_mut

    # Relax the new candidate
    a3.set_calculator(EMT())
    dyn = BFGS(a3, trajectory=None, logfile=None)
    dyn.run(fmax=0.05, steps=100)
    a3.info['key_value_pairs']['raw_score'] = -a3.get_potential_energy()
    da.add_relaxed_step(a3)
    population.update()

write('all_candidates.traj', da.get_all_relaxed_candidates())
Esempio n. 14
0
def run_ga(n_to_test, kptdensity=3.5):
    population_size = 20
    da = DataConnection('godb.db')
    atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()
    n_to_optimize = len(atom_numbers_to_optimize)
    slab = da.get_slab()
    all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize)
    blmin = closest_distances_generator(all_atom_types, 0.05)  # 0.5

    # defining genetic operators:
    mutation_probability = 0.75
    pairing = CutAndSplicePairing(blmin,
                                  p1=1.,
                                  p2=0.,
                                  minfrac=0.15,
                                  use_tags=False)
    cellbounds = CellBounds(
        bounds={
            'phi': [0.2 * 180., 0.8 * 180.],
            'chi': [0.2 * 180., 0.8 * 180.],
            'psi': [0.2 * 180., 0.8 * 180.]
        })
    strainmut = StrainMutation(blmin,
                               stddev=0.7,
                               cellbounds=cellbounds,
                               use_tags=False)
    blmin_soft = closest_distances_generator(all_atom_types, 0.1)
    softmut = SoftMutation(blmin_soft, bounds=[2., 5.], use_tags=False)
    rattlemut = RattleMutation(blmin,
                               n_to_optimize,
                               rattle_prop=0.8,
                               rattle_strength=2.5,
                               use_tags=False)
    mutations = OperationSelector([4., 4., 2], [softmut, strainmut, rattlemut])

    if True:
        # recalculate raw scores
        structures = da.get_all_relaxed_candidates()
        for atoms in structures:
            atoms = singlepoint(atoms, kptdensity=kptdensity)
            da.c.delete([atoms.info['relax_id']])
            if 'data' not in atoms.info:
                atoms.info['data'] = {}
            da.add_relaxed_step(atoms)
        print('Finished recalculating raw scores')

    # relaxing the initial candidates:
    while da.get_number_of_unrelaxed_candidates() > 0:
        a = da.get_an_unrelaxed_candidate()
        a.wrap()
        a = relax_one(a, kptdensity=kptdensity)
        da.add_relaxed_step(a)

    # create the population
    population = Population(data_connection=da,
                            population_size=population_size,
                            comparator=comparator,
                            logfile='log.txt')

    current_pop = population.get_current_population()
    strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
    pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)

    # Test n_to_test new candidates
    ga_raw_scores = []
    step = 0
    for step in range(n_to_test):
        print('Starting configuration number %d' % step, flush=True)

        clock = time()
        a3 = None
        r = random()
        if r > mutation_probability:
            while a3 is None:
                a1, a2 = population.get_two_candidates()
                a3, desc = pairing.get_new_individual([a1, a2])
        else:
            while a3 is None:
                a1 = population.get_one_candidate()
                a3, desc = mutations.get_new_individual([a1])

        dt = time() - clock
        op = 'pairing' if r > mutation_probability else 'mutating'
        print('Time for %s candidate(s): %.3f' % (op, dt), flush=True)

        a3.wrap()
        da.add_unrelaxed_candidate(a3, description=desc)

        a3 = relax_one(a3, kptdensity=kptdensity)
        da.add_relaxed_step(a3)

        # Various updates:
        population.update()
        current_pop = population.get_current_population()

        if step % 10 == 0:
            strainmut.update_scaling_volume(current_pop,
                                            w_adapt=0.5,
                                            n_adapt=4)
            pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
            write('current_population.traj', current_pop)

        # Print out information for easy analysis/plotting afterwards:
        if r > mutation_probability:
            print('Step %d %s %.3f %.3f %.3f' % (step, desc,\
                   get_raw_score(a1), get_raw_score(a2), get_raw_score(a3)))
        else:
            print('Step %d %s %.3f %.3f' % (step, desc,\
                   get_raw_score(a1), get_raw_score(a3)))

        print('Step %d highest raw score in pop: %.3f' % \
              (step, get_raw_score(current_pop[0])))
        ga_raw_scores.append(get_raw_score(a3))
        print('Step %d highest raw score generated by GA: %.3f' % \
              (step, max(ga_raw_scores)))

    emin = population.pop[0].get_potential_energy()
    print('GA finished after step %d' % step)
    print('Lowest energy = %8.3f eV' % emin, flush=True)
    write('all_candidates.traj', da.get_all_relaxed_candidates())
    write('current_population.traj', population.get_current_population())
Esempio n. 15
0
def run_ga(n_to_test, kptdensity=None):
    ''' This method specifies how to run the GA once the
    initial random structures have been stored in godb.db.
    '''
    # Various initializations:
    population_size = 10
    da = DataConnection('godb.db')
    atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()
    n_to_optimize = len(atom_numbers_to_optimize)
    slab = da.get_slab()
    all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize)
    blmin = closest_distances_generator(all_atom_types,
                                        ratio_of_covalent_radii=0.05)

    # Defining the mix of genetic operators:
    mutation_probability = 0.3333
    pairing = CutAndSplicePairing(slab, n_to_optimize, blmin)
    rattlemut = RattleMutation(blmin, n_to_optimize,
                               rattle_prop=0.8, rattle_strength=1.5)
    mirrormut = MirrorMutation(blmin, n_to_optimize)
    mutations = OperationSelector([1., 1.], [rattlemut, mirrormut])

    if True:
        # Recalculate raw scores of any relaxed candidates
        # present in the godb.db database (only applies to 
        # iter007).
        structures = da.get_all_relaxed_candidates()
        for atoms in structures:
            atoms = singlepoint(atoms)
            da.c.delete([atoms.info['relax_id']])
            if 'data' not in atoms.info:
                atoms.info['data'] = {}
            da.add_relaxed_step(atoms)
        print('Finished recalculating raw scores')

    # Relax the randomly generated initial candidates:
    while da.get_number_of_unrelaxed_candidates() > 0:
        a = da.get_an_unrelaxed_candidate()
        a.wrap()
        a = relax_one(a)
        da.add_relaxed_step(a)

    # Create the population
    population = Population(data_connection=da,
                            population_size=population_size,
                            comparator=comparator,
                            logfile='log.txt')
    current_pop = population.get_current_population()

    # Test n_to_test new candidates
    ga_raw_scores = []
    step = 0
    for step in range(n_to_test):
        print('Starting configuration number %d' % step, flush=True)

        clock = time()
        a3 = None
        r = random()
        if r > mutation_probability:
            while a3 is None:
                a1, a2 = population.get_two_candidates()
                a3, desc = pairing.get_new_individual([a1, a2])
        else:
            while a3 is None:
                a1 = population.get_one_candidate()
                a3, desc = mutations.get_new_individual([a1])

        dt = time() - clock
        op = 'pairing' if r > mutation_probability else 'mutating'
        print('Time for %s candidate(s): %.3f' % (op, dt), flush=True)

        a3.wrap()
        da.add_unrelaxed_candidate(a3, description=desc)

        a3 = relax_one(a3)
        da.add_relaxed_step(a3)

        # Various updates:
        population.update()
        current_pop = population.get_current_population()
        write('current_population.traj', current_pop)

        # Print out information for easy analysis/plotting afterwards:
        if r > mutation_probability:
            print('Step %d %s %.3f %.3f %.3f' % (step, desc,\
                   get_raw_score(a1), get_raw_score(a2), get_raw_score(a3)))
        else:
            print('Step %d %s %.3f %.3f' % (step, desc,\
                   get_raw_score(a1), get_raw_score(a3)))

        print('Step %d highest raw score in pop: %.3f' % \
              (step, get_raw_score(current_pop[0])))
        ga_raw_scores.append(get_raw_score(a3))
        print('Step %d highest raw score generated by GA: %.3f' % \
              (step, max(ga_raw_scores)))

    emin = population.pop[0].get_potential_energy()
    print('GA finished after step %d' % step)
    print('Lowest energy = %8.3f eV' % emin, flush=True)
    write('all_candidates.traj', da.get_all_relaxed_candidates())
    write('current_population.traj', population.get_current_population())
Esempio n. 16
0
def test_basic_example_main_run(seed, testdir):
    # set up the random number generator
    rng = np.random.RandomState(seed)

    # create the surface
    slab = fcc111('Au', size=(4, 4, 1), vacuum=10.0, orthogonal=True)
    slab.set_constraint(FixAtoms(mask=len(slab) * [True]))

    # define the volume in which the adsorbed cluster is optimized
    # the volume is defined by a corner position (p0)
    # and three spanning vectors (v1, v2, v3)
    pos = slab.get_positions()
    cell = slab.get_cell()
    p0 = np.array([0., 0., max(pos[:, 2]) + 2.])
    v1 = cell[0, :] * 0.8
    v2 = cell[1, :] * 0.8
    v3 = cell[2, :]
    v3[2] = 3.

    # Define the composition of the atoms to optimize
    atom_numbers = 2 * [47] + 2 * [79]

    # define the closest distance two atoms of a given species can be to each other
    unique_atom_types = get_all_atom_types(slab, atom_numbers)
    blmin = closest_distances_generator(atom_numbers=unique_atom_types,
                                        ratio_of_covalent_radii=0.7)

    # create the starting population
    sg = StartGenerator(slab=slab,
                        blocks=atom_numbers,
                        blmin=blmin,
                        box_to_place_in=[p0, [v1, v2, v3]],
                        rng=rng)

    # generate the starting population
    population_size = 5
    starting_population = [sg.get_new_candidate() for i in range(population_size)]

    # from ase.visualize import view   # uncomment these lines
    # view(starting_population)        # to see the starting population

    # create the database to store information in
    d = PrepareDB(db_file_name=db_file,
                  simulation_cell=slab,
                  stoichiometry=atom_numbers)

    for a in starting_population:
        d.add_unrelaxed_candidate(a)

    # XXXXXXXXXX This should be the beginning of a new test,
    # but we are using some resources from the precious part.
    # Maybe refactor those things as (module-level?) fixtures.

    # Change the following three parameters to suit your needs
    population_size = 5
    mutation_probability = 0.3
    n_to_test = 5

    # Initialize the different components of the GA
    da = DataConnection('gadb.db')
    atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()
    n_to_optimize = len(atom_numbers_to_optimize)
    slab = da.get_slab()
    all_atom_types = get_all_atom_types(slab, atom_numbers_to_optimize)
    blmin = closest_distances_generator(all_atom_types,
                                        ratio_of_covalent_radii=0.7)

    comp = InteratomicDistanceComparator(n_top=n_to_optimize,
                                         pair_cor_cum_diff=0.015,
                                         pair_cor_max=0.7,
                                         dE=0.02,
                                         mic=False)

    pairing = CutAndSplicePairing(slab, n_to_optimize, blmin, rng=rng)
    mutations = OperationSelector([1., 1., 1.],
                            [MirrorMutation(blmin, n_to_optimize, rng=rng),
                             RattleMutation(blmin, n_to_optimize, rng=rng),
                             PermutationMutation(n_to_optimize, rng=rng)],
                             rng=rng)

    # Relax all unrelaxed structures (e.g. the starting population)
    while da.get_number_of_unrelaxed_candidates() > 0:
        a = da.get_an_unrelaxed_candidate()
        a.calc = EMT()
        print('Relaxing starting candidate {0}'.format(a.info['confid']))
        dyn = BFGS(a, trajectory=None, logfile=None)
        dyn.run(fmax=0.05, steps=100)
        set_raw_score(a, -a.get_potential_energy())
        da.add_relaxed_step(a)

    # create the population
    population = Population(data_connection=da,
                            population_size=population_size,
                            comparator=comp,
                            rng=rng)

    # test n_to_test new candidates
    for i in range(n_to_test):
        print('Now starting configuration number {0}'.format(i))
        a1, a2 = population.get_two_candidates()
        a3, desc = pairing.get_new_individual([a1, a2])
        if a3 is None:
            continue
        da.add_unrelaxed_candidate(a3, description=desc)

        # Check if we want to do a mutation
        if rng.rand() < mutation_probability:
            a3_mut, desc = mutations.get_new_individual([a3])
            if a3_mut is not None:
                da.add_unrelaxed_step(a3_mut, desc)
                a3 = a3_mut

        # Relax the new candidate
        a3.calc = EMT()
        dyn = BFGS(a3, trajectory=None, logfile=None)
        dyn.run(fmax=0.05, steps=100)
        set_raw_score(a3, -a3.get_potential_energy())
        da.add_relaxed_step(a3)
        population.update()

    write('all_candidates.traj', da.get_all_relaxed_candidates())
Esempio n. 17
0
    def test_generators(self):
        """Generate features from atoms objects."""
        # Test generic features for Pt then both Pt and Au.
        get_mendeleev_params(atomic_number=78)
        get_mendeleev_params(atomic_number=[78, 79],
                             params=default_params + ['en_ghosh'])

        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        print('Getting candidates from the database')
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        # Setup the test and training datasets.
        testset = get_unique(atoms=all_cand, size=test_size, key='raw_score')
        self.assertTrue(len(testset['atoms']) == test_size)
        self.assertTrue(len(testset['taken']) == test_size)

        trainset = get_train(atoms=all_cand,
                             size=train_size,
                             taken=testset['taken'],
                             key='raw_score')
        self.assertTrue(len(trainset['atoms']) == train_size)
        self.assertTrue(len(trainset['target']) == train_size)

        # Initiate the fingerprint generators with relevant input variables.
        print('Getting the fingerprints')
        f = FeatureGenerator(element_parameters='atomic_radius', nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        data = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec])
        n, d = np.shape(data)
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.nearestneighbour_vec])) == d)
        print('passed nearestneighbour_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.bond_count_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 52)
        print('passed bond_count_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distribution_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 10)
        print('passed distribution_vec')

        # EXPENSIVE to calculate. Not included in training data.
        train_fp = f.return_vec(testset['atoms'], [f.connections_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == test_size and d == 26)
        print('passed connections_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.rdf_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 20)
        print('passed rdf_vec')

        # Start testing the standard fingerprint vector generators.
        train_fp = f.return_vec(trainset['atoms'], [f.element_mass_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 1)
        self.assertTrue(len(f.return_names([f.element_mass_vec])) == d)
        print('passed element_mass_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.element_parameter_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        # print(f.return_names([f.element_parameter_vec]))
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.element_parameter_vec])) == d)
        print('passed element_parameter_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.composition_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.composition_vec])) == d)
        print('passed composition_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.eigenspectrum_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 147)
        self.assertTrue(len(f.return_names([f.eigenspectrum_vec])) == d)
        print('passed eigenspectrum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distance_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed distance_vec')

        train_fp = f.return_vec(
            trainset['atoms'],
            [f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == 150)
        self.assertTrue(
            len(
                f.return_names([
                    f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec
                ])) == d)
        print('passed combined generation')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_sum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_mean_vec')

        f = FeatureGenerator(element_parameters='atomic_radius',
                             max_neighbors='full',
                             nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_sum_vec all neighbors')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_mean_vec all neighbors')

        # Do basic check for atomic porperties.
        no_prop = []
        an_prop = []
        # EXPENSIVE to calculate. Not included in training data.
        for atoms in testset['atoms']:
            no_prop.append(neighbor_features(atoms=atoms))
            an_prop.append(
                neighbor_features(atoms=atoms, property=['atomic_number']))
        self.assertTrue(np.shape(no_prop) == (test_size, 15))
        self.assertTrue(np.shape(an_prop) == (test_size, 30))
        print('passed graph_vec')

        self.__class__.all_cand = all_cand
        self.__class__.data = data
Esempio n. 18
0
from catlearn.featurize.setup import FeatureGenerator
from catlearn.regression import GaussianProcess
from catlearn.preprocess.feature_engineering import single_transform
from catlearn.ga import GeneticAlgorithm

# ## Data Generation
#
# To start with we import some data. For this tutorial, the data for alloyed nanoparticles are used.

# In[2]:

# Connect ase atoms database.
gadb = DataConnection('../../data/gadb.db')

# Get all relaxed candidates from the db file.
all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

# We then split this data into some training data and a holdout test set.

# In[3]:

testset = get_unique(atoms=all_cand, size=100, key='raw_score')

trainset = get_train(atoms=all_cand,
                     size=500,
                     taken=testset['taken'],
                     key='raw_score')

trainval = trainset['target']
testval = testset['target']
Esempio n. 19
0
def test_database_logic(seed, testdir):
    from ase.ga.data import PrepareDB
    from ase.ga.data import DataConnection
    from ase.ga.startgenerator import StartGenerator
    from ase.ga.utilities import closest_distances_generator
    from ase.ga import set_raw_score
    import numpy as np
    from ase.build import fcc111
    from ase.constraints import FixAtoms

    # set up the random number generator
    rng = np.random.RandomState(seed)

    slab = fcc111('Au', size=(4, 4, 2), vacuum=10.0, orthogonal=True)
    slab.set_constraint(FixAtoms(mask=slab.positions[:, 2] <= 10.))

    # define the volume in which the adsorbed cluster is optimized
    # the volume is defined by a corner position (p0)
    # and three spanning vectors (v1, v2, v3)
    pos = slab.get_positions()
    cell = slab.get_cell()
    p0 = np.array([0., 0., max(pos[:, 2]) + 2.])
    v1 = cell[0, :] * 0.8
    v2 = cell[1, :] * 0.8
    v3 = cell[2, :]
    v3[2] = 3.

    # define the closest distance between two atoms of a given species
    blmin = closest_distances_generator(atom_numbers=[47, 79],
                                        ratio_of_covalent_radii=0.7)

    # Define the composition of the atoms to optimize
    atom_numbers = 2 * [47] + 2 * [79]

    # create the starting population
    sg = StartGenerator(slab=slab,
                        blocks=atom_numbers,
                        blmin=blmin,
                        box_to_place_in=[p0, [v1, v2, v3]],
                        rng=rng)

    # generate the starting population
    starting_population = [sg.get_new_candidate() for i in range(20)]

    d = PrepareDB(db_file_name=db_file,
                  simulation_cell=slab,
                  stoichiometry=atom_numbers)

    for a in starting_population:
        d.add_unrelaxed_candidate(a)

    # and now for the actual test
    dc = DataConnection(db_file)

    dc.get_slab()
    dc.get_atom_numbers_to_optimize()

    assert dc.get_number_of_unrelaxed_candidates() == 20

    a1 = dc.get_an_unrelaxed_candidate()
    dc.mark_as_queued(a1)

    assert dc.get_number_of_unrelaxed_candidates() == 19
    assert len(dc.get_all_candidates_in_queue()) == 1

    set_raw_score(a1, 0.0)
    dc.add_relaxed_step(a1)

    assert dc.get_number_of_unrelaxed_candidates() == 19
    assert len(dc.get_all_candidates_in_queue()) == 0

    assert len(dc.get_all_relaxed_candidates()) == 1

    a2 = dc.get_an_unrelaxed_candidate()
    dc.mark_as_queued(a2)
    confid = a2.info['confid']
    assert dc.get_all_candidates_in_queue()[0] == confid

    dc.remove_from_queue(confid)
    assert len(dc.get_all_candidates_in_queue()) == 0
Esempio n. 20
0
    relax(a3, cellbounds=cellbounds)
    da.add_relaxed_step(a3)

    # If the relaxation has changed the cell parameters
    # beyond the bounds we disregard it in the population
    cell = a3.get_cell()
    if not cellbounds.is_within_bounds(cell):
        da.kill_candidate(a3.info['confid'])

    # Update the population
    population.update()

    if step % 10 == 0:
        # Update the scaling volumes of the strain mutation
        # and the pairing operator based on the current
        # best structures contained in the population
        current_pop = population.get_current_population()
        strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
        pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
        write('current_population.traj', current_pop)

print('GA finished after step %d' % step)
hiscore = get_raw_score(current_pop[0])
print('Highest raw score = %8.4f eV' % hiscore)

all_candidates = da.get_all_relaxed_candidates()
write('all_candidates.traj', all_candidates)

current_pop = population.get_current_population()
write('current_population.traj', current_pop)