def test_setup(self): nMOL = 13 compare = SimpleEnergeticCharacterizer(accuracy=5e-7) db_name = f"thresh_lj{nMOL}.db" self.files.append(db_name) database = Database(db=db_name, new_database=True, compare_clusters=compare) system = DefineSystem(molecules=[Molecule(coordinates=np.array([[0.0, 0.0, 0.0]]), particle_names=["LJ"])], numbers_of_molecules=[nMOL], box_length=15) # define a mutation scheme # probs is normalised inside Mutate so we can just pass realatve probabilites mutation = Mutate(mutations=[RandomSingleTranslation(), RandomMultipleTranslations()], relative_probabilities=[1, 1]) daemon = Pyro4.Daemon() ga_args = dict(database=database, min_pool_size=10, max_generations=1000, system=system, convergence_steps=500, mutate=mutation, daemon=daemon, mutation_rate=0.3, max_queue_size=10,) thresh_GA = PoolGA(threshold=-41.326801, initial_pool=database, **ga_args) thresh_GA.start_threads() time.sleep(0.15) self.assertFalse(thresh_GA.jobs.empty()) daemon.shutdown()
def setup_database(self, database: Union[str, Database], *args, **kwargs) -> None: """Provides error checking and if everything checks out, creates a database at self.database Args: database: Union[str, Database], required, the Database object or file path to a database on disk *args: **kwargs: Returns: None. Sets self.database Raises: AssertionError when database is not valid """ if isinstance(database, str): # noinspection PyTypeChecker try: self.database = Database(database, *args, **kwargs) # new_database=False, except IOError: self.database = Database(database, new_database=True, *args, **kwargs) message = """You are using a database with the default attributes saved at: {} If this was not your intention you should explicitly create the database and pass it to the GA"""\ .format(database) self.log.warning(message) elif isinstance(database, Database): self.database = database else: try: message = "Database must be an instance of str or Database! Got {}:{}".format(type(database), database) raise AssertionError(message) except AssertionError as error: self.log.exception(error) raise self.log.info("Using database: {}\n".format(self.database))
def __init__(self, n_quench: int = 100, database: Database = Database( new_database=True, compare_clusters=SimpleGeometricCharacterizer()), n_processes: int = 2, *args, **kwargs) -> None: self.n_quench = n_quench self.database = database self.n_processes = n_processes self.quench = None self.results_dict = {} super().__init__(*args, **kwargs)
def setUpClass(cls) -> None: """Sets up a database in memory and various other classes required by PoolGA""" cls.log = logging.getLogger(__name__) cls.database = Database( new_database=True, compare_clusters=SimpleEnergeticCharacterizer()) cls.selector = BaseSelector() cls.mating = DeavenHoCrossover() cls.mock_system = unittest.mock.Mock(spec=DefineSystem) cls.mock_pool = [unittest.mock.MagicMock(spec=Cluster)] * 5 cls.log.info(str(cls.mock_system)) cls.GA = PoolGA(database=cls.database, min_pool_size=10, system=cls.mock_system, crossover=cls.mating, selector=cls.selector) set_numpy_seed() def make_ga(cls, database=":memory:", min_pool_size=10, selector=cls.selector, mating=cls.mating, system=cls.mock_system) -> PoolGA: """Returns a PoolGA instance""" return PoolGA(database=database, min_pool_size=min_pool_size, selector=selector, crossover=mating, system=system) cls.make_ga = make_ga
class PoolGA(BaseGA): # noinspection PyPep8Naming def __init__(self, database: Union[Database, str], min_pool_size: int, system: Union[Cluster, DefineSystem, str, Database], mutate_rate: float=0.2, # 20% default mutation rate crossover: BaseMate=DeavenHoCrossover(), mutate: Mutate=Mutate(), selector: BaseSelector=RouletteWheelSelection(), # RouletteWheel by default # Compare clusters based on energy: compareClusters: BaseCharacterizer=SimpleEnergeticCharacterizer(1e-9), writer: BaseWriter=XYZWriter(), file_name: str="pool.xyz", convergence_steps: int=100, generations: int=100, pool_unique=True, max_queue_size: int=20, update_freq: int=None, set_initial_pool=None, daemon: Pyro4.Daemon=None, log: Union[Type[logging.log], None]=None, max_pool_size: Union[int, None]= None, *args, **kwargs) -> None: self.log = log or logging.getLogger(__name__) self.daemon = daemon self.database = None self.setup_database(database) self.system = self.check_system(system) self.min_pool_size = min_pool_size self.max_pool_size = max_pool_size or min_pool_size*2 self.mutate_rate = mutate_rate self.max_generations = generations # A generation is one*min_pool_size number of children self.pool_unique = pool_unique self.mating = crossover self.mutate = mutate self.selector = selector self.compareClusters = compareClusters self.writer = writer self.file_name = file_name self.convergence_steps = convergence_steps self.update_freq = update_freq or int((self.min_pool_size*self.max_generations)/10) super().__init__(*args, **kwargs) self._validate() self.pool = [] self.jobs = Queue(maxsize=max_queue_size) self.returned_results = Queue(maxsize=max_queue_size) self.registered_workers = [] self.converged = False self.shutdown = False self.n_steps = 0 self.best_energies = [] self.all_energies = [] # self.__convergence_counter = 0 if set_initial_pool is not None: self.set_initial_pool(set_initial_pool) # Setup thread to handle incoming results self.result_thread = Thread(target=self.process_returned_jobs, daemon=True) self.GA_thread = Thread(target=self.run_GA, daemon=True) self.start_time = time.time() # self.lock = Lock() self.processes = [] # noinspection SpellCheckingInspection self.log.debug(f"Server running on Host: {os.uname()}\nWith pid: {os.getpid()}") def set_initial_pool(self, initial_pool: Union[List, Database, str]) -> None: """ Takes an initial pool as a Database, List of Cluster objects, or a filename of an xyz format file and sets the initial pool NOT YET IMPLEMENTED """ if isinstance(initial_pool, list): if isinstance(initial_pool[0], Cluster): self.pool = initial_pool self.log.debug(f"intial pool = {self.pool}\n{self.pool[0].molecules}") return raise NotImplementedError("This method of setting the initial pool is not yet supported") # noinspection PyPep8Naming def take_GA_step(self) -> Cluster: # noinspection PyArgumentEqualDefault if np.random.uniform(0, 1.0) >= self.mutate_rate: while True: parents = self.select_clusters(n_clusters=2) self.log.debug("Parents: {} & {} selected for mating".format(parents[0], parents[1])) child = self.mating(parents) self.log.debug("Child: {}".format(child)) if isinstance(child, Cluster): break else: continue return child else: # Pass pool and the cluster selecting method to mutate mutated_cluster = self.mutate.mutate(self.select_clusters(n_clusters=1)[0]) self.log.debug("Mutated cluster to be minimised {}".format(mutated_cluster)) return mutated_cluster # self.select_clusters(n_clusters=1) def start_threads(self) -> None: self.log.debug("Starting GA thread and result thread") self.result_thread.start() self.GA_thread.start() self.log.info("Starting the server event loop") self.start_time = time.time() # noinspection PyPep8Naming def run_GA(self) -> None: # TODO: find some way to unittest the main run_GA method? self.log.info(f"""Beginning global optimisation of {self.system}. At {self.start_time} Using version {bmpga.__version__} of BMPGA""") # while max_generations is not exceeded and optimisation is not converged: while (self.generations() <= self.max_generations) and not self.converged and not self.shutdown: time.sleep(0.05) # If the pool is not initialised, generate and minimise a random cluster if len(self.pool) < self.min_pool_size: # and not self.jobs.full(): self.log.debug("Pool not full: {} < {}. Generating random cluster." .format(len(self.pool), self.min_pool_size)) job = dumps(["minimize", self.system.get_random_cluster()]) # self.log.debug(job) # If min_pool_size < pool_size < max_pool_size, perform normal GA operation elif self.min_pool_size <= len(self.pool) <= self.max_pool_size: # and not self.jobs.full(): job = dumps(["minimize", self.take_GA_step()]) # If the pool is too large, cull back to min_pool_size elif len(self.pool) >= self.max_pool_size: self.cull_pool() continue # If the job queue is full or another process is accessing it, wait until there # is space and then put the job there. # Note: this will mean that the main GA will pause here until some jobs have been taken away by workers # noinspection PyArgumentEqualDefault,PyUnboundLocalVariable self.jobs.put(job, block=True, timeout=None) self.tear_down() def get_job(self, worker_id: int) -> Union[List, bytes]: """Method called by remote workers to retrieve jobs""" if self.shutdown: return dumps(["Shutdown", None]) if worker_id not in self.registered_workers: self.registered_workers.append(worker_id) self.log.info("New worker {} registered".format(worker_id)) # noinspection PyArgumentEqualDefault t = np.random.uniform(0.0, 1.0) while t < 5: if not self.jobs.empty(): job = self.jobs.get() # block=True self.log.debug("Job assigned to worker: {id}".format(job, id=worker_id)) return job else: self.log.debug(f"Queue empty, waiting {t} seconds for a new job") time.sleep(t) t *= 1.1 continue return dumps(["NoJob", None]) def return_result(self, result, worker_id: int) -> None: result = loads(b64decode(result["data"])) try: assert isinstance(result, Cluster) except AttributeError as error: message = "Cluster object not returned by worker {}! Received: {} Type({})\n{}" self.log.exception(message.format(worker_id, result, type(result), error)) result.step = self.n_steps result.minimum = True self.returned_results.put([result, worker_id]) # block=True def update(self) -> None: generation = self.generations() message = f"At generation {generation}. Best energy so far: {self.best_energies[-1]}" self.log.info(message) self.write_pool_to_file(filename=f"pool_generation_{generation}.xyz") self.log.debug(str(self.all_energies) + str(self.best_energies)) with open("avg_energies.txt", "a+") as f: f.write(f"{generation},{np.mean([l[1] for l in self.all_energies[-self.min_pool_size:]])}\n") with open("all_energies.txt", "a+") as f: for line in self.all_energies: f.write(f"{generation},{','.join([str(x) for x in line])}\n") if len(self.best_energies) >= 1: with open("best_energies.txt", "a+") as f: for line in self.best_energies[1:]: f.write(f"{','.join([str(x) for x in line])}\n") self.all_energies = [] self.best_energies = [self.best_energies[-1]] def process_returned_jobs(self) -> None: self.log.debug("started thread in process_returned_jobs") while self.generations() <= self.max_generations and not self.converged and not self.shutdown: time.sleep(0.01) if not self.returned_results.empty(): result, worker_id = self.returned_results.get() # block=True self.n_steps += 1 # Call insert first to generate cluster._id etc. result.step = self.n_steps result = self.database.insert_cluster(result) self.log.debug(f"Processing: {result} from worker: {worker_id}") if self.check_cluster(result): self.pool.append(result) self.log.info(f"{result} accepted. Adding to pool") # sort the new pool self.pool = sorted(self.pool, key=lambda x: x.cost) self.check_converged(result) if self.n_steps % self.update_freq == 0: self.update() if self.converged: message = "Convergence reached. result_thread shutting down." self.log.info(message) elif self.generations() <= self.max_generations: message = f"""Max generations reached: {self.generations()} >= {self.max_generations} GA server will shut down shortly.""" self.log.info(message.format) elif self.shutdown: message = "Main thread has requested shutdown. result_thread exiting." self.log.info(message) else: message = f"process_returned_jobs shutting down for an unknown reason!\n{self.__dict__}" self.log.error(message) def check_cluster(self, new_cluster) -> bool: # TODO: implement uniqueness checking """ Method to check the uniqueness of the returned cluster """ if not self.pool_unique or len(self.pool) < 1: return True else: for member in self.pool: # This pretty much just inverts the result of compare clusters. # Cluster is not unique if it is the same as any other cluster if not self.compareClusters(new_cluster, member): continue else: return False self.log.debug(f"Cluster {new_cluster} unique, adding to pool") return True def check_converged(self, cluster) -> None: try: best_step, best_en = self.best_energies[-1] except IndexError: best_step, best_en = [0, 0.0] self.all_energies.append([self.n_steps, cluster.cost]) if cluster.cost >= best_en: if (self.n_steps - best_step) >= self.convergence_steps: message = "Convergence reached at step: {0} (generation = {gen}). Best energy: {1} was found at step{2}" self.log.info(message.format(self.n_steps, best_en, best_step, gen=self.generations())) self.converged = True else: self.converged = False elif cluster.cost < best_en: self.best_energies.append([self.n_steps, cluster.cost]) message = "New best energy: {} found at step: {}".format(cluster.cost, self.n_steps) self.log.info(message) self.converged = False def generations(self) -> float: """Convenience function to calculate the current generation Generation = current step / minimum population size """ return float(float(self.n_steps)/float(self.min_pool_size)) def cull_pool(self) -> None: # Reduce population back to self.min_pool_size # self.log.warning("GA.cull_pool not fully implemented. ") tmp_pool = copy.deepcopy(self.pool) self.pool = sorted(tmp_pool, key=lambda x: x.cost)[:self.min_pool_size] # raise NotImplementedError def select_clusters(self, n_clusters) -> List[Cluster]: """Invokes self.selector.select_clusters() to select parents from the pool""" return self.selector.get_parents(self.pool, number_of_parents=n_clusters) def write_pool_to_file(self, filename: str = None, pool: list = None) -> None: message = "Writing pool to file: {}".format(filename) self.log.info(message) if pool is None: pool = copy.deepcopy(self.pool) else: pool = copy.deepcopy(pool) self.writer.write(structures=pool, filename=filename, file_mode="w") def check_system(self, system: Union[Database, str, Cluster, DefineSystem])->Union[DefineSystem, RandomClusterGenerator]: """Checks that the system passed in is Args: system: Database or str or Cluster or DefineSystem, required, an example Cluster for the system or a DefineSystem object describing the system or a Database or path to a database containing at least one example cluster for the system. This will then be used to create an object which can produce new random clusters based on this template with random molecular positions and orientations. Returns: An object with a .generate_random_cluster method for use in filling the empty pool Raises: Assertion error: if the """ if isinstance(system, str) or isinstance(system, Database) or None: try: self.system = self.database.get_global_minimum() assert isinstance(self.system, Cluster) except AssertionError as error: message = """If passing str or database as system this must point to a valid database containing at least one valid cluster. {} does not meet this requirement\n{}""".format(system, error) self.log.exception(message) raise else: try: assert isinstance(system, Cluster) or isinstance(system, DefineSystem) except AssertionError as error: msg = """Must pass valid database or either a description of system as either Cluster or DefineSystem instance\n{}""".format(error) self.log.exception(msg) raise if isinstance(system, DefineSystem): return system elif isinstance(system, Cluster): return RandomClusterGenerator(cluster=system) def setup_database(self, database: Union[str, Database], *args, **kwargs) -> None: """Provides error checking and if everything checks out, creates a database at self.database Args: database: Union[str, Database], required, the Database object or file path to a database on disk *args: **kwargs: Returns: None. Sets self.database Raises: AssertionError when database is not valid """ if isinstance(database, str): # noinspection PyTypeChecker try: self.database = Database(database, *args, **kwargs) # new_database=False, except IOError: self.database = Database(database, new_database=True, *args, **kwargs) message = """You are using a database with the default attributes saved at: {} If this was not your intention you should explicitly create the database and pass it to the GA"""\ .format(database) self.log.warning(message) elif isinstance(database, Database): self.database = database else: try: message = "Database must be an instance of str or Database! Got {}:{}".format(type(database), database) raise AssertionError(message) except AssertionError as error: self.log.exception(error) raise self.log.info("Using database: {}\n".format(self.database)) def _validate(self) -> None: # TODO: Properly document PoolGA._validate() """Basic checks to ensure that that this instance of PoolGA is valid""" # Check the sizes of min_ and max_pool_size are valid try: assert self.min_pool_size < self.max_pool_size except AssertionError as error: self.log.exception("min_pool_size >= max_pool_size: {} >= {}.\n{}".format(self.min_pool_size, self.max_pool_size, error)) raise try: assert self.mating is not None # noinspection PyTypeChecker assert isinstance(self.selector, BaseSelector) except AssertionError as error: message = "Must pass a subclass of bmpga.mating.BaseSelector as selector. Got {}:{}!\n{}"\ .format(self.mating, type(self.mating), error) self.log.exception(message) raise try: # noinspection PyTypeChecker assert isinstance(self.mating, BaseMate) except AssertionError as error: message = "Must pass a subclass of bmpga.mating.BaseMate as mating. Got {}:{}!\n{}"\ .format(self.mating, type(self.mating), error) self.log.exception(message) raise def tear_down(self) -> None: self.cull_pool() self.shutdown = True message = "GA is shutting down. Best cluster found is: {}".format(self.database.get_global_minimum()) self.log.info(message) self.log.info("{pool_len} structures from pool to be written to {fn}" .format(pool_len=len(self.pool), fn=self.file_name)) self.update() self.write_pool_to_file(filename=self.file_name) self.log.info("Written best energies and all energies to disk") self.log.info("Shutting down daemon") self.daemon.shutdown() self.log.info("GA server was running for {}s".format(time.time() - self.start_time))
import Pyro4 import numpy as np from bmpga.mating.selectors import BaseSelector from bmpga.characterization import SimpleEnergeticCharacterizer from bmpga.storage import Database, Cluster, Molecule from bmpga.systems import DefineSystem from bmpga.optimisation import PoolGA from bmpga.mutation import Mutate, RandomSingleTranslation, RandomMultipleTranslations log = logging.getLogger(__name__) compare = SimpleEnergeticCharacterizer() database = Database( db="test.db", new_database=True, compare_clusters=SimpleEnergeticCharacterizer(accuracy=5e-7)) lj = Molecule(coordinates=np.array([[0.0, 0.0, 0.0]]), particle_names=["LJ"]) system = DefineSystem(numbers_of_molecules=[38], molecules=[lj], log=log, box_length=6) # define a mutation scheme # probabilities is normalised inside Mutate so we can just pass realatve probabilites mutation = Mutate( mutations=[RandomSingleTranslation(), RandomMultipleTranslations()], relative_probabilities=[1, 1])