def __init__(self, initial_parameters, swarm=None): self.parameters = ParameterSet(initial_parameters) self.best = None self.best_score = None self.age = 0 self.initialize_velocities(swarm) self.lock = False
def _parse(self, string): if "Notes:" in string: string, _, self.notes = string.partition('Notes:') # Reconstruct the parameters. self.modifications = [] for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE): path, eq, value = change.partition('=') self.modifications.append((path, value)) self.parameters = ParameterSet(self.lab.default_parameters) for path, value in self.modifications: self.parameters.apply(path, value) if "Hash: " in string: # Override hash(self) with whats on file since this is reconstructed # from defaults + modifications, and the defaults might have changed. self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) if "Journal: " in string: self.journal = re.search("Journal: (.*)", string).groups()[0] if "Attempts: " in string: self.attempts = int( re.search("Attempts: (.*)", string).groups()[0]) if "Scores: " in string: self.scores = re.search("Scores: (.*)", string).groups()[0].strip() self.scores = [ float(s.strip()) for s in self.scores.split(',') if s.strip() ] assert (len(self.scores) <= self.attempts ) # Attempts may fail and not return a score.
def initialize_velocities(self, swarm=None): # Make a new parameter structure for the velocity data. self.velocities = ParameterSet(self.parameters) # Iterate through every field in the structure. for path in self.parameters.enumerate(): value = self.parameters.get(path) if swarm is not None: # Analyse the other particle velocities, so that the new # velocity is not too large or too small. data = [p.velocities.get(path) for p in swarm if p is not self] velocity = np.random.normal(np.mean(data), np.std(data)) else: # New swarm, start with a large random velocity. max_percent_change = .10 uniform = 2 * random.random() - 1 if isinstance(value, float): velocity = value * uniform * max_percent_change elif isinstance(value, int): if abs(value) < 1. / max_percent_change: velocity = uniform # Parameters are rounded, so 50% chance this will mutate. else: velocity = value * uniform * max_percent_change else: raise NotImplementedError() self.velocities.apply(path, velocity)
def collect_results(self, parameters, score): # Get the particle for these parameters. for particle in self.swarm: if particle.parameters == parameters: break else: raise Exception("Unrecognized parameters!") if isinstance(score, Exception) or math.isnan(score): # Program crashed, replace this particle. if particle.best is not None: particle.parameters = ParameterSet(particle.best) elif self.best is not None: particle.parameters = ParameterSet(self.best) else: particle.parameters = ParameterSet(self.lab.default_parameters) particle.initialize_velocities(self.swarm) particle.update_position() else: # Update with results of this particles evaluation. if self.best_score is not None: self.best_score *= 1 - score_decay_rate / len(self.swarm) if self.best is None or score > self.best_score: self.best = ParameterSet(particle.parameters) self.best_score = score print("New global best score %g." % score) particle.update(score, self.best) particle.lock = False self.save()
def _parse(self, report): if not report.strip(): raise ValueError("Empty lab report file!") sections = report.split(self.section_divider) self.header = sections[0] default_parameters = '\n'.join( sections[1].split('\n')[1:-1] ) cli = sections[1].split('\n')[-1].strip('$ ').split() sorted_pval_table = sections[2] experiment_sections = sections[3:] file_defaults = ParameterSet(default_parameters) # Consistency check for parameters & experiment argv. if file_defaults != self.default_parameters or cli != self.argv: while True: q = input("Default parameters or invocation have changed, options:\n" + " old - Ignore the new/given, use what's on file.\n" + " new - Use the new/given, overwrites the old file!\n" + " abort.\n" + ">>> ") q = q.strip().lower() if q == 'old': self.default_parameters = file_defaults self.argv = cli break elif q == 'new': shutil.copy(self.lab_report, self.lab_report + '.backup') break elif q in ('abort', 'exit', 'quit') or q in 'aeq': sys.exit() [Experiment(self, s) for s in experiment_sections if s.strip()]
def _load_experiment_module(self, experiment_module): """ Argument experiment_module is command line argument 0, specifying the file path to the experiment module. """ self.path, experiment_module = os.path.split(experiment_module) self.name, dot_py = os.path.splitext(experiment_module) assert(dot_py == '.py') self.module_reload = 'import sys; sys.path.append("%s"); '%self.path self.module_reload += 'import %s; '%self.name exec_globals = {} exec(self.module_reload, exec_globals) self.module = exec_globals[self.name] self.default_parameters = ParameterSet(self.module.default_parameters) self.structure = self.default_parameters.get_types()
def __init__( self, lab, string=None, modifications=None, parameters=None, ): """ """ self.lab = lab self.attempts = 0 self.scores = [] self.notes = ' ' # Load or create this experiment's data. if string is not None: self._parse(string) elif modifications is not None: self.parameters = ParameterSet(self.lab.default_parameters) for path, value in modifications: self.parameters.apply(path, value) elif parameters is not None: self.parameters = ParameterSet(parameters) else: raise TypeError("Not enough arguments to Experiment.__init__()") self.parameters = self.parameters.typecast(self.lab.structure) self.modifications = self.lab.default_parameters.diff(self.parameters) if hash(self) not in self.lab.experiment_ids: self.lab.experiments.append(self) self.lab.experiment_ids[hash(self)] = self else: existing = self.lab.experiment_ids[hash(self)] if existing.parameters == self.parameters: raise ValueError("Duplicate Parameters, Hash %X" % hash(self)) else: raise SystemExit("Hash Collision!") # Start a journal file for this experiment. if not hasattr(self, 'journal'): self.journal = os.path.join(self.lab.ae_directory, "%X.journal" % hash(self)) with open(self.journal, 'a') as file: file.write('Experiment Journal For Parameters:\n') file.write(str(self.parameters) + '\n') file.write('Hash: %X\n' % hash(self)) file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n')
def merge(self, lab, ideas): """ Take several experiments and return the best combination of them. """ # Marshal all of the modifications together. ideas = sorted(ideas, key=lambda x: -x.mean()) paths = [] values = [] for x in ideas: for path, value in x.modifications: if path in paths: continue # Higher scoring experiments take precedence. paths.append(path) values.append(value) # Create and get the experiment object. params = ParameterSet(lab.default_parameters) for p, v in zip(paths, values): params.apply(p, v) return lab.get_experiment(params)
def update(self, score, global_best): self.age += 1 if self.best_score is not None: self.best_score *= 1 - score_decay_rate if self.best is None or score > self.best_score: self.best = ParameterSet(self.parameters) self.best_score = score print("New particle best score %g." % self.best_score) self.update_position() self.update_velocity(global_best)
def get_experiment(self, parameters): """ Returns Experiment instance for the given parameters. If one does not already exist for these parameter then it is created. """ if isinstance( parameters, Experiment ): return parameters p = ParameterSet( parameters ).typecast( self.structure ) h = hash(p) if h in self.experiment_ids: return self.experiment_ids[h] else: return Experiment(self, parameters=p)
def __init__(self, laboratory, args): self.lab = laboratory # Get a list of every parameter to experiment with. target_parameters = [] for start in args.grid_search.split(','): node = self.lab.default_parameters.get(start) subtree = ParameterSet.enumerate(node) target_parameters.extend(start + end for end in subtree) # Suggest modifications to each parameter. self.experiments = [] for path in target_parameters: value = self.lab.default_parameters.get(path) for mod in self.mod_funcs: params = ParameterSet(self.lab.default_parameters) params.apply(path, mod(value)) X = self.lab.get_experiment(params) if not X.notes.strip(): X.notes += "Suggested by Grid Search.\n" self.experiments.append(X) self.lab.save( ) # Write all of the new grid-search experiments to the lab report.
class Laboratory: """ Main class of the AE program. Attributes: lab.module - Users Experiment python module lab.name - Name of experiment module lab.path - Directory containing experiment module lab.structure - Types of parameters lab.default_parameters - lab.module.default_parameters lab.argv - Command line invocation of experiment program lab.tag - Optional, identifier string for this Laboratory lab.ae_directory - Directory containing all files created by this program lab.lab_report - File path of Lab Report lab.experiments - List of Experiment instances lab.experiment_ids - Experiments accessed by their unique hash """ default_extension = '_ae' section_divider = '\n' + ('=' * 80) + '\n' def __init__(self, experiment_argv, method=None, tag='', verbose=False): if not experiment_argv: raise ValueError('Missing arguments for the experiment to run!') if isinstance(experiment_argv, str): experiment_argv = experiment_argv.split() self.argv = experiment_argv self.method = method self.tag = tag self.verbose = verbose self._load_experiment_module(experiment_argv[0]) self.ae_directory = os.path.join(self.path, self.name) + self.default_extension if self.tag: self.ae_directory = self.ae_directory + '_' + self.tag self.lab_report = os.path.join(self.ae_directory, 'lab_report.txt') self.experiments = [] self.experiment_ids = {} if os.path.isdir(self.ae_directory): with open(self.lab_report, 'r') as file: report = file.read() self._parse(report) else: # Initialize the Lab Reports attributes and write the skeleton of it # to file. self.init_header() os.mkdir(self.ae_directory) # Always have an experiment for the default parameters. try: Experiment(self, parameters = self.default_parameters) except ValueError: pass def init_header(self): """ Sets attribute lab.header containing the initial text in the Notes section at the top of the lab-report. """ self.header = str(self.name) if self.tag: self.header += ' - ' + self.tag self.header += ' - Automatic Experiments\n' self.header += time.asctime( time.localtime(time.time()) ) + '\n' def _load_experiment_module(self, experiment_module): """ Argument experiment_module is command line argument 0, specifying the file path to the experiment module. """ self.path, experiment_module = os.path.split(experiment_module) self.name, dot_py = os.path.splitext(experiment_module) assert(dot_py == '.py') self.module_reload = 'import sys; sys.path.append("%s"); '%self.path self.module_reload += 'import %s; '%self.name exec_globals = {} exec(self.module_reload, exec_globals) self.module = exec_globals[self.name] self.default_parameters = ParameterSet(self.module.default_parameters) self.structure = self.default_parameters.get_types() def _parse(self, report): if not report.strip(): raise ValueError("Empty lab report file!") sections = report.split(self.section_divider) self.header = sections[0] default_parameters = '\n'.join( sections[1].split('\n')[1:-1] ) cli = sections[1].split('\n')[-1].strip('$ ').split() sorted_pval_table = sections[2] experiment_sections = sections[3:] file_defaults = ParameterSet(default_parameters) # Consistency check for parameters & experiment argv. if file_defaults != self.default_parameters or cli != self.argv: while True: q = input("Default parameters or invocation have changed, options:\n" + " old - Ignore the new/given, use what's on file.\n" + " new - Use the new/given, overwrites the old file!\n" + " abort.\n" + ">>> ") q = q.strip().lower() if q == 'old': self.default_parameters = file_defaults self.argv = cli break elif q == 'new': shutil.copy(self.lab_report, self.lab_report + '.backup') break elif q in ('abort', 'exit', 'quit') or q in 'aeq': sys.exit() [Experiment(self, s) for s in experiment_sections if s.strip()] def get_experiment(self, parameters): """ Returns Experiment instance for the given parameters. If one does not already exist for these parameter then it is created. """ if isinstance( parameters, Experiment ): return parameters p = ParameterSet( parameters ).typecast( self.structure ) h = hash(p) if h in self.experiment_ids: return self.experiment_ids[h] else: return Experiment(self, parameters=p) def significant_experiments_table(self): """ Returns string """ ex = sorted(self.experiments, key = lambda x: (-x.mean(), -x.attempts)) ex = ex[:20] # Always keep the default parameters on the leader board. if self.default_parameters not in (X.parameters for X in ex): ex.pop() ex.append( self.get_experiment( self.default_parameters)) s = ' Hash | N | Score | P-Value | Modifications\n' fmt = '%8X | %3d | % 10g | % 9.3g | ' for x in ex: s += fmt%(hash(x), len(x.scores), x.mean(), x.significance(ex[0])) if not x.modifications: s += 'Default Parameters\n' else: for idx, mod in enumerate(x.modifications): param, value = mod if idx > 0: s += ' ' * 42 s += '%s = %s\n'%(param, str(value)) return s def __str__(self): """ Returns the lab report. """ s = self.header s += self.section_divider s += 'Default Parameter Values = \n' s += str(self.default_parameters) s += '\n$ ' + ' '.join(self.argv) s += self.section_divider s += self.significant_experiments_table().rstrip() s += '\n\nFailed Experiments: ' for x in self.experiments: if x.attempts > len(x.scores): s += '%X '%hash(x) s += self.section_divider s += self.section_divider.join(str(s) for s in self.experiments) return s def save(self): with open(self.lab_report + '.tmp', 'w') as file: file.write( str(self) ) os.rename(self.lab_report + '.tmp', self.lab_report) def run(self, processes, time_limit = None, memory_limit = None,): """ Main loop of the AE program. """ pool = [] while True: # Start running new experiments while len(pool) < processes: X = self.get_experiment( self.method.suggest_parameters() ) trial = Worker(self, X.parameters, time_limit, memory_limit) trial.start() pool.append(trial) # Wait for experiments to complete. time.sleep(2) # Check for jobs which have finished. for idx in range(len(pool)-1, -1, -1): if not pool[idx].is_alive(): trial = pool.pop( idx ) X = self.get_experiment( trial.parameters ) trial.collect_journal( X ) trial.collect_score( X ) # Notify the parameter optimization method that the # parameters which it suggested have finished evaluating. self.method.collect_results( X.parameters, trial.score ) self.save() # Write the updated Lab Report to file.
class Experiment: """ An experiment represents a unique ParameterSet. This class primarily deals with bookkeeping. Attributes: parameters - ParameterSet lab - Circular reference to Laboratory instance. attempts - Number of times attempted to evaluate. scores - List of float notes - string journal - File path to log file for this experiment. """ def __init__(self, lab, string=None, modifications=None, parameters=None,): """ """ self.lab = lab self.attempts = 0 self.scores = [] self.notes = ' ' # Load or create this experiment's data. if string is not None: self._parse( string ) elif modifications is not None: self.parameters = ParameterSet( self.lab.default_parameters ) for path, value in modifications: self.parameters.apply( path, value ) elif parameters is not None: self.parameters = ParameterSet( parameters ) else: raise TypeError("Not enough arguments to Experiment.__init__()") self.parameters = self.parameters.typecast( self.lab.structure ) self.modifications = self.lab.default_parameters.diff( self.parameters ) if hash(self) not in self.lab.experiment_ids: self.lab.experiments.append(self) self.lab.experiment_ids[hash(self)] = self else: existing = self.lab.experiment_ids[hash(self)] if existing.parameters == self.parameters: raise ValueError("Duplicate Parameters, Hash %X"%hash(self)) else: raise SystemExit("Hash Collision!") # Start a journal file for this experiment. if not hasattr(self, 'journal'): self.journal = os.path.join(self.lab.ae_directory, "%X.journal"%hash(self)) with open(self.journal, 'a') as file: file.write('Experiment Journal For Parameters:\n') file.write( str(self.parameters) + '\n') file.write('Hash: %X\n'%hash(self)) file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n') def _parse(self, string): if "Notes:" in string: string, _, self.notes = string.partition('Notes:') # Reconstruct the parameters. self.modifications = [] for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE): path, eq, value = change.partition('=') self.modifications.append((path, value)) self.parameters = ParameterSet(self.lab.default_parameters) for path, value in self.modifications: self.parameters.apply(path, value) if "Hash: " in string: # Override hash(self) with whats on file since this is reconstructed # from defaults + modifications, and the defaults might have changed. self._hash = int(re.search("Hash: (.*)", string).groups()[0], base=16) if "Journal: " in string: self.journal = re.search("Journal: (.*)", string).groups()[0] if "Attempts: " in string: self.attempts = int(re.search("Attempts: (.*)", string).groups()[0]) if "Scores: " in string: self.scores = re.search("Scores: (.*)", string).groups()[0].strip() self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()] assert( len(self.scores) <= self.attempts ) # Attempts may fail and not return a score. def significance(self, baseline=None): """ Returns the P-Value of the Null-Hypothesis test, the probability that this experiment and the given experiment have the same distribution of scores, meaning that the change in scores is merely by chance. Argument baseline is an Experiment, optional defaults to default_parameters """ if baseline is None: baseline = self.lab.default_parameters baseline = self.lab.get_experiment( baseline ) if not self.scores or not baseline.scores: return float('nan') if len(self.scores) == 1: pass # TODO: How to pass probabilities & statistics? stat, pval = scipy.stats.ttest_ind( baseline.scores, self.scores, axis=None, # Since both samples come from the same experimental setup they # should have the same variance. equal_var=True,) return pval def mean(self): """ Returns the average score. """ return np.mean(self.scores) if self.scores else float('-inf') def __str__(self): s = '' if not self.modifications: s += "Default Parameters\n" for mod, value in self.modifications: s += "Modification: %s = %s\n"%(mod, str(value)) s += 'Hash: %X\n'%hash(self) s += 'Journal: %s\n'%self.journal s += 'Attempts: %d\n'%self.attempts if self.scores: s += 'Scores: %s\n'%', '.join(str(s) for s in sorted(self.scores)) mean = np.mean(self.scores) std = np.std(self.scores) s += 'Mean & Standard Deviation: %g & %g\n'%(mean, std) s += 'P-Value: %g\n'%self.significance() s += 'Notes:' + self.notes return s def __hash__(self): if not hasattr(self, '_hash'): self._hash = hash(self.parameters) return self._hash
class ParticleData: """ Attributes: p.parameters - ParameterSet p.velocities - ParameterSet full of float p.best - ParameterSet p.score - float p.age - Number of times this particle has been evaluated/updated. p.lock - Is this particle currently being evaluated? """ def __init__(self, initial_parameters, swarm=None): self.parameters = ParameterSet(initial_parameters) self.best = None self.best_score = None self.age = 0 self.initialize_velocities(swarm) self.lock = False def initialize_velocities(self, swarm=None): # Make a new parameter structure for the velocity data. self.velocities = ParameterSet(self.parameters) # Iterate through every field in the structure. for path in self.parameters.enumerate(): value = self.parameters.get(path) if swarm is not None: # Analyse the other particle velocities, so that the new # velocity is not too large or too small. data = [p.velocities.get(path) for p in swarm if p is not self] velocity = np.random.normal(np.mean(data), np.std(data)) else: # New swarm, start with a large random velocity. max_percent_change = .10 uniform = 2 * random.random() - 1 if isinstance(value, float): velocity = value * uniform * max_percent_change elif isinstance(value, int): if abs(value) < 1. / max_percent_change: velocity = uniform # Parameters are rounded, so 50% chance this will mutate. else: velocity = value * uniform * max_percent_change else: raise NotImplementedError() self.velocities.apply(path, velocity) def update_position(self): for path in self.parameters.enumerate(): position = self.parameters.get(path) velocity = self.velocities.get(path) self.parameters.apply(path, position + velocity) def update_velocity(self, global_best): for path in self.parameters.enumerate(): postition = self.parameters.get(path) velocity = self.velocities.get(path) particle_best = self.best.get( path) if self.best is not None else postition global_best_x = global_best.get( path) if global_best is not None else postition # Update velocity. particle_bias = (particle_best - postition) * particle_strength * random.random() global_bias = (global_best_x - postition) * global_strength * random.random() velocity = velocity * velocity_strength + particle_bias + global_bias self.velocities.apply(path, velocity) def update(self, score, global_best): self.age += 1 if self.best_score is not None: self.best_score *= 1 - score_decay_rate if self.best is None or score > self.best_score: self.best = ParameterSet(self.parameters) self.best_score = score print("New particle best score %g." % self.best_score) self.update_position() self.update_velocity(global_best)