Example #1
0
 def __init__(self, initial_parameters, swarm=None):
     self.parameters = ParameterSet(initial_parameters)
     self.best = None
     self.best_score = None
     self.age = 0
     self.initialize_velocities(swarm)
     self.lock = False
Example #2
0
    def _parse(self, string):
        if "Notes:" in string:
            string, _, self.notes = string.partition('Notes:')
        # Reconstruct the parameters.
        self.modifications = []
        for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE):
            path, eq, value = change.partition('=')
            self.modifications.append((path, value))
        self.parameters = ParameterSet(self.lab.default_parameters)
        for path, value in self.modifications:
            self.parameters.apply(path, value)

        if "Hash: " in string:
            # Override hash(self) with whats on file since this is reconstructed
            # from defaults + modifications, and the defaults might have changed.
            self._hash = int(re.search("Hash: (.*)", string).groups()[0],
                             base=16)
        if "Journal: " in string:
            self.journal = re.search("Journal: (.*)", string).groups()[0]
        if "Attempts: " in string:
            self.attempts = int(
                re.search("Attempts: (.*)", string).groups()[0])
        if "Scores: " in string:
            self.scores = re.search("Scores: (.*)", string).groups()[0].strip()
            self.scores = [
                float(s.strip()) for s in self.scores.split(',') if s.strip()
            ]
            assert (len(self.scores) <= self.attempts
                    )  # Attempts may fail and not return a score.
Example #3
0
 def initialize_velocities(self, swarm=None):
     # Make a new parameter structure for the velocity data.
     self.velocities = ParameterSet(self.parameters)
     # Iterate through every field in the structure.
     for path in self.parameters.enumerate():
         value = self.parameters.get(path)
         if swarm is not None:
             # Analyse the other particle velocities, so that the new
             # velocity is not too large or too small.
             data = [p.velocities.get(path) for p in swarm if p is not self]
             velocity = np.random.normal(np.mean(data), np.std(data))
         else:
             # New swarm, start with a large random velocity.
             max_percent_change = .10
             uniform = 2 * random.random() - 1
             if isinstance(value, float):
                 velocity = value * uniform * max_percent_change
             elif isinstance(value, int):
                 if abs(value) < 1. / max_percent_change:
                     velocity = uniform  # Parameters are rounded, so 50% chance this will mutate.
                 else:
                     velocity = value * uniform * max_percent_change
             else:
                 raise NotImplementedError()
         self.velocities.apply(path, velocity)
Example #4
0
    def collect_results(self, parameters, score):
        # Get the particle for these parameters.
        for particle in self.swarm:
            if particle.parameters == parameters:
                break
        else:
            raise Exception("Unrecognized parameters!")

        if isinstance(score, Exception) or math.isnan(score):
            # Program crashed, replace this particle.
            if particle.best is not None:
                particle.parameters = ParameterSet(particle.best)
            elif self.best is not None:
                particle.parameters = ParameterSet(self.best)
            else:
                particle.parameters = ParameterSet(self.lab.default_parameters)
            particle.initialize_velocities(self.swarm)
            particle.update_position()
        else:
            # Update with results of this particles evaluation.
            if self.best_score is not None:
                self.best_score *= 1 - score_decay_rate / len(self.swarm)
            if self.best is None or score > self.best_score:
                self.best = ParameterSet(particle.parameters)
                self.best_score = score
                print("New global best score %g." % score)
            particle.update(score, self.best)
        particle.lock = False
        self.save()
Example #5
0
    def _parse(self, report):
        if not report.strip():
            raise ValueError("Empty lab report file!")
        sections            = report.split(self.section_divider)
        self.header         = sections[0]
        default_parameters  = '\n'.join( sections[1].split('\n')[1:-1] )
        cli                 = sections[1].split('\n')[-1].strip('$ ').split()
        sorted_pval_table   = sections[2]
        experiment_sections = sections[3:]
        file_defaults       = ParameterSet(default_parameters)
        # Consistency check for parameters & experiment argv.
        if file_defaults != self.default_parameters or cli != self.argv:
            while True:
                q = input("Default parameters or invocation have changed, options:\n" + 
                          "  old - Ignore the new/given, use what's on file.\n" +
                          "  new - Use the new/given, overwrites the old file!\n" +
                          "  abort.\n" +
                          ">>> ")
                q = q.strip().lower()
                if q == 'old':
                    self.default_parameters = file_defaults
                    self.argv               = cli
                    break
                elif q == 'new':
                    shutil.copy(self.lab_report, self.lab_report + '.backup')
                    break
                elif q in ('abort', 'exit', 'quit') or q in 'aeq':
                    sys.exit()

        [Experiment(self, s) for s in experiment_sections if s.strip()]
Example #6
0
    def _load_experiment_module(self, experiment_module):
        """
        Argument experiment_module is command line argument 0, specifying the
        file path to the experiment module.
        """
        self.path, experiment_module = os.path.split(experiment_module)
        self.name, dot_py = os.path.splitext(experiment_module)
        assert(dot_py == '.py')
        self.module_reload  = 'import sys; sys.path.append("%s"); '%self.path
        self.module_reload += 'import %s; '%self.name
        exec_globals = {}
        exec(self.module_reload, exec_globals)
        self.module = exec_globals[self.name]

        self.default_parameters = ParameterSet(self.module.default_parameters)
        self.structure = self.default_parameters.get_types()
Example #7
0
    def __init__(
        self,
        lab,
        string=None,
        modifications=None,
        parameters=None,
    ):
        """ """
        self.lab = lab
        self.attempts = 0
        self.scores = []
        self.notes = ' '
        # Load or create this experiment's data.
        if string is not None:
            self._parse(string)
        elif modifications is not None:
            self.parameters = ParameterSet(self.lab.default_parameters)
            for path, value in modifications:
                self.parameters.apply(path, value)
        elif parameters is not None:
            self.parameters = ParameterSet(parameters)
        else:
            raise TypeError("Not enough arguments to Experiment.__init__()")

        self.parameters = self.parameters.typecast(self.lab.structure)
        self.modifications = self.lab.default_parameters.diff(self.parameters)

        if hash(self) not in self.lab.experiment_ids:
            self.lab.experiments.append(self)
            self.lab.experiment_ids[hash(self)] = self
        else:
            existing = self.lab.experiment_ids[hash(self)]
            if existing.parameters == self.parameters:
                raise ValueError("Duplicate Parameters, Hash %X" % hash(self))
            else:
                raise SystemExit("Hash Collision!")

        # Start a journal file for this experiment.
        if not hasattr(self, 'journal'):
            self.journal = os.path.join(self.lab.ae_directory,
                                        "%X.journal" % hash(self))
            with open(self.journal, 'a') as file:
                file.write('Experiment Journal For Parameters:\n')
                file.write(str(self.parameters) + '\n')
                file.write('Hash: %X\n' % hash(self))
                file.write('Command Line Invocation: $ ' +
                           ' '.join(self.lab.argv) + '\n')
Example #8
0
 def merge(self, lab, ideas):
     """ Take several experiments and return the best combination of them. """
     # Marshal all of the modifications together.
     ideas = sorted(ideas, key=lambda x: -x.mean())
     paths = []
     values = []
     for x in ideas:
         for path, value in x.modifications:
             if path in paths:
                 continue  # Higher scoring experiments take precedence.
             paths.append(path)
             values.append(value)
     # Create and get the experiment object.
     params = ParameterSet(lab.default_parameters)
     for p, v in zip(paths, values):
         params.apply(p, v)
     return lab.get_experiment(params)
Example #9
0
 def update(self, score, global_best):
     self.age += 1
     if self.best_score is not None:
         self.best_score *= 1 - score_decay_rate
     if self.best is None or score > self.best_score:
         self.best = ParameterSet(self.parameters)
         self.best_score = score
         print("New particle best score %g." % self.best_score)
     self.update_position()
     self.update_velocity(global_best)
Example #10
0
    def get_experiment(self, parameters):
        """
        Returns Experiment instance for the given parameters.  If one does not
        already exist for these parameter then it is created.
        """
        if isinstance( parameters, Experiment ):
            return parameters

        p = ParameterSet( parameters ).typecast( self.structure )
        h = hash(p)
        if h in self.experiment_ids:
            return self.experiment_ids[h]
        else:
            return Experiment(self, parameters=p)
Example #11
0
    def __init__(self, laboratory, args):
        self.lab = laboratory

        # Get a list of every parameter to experiment with.
        target_parameters = []
        for start in args.grid_search.split(','):
            node = self.lab.default_parameters.get(start)
            subtree = ParameterSet.enumerate(node)
            target_parameters.extend(start + end for end in subtree)

        # Suggest modifications to each parameter.
        self.experiments = []
        for path in target_parameters:
            value = self.lab.default_parameters.get(path)
            for mod in self.mod_funcs:
                params = ParameterSet(self.lab.default_parameters)
                params.apply(path, mod(value))
                X = self.lab.get_experiment(params)
                if not X.notes.strip():
                    X.notes += "Suggested by Grid Search.\n"
                self.experiments.append(X)

        self.lab.save(
        )  # Write all of the new grid-search experiments to the lab report.
Example #12
0
class Laboratory:
    """
    Main class of the AE program.

    Attributes:
        lab.module             - Users Experiment python module
        lab.name               - Name of experiment module
        lab.path               - Directory containing experiment module
        lab.structure          - Types of parameters
        lab.default_parameters - lab.module.default_parameters
        lab.argv               - Command line invocation of experiment program
        lab.tag                - Optional, identifier string for this Laboratory
        lab.ae_directory       - Directory containing all files created by this program
        lab.lab_report         - File path of Lab Report
        lab.experiments        - List of Experiment instances
        lab.experiment_ids     - Experiments accessed by their unique hash
    """
    default_extension = '_ae'
    section_divider = '\n' + ('=' * 80) + '\n'
    def __init__(self, experiment_argv, method=None, tag='', verbose=False):
        if not experiment_argv:
            raise ValueError('Missing arguments for the experiment to run!')
        if isinstance(experiment_argv, str):
            experiment_argv = experiment_argv.split()
        self.argv    = experiment_argv
        self.method  = method
        self.tag     = tag
        self.verbose = verbose
        self._load_experiment_module(experiment_argv[0])
        self.ae_directory = os.path.join(self.path, self.name) + self.default_extension
        if self.tag:
            self.ae_directory = self.ae_directory + '_' + self.tag
        self.lab_report   = os.path.join(self.ae_directory, 'lab_report.txt')
        self.experiments    = []
        self.experiment_ids = {}
        if os.path.isdir(self.ae_directory):
            with open(self.lab_report, 'r') as file:
                report = file.read()
            self._parse(report)
        else:
            # Initialize the Lab Reports attributes and write the skeleton of it
            # to file.
            self.init_header()
            os.mkdir(self.ae_directory)
        # Always have an experiment for the default parameters.
        try:
            Experiment(self,  parameters = self.default_parameters)
        except ValueError:
            pass

    def init_header(self):
        """
        Sets attribute lab.header containing the initial text in the Notes
            section at the top of the lab-report.
        """
        self.header = str(self.name)
        if self.tag:
            self.header += ' - ' + self.tag
        self.header += ' - Automatic Experiments\n'
        self.header += time.asctime( time.localtime(time.time()) ) + '\n'

    def _load_experiment_module(self, experiment_module):
        """
        Argument experiment_module is command line argument 0, specifying the
        file path to the experiment module.
        """
        self.path, experiment_module = os.path.split(experiment_module)
        self.name, dot_py = os.path.splitext(experiment_module)
        assert(dot_py == '.py')
        self.module_reload  = 'import sys; sys.path.append("%s"); '%self.path
        self.module_reload += 'import %s; '%self.name
        exec_globals = {}
        exec(self.module_reload, exec_globals)
        self.module = exec_globals[self.name]

        self.default_parameters = ParameterSet(self.module.default_parameters)
        self.structure = self.default_parameters.get_types()

    def _parse(self, report):
        if not report.strip():
            raise ValueError("Empty lab report file!")
        sections            = report.split(self.section_divider)
        self.header         = sections[0]
        default_parameters  = '\n'.join( sections[1].split('\n')[1:-1] )
        cli                 = sections[1].split('\n')[-1].strip('$ ').split()
        sorted_pval_table   = sections[2]
        experiment_sections = sections[3:]
        file_defaults       = ParameterSet(default_parameters)
        # Consistency check for parameters & experiment argv.
        if file_defaults != self.default_parameters or cli != self.argv:
            while True:
                q = input("Default parameters or invocation have changed, options:\n" + 
                          "  old - Ignore the new/given, use what's on file.\n" +
                          "  new - Use the new/given, overwrites the old file!\n" +
                          "  abort.\n" +
                          ">>> ")
                q = q.strip().lower()
                if q == 'old':
                    self.default_parameters = file_defaults
                    self.argv               = cli
                    break
                elif q == 'new':
                    shutil.copy(self.lab_report, self.lab_report + '.backup')
                    break
                elif q in ('abort', 'exit', 'quit') or q in 'aeq':
                    sys.exit()

        [Experiment(self, s) for s in experiment_sections if s.strip()]

    def get_experiment(self, parameters):
        """
        Returns Experiment instance for the given parameters.  If one does not
        already exist for these parameter then it is created.
        """
        if isinstance( parameters, Experiment ):
            return parameters

        p = ParameterSet( parameters ).typecast( self.structure )
        h = hash(p)
        if h in self.experiment_ids:
            return self.experiment_ids[h]
        else:
            return Experiment(self, parameters=p)

    def significant_experiments_table(self):
        """ Returns string """
        ex = sorted(self.experiments, key = lambda x: (-x.mean(), -x.attempts))
        ex = ex[:20]
        # Always keep the default parameters on the leader board.
        if self.default_parameters not in (X.parameters for X in ex):
            ex.pop()
            ex.append( self.get_experiment( self.default_parameters))
        s = '    Hash |   N |      Score |   P-Value | Modifications\n'
        fmt = '%8X | %3d | % 10g | % 9.3g | '
        for x in ex:
            s += fmt%(hash(x), len(x.scores), x.mean(), x.significance(ex[0]))
            if not x.modifications:
                s += 'Default Parameters\n'
            else:
                for idx, mod in enumerate(x.modifications):
                    param, value = mod
                    if idx > 0:
                        s += ' ' * 42
                    s += '%s = %s\n'%(param, str(value))
        return s

    def __str__(self):
        """ Returns the lab report. """
        s  = self.header
        s += self.section_divider
        s += 'Default Parameter Values = \n'
        s += str(self.default_parameters)
        s += '\n$ ' + ' '.join(self.argv)
        s += self.section_divider
        s += self.significant_experiments_table().rstrip()
        s += '\n\nFailed Experiments: '
        for x in self.experiments:
            if x.attempts > len(x.scores):
                s += '%X '%hash(x)
        s += self.section_divider
        s += self.section_divider.join(str(s) for s in self.experiments)
        return s

    def save(self):
        with open(self.lab_report + '.tmp', 'w') as file:
            file.write( str(self) )
        os.rename(self.lab_report + '.tmp', self.lab_report)

    def run(self, processes,
        time_limit   = None,
        memory_limit = None,):
        """ Main loop of the AE program. """
        pool = []
        while True:
            # Start running new experiments
            while len(pool) < processes:
                X = self.get_experiment( self.method.suggest_parameters() )
                trial = Worker(self, X.parameters, time_limit, memory_limit)
                trial.start()
                pool.append(trial)

            # Wait for experiments to complete.
            time.sleep(2)

            # Check for jobs which have finished.
            for idx in range(len(pool)-1, -1, -1):
                if not pool[idx].is_alive():
                    trial = pool.pop( idx )
                    X = self.get_experiment( trial.parameters )
                    trial.collect_journal( X )
                    trial.collect_score( X )
                    # Notify the parameter optimization method that the
                    # parameters which it suggested have finished evaluating.
                    self.method.collect_results( X.parameters, trial.score )
                    self.save()     # Write the updated Lab Report to file.
Example #13
0
class Experiment:
    """
    An experiment represents a unique ParameterSet.
    This class primarily deals with bookkeeping.

    Attributes:
        parameters    - ParameterSet
        lab           - Circular reference to Laboratory instance.
        attempts      - Number of times attempted to evaluate.
        scores        - List of float
        notes         - string
        journal       - File path to log file for this experiment.
    """
    def __init__(self, lab,
        string=None,
        modifications=None,
        parameters=None,):
        """ """
        self.lab      = lab
        self.attempts = 0
        self.scores   = []
        self.notes    = ' '
        # Load or create this experiment's data.
        if string is not None:
            self._parse( string )
        elif modifications is not None:
            self.parameters = ParameterSet( self.lab.default_parameters )
            for path, value in modifications:
                self.parameters.apply( path, value )
        elif parameters is not None:
            self.parameters = ParameterSet( parameters )
        else:
            raise TypeError("Not enough arguments to Experiment.__init__()")

        self.parameters    = self.parameters.typecast( self.lab.structure )
        self.modifications = self.lab.default_parameters.diff( self.parameters )

        if hash(self) not in self.lab.experiment_ids:
            self.lab.experiments.append(self)
            self.lab.experiment_ids[hash(self)] = self
        else:
            existing = self.lab.experiment_ids[hash(self)]
            if existing.parameters == self.parameters:
                raise ValueError("Duplicate Parameters, Hash %X"%hash(self))
            else:
                raise SystemExit("Hash Collision!")

        # Start a journal file for this experiment.
        if not hasattr(self, 'journal'):
            self.journal = os.path.join(self.lab.ae_directory, "%X.journal"%hash(self))
            with open(self.journal, 'a') as file:
                file.write('Experiment Journal For Parameters:\n')
                file.write( str(self.parameters) + '\n')
                file.write('Hash: %X\n'%hash(self))
                file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n')

    def _parse(self, string):
        if "Notes:" in string:
            string, _, self.notes = string.partition('Notes:')
        # Reconstruct the parameters.
        self.modifications = []
        for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE):
            path, eq, value = change.partition('=')
            self.modifications.append((path, value))
        self.parameters = ParameterSet(self.lab.default_parameters)
        for path, value in self.modifications:
            self.parameters.apply(path, value)

        if "Hash: " in string:
            # Override hash(self) with whats on file since this is reconstructed
            # from defaults + modifications, and the defaults might have changed.
            self._hash    = int(re.search("Hash: (.*)", string).groups()[0], base=16)
        if "Journal: " in string:
            self.journal  = re.search("Journal: (.*)", string).groups()[0]
        if "Attempts: " in string:
            self.attempts = int(re.search("Attempts: (.*)", string).groups()[0])
        if "Scores: " in string:
            self.scores = re.search("Scores: (.*)", string).groups()[0].strip()
            self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()]
            assert( len(self.scores) <= self.attempts ) # Attempts may fail and not return a score.

    def significance(self, baseline=None):
        """
        Returns the P-Value of the Null-Hypothesis test, the probability that
        this experiment and the given experiment have the same distribution of
        scores, meaning that the change in scores is merely by chance.

        Argument baseline is an Experiment, optional defaults to default_parameters
        """
        if baseline is None:
            baseline = self.lab.default_parameters
        baseline = self.lab.get_experiment( baseline )

        if not self.scores or not baseline.scores:
            return float('nan')
        if len(self.scores) == 1:
            pass # TODO: How to pass probabilities & statistics?
        stat, pval = scipy.stats.ttest_ind(
            baseline.scores, self.scores, axis=None,
            # Since both samples come from the same experimental setup  they
            # should have the same variance.
            equal_var=True,)
        return pval

    def mean(self):
        """ Returns the average score. """
        return np.mean(self.scores) if self.scores else float('-inf')

    def __str__(self):
        s = ''
        if not self.modifications:
            s += "Default Parameters\n"
        for mod, value in self.modifications:
            s += "Modification: %s = %s\n"%(mod, str(value))
        s += 'Hash: %X\n'%hash(self)
        s += 'Journal: %s\n'%self.journal
        s += 'Attempts: %d\n'%self.attempts
        if self.scores:
            s += 'Scores: %s\n'%', '.join(str(s) for s in sorted(self.scores))
            mean = np.mean(self.scores)
            std  = np.std(self.scores)
            s += 'Mean & Standard Deviation: %g & %g\n'%(mean, std)
            s += 'P-Value: %g\n'%self.significance()
        s += 'Notes:' + self.notes
        return s

    def __hash__(self):
        if not hasattr(self, '_hash'):
            self._hash  = hash(self.parameters)
        return self._hash
Example #14
0
class ParticleData:
    """
    Attributes:
        p.parameters - ParameterSet
        p.velocities - ParameterSet full of float
        p.best       - ParameterSet
        p.score      - float
        p.age        - Number of times this particle has been evaluated/updated.
        p.lock       - Is this particle currently being evaluated?
    """
    def __init__(self, initial_parameters, swarm=None):
        self.parameters = ParameterSet(initial_parameters)
        self.best = None
        self.best_score = None
        self.age = 0
        self.initialize_velocities(swarm)
        self.lock = False

    def initialize_velocities(self, swarm=None):
        # Make a new parameter structure for the velocity data.
        self.velocities = ParameterSet(self.parameters)
        # Iterate through every field in the structure.
        for path in self.parameters.enumerate():
            value = self.parameters.get(path)
            if swarm is not None:
                # Analyse the other particle velocities, so that the new
                # velocity is not too large or too small.
                data = [p.velocities.get(path) for p in swarm if p is not self]
                velocity = np.random.normal(np.mean(data), np.std(data))
            else:
                # New swarm, start with a large random velocity.
                max_percent_change = .10
                uniform = 2 * random.random() - 1
                if isinstance(value, float):
                    velocity = value * uniform * max_percent_change
                elif isinstance(value, int):
                    if abs(value) < 1. / max_percent_change:
                        velocity = uniform  # Parameters are rounded, so 50% chance this will mutate.
                    else:
                        velocity = value * uniform * max_percent_change
                else:
                    raise NotImplementedError()
            self.velocities.apply(path, velocity)

    def update_position(self):
        for path in self.parameters.enumerate():
            position = self.parameters.get(path)
            velocity = self.velocities.get(path)
            self.parameters.apply(path, position + velocity)

    def update_velocity(self, global_best):
        for path in self.parameters.enumerate():
            postition = self.parameters.get(path)
            velocity = self.velocities.get(path)
            particle_best = self.best.get(
                path) if self.best is not None else postition
            global_best_x = global_best.get(
                path) if global_best is not None else postition

            # Update velocity.
            particle_bias = (particle_best -
                             postition) * particle_strength * random.random()
            global_bias = (global_best_x -
                           postition) * global_strength * random.random()
            velocity = velocity * velocity_strength + particle_bias + global_bias
            self.velocities.apply(path, velocity)

    def update(self, score, global_best):
        self.age += 1
        if self.best_score is not None:
            self.best_score *= 1 - score_decay_rate
        if self.best is None or score > self.best_score:
            self.best = ParameterSet(self.parameters)
            self.best_score = score
            print("New particle best score %g." % self.best_score)
        self.update_position()
        self.update_velocity(global_best)