Example #1
0
 def merge(self, lab, ideas):
     """ Take several experiments and return the best combination of them. """
     # Marshal all of the modifications together.
     ideas = sorted(ideas, key=lambda x: -x.mean())
     paths = []
     values = []
     for x in ideas:
         for path, value in x.modifications:
             if path in paths:
                 continue  # Higher scoring experiments take precedence.
             paths.append(path)
             values.append(value)
     # Create and get the experiment object.
     params = ParameterSet(lab.default_parameters)
     for p, v in zip(paths, values):
         params.apply(p, v)
     return lab.get_experiment(params)
Example #2
0
    def __init__(self, laboratory, args):
        self.lab = laboratory

        # Get a list of every parameter to experiment with.
        target_parameters = []
        for start in args.grid_search.split(','):
            node = self.lab.default_parameters.get(start)
            subtree = ParameterSet.enumerate(node)
            target_parameters.extend(start + end for end in subtree)

        # Suggest modifications to each parameter.
        self.experiments = []
        for path in target_parameters:
            value = self.lab.default_parameters.get(path)
            for mod in self.mod_funcs:
                params = ParameterSet(self.lab.default_parameters)
                params.apply(path, mod(value))
                X = self.lab.get_experiment(params)
                if not X.notes.strip():
                    X.notes += "Suggested by Grid Search.\n"
                self.experiments.append(X)

        self.lab.save(
        )  # Write all of the new grid-search experiments to the lab report.
Example #3
0
class Experiment:
    """
    An experiment represents a unique ParameterSet.
    This class primarily deals with bookkeeping.

    Attributes:
        parameters    - ParameterSet
        lab           - Circular reference to Laboratory instance.
        attempts      - Number of times attempted to evaluate.
        scores        - List of float
        notes         - string
        journal       - File path to log file for this experiment.
    """
    def __init__(self, lab,
        string=None,
        modifications=None,
        parameters=None,):
        """ """
        self.lab      = lab
        self.attempts = 0
        self.scores   = []
        self.notes    = ' '
        # Load or create this experiment's data.
        if string is not None:
            self._parse( string )
        elif modifications is not None:
            self.parameters = ParameterSet( self.lab.default_parameters )
            for path, value in modifications:
                self.parameters.apply( path, value )
        elif parameters is not None:
            self.parameters = ParameterSet( parameters )
        else:
            raise TypeError("Not enough arguments to Experiment.__init__()")

        self.parameters    = self.parameters.typecast( self.lab.structure )
        self.modifications = self.lab.default_parameters.diff( self.parameters )

        if hash(self) not in self.lab.experiment_ids:
            self.lab.experiments.append(self)
            self.lab.experiment_ids[hash(self)] = self
        else:
            existing = self.lab.experiment_ids[hash(self)]
            if existing.parameters == self.parameters:
                raise ValueError("Duplicate Parameters, Hash %X"%hash(self))
            else:
                raise SystemExit("Hash Collision!")

        # Start a journal file for this experiment.
        if not hasattr(self, 'journal'):
            self.journal = os.path.join(self.lab.ae_directory, "%X.journal"%hash(self))
            with open(self.journal, 'a') as file:
                file.write('Experiment Journal For Parameters:\n')
                file.write( str(self.parameters) + '\n')
                file.write('Hash: %X\n'%hash(self))
                file.write('Command Line Invocation: $ ' + ' '.join(self.lab.argv) + '\n')

    def _parse(self, string):
        if "Notes:" in string:
            string, _, self.notes = string.partition('Notes:')
        # Reconstruct the parameters.
        self.modifications = []
        for change in re.findall(r"^[Mm]od.*:(.*)$", string, re.MULTILINE):
            path, eq, value = change.partition('=')
            self.modifications.append((path, value))
        self.parameters = ParameterSet(self.lab.default_parameters)
        for path, value in self.modifications:
            self.parameters.apply(path, value)

        if "Hash: " in string:
            # Override hash(self) with whats on file since this is reconstructed
            # from defaults + modifications, and the defaults might have changed.
            self._hash    = int(re.search("Hash: (.*)", string).groups()[0], base=16)
        if "Journal: " in string:
            self.journal  = re.search("Journal: (.*)", string).groups()[0]
        if "Attempts: " in string:
            self.attempts = int(re.search("Attempts: (.*)", string).groups()[0])
        if "Scores: " in string:
            self.scores = re.search("Scores: (.*)", string).groups()[0].strip()
            self.scores = [float(s.strip()) for s in self.scores.split(',') if s.strip()]
            assert( len(self.scores) <= self.attempts ) # Attempts may fail and not return a score.

    def significance(self, baseline=None):
        """
        Returns the P-Value of the Null-Hypothesis test, the probability that
        this experiment and the given experiment have the same distribution of
        scores, meaning that the change in scores is merely by chance.

        Argument baseline is an Experiment, optional defaults to default_parameters
        """
        if baseline is None:
            baseline = self.lab.default_parameters
        baseline = self.lab.get_experiment( baseline )

        if not self.scores or not baseline.scores:
            return float('nan')
        if len(self.scores) == 1:
            pass # TODO: How to pass probabilities & statistics?
        stat, pval = scipy.stats.ttest_ind(
            baseline.scores, self.scores, axis=None,
            # Since both samples come from the same experimental setup  they
            # should have the same variance.
            equal_var=True,)
        return pval

    def mean(self):
        """ Returns the average score. """
        return np.mean(self.scores) if self.scores else float('-inf')

    def __str__(self):
        s = ''
        if not self.modifications:
            s += "Default Parameters\n"
        for mod, value in self.modifications:
            s += "Modification: %s = %s\n"%(mod, str(value))
        s += 'Hash: %X\n'%hash(self)
        s += 'Journal: %s\n'%self.journal
        s += 'Attempts: %d\n'%self.attempts
        if self.scores:
            s += 'Scores: %s\n'%', '.join(str(s) for s in sorted(self.scores))
            mean = np.mean(self.scores)
            std  = np.std(self.scores)
            s += 'Mean & Standard Deviation: %g & %g\n'%(mean, std)
            s += 'P-Value: %g\n'%self.significance()
        s += 'Notes:' + self.notes
        return s

    def __hash__(self):
        if not hasattr(self, '_hash'):
            self._hash  = hash(self.parameters)
        return self._hash
Example #4
0
class ParticleData:
    """
    Attributes:
        p.parameters - ParameterSet
        p.velocities - ParameterSet full of float
        p.best       - ParameterSet
        p.score      - float
        p.age        - Number of times this particle has been evaluated/updated.
        p.lock       - Is this particle currently being evaluated?
    """
    def __init__(self, initial_parameters, swarm=None):
        self.parameters = ParameterSet(initial_parameters)
        self.best = None
        self.best_score = None
        self.age = 0
        self.initialize_velocities(swarm)
        self.lock = False

    def initialize_velocities(self, swarm=None):
        # Make a new parameter structure for the velocity data.
        self.velocities = ParameterSet(self.parameters)
        # Iterate through every field in the structure.
        for path in self.parameters.enumerate():
            value = self.parameters.get(path)
            if swarm is not None:
                # Analyse the other particle velocities, so that the new
                # velocity is not too large or too small.
                data = [p.velocities.get(path) for p in swarm if p is not self]
                velocity = np.random.normal(np.mean(data), np.std(data))
            else:
                # New swarm, start with a large random velocity.
                max_percent_change = .10
                uniform = 2 * random.random() - 1
                if isinstance(value, float):
                    velocity = value * uniform * max_percent_change
                elif isinstance(value, int):
                    if abs(value) < 1. / max_percent_change:
                        velocity = uniform  # Parameters are rounded, so 50% chance this will mutate.
                    else:
                        velocity = value * uniform * max_percent_change
                else:
                    raise NotImplementedError()
            self.velocities.apply(path, velocity)

    def update_position(self):
        for path in self.parameters.enumerate():
            position = self.parameters.get(path)
            velocity = self.velocities.get(path)
            self.parameters.apply(path, position + velocity)

    def update_velocity(self, global_best):
        for path in self.parameters.enumerate():
            postition = self.parameters.get(path)
            velocity = self.velocities.get(path)
            particle_best = self.best.get(
                path) if self.best is not None else postition
            global_best_x = global_best.get(
                path) if global_best is not None else postition

            # Update velocity.
            particle_bias = (particle_best -
                             postition) * particle_strength * random.random()
            global_bias = (global_best_x -
                           postition) * global_strength * random.random()
            velocity = velocity * velocity_strength + particle_bias + global_bias
            self.velocities.apply(path, velocity)

    def update(self, score, global_best):
        self.age += 1
        if self.best_score is not None:
            self.best_score *= 1 - score_decay_rate
        if self.best is None or score > self.best_score:
            self.best = ParameterSet(self.parameters)
            self.best_score = score
            print("New particle best score %g." % self.best_score)
        self.update_position()
        self.update_velocity(global_best)