def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set([ "evals", "minSolution", "minScore", "maxScore", "attrs", "minSolution", "maxSolution", "_empty", "cache", "updates", "printEvery", "useCache" ])
def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set(["evals","minSolution","minScore","maxScore","attrs", "minSolution", "maxSolution","_empty","cache", "updates","printEvery", "useCache"])
class BayesianDirichletScorer(object): def __init__(self): self.cache = {} self.varCache = LRUCache() def matchesPrior(self, data, configuration): return 1.0 def matches(self, data, config): """count number of instances of configuration in data""" cnt = 0L for x in data: if config <= x: cnt += 1 return cnt def __call__(self, network, data): network.computeEdgeStatistics() total = 0.0 total -= network.edgeRatio * len(data) * 10 for variable in network.variables: varKey = str(variable.index) + str(variable.parents) if self.varCache.has_key(varKey): total += self.varCache[varKey] continue start = total for configuration in variable.configurations(): prior = self.matchesPrior(data, configuration) total += gammaln(prior) total -= gammaln(prior + self.matches(data, configuration)) for val in variable.values(): priorVal = self.matchesPrior(data, configuration + val) total -= gammaln(priorVal) total += gammaln(priorVal + self.matches(data, configuration + val)) self.varCache[varKey] = total - start return total / len(data)
def __init__(self): self.cache = {} self.varCache = LRUCache()
class BayesNet(Distribution): counter = 0 created = 0 config = Config(numVariables=None, branchFactor=10, variableGenerator=BayesVariable, structureGenerator=GreedyStructureSearch( 10, BayesianInformationCriterion()), sampler=DAGSampler(), randomizer=lambda net: None) densityCache = LRUCache() weightCache = LRUCache() likelihoodCache = LRUCache() def __init__(self, **kwargs): config = BayesNet.config.merge(Config(**kwargs)) super(BayesNet, self).__init__(**config.__properties__) self.numVariables = self.config.numVariables self.variableGenerator = self.config.variableGenerator self.structureGenerator = self.config.structureGenerator self.randomizer = self.config.randomizer self.sampler = self.config.sampler self.variables = [] for i in xrange(self.numVariables): self.variables.append(self.variableGenerator(i, self.config)) self.decay = 1 self.dirty = False self.acyclic = True self.edges = [] self.edgeRatio = 0.0 self.edgeTuples = None self.cacheKeys = dict([(v.index, v.cacheKey) for v in self.variables]) self.edgeMap = {} self.binary = zeros(len(self.variables)**2) self.deferred = False self.deferredWeights = False self.edgeRep = None self.densityStored = None self.cacheHits = 0 self.cacheTries = 0 self.changed = {} self.last = {} self.__class__.counter += 1 self.__class__.created += 1 def __del__(self): for variable in self.variables: del variable self.__class__.counter -= 1 def __copy__(self): return self.__class__.parse(str(self), self.config) @checkDeferred def get(self, index): for var in self.variables: if var.index == index: return var return None @checkDeferred def sort(self): self.variables = sorted(self.variables, key=lambda x: len(x.parents)) visited = {} skipped = {} #print "before: ", [v.index for v in self.variables] vars = LinkedList() for var in self.variables: vars.append(var) current = vars.first while current is not None: v = current.value advance = True for idx, p in v.parents.iteritems(): if not visited.has_key(idx): before = current.before vars.remove(current) vars.append(v) if before is not None: current = before.after advance = False skipped[v.index] = True break if not advance and skipped.has_key(current.value.index): break if advance: current = current.after skipped = {} visited[v.index] = True #print i, visited, v.index, v.parents current = vars.first self.variables = [] while current is not None: self.variables.append(current.value) current = current.after #print "after: ", [v.index for v in self.variables] return len(skipped) == 0 @checkDeferred def decompose(self): acyclic = self.sort() #self.variables is now ordered in a way to allow sampling clusters = [] for var in self.variables: # is the parent in one of the clusters? member = False for c in clusters: for v in c: if var.parents.has_key(v): member = True break if member: c.append(var.index) break if not member: clusters.append([var.index]) return clusters @checkDeferredWeights def distribution(self, index, x): var = None for v in self.variables: if v.index == index: var = v return var.distribution(x) @checkDeferredWeights def randomize(self): return self.randomizer(self) @checkDeferredWeights def conditionalLikelihood(self, index, data): sum = 0.0 var = None for v in self.variables: if v.index == index: var = v break for x in data: key = self.cacheKeys[var.index] key += str(x[var.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = var.density(x) sum += log(BayesNet.densityCache[key]) return sum @checkDeferredWeights def likelihood(self, data): prod = 0.0 for v in self.variables: vprod = 0.0 for x in data: key = self.cacheKeys[v.index] key += str(x[v.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = v.density(x) vprod += log(BayesNet.densityCache[key]) if isinstance(vprod, np.ndarray): vprod = vprod[0] self.last[v.index] = vprod prod += vprod self.densityStored = prod return prod @checkDeferredWeights def likelihoodChanged(self, data, storeChange=False): diff = 0.0 olddiff = 0.0 total = 0.0 for v in self.variables: if self.changed.has_key(v.index) and self.changed[v.index]: inner = 0.0 for x in data: key = self.cacheKeys[v.index] key += str(x[v.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = v.density(x) inner += log(BayesNet.densityCache[key]) total += inner if storeChange: self.last[v.index] = inner else: total += self.last[v.index] return total @checkDeferredWeights def marginal(self, cmpIdx, data): var = None for v in self.variables: if v.index == cmpIdx: var = v break total = 0.0 for t in data: total += var.marginalDensity(t, self) return total / len(data) @checkDeferredWeights def map(self, cmpIdx, data): var = None for v in self.variables: if v.index == cmpIdx: var = v break total = 0.0 for t in data: z = var.map(t) if z[cmpIdx] == t[cmpIdx]: total += 1.0 return total / len(data) @checkDeferredWeights def density(self, x): self.computeEdgeStatistics() prod = 1.0 for variable in self.variables: key = self.cacheKeys[variable.index] key += str(x[variable.index]) if not BayesNet.densityCache.has_key(key): BayesNet.densityCache[key] = variable.density(x) #print key, " - ", BayesNet.densityCache[key], " - ", variable.density(x) prod *= BayesNet.densityCache[key] return prod @checkDeferredWeights def __call__(self): """sample the network""" return self.sampler(self) @checkDeferredWeights def sample(self): return self.__call__() @checkDeferredWeights def batch(self, num): return [self.__call__() for i in xrange(num)] @checkDeferred def numFreeParameters(self): total = 0 for variable in self.variables: total += variable.numFreeParameters() return total @checkDeferred def update(self, epoch, data): self.computeEdgeStatistics() for variable in self.variables: self.updateVar(variable, data) @checkDeferred def updateVar(self, variable, data): self.computeEdgeStatistics() key = self.cacheKeys[variable.index] if not BayesNet.weightCache.has_key(key): variable.update(data) BayesNet.weightCache[key] = variable.getComputedState() else: variable.restoreComputedState(BayesNet.weightCache[key]) @checkDeferred def merge(self, other, data): return self.structureGenerator.merge(self, other, data) @checkDeferred def cross(self, other, data): return self.structureGenerator.cross(self, other, data) @checkDeferred def hasEdge(self, frm, t): """Whether the network has an edge from the parent with index 'from' to the child with index 'to' """ return (frm, t) in self.edges #try: # toNode = [variable for variable in self.variables if variable.index == t][0] # fromNode = [parent for l,parent in fromNode.parents.iteritems() if parent.index == frm][0] # return True #except Exception: # return False @checkDeferred def isAcyclic(self): """Is the network a DAG?""" if self.dirty: self.computeEdgeStatistics() return self.acyclic """ tested = set([]) for variable in self.variables: if len(set(variable.parents) - tested) > 0: self.acyclic = False return False tested = set(list(tested) + [variable]) self.acyclic = True return True """ """ for variable in self.variables: if variable in variable.parents: return False tested = set([]) while len(tested) < len(self.variables): added = False for variable in self.variables: if variable in tested: continue if len(set(variable.parents) - tested) == 0: tested = set(list(tested) + [variable]) added = True break if not added: return False return True """ @checkDeferred def structureSearch(self, data): return self.structureGenerator.search(self, data) @checkDeferredWeights def getComputedState(self): state = {} self.computeEdgeStatistics() state['acyclic'] = self.acyclic state['edges'] = self.edges state['edgeRatio'] = self.edgeRatio state['edgeTuples'] = self.edgeTuples state['cacheKeys'] = self.cacheKeys varstate = {} varorder = {} for i, v in enumerate(self.variables): varstate[v.index] = v.getComputedState() varorder[i] = v.index state['varstate'] = varstate state['varorder'] = varorder return state def restoreComputedState(self, state): self.dirty = False self.acyclic = state['acyclic'] self.edges = state['edges'] self.edgeRatio = state['edgeRatio'] self.edgeTuples = state['edgeTuples'] self.cacheKeys = state['cacheKeys'] varorder = state['varorder'] varstate = state['varstate'] self.variables = sorted(self.variables, key=lambda v: varorder[v.index]) for v in self.variables: v.restoreComputedState(varstate[v.index]) @checkDeferred def computeEdgeStatistics(self): if not self.dirty: return self.acyclic = self.sort() if self.edges is not None: del self.edges self.edges = [] for variable in self.variables: for l, variable2 in variable.parents.iteritems(): self.edges.append((variable2, variable)) self.edges = sorted(self.edges, key=lambda e: (e[0].index, e[1].index)) self.edgeRatio = len(self.edges) / (1e-10 + (len(self.variables)**2)) self.edgeTuples = [(frm.index, to.index) for frm, to in self.edges] self.cacheKeys = dict([(v.index, v.cacheKey) for v in self.variables]) self.dirty = False self.densityStored = None @checkDeferred def getChildren(self, variable): children = [] self.computeEdgeStatistics() for variable2 in self.variables: if variable2.parents.has_key(variable.index): children.append(variable2) return children @checkDeferred def updateVariables(self, data): for variable in self.variables: variable.update(data) @checkDeferred def __getitem__(self, index): if self.edgeMap.has_key(index): return self.edgeMap[index] frmidx = index % self.numVariables toidx = index / self.numVariables for l, v in self.variables[toidx].parents.iteritems(): if v.index == frmidx: self.edgeMap[index] = True return True self.edgeMap[index] = False return False def __len__(self): return self.numVariables**2 def __getstate__(self): return { 'v': self.variables, 'r': self.randomizer, 's': self.sampler, 'sg': self.structureGenerator, } def __setstate__(self, state): self.dirty = True self.variables = state['v'] self.numVariables = len(self.variables) self.randomizer = state['r'] self.structureGenerator = state['sg'] self.sampler = state['s'] indexMap = {} for variable in self.variables: indexMap[variable.index] = variable for variable in self.variables: variable.parents = {} for i in variable.parentIndices: variable.addParent(indexMap[i]) self.changed = {} self.last = {} self.deferred = False self.deferredWeights = True self.edgeRep = None self.edgeMap = {} self.edgeTuples = None self.edges = None self.binary = None self.densityStored = None self.computeEdgeStatistics() @checkDeferred def __str__(self): """pickle the object""" self.computeEdgeStatistics() return cPickle.dumps(len(self.variables)) + cPickle.dumps( self.edgeTuples) def initialize(net): for frm, to in net.edgeRep: net.variables[to].addParent(net.variables[frm]) net.dirty = True net.deferred = False net.edgeRep = None net.computeEdgeStatistics() @checkDeferred def estimate(net): for variable in net.variables: net.updateVar(variable, net.config.data) net.deferredWeights = False @classmethod def parse(cls, rep, cfg): io = StringIO(rep) numVars = cPickle.load(io) if cfg is None: cfg = BayesNet.config net = cls(**cfg.__properties__) edges = cPickle.load(io) net.edgeRep = edges net.deferred = True net.deferredWeights = True return net
class History(object): """A History used to track the progress of an optimization algorithm. Different algorithms should extend this class in order to define the minimal amount of history that needs to be stored in order for the algorthm to operate. """ useCache = True attrs = set() sorted = False root = True # whether the current history is the top level root for # all histories of this algorithm def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set([ "evals", "minSolution", "minScore", "maxScore", "attrs", "minSolution", "maxSolution", "_empty", "cache", "updates", "printEvery", "useCache" ]) def __getstate__(self): """Used by :class:`CheckpointedHistory` and :class:`CheckpointedMultipleHistory` to checkpoint a history so it can be rolled back after updates. Should return all objects in the history in a dictionary, sensitive to the fact that object references may need to be copied. :returns: A dictionary with the state of the history """ state = {} for attr in self.attrs: val = getattr(self, attr) if isinstance(val, list): val = [x for x in val] state[attr] = val state['cfg'] = self.config.__properties__ return state def __setstate__(self, state): for attr in self.attrs: val = state[attr] setattr(self, attr, val) import pyec.config self.config = pyec.config.Config(**state['cfg']) def empty(self): """Whether the history has been used or not.""" return self._empty def better(self, score1, score2): """Return whether one score is better than another. Uses ``config.minimize`` to decide whether lesser or greater numbers are better. :param score1: the score in question (floating point number) :type score1: ``float`` :param score2: the score being compared :type score2: ``float`` :returns: whether ``score1`` is better than ``score2`` """ if self.config.minimize: return score1 < score2 else: return score1 > score2 def best(self): """Get the best solution, whether minimizing or maximizing. Same as ``optimal`` """ if self.config.minimize: return self.minimal() else: return self.maximal() optimal = best def minimal(self): """Get the minimal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.minSolution, self.minScore def maximal(self): """Get the maximal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.maxSolution, self.maxScore def num_evaluations(self): """Get the number of function evaluations performed in this history object. :returns: An integer representing the number of times the fitness function or objective has been evaluated """ return self.evals def update(self, population, fitness, space, opt): """ Update the state of the :class:`History` with the latest population and its fitness scores. Subclasses should probably override ``internalUpdate`` rather than ``update``, unless they want to change how the min/max are tracked. Returns the history for use in continuations. If ``population`` is ``None``, then this method does nothing; this is so that you can set up a loop like to run an optimizer like:: p = None f = lambda x: t = History() o = some_optimizer s = o.config.space for i in xrange(generations): p = some_optimizer[t.update(p,f,s,0), f]() t.update(p,f,s,o) :params population: The previous population. :type population: list of points in the search domain :params fitness: The fitness / cost / objective function :type fitness: Any callable object :params space: The search domain :type space: :class:`Space` :params opt: The optimizer reporting this population :type opt: :class:`PopulationDistribution` :returns: The history (``self``), for continuations """ if population is None: return #self.config.stats.start(repr(self) + "history.update.all") self._empty = False self.evals += len(population) self.updates += 1 #self.config.stats.start(repr(self) + "history.update.scoreall") # score the sample pop = population scored = [(x, self.score(x, fitness, space)) for x in pop] #self.config.stats.stop(repr(self) + "history.update.scoreall") #self.config.stats.start(repr(self) + "history.update.findbest") if self.root and self.config.observer is not None: self.config.observer.report(opt, scored) for x, s in scored: if s > self.maxScore: self.maxScore = s self.maxSolution = x if s < self.minScore: self.minScore = s self.minSolution = x #self.config.stats.stop(repr(self) + "history.update.findbest") if not (self.updates % self.printEvery): genmin = min([s for x, s in scored]) genmax = max([s for x, s in scored]) genavg = np.average([s for x, s in scored]) print self.updates, ": min", self.minScore, " max", self.maxScore, print " this generation (min, avg, max): ", genmin, genavg, genmax #self.config.stats.start(repr(self) + "history.update.internal") self.internalUpdate(scored) #self.config.stats.stop(repr(self) + "history.update.internal") #self.config.stats.stop(repr(self) + "history.update.all") return self def internalUpdate(self, population): """ Update the state of the :class:`History` with the latest population and its fitness scores. This is an internal call intended for overridden by subclasses. One of the important functions is to delete points no longer needed by the history. :params population: The previous population with its fitness scores. :type population: list of (point, score) tuples """ pass def score(self, point, fitness, space): """Get the fitness score, caching where possible. :param point: A valid point in the space :type point: Must match ``space.type`` :param fitness: The fitness function :type fitness: Any callable :param space: The space to which the point belongs :type space: :class:`Space` :returns: The fitness value, cached if possible """ if fitness is None: return None #self.config.stats.start("history.score") if self.useCache: try: hashed = space.hash(point) if self.cache.has_key(hashed): ret = self.cache[hashed] #self.config.stats.stop("history.score") return ret except Exception: pass if not space.in_bounds(point): # use NaN so that the result is less than nor greater than # any other score, and therefore NEVER optimal s = np.inf - np.inf else: try: s = fitness(space.convert(point)) except ValueError: s = np.inf - np.inf if self.useCache: try: hashed = space.hash(point) self.cache[hashed] = s except Exception: pass #self.config.stats.stop("history.score") return s def setCache(self, cache): self.cache = cache
class History(object): """A History used to track the progress of an optimization algorithm. Different algorithms should extend this class in order to define the minimal amount of history that needs to be stored in order for the algorthm to operate. """ useCache = True attrs = set() sorted = False root = True # whether the current history is the top level root for # all histories of this algorithm def __init__(self, config): super(History, self).__init__() self.config = config self.evals = 0 self.minSolution = None self.minScore = np.inf self.maxSolution = None self.maxScore = -np.inf self._empty = True if not hasattr(self, 'cache'): self.cache = LRUCache() # 10,000 items by default self.updates = 0 #how often to print generation report self.printEvery = config.printEvery or 1000000000000L self.attrs = set(["evals","minSolution","minScore","maxScore","attrs", "minSolution", "maxSolution","_empty","cache", "updates","printEvery", "useCache"]) def __getstate__(self): """Used by :class:`CheckpointedHistory` and :class:`CheckpointedMultipleHistory` to checkpoint a history so it can be rolled back after updates. Should return all objects in the history in a dictionary, sensitive to the fact that object references may need to be copied. :returns: A dictionary with the state of the history """ state = {} for attr in self.attrs: val = getattr(self, attr) if isinstance(val, list): val = [x for x in val] state[attr] = val state['cfg'] = self.config.__properties__ return state def __setstate__(self, state): for attr in self.attrs: val = state[attr] setattr(self, attr, val) import pyec.config self.config = pyec.config.Config(**state['cfg']) def empty(self): """Whether the history has been used or not.""" return self._empty def better(self, score1, score2): """Return whether one score is better than another. Uses ``config.minimize`` to decide whether lesser or greater numbers are better. :param score1: the score in question (floating point number) :type score1: ``float`` :param score2: the score being compared :type score2: ``float`` :returns: whether ``score1`` is better than ``score2`` """ if self.config.minimize: return score1 < score2 else: return score1 > score2 def best(self): """Get the best solution, whether minimizing or maximizing. Same as ``optimal`` """ if self.config.minimize: return self.minimal() else: return self.maximal() optimal = best def minimal(self): """Get the minimal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.minSolution, self.minScore def maximal(self): """Get the maximal solution and its score. :returns: A tuple of two item with the solution object first and the score of that item second """ return self.maxSolution, self.maxScore def num_evaluations(self): """Get the number of function evaluations performed in this history object. :returns: An integer representing the number of times the fitness function or objective has been evaluated """ return self.evals def update(self, population, fitness, space, opt): """ Update the state of the :class:`History` with the latest population and its fitness scores. Subclasses should probably override ``internalUpdate`` rather than ``update``, unless they want to change how the min/max are tracked. Returns the history for use in continuations. If ``population`` is ``None``, then this method does nothing; this is so that you can set up a loop like to run an optimizer like:: p = None f = lambda x: t = History() o = some_optimizer s = o.config.space for i in xrange(generations): p = some_optimizer[t.update(p,f,s,0), f]() t.update(p,f,s,o) :params population: The previous population. :type population: list of points in the search domain :params fitness: The fitness / cost / objective function :type fitness: Any callable object :params space: The search domain :type space: :class:`Space` :params opt: The optimizer reporting this population :type opt: :class:`PopulationDistribution` :returns: The history (``self``), for continuations """ if population is None: return #self.config.stats.start(repr(self) + "history.update.all") self._empty = False self.evals += len(population) self.updates += 1 #self.config.stats.start(repr(self) + "history.update.scoreall") # score the sample pop = population scored = [(x, self.score(x, fitness, space)) for x in pop] #self.config.stats.stop(repr(self) + "history.update.scoreall") #self.config.stats.start(repr(self) + "history.update.findbest") if self.root and self.config.observer is not None: self.config.observer.report(opt, scored) for x,s in scored: if s > self.maxScore: self.maxScore = s self.maxSolution = x if s < self.minScore: self.minScore = s self.minSolution = x #self.config.stats.stop(repr(self) + "history.update.findbest") if not (self.updates % self.printEvery): genmin = min([s for x,s in scored]) genmax = max([s for x,s in scored]) genavg = np.average([s for x,s in scored]) print self.updates, ": min", self.minScore, " max", self.maxScore, print " this generation (min, avg, max): ", genmin, genavg, genmax #self.config.stats.start(repr(self) + "history.update.internal") self.internalUpdate(scored) #self.config.stats.stop(repr(self) + "history.update.internal") #self.config.stats.stop(repr(self) + "history.update.all") return self def internalUpdate(self, population): """ Update the state of the :class:`History` with the latest population and its fitness scores. This is an internal call intended for overridden by subclasses. One of the important functions is to delete points no longer needed by the history. :params population: The previous population with its fitness scores. :type population: list of (point, score) tuples """ pass def score(self, point, fitness, space): """Get the fitness score, caching where possible. :param point: A valid point in the space :type point: Must match ``space.type`` :param fitness: The fitness function :type fitness: Any callable :param space: The space to which the point belongs :type space: :class:`Space` :returns: The fitness value, cached if possible """ if fitness is None: return None #self.config.stats.start("history.score") if self.useCache: try: hashed = space.hash(point) if self.cache.has_key(hashed): ret = self.cache[hashed] #self.config.stats.stop("history.score") return ret except Exception: pass if not space.in_bounds(point): # use NaN so that the result is less than nor greater than # any other score, and therefore NEVER optimal s = np.inf - np.inf else: try: s = fitness(space.convert(point)) except ValueError: s = np.inf - np.inf if self.useCache: try: hashed = space.hash(point) self.cache[hashed] = s except Exception: pass #self.config.stats.stop("history.score") return s def setCache(self, cache): self.cache = cache