def _prepare(self): self.watch.tag("preparing optimization", verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color="green") if self.multicore: pool = Pool(maxtasksperchild=1) try: for i, (_, d_) in enumerate( pool.imap(with_tracing(_methodcaller("_prepare", sideeffects=True)), self.learners) ): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() except Exception as e: logger.error("Error in child process. Terminating pool...") pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def _itergroundings(self, simplify=False, unsatfailure=False): if self.verbose: bar = ProgressBar(width=100, color='green') for i, formula in enumerate(self.formulas): if self.verbose: bar.update((i+1) / float(len(self.formulas))) for gndformula in formula.itergroundings(self.mrf, simplify=simplify): if unsatfailure and gndformula.weight == HARD and gndformula(self.mrf.evidence) != 1: print gndformula.print_structure(self.mrf.evidence) raise SatisfiabilityException('MLN is unsatisfiable due to hard constraint violation %s (see above)' % self.mrf.formulas[gndformula.idx]) yield gndformula
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color='green') if self.multicore: for i, (_, d_) in enumerate(Pool(maxtasksperchild=1).imap(with_tracing(_methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def __init__(self, mln_, dbs, method, **params): ''' :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. ''' self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag('setup learners', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug('Setting up multi-core processing for {} cores'.format(pool._processes)) try: for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for i, db in enumerate(self.dbs): _, learner = _setup_learner((i, self.mln, db, method, self._params + {'multicore': False})) self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() if self.verbose: print 'set up', self.name self.watch.finish('setup learners')
def _itergroundings(self, simplify=False, unsatfailure=False): if self.verbose: bar = ProgressBar(width=100, color='green') for i, formula in enumerate(self.formulas): if self.verbose: bar.update((i + 1) / float(len(self.formulas))) for gndformula in formula.itergroundings(self.mrf, simplify=simplify): if unsatfailure and gndformula.weight == HARD and gndformula( self.mrf.evidence) != 1: print gndformula.print_structure(self.mrf.evidence) raise SatisfiabilityException( 'MLN is unsatisfiable due to hard constraint violation %s (see above)' % self.mrf.formulas[gndformula.idx]) yield gndformula
def _run(self): i = 0 i_max = self.maxsteps thr = self.thr if self.verbose: bar = ProgressBar(width=100, steps=i_max, color='green') while i < i_max and self.sum > self.thr: # randomly choose a variable to modify var = self.mrf.variables[random.randint(0, len(self.mrf.variables)-1)] evdict = var.value2dict(var.evidence_value(self.mrf.evidence)) valuecount = var.valuecount(evdict) if valuecount == 1: # this is evidence continue # compute the sum of relevant gf weights before the modification sum_before = 0 for gf in self.var2gf[var.idx]: sum_before += (self.hardw if gf.weight == HARD else gf.weight) * (1 - gf(self.state)) # modify the state validx = random.randint(0, valuecount - 1) value = [v for _, v in var.itervalues(evdict)][validx] oldstate = list(self.state) var.setval(value, self.state) # compute the sum after the modification sum_after = 0 for gf in self.var2gf[var.idx]: sum_after += (self.hardw if gf.weight == HARD else gf.weight) * (1 - gf(self.state)) # determine whether to keep the new state keep = False improvement = sum_after - sum_before if improvement < 0 or sum_after <= thr: prob = 1.0 keep = True else: prob = (1.0 - min(1.0, abs(improvement / self.sum))) * (1 - (float(i) / i_max)) keep = random.uniform(0.0, 1.0) <= prob # keep = False # !!! no annealing # apply new objective value if keep: self.sum += improvement else: self.state = oldstate # next iteration i += 1 if self.verbose: bar.label('sum = %f' % self.sum) bar.inc() if self.verbose: print "SAMaxWalkSAT: %d iterations, sum=%f, threshold=%f" % (i, self.sum, self.thr) self.mrf.mln.weights = self.weights return dict([(str(q), self.state[q.gndatom.idx]) for q in self.queries])
def _compute_statistics(self): self._stat = [] grounder = DefaultGroundingFactory(self.mrf) eworld = list(self.mrf.evidence) if self.verbose: bar = ProgressBar(width=100, steps=self.mrf.countworlds(), color='green') for widx, world in self.mrf.iterallworlds(): if self.verbose: bar.label(str(widx)) bar.inc() values = {} self._stat.append(values) if self._eworld_idx is None and world == eworld: self._eworld_idx = widx for gf in grounder.itergroundings(): truth = gf(world) if truth != 0: values[gf.idx] = values.get(gf.idx, 0) + truth
def __init__(self, mln_, dbs, method, **params): """ :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. """ self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag("setup learners", verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(dbs), color="green") if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug("Setting up multi-core processing for {} cores".format(pool._processes)) try: for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label("Database %d, %s" % ((i + 1), learner.name)) bar.inc() except Exception as e: logger.error("Error in child process. Terminating pool...") pool.close() raise e finally: pool.terminate() pool.join() else: for i, db in enumerate(self.dbs): _, learner = _setup_learner((i, self.mln, db, method, self._params + {"multicore": False})) self.learners[i] = learner if self.verbose: bar.label("Database %d, %s" % ((i + 1), learner.name)) bar.inc() if self.verbose: print "set up", self.name self.watch.finish("setup learners")
def _run(self): i = 0 i_max = self.maxsteps thr = self.thr if self.verbose: bar = ProgressBar(width=100, steps=i_max, color='green') while i < i_max and self.sum > self.thr: # randomly choose a variable to modify var = self.mrf.variables[random.randint( 0, len(self.mrf.variables) - 1)] evdict = var.value2dict(var.evidence_value(self.mrf.evidence)) valuecount = var.valuecount(evdict) if valuecount == 1: # this is evidence continue # compute the sum of relevant gf weights before the modification sum_before = 0 for gf in self.var2gf[var.idx]: sum_before += (self.hardw if gf.weight == HARD else gf.weight) * (1 - gf(self.state)) # modify the state validx = random.randint(0, valuecount - 1) value = [v for _, v in var.itervalues(evdict)][validx] oldstate = list(self.state) var.setval(value, self.state) # compute the sum after the modification sum_after = 0 for gf in self.var2gf[var.idx]: sum_after += (self.hardw if gf.weight == HARD else gf.weight) * (1 - gf(self.state)) # determine whether to keep the new state keep = False improvement = sum_after - sum_before if improvement < 0 or sum_after <= thr: prob = 1.0 keep = True else: prob = (1.0 - min(1.0, abs(improvement / self.sum))) * ( 1 - (float(i) / i_max)) keep = random.uniform(0.0, 1.0) <= prob # keep = False # !!! no annealing # apply new objective value if keep: self.sum += improvement else: self.state = oldstate # next iteration i += 1 if self.verbose: bar.label('sum = %f' % self.sum) bar.inc() if self.verbose: print "SAMaxWalkSAT: %d iterations, sum=%f, threshold=%f" % ( i, self.sum, self.thr) self.mrf.mln.weights = self.weights return dict([(str(q), self.state[q.gndatom.idx]) for q in self.queries])
def __init__(self, mln_, dbs, method, **params): ''' :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. ''' self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag('setup learners', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug('Setting up multi-core processing for %d cores' % pool._processes) for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i+1), learner.name)) bar.inc() pool.close() pool.join() else: for i, db in enumerate(self.dbs): _, learner = _setup_learner((i, self.mln, db, method, self._params + {'multicore': False})) self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i+1), learner.name)) bar.inc() if self.verbose: print 'set up', self.name self.watch.finish('setup learners')
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color='green') if self.multicore: for i, (_, d_) in enumerate( Pool(maxtasksperchild=1).imap( with_tracing( _methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _run(self, **params): ''' infer one or more probabilities P(F1 | F2) what: a ground formula (string) or a list of ground formulas (list of strings) (F1) given: a formula as a string (F2) set evidence according to given conjunction (if any) ''' # if softEvidence is None: # self.softEvidence = self.mln.softEvidence # else: # self.softEvidence = softEvidence # initialize chains chains = MCMCInference.ChainGroup(self) for i in range(self.chains): chain = GibbsSampler.Chain(self, self.queries) chains.chain(chain) # if self.softEvidence is not None: # chain.setSoftEvidence(self.softEvidence) # do Gibbs sampling # if verbose and details: print "sampling..." converged = 0 steps = 0 if self.verbose: bar = ProgressBar(width=100, color='green', steps=self.maxsteps) while converged != self.chains and steps < self.maxsteps: converged = 0 steps += 1 for chain in chains.chains: chain.step() if self.verbose: bar.inc() bar.label('%d / %d' % (steps, self.maxsteps)) # if self.useConvergenceTest: # if chain.converged and numSteps >= minSteps: # converged += 1 # if verbose and details: # if numSteps % infoInterval == 0: # print "step %d (fraction converged: %.2f)" % (numSteps, float(converged) / numChains) # if numSteps % resultsInterval == 0: # chainGroup.getResults() # chainGroup.printResults(shortOutput=True) # get the results return chains.results()[0]
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) try: for i, (_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _run(self): """ verbose: whether to print results (or anything at all, in fact) details: (given that verbose is true) whether to output additional status information debug: (given that verbose is true) if true, outputs debug information, in particular the distribution over possible worlds debugLevel: level of detail for debug mode """ # check consistency with hard constraints: self._watch.tag('check hard constraints', verbose=self.verbose) hcgrounder = FastConjunctionGrounding(self.mrf, simplify=False, unsatfailure=True, formulas=[f for f in self.mrf.formulas if f.weight == HARD], **(self._params + {'multicore': False, 'verbose': False})) for gf in hcgrounder.itergroundings(): if isinstance(gf, Logic.TrueFalse) and gf.truth() == .0: raise SatisfiabilityException('MLN is unsatisfiable due to hard constraint violation by evidence: {} ({})'.format(str(gf), str(self.mln.formula(gf.idx)))) self._watch.finish('check hard constraints') # compute number of possible worlds worlds = 1 for variable in self.mrf.variables: values = variable.valuecount(self.mrf.evidence) worlds *= values numerators = [0.0 for i in range(len(self.queries))] denominator = 0. # start summing logger.debug("Summing over %d possible worlds..." % worlds) if worlds > 500000 and self.verbose: print colorize('!!! %d WORLDS WILL BE ENUMERATED !!!' % worlds, (None, 'red', True), True) k = 0 self._watch.tag('enumerating worlds', verbose=self.verbose) global global_enumAsk global_enumAsk = self bar = None if self.verbose: bar = ProgressBar(width=100, steps=worlds, color='green') if self.multicore: pool = Pool() logger.debug('Using multiprocessing on {} core(s)...'.format(pool._processes)) try: for num, denum in pool.imap(with_tracing(eval_queries), self.mrf.worlds()): denominator += denum k += 1 for i, v in enumerate(num): numerators[i] += v if self.verbose: bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: # do it single core for world in self.mrf.worlds(): # compute exp. sum of weights for this world num, denom = eval_queries(world) denominator += denom for i, _ in enumerate(self.queries): numerators[i] += num[i] k += 1 if self.verbose: bar.update(float(k) / worlds) logger.debug("%d worlds enumerated" % k) self._watch.finish('enumerating worlds') if 'grounding' in self.grounder.watch.tags: self._watch.tags['grounding'] = self.grounder.watch['grounding'] if denominator == 0: raise SatisfiabilityException( 'MLN is unsatisfiable. All probability masses returned 0.') # normalize answers dist = map(lambda x: float(x) / denominator, numerators) result = {} for q, p in zip(self.queries, dist): result[str(q)] = p return result
def _run(self): ''' p: probability of a greedy (WalkSAT) move initAlgo: algorithm to use in order to find an initial state that satisfies all hard constraints ("SampleSAT" or "SAMaxWalkSat") verbose: whether to display results upon completion details: whether to display information while the algorithm is running infoInterval: [if details==True] interval (no. of steps) in which to display the current step number and some additional info resultsInterval: [if details==True] interval (no. of steps) in which to display intermediate results; [if keepResultsHistory==True] interval in which to store intermediate results in the history debug: whether to display debug information (e.g. internal data structures) while the algorithm is running debugLevel: controls degree to which debug information is presented keepResultsHistory: whether to store the history of results (at each resultsInterval) referenceResults: reference results to compare obtained results to saveHistoryFile: if not None, save history to given filename sampleCallback: function that is called for every sample with the sample and step number as parameters softEvidence: if None, use soft evidence from MLN, otherwise use given dictionary of soft evidence handleSoftEvidence: if False, ignore all soft evidence in the MCMC sampling (but still compute softe evidence statistics if soft evidence is there) ''' logger.debug("starting MC-SAT with maxsteps=%d, softevidence=%s" % (self.maxsteps, self.softevidence)) # initialize the KB and gather required info self._initkb() # print CNF KB logger.debug("CNF KB:") for gf in self.gndformulas: logger.debug("%7.3f %s" % (gf.weight, str(gf))) print # set the random seed if it was given if self.rndseed is not None: random.seed(self.rndseed) # create chains chaingroup = MCMCInference.ChainGroup(self) self.chaingroup = chaingroup for i in range(self.chains): chain = MCMCInference.Chain(self, self.queries) chaingroup.chain(chain) # satisfy hard constraints using initialization algorithm M = [] NLC = [] for i, gf in enumerate(self.gndformulas): if gf.weight == HARD: if gf.islogical(): clause_range = self.gf2clauseidx[i] M.extend(range(*clause_range)) else: NLC.append(gf) if M or NLC: logger.debug('Running SampleSAT') chain.state = SampleSAT(self.mrf, chain.state, M, NLC, self, p=self.p).run() # Note: can't use p=1.0 because there is a chance of getting into an oscillating state if praclog.level == praclog.DEBUG: self.mrf.print_world_vars(chain.state) self.step = 1 logger.debug('running MC-SAT with %d chains' % len(chaingroup.chains)) self._watch.tag('running MC-SAT', self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=self.maxsteps, color='green') while self.step <= self.maxsteps: # take one step in each chain for chain in chaingroup.chains: # choose a subset of the satisfied formulas and sample a state that satisfies them state = self._satisfy_subset(chain) # update chain counts chain.update(state) if self.verbose: bar.inc() bar.label('%d / %d' % (self.step, self.maxsteps)) # intermediate results self.step += 1 # get results self.step -= 1 results = chaingroup.results() return results[0]