def _itergroundings(self, simplify=False, unsatfailure=False): global global_bpll_grounding global_bpll_grounding = self if self.multicore: pool = Pool(maxtasksperchild=1) try: for gndresult in pool.imap(with_tracing(create_formula_groundings), self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) checkmem() yield None except CtrlCException as e: pool.terminate() raise e pool.close() pool.join() else: for gndresult in imap(create_formula_groundings, self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) yield None
def _grad(self, w): grad = numpy.zeros(len(self.mln.formulas), numpy.float64) if False: # self.multicore: # it turned out that it doesn't pay off to evaluate the gradient # in separate processes, so we turn it off pool = Pool() try: for i, (grad_, d_) in enumerate( pool.imap( with_tracing(_methodcaller("_grad", sideeffects=True)), map(lambda l: (l, w), self.learners) ) ): self.learners[i].__dict__ = d_ grad += grad_ except Exception as e: logger.error("Error in child process. Terminating pool...") pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: grad += learner._grad(w) return grad
def _grad(self, w): grad = numpy.zeros(len(self.mln.formulas), numpy.float64) if False: # self.multicore: # it turned out that it doesn't pay off to evaluate the gradient # in separate processes, so we turn it off pool = Pool() try: for i, (grad_, d_) in enumerate( pool.imap( with_tracing( _methodcaller('_grad', sideeffects=True)), map(lambda l: (l, w), self.learners))): self.learners[i].__dict__ = d_ grad += grad_ except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: grad += learner._grad(w) return grad
def _itergroundings(self, simplify=False, unsatfailure=False): global global_bpll_grounding global_bpll_grounding = self if self.multicore: pool = Pool(maxtasksperchild=1) try: for gndresult in pool.imap( with_tracing(create_formula_groundings), self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) checkmem() yield None except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gndresult in imap(create_formula_groundings, self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) yield None
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def _prepare(self): self.watch.tag("preparing optimization", verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color="green") if self.multicore: pool = Pool(maxtasksperchild=1) try: for i, (_, d_) in enumerate( pool.imap(with_tracing(_methodcaller("_prepare", sideeffects=True)), self.learners) ): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() except Exception as e: logger.error("Error in child process. Terminating pool...") pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(steps=len(self.dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) try: for i, (_, d_) in enumerate( pool.imap( with_tracing( _methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def __init__(self, mln_, dbs, method, **params): ''' :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. ''' self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag('setup learners', verbose=self.verbose) if self.verbose: bar = ProgressBar(steps=len(dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug( 'Setting up multi-core processing for {} cores'.format( pool._processes)) try: for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() # as MLNs and formulas have been copied to the separate processes, # the mln pointers of the formulas now point to the MLNs in these child processes # we have to copy the materialized weight back to our parent process self.mln.weights = list(first(self.learners).mrf.mln.weights) else: for i, db in enumerate(self.dbs): _, learner = _setup_learner( (i, self.mln, db, method, self._params + { 'multicore': False })) self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() if self.verbose: print 'set up', self.name self.watch.finish('setup learners')
def _hessian(self, w): N = len(self.mln.formulas) hessian = numpy.matrix(numpy.zeros((N,N))) if self.multicore: pool = Pool() for h in pool.imap(with_tracing(_methodcaller('_hessian')), map(lambda l: (l, w), self.learners)): hessian += h pool.terminate() pool.join() else: for learner in self.learners: hessian += learner._hessian(w) return hessian
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color='green') if self.multicore: for i, (_, d_) in enumerate(Pool(maxtasksperchild=1).imap(with_tracing(_methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _grad(self, w): grad = numpy.zeros(len(self.mln.formulas), numpy.float64) if False:#self.multicore: # it turned out that it doesn't pay off to evaluate the gradient # in separate processes, so we turn it off pool = Pool() for i, (grad_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_grad', sideeffects=True)), map(lambda l: (l, w), self.learners))): self.learners[i].__dict__ = d_ grad += grad_ pool.terminate() pool.join() else: for learner in self.learners: grad += learner._grad(w) return grad
def _f(self, w): # it turned out that it doesn't pay off to evaluate the function # in separate processes, so we turn it off if False:#self.multicore: likelihood = 0 pool = Pool() for i, (f_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_f', sideeffects=True)), map(lambda l: (l, w), self.learners))): self.learners[i].__dict__ = d_ likelihood += f_ pool.terminate() pool.join() return likelihood else: return sum(map(lambda l: l._f(w), self.learners))
def _hessian(self, w): N = len(self.mln.formulas) hessian = numpy.matrix(numpy.zeros((N, N))) if self.multicore: pool = Pool() for h in pool.imap(with_tracing(_methodcaller('_hessian')), map(lambda l: (l, w), self.learners)): hessian += h pool.terminate() pool.join() else: for learner in self.learners: hessian += learner._hessian(w) return hessian
def __init__(self, mln_, dbs, method, **params): ''' :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. ''' self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag('setup learners', verbose=self.verbose) if self.verbose: bar = ProgressBar(steps=len(dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug('Setting up multi-core processing for {} cores'.format(pool._processes)) try: for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for i, db in enumerate(self.dbs): _, learner = _setup_learner((i, self.mln, db, method, self._params + {'multicore': False})) self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() if self.verbose: print 'set up', self.name self.watch.finish('setup learners')
def _f(self, w): # it turned out that it doesn't pay off to evaluate the function # in separate processes, so we turn it off if False: #self.multicore: likelihood = 0 pool = Pool() for i, (f_, d_) in enumerate( pool.imap( with_tracing(_methodcaller('_f', sideeffects=True)), map(lambda l: (l, w), self.learners))): self.learners[i].__dict__ = d_ likelihood += f_ pool.terminate() pool.join() return likelihood else: return sum(map(lambda l: l._f(w), self.learners))
def _prepare(self): self.watch.tag('preparing optimization', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(self.dbs), color='green') if self.multicore: for i, (_, d_) in enumerate( Pool(maxtasksperchild=1).imap( with_tracing( _methodcaller('_prepare', sideeffects=True)), self.learners)): checkmem() self.learners[i].__dict__ = d_ if self.verbose: bar.inc() else: for learner in self.learners: checkmem() learner._prepare() if self.verbose: bar.inc()
def _hessian(self, w): N = len(self.mln.formulas) hessian = numpy.matrix(numpy.zeros((N, N))) if self.multicore: pool = Pool() try: for h in pool.imap(with_tracing(_methodcaller('_hessian')), map(lambda l: (l, w), self.learners)): hessian += h except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for learner in self.learners: hessian += learner._hessian(w) return hessian
def __init__(self, mln_, dbs, method, **params): ''' :param dbs: list of :class:`mln.database.Database` objects to be used for learning. :param mln_: the MLN object to be used for learning :param method: the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`. :param **params: additional parameters handed over to the base learners. ''' self.dbs = dbs self._params = edict(params) if not mln_._materialized: self.mln = mln_.materialize(*dbs) else: self.mln = mln_ self.watch = StopWatch() self.learners = [None] * len(dbs) self.watch.tag('setup learners', verbose=self.verbose) if self.verbose: bar = ProgressBar(width=100, steps=len(dbs), color='green') if self.multicore: pool = Pool(maxtasksperchild=1) logger.debug('Setting up multi-core processing for %d cores' % pool._processes) for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)): self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() pool.close() pool.join() else: for i, db in enumerate(self.dbs): _, learner = _setup_learner( (i, self.mln, db, method, self._params + { 'multicore': False })) self.learners[i] = learner if self.verbose: bar.label('Database %d, %s' % ((i + 1), learner.name)) bar.inc() if self.verbose: print 'set up', self.name self.watch.finish('setup learners')
def _f(self, w): # it turned out that it doesn't pay off to evaluate the function # in separate processes, so we turn it off if False: # self.multicore: likelihood = 0 pool = Pool() try: for i, (f_, d_) in enumerate( pool.imap(with_tracing(_methodcaller("_f", sideeffects=True)), map(lambda l: (l, w), self.learners)) ): self.learners[i].__dict__ = d_ likelihood += f_ except Exception as e: logger.error("Error in child process. Terminating pool...") pool.close() raise e finally: pool.terminate() pool.join() return likelihood else: return sum(map(lambda l: l._f(w), self.learners))
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def _run(self): """ verbose: whether to print results (or anything at all, in fact) details: (given that verbose is true) whether to output additional status information debug: (given that verbose is true) if true, outputs debug information, in particular the distribution over possible worlds debugLevel: level of detail for debug mode """ # check consistency with hard constraints: self._watch.tag('check hard constraints', verbose=self.verbose) hcgrounder = FastConjunctionGrounding(self.mrf, simplify=False, unsatfailure=True, formulas=[f for f in self.mrf.formulas if f.weight == HARD], **(self._params + {'multicore': False, 'verbose': False})) for gf in hcgrounder.itergroundings(): if isinstance(gf, Logic.TrueFalse) and gf.truth() == .0: raise SatisfiabilityException('MLN is unsatisfiable due to hard constraint violation by evidence: {} ({})'.format(str(gf), str(self.mln.formula(gf.idx)))) self._watch.finish('check hard constraints') # compute number of possible worlds worlds = 1 for variable in self.mrf.variables: values = variable.valuecount(self.mrf.evidence) worlds *= values numerators = [0.0 for i in range(len(self.queries))] denominator = 0. # start summing logger.debug("Summing over %d possible worlds..." % worlds) if worlds > 500000 and self.verbose: print colorize('!!! %d WORLDS WILL BE ENUMERATED !!!' % worlds, (None, 'red', True), True) k = 0 self._watch.tag('enumerating worlds', verbose=self.verbose) global global_enumAsk global_enumAsk = self bar = None if self.verbose: bar = ProgressBar(width=100, steps=worlds, color='green') if self.multicore: pool = Pool() logger.debug('Using multiprocessing on {} core(s)...'.format(pool._processes)) try: for num, denum in pool.imap(with_tracing(eval_queries), self.mrf.worlds()): denominator += denum k += 1 for i, v in enumerate(num): numerators[i] += v if self.verbose: bar.inc() except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: # do it single core for world in self.mrf.worlds(): # compute exp. sum of weights for this world num, denom = eval_queries(world) denominator += denom for i, _ in enumerate(self.queries): numerators[i] += num[i] k += 1 if self.verbose: bar.update(float(k) / worlds) logger.debug("%d worlds enumerated" % k) self._watch.finish('enumerating worlds') if 'grounding' in self.grounder.watch.tags: self._watch.tags['grounding'] = self.grounder.watch['grounding'] if denominator == 0: raise SatisfiabilityException( 'MLN is unsatisfiable. All probability masses returned 0.') # normalize answers dist = map(lambda x: float(x) / denominator, numerators) result = {} for q, p in zip(self.queries, dist): result[str(q)] = p return result
def main(): #=========================================================================== # Parse command line arguments #=========================================================================== usage = 'PRAC Tell' parser = argparse.ArgumentParser(description=usage) parser.add_argument('--howto', '-H', type=str, help='Title of the howto, e.g. "Make pancakes"') parser.add_argument('--steps', '-s', nargs='+', type=str, dest='steps', help='A list of instruction steps in natural language. If set, his option must be the last in the list of options followed by the list of instructions.') parser.add_argument('--batch', '-b', dest='batch', default=False, action='store_true', help='Import a list of howtos in batch processing whose filenames are given the respective howto title, e.g. "Make a pancake." The file content must then be given by the single instruction steps, one by line.') parser.add_argument('--recursive', '-r', dest='recursive', default=False, help='Apply the import of instructions recursively to subdirectories.') parser.add_argument("--verbose", "-v", dest="verbose", default=1, type=int, action="store", help="Set verbosity level {0..3}. Default is 1.") parser.add_argument('--quiet', '-q', dest='quiet', action='store_true', default=False, help='Do not print any status messages.') parser.add_argument('--multicore', '-m', dest='multicore', action='store_true', default=False, help='Perform information extraction in multicore modus') parser.add_argument('--save', action='store_true', default=False, help='Store the howto in the PRAC database permanently.') args = parser.parse_args() if args.quiet: args.verbose = 0 if args.verbose: print(prac_heading('Telling PRAC, how to {}'.format(args.howto))) # =========================================================================== # If the 'steps' flag is set, take all arguments as the list of instructions # =========================================================================== howtos = [] if args.steps: howtos = [{args.howto: args.steps}] elif args.batch: for path in args: if args.recursive: for loc, dirs, files in os.walk(path): for filename in files: with open(os.path.join(loc, filename)) as f: howtos.append({' '.join(filename.split('-')): [_f for _f in (line.strip() for line in f) if _f]}) else: for filename in os.listdir(path): if os.path.isdir(filename): continue with open(os.path.join(path, filename)) as f: howtos.append({' '.join(filename.split('-')): [_f for _f in (line.strip() for line in f) if _f]}) else: for filename in args: with open(filename) as f: howtos.append({' '.join(filename.split('-')): [_f for _f in (line.strip() for line in f) if _f]}) # =========================================================================== # start the import # =========================================================================== try: cpu_count = multiprocessing.cpu_count() if args.multicore else 1 pool = multicore.NonDaemonicPool(cpu_count) pool.map(multicore.with_tracing(import_howto), list(zip(howtos, itertools.repeat(args.verbose), itertools.repeat(args.save)))) except KeyboardInterrupt: traceback.print_exc() pool.terminate() else: # ======================================================================= # finished # ======================================================================= if args.verbose: print('Done. Imported %d howtos' % len(howtos)) finally: pool.close() pool.join()