Exemple #1
0
    def __init__(self, mln_, dbs, method, **params):
        '''
        :param dbs:         list of :class:`mln.database.Database` objects to
                            be used for learning.
        :param mln_:        the MLN object to be used for learning
        :param method:      the algorithm to be used for learning. Must be a
                            class provided by
                            :class:`mln.methods.LearningMethods`.
        :param **params:    additional parameters handed over to the base
                            learners.
        '''

        self.dbs = dbs
        self._params = edict(params)
        if not mln_._materialized:
            self.mln = mln_.materialize(*dbs)
        else:
            self.mln = mln_
        self.watch = StopWatch()
        self.learners = [None] * len(dbs)
        self.watch.tag('setup learners', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            logger.debug(
                'Setting up multi-core processing for {} cores'.format(
                    pool._processes))
            try:
                for i, learner in pool.imap(with_tracing(_setup_learner),
                                            self._iterdbs(method)):
                    self.learners[i] = learner
                    if self.verbose:
                        bar.label('Database %d, %s' % ((i + 1), learner.name))
                        bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
            # as MLNs and formulas have been copied to the separate processes,
            # the mln pointers of the formulas now point to the MLNs in these child processes
            # we have to copy the materialized weight back to our parent process
            self.mln.weights = list(first(self.learners).mrf.mln.weights)
        else:
            for i, db in enumerate(self.dbs):
                _, learner = _setup_learner(
                    (i, self.mln, db, method, self._params + {
                        'multicore': False
                    }))
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
        if self.verbose:
            print 'set up', self.name
        self.watch.finish('setup learners')
Exemple #2
0
 def __init__(self, mrf, queries=ALL, **params):
     self.mrf = mrf
     self.mln = mrf.mln
     self._params = edict(params)
     if not queries:
         self.queries = [
             self.mln.logic.gnd_lit(ga, negated=False, mln=self.mln)
             for ga in self.mrf.gndatoms
             if self.mrf.evidence[ga.idx] is None
         ]
     else:
         # check for single/multiple query and expand
         if type(queries) is not list:
             queries = [queries]
         self.queries = self._expand_queries(queries)
     # fill in the missing truth values of variables that have only one remaining value
     for variable in self.mrf.variables:
         if variable.valuecount(self.mrf.evidence_dicti(
         )) == 1:  # the var is fully determined by the evidence
             for _, value in variable.itervalues(self.mrf.evidence):
                 break
             self.mrf.set_evidence(variable.value2dict(value), erase=False)
     # apply the closed world assumptions to the explicitly specified predicates
     if self.cwpreds:
         for pred in self.cwpreds:
             if isinstance(self.mln.predicate(pred),
                           SoftFunctionalPredicate):
                 if self.verbose:
                     logger.warning(
                         'Closed world assumption will be applied to soft functional predicate %s'
                         % pred)
             elif isinstance(self.mln.predicate(pred), FunctionalPredicate):
                 raise Exception(
                     'Closed world assumption is inapplicable to functional predicate %s'
                     % pred)
             for gndatom in self.mrf.gndatoms:
                 if gndatom.predname != pred: continue
                 if self.mrf.evidence[gndatom.idx] is None:
                     self.mrf.evidence[gndatom.idx] = 0
     # apply the closed world assumption to all remaining ground atoms that are not in the queries
     if self.closedworld:
         qpreds = set()
         for q in self.queries:
             qpreds.update(q.prednames())
         for gndatom in self.mrf.gndatoms:
             if isinstance(self.mln.predicate(gndatom.predname), FunctionalPredicate) \
                     or isinstance(self.mln.predicate(gndatom.predname), SoftFunctionalPredicate):
                 continue
             if gndatom.predname not in qpreds and self.mrf.evidence[
                     gndatom.idx] is None:
                 self.mrf.evidence[gndatom.idx] = 0
     for var in self.mrf.variables:
         if isinstance(var, FuzzyVariable):
             var.consistent(self.mrf.evidence, strict=True)
     self._watch = StopWatch()
Exemple #3
0
 def __init__(self,
              logic='FirstOrderLogic',
              grammar='PRACGrammar',
              mlnfile=None):
     # instantiate the logic and grammar
     logic_str = '%s("%s", self)' % (logic, grammar)
     self.logic = eval(logic_str)
     logger.debug('Creating MLN with %s syntax and %s semantics' %
                  (grammar, logic))
     self._predicates = {
     }  # maps from predicate name to the predicate instance
     self.domains = {}  # maps from domain names to list of values
     self._formulas = []  # list of MLNFormula instances
     self.domain_decls = []
     self.weights = []
     self.fixweights = []
     self.vars = {}
     self._unique_templvars = []
     self._probreqs = []
     self._materialized = False
     self.fuzzypreds = [
     ]  # for saving fuzzy predicates that have been converted to binary preds
     if mlnfile is not None:
         MLN.load(mlnfile, logic=logic, grammar=grammar, mln=self)
         return
     self.closedWorldPreds = []
     self.formulaGroups = []
     self.templateIdx2GroupIdx = {}
     self.posteriorProbReqs = []
     self.watch = StopWatch()
Exemple #4
0
    def __init__(self, logic='FirstOrderLogic', grammar='PRACGrammar', mlnfile=None):
        # instantiate the logic and grammar
        logic_str = '%s("%s", self)' % (logic, grammar)
        self.logic = eval(logic_str)
        logger.debug('Creating MLN with %s syntax and %s semantics' % (grammar, logic))
        
        self._predicates = {} # maps from predicate name to the predicate instance
        self.domains = {}    # maps from domain names to list of values
        self._formulas = []   # list of MLNFormula instances
        self.domain_decls = []
        self.weights = []
        self.fixweights = []
        self.vars = {}
        self._unique_templvars = []
        self._probreqs = []
        self._materialized = False
        self.fuzzypreds = [] # for saving fuzzy predicates that have been converted to binary preds
        if mlnfile is not None:
            MLN.load(mlnfile, logic=logic, grammar=grammar, mln=self)
            return
        
        self.closedWorldPreds = []

        self.formulaGroups = []
        self.templateIdx2GroupIdx = {}

        self.posteriorProbReqs = []
#         self.parameterType = parameterType
#         self.probabilityFittingInferenceMethod = InferenceMethods.Exact
#         self.probabilityFittingThreshold = 0.002 # maximum difference between desired and computed probability
#         self.probabilityFittingMaxSteps = 20 # maximum number of steps to run iterative proportional fitting
#         self.defaultInferenceMethod = defaultInferenceMethod
#         self.allSoft = False
        self.watch = StopWatch()
Exemple #5
0
    def __init__(self, mln_, dbs, method, **params):
        '''
        :param dbs:         list of :class:`mln.database.Database` objects to be used for learning.
        :param mln_:        the MLN object to be used for learning
        :param method:      the algorithm to be used for learning. Must be a class provided by :class:`mln.methods.LearningMethods`.
        :param **params:    additional parameters handed over to the base learners.
        '''

        self.dbs = dbs
        self._params = edict(params)
        if not mln_._materialized:
            self.mln = mln_.materialize(*dbs)
        else:
            self.mln = mln_
        self.watch = StopWatch()
        self.learners = [None] * len(dbs)
        self.watch.tag('setup learners', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(width=100, steps=len(dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            logger.debug('Setting up multi-core processing for %d cores' %
                         pool._processes)
            for i, learner in pool.imap(with_tracing(_setup_learner),
                                        self._iterdbs(method)):
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
            pool.close()
            pool.join()
        else:
            for i, db in enumerate(self.dbs):
                _, learner = _setup_learner(
                    (i, self.mln, db, method, self._params + {
                        'multicore': False
                    }))
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
        if self.verbose:
            print 'set up', self.name
        self.watch.finish('setup learners')
Exemple #6
0
 def __init__(self,
              mrf,
              simplify=False,
              unsatfailure=False,
              formulas=None,
              cache=auto,
              **params):
     self.mrf = mrf
     self.formulas = ifNone(formulas, list(self.mrf.formulas))
     self.total_gf = 0
     for f in self.formulas:
         self.total_gf += f.countgroundings(self.mrf)
     self.grounder = None
     self._cachesize = CACHE_SIZE if cache is auto else cache
     self._cache = None
     self.__cacheinit = False
     self.__cachecomplete = False
     self._params = params
     self.watch = StopWatch()
     self.simplify = simplify
     self.unsatfailure = unsatfailure
Exemple #7
0
    def __init__(self, mln_, dbs, method, **params):
        '''
        :param dbs:         list of :class:`mln.database.Database` objects to
                            be used for learning.
        :param mln_:        the MLN object to be used for learning
        :param method:      the algorithm to be used for learning. Must be a
                            class provided by
                            :class:`mln.methods.LearningMethods`.
        :param **params:    additional parameters handed over to the base
                            learners.
        '''

        self.dbs = dbs
        self._params = edict(params)
        if not mln_._materialized:
            self.mln = mln_.materialize(*dbs)
        else:
            self.mln = mln_
        self.watch = StopWatch()
        self.learners = [None] * len(dbs)
        self.watch.tag('setup learners', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            logger.debug('Setting up multi-core processing for {} cores'.format(pool._processes))
            try:
                for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)):
                    self.learners[i] = learner
                    if self.verbose:
                        bar.label('Database %d, %s' % ((i + 1), learner.name))
                        bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for i, db in enumerate(self.dbs):
                _, learner = _setup_learner((i, self.mln, db, method, self._params + {'multicore': False}))
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
        if self.verbose:
            print 'set up', self.name
        self.watch.finish('setup learners')
Exemple #8
0
 def __init__(self, mrf, simplify=False, unsatfailure=False, formulas=None, cache=auto, **params):
     self.mrf = mrf
     self.formulas = ifNone(formulas, list(self.mrf.formulas))
     self.total_gf = 0
     for f in self.formulas:
         self.total_gf += f.countgroundings(self.mrf)
     self.grounder = None
     self._cachesize = CACHE_SIZE if cache is auto else cache
     self._cache = None
     self.__cacheinit = False
     self.__cachecomplete = False
     self._params = params
     self.watch = StopWatch()
     self.simplify = simplify
     self.unsatfailure = unsatfailure
Exemple #9
0
 def __init__(self, mrf, queries=ALL, **params):
     self.mrf = mrf
     self.mln = mrf.mln 
     self._params = edict(params)
     if not queries:
         self.queries = [self.mln.logic.gnd_lit(ga, negated=False, mln=self.mln) for ga in self.mrf.gndatoms if self.mrf.evidence[ga.idx] is None]
     else:
         # check for single/multiple query and expand
         if type(queries) is not list:
             queries = [queries]
         self.queries = self._expand_queries(queries)
     # fill in the missing truth values of variables that have only one remaining value
     for variable in self.mrf.variables:
         if variable.valuecount(self.mrf.evidence_dicti()) == 1: # the var is fully determined by the evidence
             for _, value in variable.itervalues(self.mrf.evidence): break
             self.mrf.set_evidence(variable.value2dict(value), erase=False)
     # apply the closed world assumptions to the explicitly specified predicates
     if self.cwpreds:
         for pred in self.cwpreds:
             if isinstance(self.mln.predicate(pred), SoftFunctionalPredicate):
                 if self.verbose: logger.warning('Closed world assumption will be applied to soft functional predicate %s' % pred)
             elif isinstance(self.mln.predicate(pred), FunctionalPredicate):
                 raise Exception('Closed world assumption is inapplicable to functional predicate %s' % pred)
             for gndatom in self.mrf.gndatoms:
                 if gndatom.predname != pred: continue
                 if self.mrf.evidence[gndatom.idx] is None:
                     self.mrf.evidence[gndatom.idx] = 0
     # apply the closed world assumption to all remaining ground atoms that are not in the queries
     if self.closedworld:
         qpreds = set()
         for q in self.queries:
             qpreds.update(q.prednames())
         for gndatom in self.mrf.gndatoms:
             if isinstance(self.mln.predicate(gndatom.predname), FunctionalPredicate) \
                     or isinstance(self.mln.predicate(gndatom.predname), SoftFunctionalPredicate):
                 continue
             if gndatom.predname not in qpreds and self.mrf.evidence[gndatom.idx] is None:
                 self.mrf.evidence[gndatom.idx] = 0
     for var in self.mrf.variables:
         if isinstance(var, FuzzyVariable):
             var.consistent(self.mrf.evidence, strict=True)
     self._watch = StopWatch()
Exemple #10
0
class MultipleDatabaseLearner(AbstractLearner):
    '''
    Learns from multiple databases using an arbitrary sub-learning method for
    each database, assuming independence between individual databases.
    '''
    def __init__(self, mln_, dbs, method, **params):
        '''
        :param dbs:         list of :class:`mln.database.Database` objects to
                            be used for learning.
        :param mln_:        the MLN object to be used for learning
        :param method:      the algorithm to be used for learning. Must be a
                            class provided by
                            :class:`mln.methods.LearningMethods`.
        :param **params:    additional parameters handed over to the base
                            learners.
        '''

        self.dbs = dbs
        self._params = edict(params)
        if not mln_._materialized:
            self.mln = mln_.materialize(*dbs)
        else:
            self.mln = mln_
        self.watch = StopWatch()
        self.learners = [None] * len(dbs)
        self.watch.tag('setup learners', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            logger.debug(
                'Setting up multi-core processing for {} cores'.format(
                    pool._processes))
            try:
                for i, learner in pool.imap(with_tracing(_setup_learner),
                                            self._iterdbs(method)):
                    self.learners[i] = learner
                    if self.verbose:
                        bar.label('Database %d, %s' % ((i + 1), learner.name))
                        bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
            # as MLNs and formulas have been copied to the separate processes,
            # the mln pointers of the formulas now point to the MLNs in these child processes
            # we have to copy the materialized weight back to our parent process
            self.mln.weights = list(first(self.learners).mrf.mln.weights)
        else:
            for i, db in enumerate(self.dbs):
                _, learner = _setup_learner(
                    (i, self.mln, db, method, self._params + {
                        'multicore': False
                    }))
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
        if self.verbose:
            print 'set up', self.name
        self.watch.finish('setup learners')

    def _iterdbs(self, method):
        for i, db in enumerate(self.dbs):
            yield i, self.mln, db, method, self._params + {
                'verbose': not self.multicore,
                'multicore': False
            }

    @property
    def name(self):
        return "MultipleDatabaseLearner [{} x {}]".format(
            len(self.learners), self.learners[0].name)

    def _f(self, w):
        # it turned out that it doesn't pay off to evaluate the function
        # in separate processes, so we turn it off
        if False:  # self.multicore:
            likelihood = 0
            pool = Pool()
            try:
                for i, (f_, d_) in enumerate(
                        pool.imap(
                            with_tracing(_methodcaller('_f',
                                                       sideeffects=True)),
                            map(lambda l: (l, w), self.learners))):
                    self.learners[i].__dict__ = d_
                    likelihood += f_
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
            return likelihood
        else:
            return sum(map(lambda l: l._f(w), self.learners))

    def _grad(self, w):
        grad = numpy.zeros(len(self.mln.formulas), numpy.float64)
        if False:  # self.multicore:
            # it turned out that it doesn't pay off to evaluate the gradient
            # in separate processes, so we turn it off
            pool = Pool()
            try:
                for i, (grad_, d_) in enumerate(
                        pool.imap(
                            with_tracing(
                                _methodcaller('_grad', sideeffects=True)),
                            map(lambda l: (l, w), self.learners))):
                    self.learners[i].__dict__ = d_
                    grad += grad_
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners:
                grad += learner._grad(w)
        return grad

    def _hessian(self, w):
        N = len(self.mln.formulas)
        hessian = numpy.matrix(numpy.zeros((N, N)))
        if self.multicore:
            pool = Pool()
            try:
                for h in pool.imap(with_tracing(_methodcaller('_hessian')),
                                   map(lambda l: (l, w), self.learners)):
                    hessian += h
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners:
                hessian += learner._hessian(w)
        return hessian

    def _prepare(self):
        self.watch.tag('preparing optimization', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(self.dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            try:
                for i, (_, d_) in enumerate(
                        pool.imap(
                            with_tracing(
                                _methodcaller('_prepare', sideeffects=True)),
                            self.learners)):
                    checkmem()
                    self.learners[i].__dict__ = d_
                    if self.verbose: bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners:
                checkmem()
                learner._prepare()
                if self.verbose: bar.inc()

    def _filter_fixweights(self, v):
        '''
        Removes from the vector `v` all elements at indices that correspond to
        a fixed weight formula index.
        '''
        if len(v) != len(self.mln.formulas):
            raise Exception('Vector must have same length as formula weights')
        return [
            v[i] for i in range(len(self.mln.formulas))
            if not self.mln.fixweights[i] and self.mln.weights[i] != HARD
        ]

    def _add_fixweights(self, w):
        i = 0
        w_ = []
        for f in self.mln.formulas:
            if self.mln.fixweights[f.idx] or f.weight == HARD:
                w_.append(self._w[f.idx])
            else:
                w_.append(w[i])
                i += 1
        return w_

    def run(self, **params):
        if 'scipy' not in sys.modules:
            raise Exception("Scipy was not imported! Install numpy and scipy "
                            "if you want to use weight learning.")
        runs = 0
        self._w = [0] * len(self.mln.formulas)
        while runs < self.maxrepeat:
            self._prepare()
            # initial parameter vector: all zeros or weights from formulas
            for f in self.mln.formulas:
                if self.mln.fixweights[
                        f.idx] or self.use_init_weights or f.ishard:
                    self._w[f.idx] = f.weight
            self._optimize(**self._params)
            self._cleanup()
            runs += 1
            if not any([l.repeat() for l in self.learners]): break
        return self.weights
Exemple #11
0
    def run(self):
        watch = StopWatch()
        watch.tag('inference', self.verbose)
        # load the MLN
        if isinstance(self.mln, MLN):
            mln = self.mln
        else:
            raise Exception('No MLN specified')

        if self.use_emln and self.emln is not None:
            mlnstrio = io.StringIO()
            mln.write(mlnstrio)
            mlnstr = mlnstrio.getvalue()
            mlnstrio.close()
            emln = self.emln
            mln = parse_mln(mlnstr + emln,
                            grammar=self.grammar,
                            logic=self.logic)

        # load the database
        if isinstance(self.db, Database):
            db = self.db
        elif isinstance(self.db, list) and len(self.db) == 1:
            db = self.db[0]
        elif isinstance(self.db, list) and len(self.db) == 0:
            db = Database(mln)
        elif isinstance(self.db, list):
            raise Exception(
                'Got {} dbs. Can only handle one for inference.'.format(
                    len(self.db)))
        else:
            raise Exception('DB of invalid format {}'.format(type(self.db)))

        # expand the
        #  parameters
        params = dict(self._config)
        if 'params' in params:
            params.update(eval("dict(%s)" % params['params']))
            del params['params']
        params['verbose'] = self.verbose
        if self.verbose:
            print((tabulate(sorted(list(params.items()),
                                   key=lambda k_v: str(k_v[0])),
                            headers=('Parameter:', 'Value:'))))
        if type(db) is list and len(db) > 1:
            raise Exception('Inference can only handle one database at a time')
        elif type(db) is list:
            db = db[0]
        params['cw_preds'] = [x for x in self.cw_preds if bool(x)]
        # extract and remove all non-algorithm
        for s in GUI_SETTINGS:
            if s in params: del params[s]

        if self.profile:
            prof = Profile()
            print('starting profiler...')
            prof.enable()
        # set the debug level
        olddebug = logger.level
        logger.level = (eval('logs.%s' %
                             params.get('debug', 'WARNING').upper()))
        result = None
        try:
            mln_ = mln.materialize(db)
            mrf = mln_.ground(db)
            inference = self.method(mrf, self.queries, **params)
            if self.verbose:
                print()
                print((headline('EVIDENCE VARIABLES')))
                print()
                mrf.print_evidence_vars()

            result = inference.run()
            if self.verbose:
                print()
                print((headline('INFERENCE RESULTS')))
                print()
                inference.write()
            if self.verbose:
                print()
                inference.write_elapsed_time()
        except SystemExit:
            traceback.print_exc()
            print('Cancelled...')
        finally:
            if self.profile:
                prof.disable()
                print((headline('PROFILER STATISTICS')))
                ps = pstats.Stats(prof,
                                  stream=sys.stdout).sort_stats('cumulative')
                ps.print_stats()
            # reset the debug level
            logger.level = olddebug
        if self.verbose:
            print()
            watch.finish()
            watch.printSteps()
        return result
Exemple #12
0
class MultipleDatabaseLearner(AbstractLearner):
    '''
    Learns from multiple databases using an arbitrary sub-learning method for
    each database, assuming independence between individual databases.
    '''


    def __init__(self, mln_, dbs, method, **params):
        '''
        :param dbs:         list of :class:`mln.database.Database` objects to
                            be used for learning.
        :param mln_:        the MLN object to be used for learning
        :param method:      the algorithm to be used for learning. Must be a
                            class provided by
                            :class:`mln.methods.LearningMethods`.
        :param **params:    additional parameters handed over to the base
                            learners.
        '''

        self.dbs = dbs
        self._params = edict(params)
        if not mln_._materialized:
            self.mln = mln_.materialize(*dbs)
        else:
            self.mln = mln_
        self.watch = StopWatch()
        self.learners = [None] * len(dbs)
        self.watch.tag('setup learners', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            logger.debug('Setting up multi-core processing for {} cores'.format(pool._processes))
            try:
                for i, learner in pool.imap(with_tracing(_setup_learner), self._iterdbs(method)):
                    self.learners[i] = learner
                    if self.verbose:
                        bar.label('Database %d, %s' % ((i + 1), learner.name))
                        bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for i, db in enumerate(self.dbs):
                _, learner = _setup_learner((i, self.mln, db, method, self._params + {'multicore': False}))
                self.learners[i] = learner
                if self.verbose:
                    bar.label('Database %d, %s' % ((i + 1), learner.name))
                    bar.inc()
        if self.verbose:
            print 'set up', self.name
        self.watch.finish('setup learners')


    def _iterdbs(self, method):
        for i, db in enumerate(self.dbs):
            yield i, self.mln, db, method, self._params + {
                'verbose': not self.multicore, 'multicore': False}


    @property
    def name(self):
        return "MultipleDatabaseLearner [{} x {}]".format(len(self.learners), self.learners[0].name)


    def _f(self, w):
        # it turned out that it doesn't pay off to evaluate the function  
        # in separate processes, so we turn it off 
        if False:  # self.multicore:
            likelihood = 0
            pool = Pool()
            try:
                for i, (f_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_f', sideeffects=True)), map(lambda l: (l, w), self.learners))):
                    self.learners[i].__dict__ = d_
                    likelihood += f_
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
            return likelihood
        else:
            return sum(map(lambda l: l._f(w), self.learners))


    def _grad(self, w):
        grad = numpy.zeros(len(self.mln.formulas), numpy.float64)
        if False:  # self.multicore:
            # it turned out that it doesn't pay off to evaluate the gradient  
            # in separate processes, so we turn it off 
            pool = Pool()
            try:
                for i, (grad_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_grad', sideeffects=True)), map(lambda l: (l, w), self.learners))):
                    self.learners[i].__dict__ = d_
                    grad += grad_
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners: grad += learner._grad(w)
        return grad


    def _hessian(self, w):
        N = len(self.mln.formulas)
        hessian = numpy.matrix(numpy.zeros((N, N)))
        if self.multicore:
            pool = Pool()
            try:
                for h in pool.imap(with_tracing(_methodcaller('_hessian')), map(lambda l: (l, w), self.learners)):
                    hessian += h
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners: hessian += learner._hessian(w)
        return hessian


    def _prepare(self):
        self.watch.tag('preparing optimization', verbose=self.verbose)
        if self.verbose:
            bar = ProgressBar(steps=len(self.dbs), color='green')
        if self.multicore:
            pool = Pool(maxtasksperchild=1)
            try:
                for i, (_, d_) in enumerate(pool.imap(with_tracing(_methodcaller('_prepare', sideeffects=True)), self.learners)):
                    checkmem()
                    self.learners[i].__dict__ = d_
                    if self.verbose: bar.inc()
            except Exception as e:
                logger.error('Error in child process. Terminating pool...')
                pool.close()
                raise e
            finally:
                pool.terminate()
                pool.join()
        else:
            for learner in self.learners:
                checkmem()
                learner._prepare()
                if self.verbose: bar.inc()


    def _filter_fixweights(self, v):
        '''
        Removes from the vector `v` all elements at indices that correspond to
        a fixed weight formula index.
        '''
        if len(v) != len(self.mln.formulas):
            raise Exception('Vector must have same length as formula weights')
        return [v[i] for i in range(len(self.mln.formulas)) if not self.mln.fixweights[i] and self.mln.weights[i] != HARD]


    def _add_fixweights(self, w):
        i = 0
        w_ = []
        for f in self.mln.formulas:
            if self.mln.fixweights[f.idx] or f.weight == HARD:
                w_.append(self._w[f.idx])
            else:
                w_.append(w[i])
                i += 1
        return w_


    def run(self, **params):
        if 'scipy' not in sys.modules:
            raise Exception("Scipy was not imported! Install numpy and scipy "
                            "if you want to use weight learning.")
        # initial parameter vector: all zeros or weights from formulas
        self._w = [0] * len(self.mln.formulas)
        for f in self.mln.formulas:
            if self.mln.fixweights[f.idx] or self.use_init_weights or f.ishard:
                self._w[f.idx] = f.weight
        runs = 0
        while runs < self.maxrepeat:
            self._prepare()

            self._optimize(**self._params)
            self._cleanup()
            runs += 1
            if not any([l.repeat() for l in self.learners]): break
        return self.weights
Exemple #13
0
class DefaultGroundingFactory:
    '''
    Implementation of the default grounding algorithm, which
    creates ALL ground atoms and ALL ground formulas.

    :param simplify:        if `True`, the formula will be simplified according to the
                            evidence given.
    :param unsatfailure:    raises a :class:`mln.errors.SatisfiabilityException` if a 
                            hard logical constraint is violated by the evidence.
    '''
    def __init__(self,
                 mrf,
                 simplify=False,
                 unsatfailure=False,
                 formulas=None,
                 cache=auto,
                 **params):
        self.mrf = mrf
        self.formulas = ifNone(formulas, list(self.mrf.formulas))
        self.total_gf = 0
        for f in self.formulas:
            self.total_gf += f.countgroundings(self.mrf)
        self.grounder = None
        self._cachesize = CACHE_SIZE if cache is auto else cache
        self._cache = None
        self.__cacheinit = False
        self.__cachecomplete = False
        self._params = params
        self.watch = StopWatch()
        self.simplify = simplify
        self.unsatfailure = unsatfailure

    @property
    def verbose(self):
        return self._params.get('verbose', False)

    @property
    def multicore(self):
        return self._params.get('multicore', False)

    @property
    def iscached(self):
        return self._cache is not None and self.__cacheinit

    @property
    def usecache(self):
        return self._cachesize is not None and self._cachesize > 0

    def _cacheinit(self):
        if False:  #self.total_gf > self._cachesize:
            logger.warning(
                'Number of formula groundings (%d) exceeds cache size (%d). Caching is disabled.'
                % (self.total_gf, self._cachesize))
        else:
            self._cache = []
        self.__cacheinit = True

    def itergroundings(self):
        '''
        Iterates over all formula groundings.
        '''
        self.watch.tag('grounding', verbose=self.verbose)
        if self.grounder is None:
            self.grounder = iter(
                self._itergroundings(simplify=self.simplify,
                                     unsatfailure=self.unsatfailure))
        if self.usecache and not self.iscached:
            self._cacheinit()
        counter = -1
        while True:
            counter += 1
            if self.iscached and len(self._cache) > counter:
                yield self._cache[counter]
            elif not self.__cachecomplete:
                try:
                    gf = self.grounder.next()
                except StopIteration:
                    self.__cachecomplete = True
                    return
                else:
                    if self._cache is not None:
                        self._cache.append(gf)
                    yield gf
            else:
                return
        self.watch.finish('grounding')
        if self.verbose: print

    def _itergroundings(self, simplify=False, unsatfailure=False):
        if self.verbose:
            bar = ProgressBar(width=100, color='green')
        for i, formula in enumerate(self.formulas):
            if self.verbose: bar.update((i + 1) / float(len(self.formulas)))
            for gndformula in formula.itergroundings(self.mrf,
                                                     simplify=simplify):
                if unsatfailure and gndformula.weight == HARD and gndformula(
                        self.mrf.evidence) != 1:
                    print
                    gndformula.print_structure(self.mrf.evidence)
                    raise SatisfiabilityException(
                        'MLN is unsatisfiable due to hard constraint violation %s (see above)'
                        % self.mrf.formulas[gndformula.idx])
                yield gndformula
Exemple #14
0
class DefaultGroundingFactory:
    '''
    Implementation of the default grounding algorithm, which
    creates ALL ground atoms and ALL ground formulas.

    :param simplify:        if `True`, the formula will be simplified according to the
                            evidence given.
    :param unsatfailure:    raises a :class:`mln.errors.SatisfiabilityException` if a 
                            hard logical constraint is violated by the evidence.
    '''
    
    def __init__(self, mrf, simplify=False, unsatfailure=False, formulas=None, cache=auto, **params):
        self.mrf = mrf
        self.formulas = ifNone(formulas, list(self.mrf.formulas))
        self.total_gf = 0
        for f in self.formulas:
            self.total_gf += f.countgroundings(self.mrf)
        self.grounder = None
        self._cachesize = CACHE_SIZE if cache is auto else cache
        self._cache = None
        self.__cacheinit = False
        self.__cachecomplete = False
        self._params = params
        self.watch = StopWatch()
        self.simplify = simplify
        self.unsatfailure = unsatfailure
        
        
    @property
    def verbose(self):
        return self._params.get('verbose', False)
    
    
    @property
    def multicore(self):
        return self._params.get('multicore', False)
    
    
    @property
    def iscached(self):
        return self._cache is not None and self.__cacheinit

    
    @property
    def usecache(self):
        return self._cachesize is not None and self._cachesize > 0
    
    
    def _cacheinit(self):
        if False:#self.total_gf > self._cachesize:
            logger.warning('Number of formula groundings (%d) exceeds cache size (%d). Caching is disabled.' % (self.total_gf, self._cachesize))
        else:
            self._cache = []
        self.__cacheinit = True
    
    
    def itergroundings(self):
        '''
        Iterates over all formula groundings.
        '''
        self.watch.tag('grounding', verbose=self.verbose)
        if self.grounder is None:
            self.grounder = iter(self._itergroundings(simplify=self.simplify, unsatfailure=self.unsatfailure))
        if self.usecache and not self.iscached:
            self._cacheinit()
        counter = -1
        while True:
            counter += 1
            if self.iscached and len(self._cache) > counter:
                yield self._cache[counter]
            elif not self.__cachecomplete:
                try:
                    gf = self.grounder.next()
                except StopIteration:
                    self.__cachecomplete = True
                    return
                else:
                    if self._cache is not None:
                        self._cache.append(gf)
                    yield gf
            else: return
        self.watch.finish('grounding')
        if self.verbose: print
            
            
    def _itergroundings(self, simplify=False, unsatfailure=False):
        if self.verbose: 
            bar = ProgressBar(width=100, color='green')
        for i, formula in enumerate(self.formulas):
            if self.verbose: bar.update((i+1) / float(len(self.formulas)))
            for gndformula in formula.itergroundings(self.mrf, simplify=simplify):
                if unsatfailure and gndformula.weight == HARD and gndformula(self.mrf.evidence) != 1:
                    print
                    gndformula.print_structure(self.mrf.evidence)
                    raise SatisfiabilityException('MLN is unsatisfiable due to hard constraint violation %s (see above)' % self.mrf.formulas[gndformula.idx])
                yield gndformula
Exemple #15
0
class Inference(object):
    '''
    Represents a super class for all inference methods.
    Also provides some convenience methods for collecting statistics
    about the inference process and nicely outputting results.
    
    :param mrf:        the MRF inference is being applied to.
    :param queries:    a query or list of queries, can be either instances of
                       :class:`pracmln.logic.common.Logic` or string representations of them,
                       or predicate names that get expanded to all of their ground atoms.
                       If `ALL`, all ground atoms are subject to inference.
                       
    Additional keyword parameters:
    
    :param cw:         (bool) if `True`, the closed-world assumption will be applied 
                       to all but the query atoms.
    '''
    
    def __init__(self, mrf, queries=ALL, **params):
        self.mrf = mrf
        self.mln = mrf.mln 
        self._params = edict(params)
        if not queries:
            self.queries = [self.mln.logic.gnd_lit(ga, negated=False, mln=self.mln) for ga in self.mrf.gndatoms if self.mrf.evidence[ga.idx] is None]
        else:
            # check for single/multiple query and expand
            if type(queries) is not list:
                queries = [queries]
            self.queries = self._expand_queries(queries)
        # fill in the missing truth values of variables that have only one remaining value
        for variable in self.mrf.variables:
            if variable.valuecount(self.mrf.evidence_dicti()) == 1: # the var is fully determined by the evidence
                for _, value in variable.itervalues(self.mrf.evidence): break
                self.mrf.set_evidence(variable.value2dict(value), erase=False)
        # apply the closed world assumptions to the explicitly specified predicates
        if self.cwpreds:
            for pred in self.cwpreds:
                if isinstance(self.mln.predicate(pred), SoftFunctionalPredicate):
                    if self.verbose: logger.warning('Closed world assumption will be applied to soft functional predicate %s' % pred)
                elif isinstance(self.mln.predicate(pred), FunctionalPredicate):
                    raise Exception('Closed world assumption is inapplicable to functional predicate %s' % pred)
                for gndatom in self.mrf.gndatoms:
                    if gndatom.predname != pred: continue
                    if self.mrf.evidence[gndatom.idx] is None:
                        self.mrf.evidence[gndatom.idx] = 0
        # apply the closed world assumption to all remaining ground atoms that are not in the queries
        if self.closedworld:
            qpreds = set()
            for q in self.queries:
                qpreds.update(q.prednames())
            for gndatom in self.mrf.gndatoms:
                if isinstance(self.mln.predicate(gndatom.predname), FunctionalPredicate) \
                        or isinstance(self.mln.predicate(gndatom.predname), SoftFunctionalPredicate):
                    continue
                if gndatom.predname not in qpreds and self.mrf.evidence[gndatom.idx] is None:
                    self.mrf.evidence[gndatom.idx] = 0
        for var in self.mrf.variables:
            if isinstance(var, FuzzyVariable):
                var.consistent(self.mrf.evidence, strict=True)
        self._watch = StopWatch()
    
    
    @property
    def verbose(self):
        return self._params.get('verbose', False)
    
    @property
    def results(self):
        if self._results is None:
            raise Exception('No results available. Run the inference first.')
        else:
            return self._results
        
    @property
    def elapsedtime(self):
        return self._watch['inference'].elapsedtime
        
        
    @property
    def multicore(self):
        return self._params.get('multicore')
    
    
    @property
    def resultdb(self):
        db = Database(self.mrf.mln)
        for atom in sorted(self.results, key=str):
            db[str(atom)] = self.results[atom]
        return db
    

    @property
    def closedworld(self):
        return self._params.get('cw', False)
        
        
    @property
    def cwpreds(self):
        return self._params.get('cw_preds', [])
        

    def _expand_queries(self, queries):
        ''' 
        Expands the list of queries where necessary, e.g. queries that are 
        just predicate names are expanded to the corresponding list of atoms.
        '''
        equeries = []
        for query in queries:
            if type(query) == str:
                prevLen = len(equeries)
                if '(' in query: # a fully or partially grounded formula
                    f = self.mln.logic.parse_formula(query)
                    for gf in f.itergroundings(self.mrf):
                        equeries.append(gf)
                else: # just a predicate name
                    if query not in self.mln.prednames:
                        raise NoSuchPredicateError('Unsupported query: %s is not among the admissible predicates.' % (query))
                        continue
                    for gndatom in self.mln.predicate(query).groundatoms(self.mln, self.mrf.domains):
                        equeries.append(self.mln.logic.gnd_lit(self.mrf.gndatom(gndatom), negated=False, mln=self.mln))
                if len(equeries) - prevLen == 0:
                    raise Exception("String query '%s' could not be expanded." % query)
            elif isinstance(query, Logic.Formula):
                equeries.append(query)
            else:
                raise Exception("Received query of unsupported type '%s'" % str(type(query)))
        return equeries
    
    
    def _run(self):
        raise Exception('%s does not implement _run()' % self.__class__.__name__)


    def run(self):
        '''
        Starts the inference process.
        '''
        
        # perform actual inference (polymorphic)
        if self.verbose: print 'Inference engine: %s' % self.__class__.__name__
        self._watch.tag('inference', verbose=self.verbose)
        _weights_backup = list(self.mln.weights)
        self._results = self._run()
        self.mln.weights = _weights_backup
        self._watch.finish('inference')
        return self
    
    
    def write(self, stream=sys.stdout, color=None, sort='prob', group=True, reverse=True):
        barwidth = 30
        if tty(stream) and color is None:
            color = 'yellow'
        if sort not in ('alpha', 'prob'):
            raise Exception('Unknown sorting: %s' % sort)
        results = dict(self.results)
        if group:
            for var in sorted(self.mrf.variables, key=str):
                res = dict([(atom, prob) for atom, prob in results.iteritems() if atom in map(str, var.gndatoms)])
                if not res: continue
                if isinstance(var, MutexVariable) or isinstance(var, SoftMutexVariable):
                    stream.write('%s:\n' % var)
                if sort == 'prob':
                    res = sorted(res, key=self.results.__getitem__, reverse=reverse)
                elif sort == 'alpha':
                    res = sorted(res, key=str)
                for atom in res:
                    stream.write('%s %s\n' % (barstr(barwidth, self.results[atom], color=color), atom))
            return
        # first sort wrt to probability
        results = sorted(results, key=self.results.__getitem__, reverse=reverse)
        # then wrt gnd atoms
        results = sorted(results, key=str)
        for q in results:
            stream.write('%s %s\n' % (barstr(barwidth, self.results[q], color=color), q))
        self._watch.printSteps()
    
    
    def write_elapsed_time(self, stream=sys.stdout, color=None):
        if stream is sys.stdout and color is None:
            color = True
        elif color is None:
            color = False
        if color: col = 'blue'
        else: col = None
        total = float(self._watch['inference'].elapsedtime)
        stream.write(headline('INFERENCE RUNTIME STATISTICS'))
        print
        self._watch.finish()
        for t in sorted(self._watch.tags.values(), key=lambda t: t.elapsedtime, reverse=True):
            stream.write('%s %s %s\n' % (barstr(width=30, percent=t.elapsedtime / total, color=col), elapsed_time_str(t.elapsedtime), t.label))
Exemple #16
0
    def run(self):
        watch = StopWatch()
        watch.tag('inference', self.verbose)
        # load the MLN
        if isinstance(self.mln, MLN):
            mln = self.mln
        else:
            raise Exception('No MLN specified')

        if self.use_emln and self.emln is not None:
            mlnstr = StringIO.StringIO()
            mln.write(mlnstr)
            mlnstr.close()
            mlnstr = str(mlnstr)
            emln = self.emln
            mln = parse_mln(mlnstr + emln,
                            grammar=self.grammar,
                            logic=self.logic)

        # load the database
        if isinstance(self.db, Database):
            db = self.db
        elif isinstance(self.db, list) and len(self.db) == 1:
            db = self.db[0]
        elif isinstance(self.db, list):
            raise Exception(
                'Got {} dbs. Can only handle one for inference.'.format(
                    len(self.db)))
        else:
            raise Exception('DB of invalid format {}'.format(type(self.db)))

        # expand the
        #  parameters
        params = dict(self._config)
        if 'params' in params:
            params.update(eval("dict(%s)" % params['params']))
            del params['params']
        if self.verbose:
            print tabulate(sorted(list(params.viewitems()),
                                  key=lambda (k, v): str(k)),
                           headers=('Parameter:', 'Value:'))
        # create the MLN and evidence database and the parse the queries
#         mln = parse_mln(modelstr, searchPath=self.dir.get(), logic=self.config['logic'], grammar=self.config['grammar'])
#         db = parse_db(mln, db_content, ignore_unknown_preds=params.get('ignore_unknown_preds', False))
        if type(db) is list and len(db) > 1:
            raise Exception('Inference can only handle one database at a time')
        elif type(db) is list:
            db = db[0]
        # parse non-atomic params


#         if type(self.queries) is not list:
#             queries = parse_queries(mln, str(self.queries))
        params['cw_preds'] = filter(lambda x: bool(x), self.cw_preds)
        # extract and remove all non-algorithm
        for s in GUI_SETTINGS:
            if s in params: del params[s]

        if self.profile:
            prof = Profile()
            print 'starting profiler...'
            prof.enable()
        # set the debug level
        olddebug = praclog.level()
        praclog.level(
            eval('logging.%s' % params.get('debug', 'WARNING').upper()))
        result = None
        try:
            mln_ = mln.materialize(db)
            mrf = mln_.ground(db)
            inference = self.method(mrf, self.queries, **params)
            if self.verbose:
                print
                print headline('EVIDENCE VARIABLES')
                print
                mrf.print_evidence_vars()

            result = inference.run()
            if self.verbose:
                print
                print headline('INFERENCE RESULTS')
                print
                inference.write()
            if self.verbose:
                print
                inference.write_elapsed_time()
        except SystemExit:
            print 'Cancelled...'
        finally:
            if self.profile:
                prof.disable()
                print headline('PROFILER STATISTICS')
                ps = pstats.Stats(prof,
                                  stream=sys.stdout).sort_stats('cumulative')
                ps.print_stats()
            # reset the debug level
            praclog.level(olddebug)
        if self.verbose:
            print
            watch.finish()
            watch.printSteps()
        return result
Exemple #17
0
class Inference(object):
    '''
    Represents a super class for all inference methods.
    Also provides some convenience methods for collecting statistics
    about the inference process and nicely outputting results.
    
    :param mrf:        the MRF inference is being applied to.
    :param queries:    a query or list of queries, can be either instances of
                       :class:`pracmln.logic.common.Logic` or string representations of them,
                       or predicate names that get expanded to all of their ground atoms.
                       If `ALL`, all ground atoms are subject to inference.
                       
    Additional keyword parameters:
    
    :param cw:         (bool) if `True`, the closed-world assumption will be applied 
                       to all but the query atoms.
    '''
    def __init__(self, mrf, queries=ALL, **params):
        self.mrf = mrf
        self.mln = mrf.mln
        self._params = edict(params)
        if not queries:
            self.queries = [
                self.mln.logic.gnd_lit(ga, negated=False, mln=self.mln)
                for ga in self.mrf.gndatoms
                if self.mrf.evidence[ga.idx] is None
            ]
        else:
            # check for single/multiple query and expand
            if type(queries) is not list:
                queries = [queries]
            self.queries = self._expand_queries(queries)
        # fill in the missing truth values of variables that have only one remaining value
        for variable in self.mrf.variables:
            if variable.valuecount(self.mrf.evidence_dicti(
            )) == 1:  # the var is fully determined by the evidence
                for _, value in variable.itervalues(self.mrf.evidence):
                    break
                self.mrf.set_evidence(variable.value2dict(value), erase=False)
        # apply the closed world assumptions to the explicitly specified predicates
        if self.cwpreds:
            for pred in self.cwpreds:
                if isinstance(self.mln.predicate(pred),
                              SoftFunctionalPredicate):
                    if self.verbose:
                        logger.warning(
                            'Closed world assumption will be applied to soft functional predicate %s'
                            % pred)
                elif isinstance(self.mln.predicate(pred), FunctionalPredicate):
                    raise Exception(
                        'Closed world assumption is inapplicable to functional predicate %s'
                        % pred)
                for gndatom in self.mrf.gndatoms:
                    if gndatom.predname != pred: continue
                    if self.mrf.evidence[gndatom.idx] is None:
                        self.mrf.evidence[gndatom.idx] = 0
        # apply the closed world assumption to all remaining ground atoms that are not in the queries
        if self.closedworld:
            qpreds = set()
            for q in self.queries:
                qpreds.update(q.prednames())
            for gndatom in self.mrf.gndatoms:
                if isinstance(self.mln.predicate(gndatom.predname), FunctionalPredicate) \
                        or isinstance(self.mln.predicate(gndatom.predname), SoftFunctionalPredicate):
                    continue
                if gndatom.predname not in qpreds and self.mrf.evidence[
                        gndatom.idx] is None:
                    self.mrf.evidence[gndatom.idx] = 0
        for var in self.mrf.variables:
            if isinstance(var, FuzzyVariable):
                var.consistent(self.mrf.evidence, strict=True)
        self._watch = StopWatch()

    @property
    def verbose(self):
        return self._params.get('verbose', False)

    @property
    def results(self):
        if self._results is None:
            raise Exception('No results available. Run the inference first.')
        else:
            return self._results

    @property
    def elapsedtime(self):
        return self._watch['inference'].elapsedtime

    @property
    def multicore(self):
        return self._params.get('multicore')

    @property
    def resultdb(self):
        if '_resultdb' in self.__dict__:
            return self._resultdb
        db = Database(self.mrf.mln)
        for atom in sorted(self.results, key=str):
            db[str(atom)] = self.results[atom]
        return db

    @property
    def closedworld(self):
        return self._params.get('cw', False)

    @property
    def cwpreds(self):
        return self._params.get('cw_preds', [])

    def _expand_queries(self, queries):
        ''' 
        Expands the list of queries where necessary, e.g. queries that are 
        just predicate names are expanded to the corresponding list of atoms.
        '''
        equeries = []
        for query in queries:
            if type(query) == str:
                prevLen = len(equeries)
                if '(' in query:  # a fully or partially grounded formula
                    f = self.mln.logic.parse_formula(query)
                    for gf in f.itergroundings(self.mrf):
                        equeries.append(gf)
                else:  # just a predicate name
                    if query not in self.mln.prednames:
                        raise NoSuchPredicateError(
                            'Unsupported query: %s is not among the admissible predicates.'
                            % (query))
                        continue
                    for gndatom in self.mln.predicate(query).groundatoms(
                            self.mln, self.mrf.domains):
                        equeries.append(
                            self.mln.logic.gnd_lit(self.mrf.gndatom(gndatom),
                                                   negated=False,
                                                   mln=self.mln))
                if len(equeries) - prevLen == 0:
                    raise Exception(
                        "String query '%s' could not be expanded." % query)
            elif isinstance(query, Logic.Formula):
                equeries.append(query)
            else:
                raise Exception("Received query of unsupported type '%s'" %
                                str(type(query)))
        return equeries

    def _run(self):
        raise Exception('%s does not implement _run()' %
                        self.__class__.__name__)

    def run(self):
        '''
        Starts the inference process.
        '''

        # perform actual inference (polymorphic)
        if self.verbose: print 'Inference engine: %s' % self.__class__.__name__
        self._watch.tag('inference', verbose=self.verbose)
        _weights_backup = list(self.mln.weights)
        self._results = self._run()
        self.mln.weights = _weights_backup
        self._watch.finish('inference')
        return self

    def write(self,
              stream=sys.stdout,
              color=None,
              sort='prob',
              group=True,
              reverse=True):
        barwidth = 30
        if tty(stream) and color is None:
            color = 'yellow'
        if sort not in ('alpha', 'prob'):
            raise Exception('Unknown sorting: %s' % sort)
        results = dict(self.results)
        if group:
            for var in sorted(self.mrf.variables, key=str):
                res = dict([(atom, prob) for atom, prob in results.iteritems()
                            if atom in map(str, var.gndatoms)])
                if not res: continue
                if isinstance(var, MutexVariable) or isinstance(
                        var, SoftMutexVariable):
                    stream.write('%s:\n' % var)
                if sort == 'prob':
                    res = sorted(res,
                                 key=self.results.__getitem__,
                                 reverse=reverse)
                elif sort == 'alpha':
                    res = sorted(res, key=str)
                for atom in res:
                    stream.write('%s %s\n' % (barstr(
                        barwidth, self.results[atom], color=color), atom))
            return
        # first sort wrt to probability
        results = sorted(results,
                         key=self.results.__getitem__,
                         reverse=reverse)
        # then wrt gnd atoms
        results = sorted(results, key=str)
        for q in results:
            stream.write('%s %s\n' %
                         (barstr(barwidth, self.results[q], color=color), q))
        self._watch.printSteps()

    def write_elapsed_time(self, stream=sys.stdout, color=None):
        if stream is sys.stdout and color is None:
            color = True
        elif color is None:
            color = False
        if color: col = 'blue'
        else: col = None
        total = float(self._watch['inference'].elapsedtime)
        stream.write(headline('INFERENCE RUNTIME STATISTICS'))
        print
        self._watch.finish()
        for t in sorted(self._watch.tags.values(),
                        key=lambda t: t.elapsedtime,
                        reverse=True):
            stream.write(
                '%s %s %s\n' %
                (barstr(width=30, percent=t.elapsedtime / total,
                        color=col), elapsed_time_str(t.elapsedtime), t.label))
Exemple #18
0
    def run(self):
        '''
        Run the MLN learning with the given parameters.
        '''
        # load the MLN
        if isinstance(self.mln, MLN):
            mln = self.mln
        else:
            raise Exception('No MLN specified')

        # load the training databases
        if type(self.db) is list and all(
                map(lambda e: isinstance(e, Database), self.db)):
            dbs = self.db
        elif isinstance(self.db, Database):
            dbs = [self.db]
        elif isinstance(self.db, basestring):
            db = self.db
            if db is None or not db:
                raise Exception('no trainig data given!')
            dbpaths = [os.path.join(self.directory, 'db', db)]
            dbs = []
            for p in dbpaths:
                dbs.extend(Database.load(mln, p, self.ignore_unknown_preds))
        else:
            raise Exception(
                'Unexpected type of training databases: %s' % type(self.db))
        if self.verbose:
            print 'loaded %d database(s).' % len(dbs)

        watch = StopWatch()

        if self.verbose:
            confg = dict(self._config)
            confg.update(eval("dict(%s)" % self.params))
            if type(confg.get('db', None)) is list:
                confg['db'] = '%d Databases' % len(confg['db'])
            print tabulate(
                sorted(list(confg.viewitems()), key=lambda (key, v): str(key)),
                headers=('Parameter:', 'Value:'))

        params = dict([(k, getattr(self, k)) for k in (
            'multicore', 'verbose', 'profile', 'ignore_zero_weight_formulas')])

        # for discriminative learning
        if issubclass(self.method, DiscriminativeLearner):
            if self.discr_preds == QUERY_PREDS:  # use query preds
                params['qpreds'] = self.qpreds
            elif self.discr_preds == EVIDENCE_PREDS:  # use evidence preds
                params['epreds'] = self.epreds

        # gaussian prior settings            
        if self.use_prior:
            params['prior_mean'] = self.prior_mean
            params['prior_stdev'] = self.prior_stdev
        # expand the parameters
        params.update(self.params)

        if self.profile:
            prof = Profile()
            print 'starting profiler...'
            prof.enable()
        else:
            prof = None
        # set the debug level
        olddebug = praclog.level()
        praclog.level(
            eval('logging.%s' % params.get('debug', 'WARNING').upper()))
        mlnlearnt = None
        try:
            # run the learner
            mlnlearnt = mln.learn(dbs, self.method, **params)
            if self.verbose:
                print
                print headline('LEARNT MARKOV LOGIC NETWORK')
                print
                mlnlearnt.write()
        except SystemExit:
            print 'Cancelled...'
        finally:
            if self.profile:
                prof.disable()
                print headline('PROFILER STATISTICS')
                ps = pstats.Stats(prof, stream=sys.stdout).sort_stats(
                    'cumulative')
                ps.print_stats()
            # reset the debug level
            praclog.level(olddebug)
        print
        watch.finish()
        watch.printSteps()
        return mlnlearnt
Exemple #19
0
    def run(self):
        '''
        Run the MLN learning with the given parameters.
        '''
        # load the MLN
        if isinstance(self.mln, MLN):
            mln = self.mln
        else:
            raise Exception('No MLN specified')

        # load the training databases
        if type(self.db) is list and all(
                [isinstance(e, Database) for e in self.db]):
            dbs = self.db
        elif isinstance(self.db, Database):
            dbs = [self.db]
        elif isinstance(self.db, str):
            db = self.db
            if db is None or not db:
                raise Exception('no trainig data given!')
            dbpaths = [os.path.join(self.directory, 'db', db)]
            dbs = []
            for p in dbpaths:
                dbs.extend(Database.load(mln, p, self.ignore_unknown_preds))
        else:
            raise Exception(
                'Unexpected type of training databases: %s' % type(self.db))
        if self.verbose:
            print(('loaded %d database(s).' % len(dbs)))

        watch = StopWatch()

        if self.verbose:
            confg = dict(self._config)
            confg.update(eval("dict(%s)" % self.params))
            if type(confg.get('db', None)) is list:
                confg['db'] = '%d Databases' % len(confg['db'])
            print((tabulate(
                sorted(list(confg.items()), key=lambda key_v: str(key_v[0])),
                headers=('Parameter:', 'Value:'))))

        params = dict([(k, getattr(self, k)) for k in (
            'multicore', 'verbose', 'profile', 'ignore_zero_weight_formulas')])

        # for discriminative learning
        if issubclass(self.method, DiscriminativeLearner):
            if self.discr_preds == QUERY_PREDS:  # use query preds
                params['qpreds'] = self.qpreds
            elif self.discr_preds == EVIDENCE_PREDS:  # use evidence preds
                params['epreds'] = self.epreds

        # gaussian prior settings            
        if self.use_prior:
            params['prior_mean'] = self.prior_mean
            params['prior_stdev'] = self.prior_stdev
        # expand the parameters
        params.update(self.params)

        if self.profile:
            prof = Profile()
            print('starting profiler...')
            prof.enable()
        else:
            prof = None
        # set the debug level
        olddebug = logger.level
        logger.level = eval('logs.%s' % params.get('debug', 'WARNING').upper())
        mlnlearnt = None
        try:
            # run the learner
            mlnlearnt = mln.learn(dbs, self.method, **params)
            if self.verbose:
                print()
                print(headline('LEARNT MARKOV LOGIC NETWORK'))
                print()
                mlnlearnt.write()
        except SystemExit:
            print('Cancelled...')
        finally:
            if self.profile:
                prof.disable()
                print(headline('PROFILER STATISTICS'))
                ps = pstats.Stats(prof, stream=sys.stdout).sort_stats(
                    'cumulative')
                ps.print_stats()
            # reset the debug level
            logger.level = olddebug
        print()
        watch.finish()
        watch.printSteps()
        return mlnlearnt
Exemple #20
0
    def run(self):
        watch = StopWatch()
        watch.tag('inference', self.verbose)
        # load the MLN
        if isinstance(self.mln, MLN):
            mln = self.mln
        else:
            raise Exception('No MLN specified')

        if self.use_emln and self.emln is not None:
            mlnstr = StringIO.StringIO()
            mln.write(mlnstr)
            mlnstr.close()
            mlnstr = str(mlnstr)
            emln = self.emln
            mln = parse_mln(mlnstr + emln, grammar=self.grammar,
                            logic=self.logic)

        # load the database
        if isinstance(self.db, Database):
            db = self.db
        elif isinstance(self.db, list) and len(self.db) == 1:
            db = self.db[0]
        elif isinstance(self.db, list):
            raise Exception(
                'Got {} dbs. Can only handle one for inference.'.format(
                    len(self.db)))
        else:
            raise Exception('DB of invalid format {}'.format(type(self.db)))

        # expand the
        #  parameters
        params = dict(self._config)
        if 'params' in params:
            params.update(eval("dict(%s)" % params['params']))
            del params['params']
        if self.verbose:
            print tabulate(sorted(list(params.viewitems()), key=lambda (k, v): str(k)), headers=('Parameter:', 'Value:'))
        if type(db) is list and len(db) > 1:
            raise Exception('Inference can only handle one database at a time')
        elif type(db) is list:
            db = db[0]
        params['cw_preds'] = filter(lambda x: bool(x), self.cw_preds)
        # extract and remove all non-algorithm
        for s in GUI_SETTINGS:
            if s in params: del params[s]

        if self.profile:
            prof = Profile()
            print 'starting profiler...'
            prof.enable()
        # set the debug level
        olddebug = praclog.level()
        praclog.level(eval('logging.%s' % params.get('debug', 'WARNING').upper()))
        result = None
        try:
            mln_ = mln.materialize(db)
            mrf = mln_.ground(db)
            inference = self.method(mrf, self.queries, **params)
            if self.verbose:
                print
                print headline('EVIDENCE VARIABLES')
                print
                mrf.print_evidence_vars()

            result = inference.run()
            if self.verbose:
                print
                print headline('INFERENCE RESULTS')
                print
                inference.write()
            if self.verbose:
                print
                inference.write_elapsed_time()
        except SystemExit:
            print 'Cancelled...'
        finally:
            if self.profile:
                prof.disable()
                print headline('PROFILER STATISTICS')
                ps = pstats.Stats(prof, stream=sys.stdout).sort_stats('cumulative')
                ps.print_stats()
            # reset the debug level
            praclog.level(olddebug)
        if self.verbose:
            print
            watch.finish()
            watch.printSteps()
        return result