Example #1
0
 def __init__(self, mln, noisyStringDomains, verbose=True):
     self.mln = mln
     self.noisyStringDomains = noisyStringDomains
     self.verbose = verbose
     self.clusters = {}  # maps domain name -> list of clusters
     self.noisyDomains = {}
     self.log = logs.getlogger('NoisyString')
Example #2
0
def runFold(fold):
    log = logs.getlogger(fold.fold_id)
    try:
        fold.run()
    except:
        raise Exception(''.join(traceback.format_exception(*sys.exc_info())))
    return fold
Example #3
0
    def run(self):

        optimizer = self.optimizer
        p = self.problem
        f = p.f
        grad = p.grad
        
        # coerce return types
        f = lambda wt: numpy.float64(p.f(wt))
        grad = lambda wt: numpy.array(list(map(numpy.float64, p.grad(wt))))
        
        # negate for minimization
        neg_f = lambda wt: -f(wt)
        neg_grad = lambda wt: -grad(wt)
        #if not useGrad or not p.useGrad(): neg_grad = None
        if not p.usef: 
            neg_f = lambda wt: -p._fDummy(wt)
        log = logs.getlogger(self.__class__.__name__)
        if optimizer == "bfgs":
            params = dict([k_v for k_v in iter(self.optParams.items()) if k_v[0] in ["gtol", "epsilon", "maxiter"]])
            if self.verbose: print("starting optimization with %s... %s\n" % (optimizer, params))
            wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params)
            if self.verbose: 
                print("optimization done with %s..." % optimizer)
                print("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "cg":            
            params = dict([k_v1 for k_v1 in iter(self.optParams.items()) if k_v1[0] in ["gtol", "epsilon", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "ncg":            
            params = dict([k_v2 for k_v2 in iter(self.optParams.items()) if k_v2[0] in ["avextol", "epsilon", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "fmin":
            params = dict([k_v3 for k_v3 in iter(self.optParams.items()) if k_v3[0] in ["xtol", "ftol", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == "powell":
            params = dict([k_v4 for k_v4 in iter(self.optParams.items()) if k_v4[0] in ["xtol", "ftol", "maxiter"]])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == 'l-bfgs-b':
            params = dict([k_v5 for k_v5 in iter(self.optParams.items()) if k_v5[0] in ["gtol", "epsilon", "maxiter", 'bounds']])
            log.info("starting optimization with %s... %s" % (optimizer, params))
            if 'bounds' in params:
                params['bounds'] = (params['bounds'],) * len(self.wt)
            wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\n" % (-f_opt))
        else:
            raise Exception("Unknown optimizer '%s'" % optimizer)
        
        return wt
Example #4
0
    def run(self):

        optimizer = self.optimizer
        p = self.problem
        f = p.f
        grad = p.grad
        
        # coerce return types
        f = lambda wt: numpy.float64(p.f(wt))
        grad = lambda wt: numpy.array(map(numpy.float64, p.grad(wt)))
        
        # negate for minimization
        neg_f = lambda wt: -f(wt)
        neg_grad = lambda wt: -grad(wt)
        #if not useGrad or not p.useGrad(): neg_grad = None
        if not p.usef: 
            neg_f = lambda wt: -p._fDummy(wt)
        log = logs.getlogger(self.__class__.__name__)
        if optimizer == "bfgs":
            params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems()))
            if self.verbose: print "starting optimization with %s... %s" % (optimizer, params)
            wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params)
            if self.verbose: 
                print "optimization done with %s..." % optimizer
                print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)
        elif optimizer == "cg":            
            params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems()))
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "ncg":            
            params = dict(filter(lambda (k,v): k in ["avextol", "epsilon", "maxiter"], self.optParams.iteritems()))
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags))
        elif optimizer == "fmin":
            params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems()))
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == "powell":
            params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems()))
            log.info("starting optimization with %s... %s" % (optimizer, params))
            wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params)
            log.info("optimization done with %s..." % optimizer)
        elif optimizer == 'l-bfgs-b':
            params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter", 'bounds'], self.optParams.iteritems()))
            log.info("starting optimization with %s... %s" % (optimizer, params))
            if 'bounds' in params:
                params['bounds'] = (params['bounds'],) * len(self.wt)
            wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params)
            log.info("optimization done with %s..." % optimizer)
            log.info("f-opt: %.16f\n" % (-f_opt))
        else:
            raise Exception("Unknown optimizer '%s'" % optimizer)
        
        return wt
Example #5
0
    def evalMLN(self, mln, dbs, module):
        '''
        Returns a confusion matrix for the given (learned) MLN evaluated on
        the databases given in dbs.
        '''

        log = logs.getlogger(self.fold_id)

        queryPred = self.params.queryPred
        queryDom = self.params.queryDom

        sig = [
            '?arg%d' % i
            for i, _ in enumerate(self.params.altMLN.predicates[queryPred])
        ]
        querytempl = '%s(%s)' % (queryPred, ','.join(sig))

        dbs = [db.duplicate() for db in dbs]

        infer = PRACInference(module.prac, [])
        inferenceStep = PRACInferenceStep(infer, self)

        for db in dbs:
            # save and remove the query predicates from the evidence
            trueDB = Database(self.params.altMLN)
            for bindings in db.query(querytempl):
                atom = querytempl
                for binding in bindings:
                    atom = atom.replace(binding, bindings[binding])
                trueDB.addGroundAtom(atom)
                db.retractGndAtom(atom)
            try:
                inferenceStep.output_dbs = [db]
                infer.inference_steps = [inferenceStep]
                module.prac.run(infer, module, mln=mln)
                resultDB = infer.inference_steps[-1].output_dbs[-1]

                sig2 = list(sig)
                entityIdx = mln.predicates[queryPred].index(queryDom)
                for entity in db.domains[queryDom]:
                    sig2[entityIdx] = entity
                    query = '%s(%s)' % (queryPred, ','.join(sig2))
                    for truth in trueDB.query(query):
                        truth = list(truth.values()).pop()
                    for pred in resultDB.query(query):
                        pred = list(pred.values()).pop()
                    self.confMatrix.addClassificationResult(pred, truth)
                for e, v in trueDB.evidence.items():
                    if v is not None:
                        db.addGroundAtom('%s%s' %
                                         ('' if v is True else '!', e))
            except:
                log.critical(''.join(
                    traceback.format_exception(*sys.exc_info())))
Example #6
0
    def run(self):
        log = logs.getlogger(self.__class__.__name__)
        norm = 1
        alpha = 1.0
        step = 1
        log.info('starting optimization with %s... (alpha=%f)' %
                 (self.__class__.__name__, alpha))
        f_ = None
        while True:
            grad = self.learner.grad(self.wt)
            norm = numpy.linalg.norm(grad)
            f_ = self.learner.f(self.wt)
            print()
            print('|grad| =', norm)
            if norm < self.gtol or (self.maxiter is not None
                                    and step > self.maxiter):
                break
            exitNow = False
            w_ = None
            smaller = False
            bigger = False
            f_opt = f_
            while not exitNow:
                w = self.wt + grad * alpha
                print()
                f = self.learner.f(w, verbose=True)
                if f_ < f:
                    #                     if smaller:
                    if f_opt < f:
                        self.wt = numpy.array(list(w))
                        f_ = f
                        alpha *= (1 + self.learningRate)
                        exitNow = True


#                     else:
                    bigger = True
                    w_ = numpy.array(list(w))
                elif f_ > f:
                    if bigger:
                        if f_opt < f:
                            self.wt = w_
                            f_ = f
                        exitNow = True
                    alpha *= (1.0 - self.learningRate)
                    smaller = True
                else:
                    exitNow = True
                f_ = f
            print()
            print('alpha =', alpha)
        return self.wt
Example #7
0
    def run(self):
        '''
        Runs the respective fold of the crossvalidation.
        '''
        log = logs.getlogger(self.fold_id)
        log.info('Running fold %d of %d...' %
                 (self.params.foldIdx + 1, self.params.foldCount))
        directory = self.params.directory
        try:
            # Apply noisy string clustering
            log.debug('Transforming noisy strings...')
            if self.params.noisyStringDomains is not None:
                noisyStrTrans = NoisyStringTransformer(
                    self.params.altMLN, self.params.noisyStringDomains, True)
                learnDBs_ = noisyStrTrans.materializeNoisyDomains(
                    self.params.learnDBs)
                testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs)
            else:
                learnDBs_ = self.params.learnDBs
                testDBs_ = self.params.testDBs

            # train the MLN
            log.debug('Starting learning...')

            module = self.params.module
            praclearn = PRACLearning(module.prac)
            praclearn.otherParams['mln'] = self.params.mlnFileName
            praclearn.otherParams['logic'] = self.params.logic
            praclearn.otherParams['onthefly'] = self.params.onthefly
            praclearn.training_dbs = learnDBs_

            learnedMLN = module.train(praclearn)

            # store the learned MLN in a file
            learnedMLN.writeToFile(
                os.path.join(directory, 'run_%d.mln' % self.params.foldIdx))
            log.debug('Finished learning.')

            # evaluate the MLN
            log.debug('Evaluating.')
            self.evalMLN(learnedMLN, testDBs_, module)
            self.confMatrix.toFile(
                os.path.join(directory,
                             'conf_matrix_%d.cm' % self.params.foldIdx))
            log.debug('Evaluation finished.')
        except (KeyboardInterrupt, SystemExit):
            log.critical("Exiting...")
            return None
Example #8
0
    def run(self):
        log = logs.getlogger(self.__class__.__name__)
        norm = 1
        alpha = 1.0
        step = 1
        log.info('starting optimization with %s... (alpha=%f)' % (self.__class__.__name__, alpha))
        f_ = None
        while True:
            grad = self.learner.grad(self.wt)
            norm = numpy.linalg.norm(grad)
            f_ = self.learner.f(self.wt)
            print()
            print('|grad| =', norm)
            if norm < self.gtol or (self.maxiter is not None and step > self.maxiter):
                break
            exitNow = False
            w_ = None
            smaller = False
            bigger = False
            f_opt = f_
            while not exitNow:
                w = self.wt + grad * alpha
                print()
                f = self.learner.f(w, verbose=True)
                if f_ < f:
#                     if smaller:
                    if f_opt < f: 
                        self.wt = numpy.array(list(w))
                        f_ = f
                        alpha *= (1 + self.learningRate)
                        exitNow = True
#                     else:
                    bigger = True
                    w_ = numpy.array(list(w))
                elif f_ > f:
                    if bigger:
                        if f_opt < f: 
                            self.wt = w_
                            f_ = f 
                        exitNow = True
                    alpha *= (1.0 - self.learningRate)
                    smaller = True
                else:
                    exitNow = True
                f_ = f
            print()
            print('alpha =', alpha)
        return self.wt
Example #9
0
 def __init__(self, prac, instr):
     '''
     PRAC inference initialization.
     :param prac:     reference to the PRAC instance.
     :param instr:    (str/iterable) list of natural-language sentences subject to
                      inference.
     '''
     self._logger = logs.getlogger(self.__class__.__name__, level=logs.DEBUG)
     self.prac = prac
     prac.deinit_modules()
     self.watch = StopWatch()
     if type(instr) in {list, tuple}:
         instr_ = instr
     elif isinstance(instr, basestring):
         instr_ = [instr]
     self.fringe = []
     pred = None
     for i in instr_:
         self.fringe.append(NLInstruction(self, i, pred=pred))
         pred = self.fringe[-1]
     self.root = list(self.fringe)
     self.lastnode = None
Example #10
0
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
IN THE SOFTWARE.
"""

import os
import tempfile
from PIL import Image
import base64

logger = logs.getlogger(__name__, logs.DEBUG)

# Default packages to use when generating output
default_packages = [
        'amsmath',
        'amsthm',
        'amssymb',
        'bm'
        ]


def __build_preamble(packages, declarations):
    preamble = '\documentclass{article}\n'
    for p in packages:
        preamble += "\\usepackage{{{}}}\n".format(p)
Example #11
0
def main():
    headline("Running xfold...")
    usage = 'PRAC Crossfold validation on pracmodules'

    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-k",
                        "--folds",
                        dest="folds",
                        type='int',
                        default=10,
                        help="Number of folds for k-fold Cross Validation")
    parser.add_argument("-p",
                        "--percent",
                        dest="percent",
                        type='int',
                        default=100,
                        help="Use only PERCENT% of the data. (default=100)")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action='store_true',
                        default=False,
                        help="Verbose mode.")
    parser.add_argument("-c",
                        "--multicore",
                        dest="multicore",
                        action='store_true',
                        default=False,
                        help="Verbose mode.")
    parser.add_argument('-n',
                        '--noisy',
                        dest='noisy',
                        type='str',
                        default=None,
                        help='-nDOMAIN defines DOMAIN as a noisy string.')
    parser.add_argument(
        '-f',
        '--folder',
        dest='folder',
        type='str',
        default=None,
        help='-f <folder> the folder in which the results shall be saved.')
    parser.add_argument('-m',
                        '--module',
                        dest='module',
                        type='str',
                        default=None,
                        help='-m <module> the module for the mln to xfold.')
    parser.add_argument('-d',
                        '--dbs',
                        action='callback',
                        type='string',
                        callback=parse_list,
                        dest='dbs')
    parser.add_argument('--predicate',
                        dest='predicate',
                        type='str',
                        default=None,
                        help='The predicate.')
    parser.add_argument('--domain',
                        dest='domain',
                        type='str',
                        default=None,
                        help='The domain.')
    parser.add_argument('--mln',
                        dest='mln',
                        type='str',
                        default=None,
                        help='The mln needed for training and inference.')
    parser.add_argument(
        '--altMLN',
        dest='altMLN',
        type='str',
        default=None,
        help='Alternative mln for loading the database files. Optional')
    parser.add_argument('--logic',
                        dest='logic',
                        type='str',
                        default='FuzzyLogic',
                        help='The logic to load the mln with.')
    parser.add_argument("--onthefly",
                        dest="onthefly",
                        default=False,
                        action='store_true',
                        help="Generate MLN on the fly")

    args = parser.parse_args()
    opts_ = vars(args)

    folds = args.folds
    percent = args.percent
    verbose = args.verbose
    multicore = args.multicore
    dirname = args.folder
    moduleName = args.module
    noisy = ['text']
    predName = args.predicate
    domain = args.domain
    dbfiles = args.dbs
    mlnFileName = args.mln
    altMLNFileName = args.altMLN or args.mln  # equal to mlnFileName if no alternative mln given
    logic = args.logic
    onthefly = args.onthefly

    startTime = time.time()

    # set up the directory
    timestamp = time.strftime("%Y-%b-%d-%H-%M-%S", time.localtime())
    if dirname is None:
        idx = 1
        while True:
            dirname = '%s-%d' % (moduleName, idx)
            idx += 1
            if not os.path.exists(dirname): break
        dirname += '-' + timestamp

    expdir = os.getenv('PRAC_EXPERIMENTS', '.')
    expdir = os.path.join(expdir, dirname)
    if os.path.exists(expdir):
        print('Directory "%s" exists. Overwrite? ([y]/n)' % expdir, ' ')
        answer = sys.stdin.read(1)
        if answer not in ('y', '\n'):
            exit(0)
        else:
            shutil.rmtree(expdir)
    os.mkdir(expdir)
    # set up the logger
    logs.getlogger().setLevel(logs.INFO)
    log = logs.getlogger()
    fileLogger = FileHandler(os.path.join(expdir, 'xval.log'))
    fileLogger.setFormatter(logs.formatter)
    log.addHandler(fileLogger)

    log.info('Log for %d-fold cross-validation of %s using %s' %
             (folds, moduleName, dbfiles))
    log.info('Date: %s' % timestamp)
    log.info('Results will be written into %s' % expdir)

    # load module
    prac = PRAC()
    module = prac.module(moduleName)

    # read MLN and dbs
    mln_ = readMLNFromFile(mlnFileName, logic=logic)
    altMLN = readMLNFromFile(altMLNFileName, logic=logic)

    dbs = []
    for dbfile in dbfiles:
        db = readDBFromFile(altMLN, dbfile)
        if type(db) is list:
            dbs.extend(db)
        else:
            dbs.append(db)
    log.info('Read %d databases.' % len(dbs))

    cwpreds = [pred for pred in mln_.predicates if pred != predName]

    # create the partition of data
    subsetLen = int(math.ceil(len(dbs) * percent / 100.0))
    if subsetLen < len(dbs):
        log.info('Using only %d of %d DBs' % (subsetLen, len(dbs)))
    dbs = sample(dbs, subsetLen)

    if len(dbs) < folds:
        log.error(
            'Cannot do %d-fold cross validation with only %d databases.' %
            (folds, len(dbs)))
        exit(0)

    shuffle(dbs)
    partSize = int(math.ceil(len(dbs) / float(folds)))
    partition = []
    for i in range(folds):
        partition.append(dbs[i * partSize:(i + 1) * partSize])

    foldRunnables = []
    for foldIdx in range(folds):
        params = XValFoldParams()
        params.mln = mln_.duplicate()
        params.altMLN = altMLN.duplicate()
        params.learnDBs = []
        for dbs in [d for i, d in enumerate(partition) if i != foldIdx]:
            params.learnDBs.extend(dbs)
        params.testDBs = partition[foldIdx]
        params.foldIdx = foldIdx
        params.foldCount = folds
        params.noisyStringDomains = noisy
        params.directory = expdir
        params.queryPred = predName
        params.queryDom = domain
        params.module = module
        params.logic = logic
        params.mlnFileName = mlnFileName
        params.altMLNFileName = altMLNFileName
        params.onthefly = onthefly
        foldRunnables.append(XValFold(params))
        log.info('Params for fold %d:\n%s' % (foldIdx, str(params)))

    if multicore:
        # set up a pool of (non-daemon!!) worker processes
        try:
            workerPool = NDPool()
            log.info('Starting %d-fold Cross-Validation in %d processes.' %
                     (folds, workerPool._processes))
            result = workerPool.map_async(runFold, foldRunnables).get()
            workerPool.close()
            workerPool.join()
            cm = ConfusionMatrixSim()
            for r in result:
                cm.combine(r.confMatrix)
            elapsedTime = time.time() - startTime
            cm.toFile(os.path.join(expdir, 'conf_matrix.cm'))

            cm.precisionsToFile(os.path.join(expdir, 'precisions.txt'))
            cm.precisionsToFile(os.path.join(expdir, 'precisions_sim.txt'),
                                sim=True)

            pdfname = 'conf_matrix'
            pdfnameSim = 'conf_matrix_sim'
            log.info('creating pdf if confusion matrix...')
            cm.toPDF(pdfname)
            cm.toPDF(pdfnameSim, sim=True)
            os.rename('{}.pdf'.format(pdfname),
                      os.path.join(expdir, '{}.pdf'.format(pdfname)))
            os.rename('{}_sim.pdf'.format(pdfname),
                      os.path.join(expdir, '{}_sim.pdf'.format(pdfname)))
        except (KeyboardInterrupt, SystemExit, SystemError):
            log.critical("Caught KeyboardInterrupt, terminating workers")
            workerPool.terminate()
            workerPool.join()
            exit(1)
        except:
            log.error('\n' +
                      ''.join(traceback.format_exception(*sys.exc_info())))
            exit(1)
    else:
        log.info(
            'Starting {}-fold Cross-Validation in 1 process.'.format(folds))
        cm = ConfusionMatrixSim()
        for fold in foldRunnables:
            cm.combine(runFold(fold).confMatrix)
        elapsedTime = time.time() - startTime
        cm.toFile(os.path.join(expdir, 'conf_matrix.cm'))

        cm.precisionsToFile(os.path.join(expdir, 'precisions.txt'))
        cm.precisionsToFile(os.path.join(expdir, 'precisions_sim.txt'),
                            sim=True)

        pdfname = 'conf_matrix'
        pdfnameSim = 'conf_matrix_sim'
        log.info('creating pdf if confusion matrix...')
        cm.toPDF(pdfname)
        cm.toPDF(pdfnameSim, sim=True)
        os.rename('{}.pdf'.format(pdfname),
                  os.path.join(expdir, '{}.pdf'.format(pdfname)))
        os.rename('{}_sim.pdf'.format(pdfname),
                  os.path.join(expdir, '{}_sim.pdf'.format(pdfname)))

        log.info('{}-fold crossvalidation {} took {:.2f} min'.format(
            folds, '(MP)' if multicore else '(SP)', elapsedTime / 60.0))
Example #12
0
def parse_db(mln, content, ignore_unknown_preds=False, db=None, dirs=['.'], projectpath=None):
    '''
    Reads one or more databases in a string representation and returns
    the respective Database objects.
    
    :param mln:                      the MLN object which should be used to load the database.
    :param content:                  the string representation of one or multiple ('---'-separated)
                                     databases
    :param ignore_unknown_preds:     by default this function raises an Exception when it encounters
                                     a predicate in the DB that has not been declared in the associated
                                     MLN. ignore_unknown_preds=True simply ignores such predicates.
    :param db:                       the Database object that shall receive the facts stored in the new DB.
                                     If None, a new `Database` object will be created.
    '''
    log = logs.getlogger('db')
    content = stripComments(content)
    allow_multiple = True
    if db is None:
        allow_multiple = True
        db = Database(mln, ignore_unknown_preds=ignore_unknown_preds)
    dbs = []
    # expand domains with dbtext constants and save evidence
    for line, l in enumerate(content.split("\n")):
        l = l.strip()
        if l == '':
            continue
        # separator between independent databases
        elif l == '---' and not db.isempty():
            dbs.append(db)
            db = Database(mln)
            continue
        # domain declaration
        elif "{" in l:
            domname, constants = db.mln.logic.parse_domain(l)
            domnames = [domname for _ in constants]
        # include
        elif l.startswith('#include'):
            filename = l[len("#include "):].strip()
            m = re.match(r'"(?P<filename>.+)"', filename)
            if m is not None:
                filename = m.group('filename')
                # if the path is relative, look for the respective file 
                # relatively to all paths specified. Take the first file matching.
                if not mlnpath(filename).exists:
                    includefilename = None
                    for d in dirs:
                        mlnp = '/'.join([d, filename])
                        if mlnpath(mlnp).exists:
                            includefilename = mlnp
                            break
                    if includefilename is None:
                        raise Exception('File not found: %s' % filename)
                else:
                    includefilename = filename
            else:
                m = re.match(r'<(?P<filename>.+)>', filename)
                if m is not None:
                    filename = m.group('filename')
                else:
                    raise MLNParsingError('Malformed #include statement: %s' % line)
                if projectpath is None:
                    raise MLNParsingError('No project specified: Cannot locate import from project: %s' % filename)
                includefilename = ':'.join([projectpath, filename])
            logger.debug('Including file: "%s"' % includefilename)
            p = mlnpath(includefilename)
            dbs.extend(parse_db(content=mlnpath(includefilename).content, ignore_unknown_preds=ignore_unknown_preds, dirs=[p.resolve_path()]+dirs, 
                      projectpath=ifnone(p.project, projectpath, lambda x: '/'.join(p.path+[x])), mln=mln)) 
            continue
        # valued evidence
        elif l[0] in "0123456789":
            s = l.find(" ")
            gndatom = l[s + 1:].replace(" ", "")
            value = float(l[:s])
            if value < 0 or value > 1:
                raise Exception('Valued evidence must be in [0,1]') 
            if gndatom  in db.evidence:
                raise Exception("Duplicate soft evidence for '%s'" % gndatom)
            try:
                _, predname, constants =   mln.logic.parse_literal(gndatom) # TODO Should we allow soft evidence on non-atoms here? (This assumes atoms)
            except NoSuchPredicateError, e:
                if ignore_unknown_preds: continue
                else: raise e
            domnames = mln.predicate(predname).argdoms
            db << (gndatom, value)
        # literal
        else:
            if l[0] == "?":
                raise Exception("Unknown literals not supported (%s)" % l) # this is an Alchemy feature
            try:
                true, predname, constants = mln.logic.parse_literal(l)
            except NoSuchPredicateError, e:
                if ignore_unknown_preds: continue
                else: raise e
            except Exception, e:
                traceback.print_exc()
                raise MLNParsingError('Error parsing line %d: %s (%s)' % (line+1, l, e.message))
Example #13
0
    def __init__(self, master, prac, node, gconf, directory='.'):
        self.logger = logs.getlogger(self.__class__.__name__, level=logs.DEBUG)
        self.master = master

        self.initialized = False

        self.master.bind('<Return>', self.start)
        self.master.bind('<Escape>', lambda a: self.master.quit())
        self.master.protocol('WM_DELETE_WINDOW', self.quit)

        self.prac = prac
        self.prac_inference = node.pracinfer
        self.infStep = None

        self.module_dir = os.path.join(locations.pracmodules, 'wnsenses')

        self.frame = Frame(master)
        self.frame.pack(fill=BOTH, expand=1)
        self.frame.columnconfigure(1, weight=1)

        # module selection
        row = 0
        Label(self.frame, text="Module: ").grid(row=row, column=0, sticky="E")
        modules = sorted([module for module in self.prac._manifests_by_name])
        self.selected_module = StringVar(master)
        self.selected_module.trace("w", self.select_module)
        self.list_modules = OptionMenu(*(self.frame, self.selected_module) +
                                       tuple(modules))
        self.list_modules.grid(row=row, column=1, sticky="NWE")

        # Project selection
        row += 1
        Label(self.frame, text="Project: ").grid(row=row, column=0, sticky="E")
        saveprojectcontainer = Frame(self.frame)
        saveprojectcontainer.grid(row=row, column=1, sticky="NEWS")
        saveprojectcontainer.columnconfigure(0, weight=1)

        self.selected_project = StringVar(master)
        projectfiles = ['']
        self.list_projects = OptionMenu(
            *(saveprojectcontainer, self.selected_project) +
            tuple(projectfiles))
        self.list_projects.grid(row=0, column=0, sticky="NWES")
        self.selected_project.trace("w", self.select_project)

        # save proj file
        self.btn_saveproj = Button(saveprojectcontainer,
                                   text='Save Project...',
                                   command=self.noask_save_project)
        self.btn_saveproj.grid(row=0, column=1, sticky="E")

        # save proj file as...
        self.btn_saveproj = Button(saveprojectcontainer,
                                   text='Save Project as...',
                                   command=self.ask_save_project)
        self.btn_saveproj.grid(row=0, column=2, sticky="E")

        # logic selection
        row += 1
        Label(self.frame, text='Logic: ').grid(row=row, column=0, sticky='E')
        logics = ['FirstOrderLogic', 'FuzzyLogic']
        self.selected_logic = StringVar(master)
        self.selected_logic.trace('w', self.settings_setdirty)
        l = OptionMenu(*(self.frame, self.selected_logic) + tuple(logics))
        l.grid(row=row, column=1, sticky='NWE')

        # mln section
        row += 1
        Label(self.frame, text="MLN: ").grid(row=row, column=0, sticky='NE')
        self.mln_container = FileEditBar(self.frame,
                                         dir=self.module_dir,
                                         filesettings={
                                             'extension': '.mln',
                                             'ftypes': [('MLN files', '.mln')]
                                         },
                                         defaultname='*unknown{}',
                                         importhook=self.import_mln,
                                         deletehook=self.delete_mln,
                                         projecthook=self.save_proj,
                                         filecontenthook=self.mlnfilecontent,
                                         fileslisthook=self.mlnfiles,
                                         updatehook=self.update_mln,
                                         onchangehook=self.project_setdirty)
        self.mln_container.grid(row=row, column=1, sticky="NEWS")
        self.mln_container.columnconfigure(1, weight=2)
        self.frame.rowconfigure(row, weight=1)

        row += 1
        self.use_emln = IntVar()
        self.use_emln.set(0)
        self.cb_use_emln = Checkbutton(self.frame,
                                       text="use model extension",
                                       variable=self.use_emln,
                                       command=self.onchange_use_emln)
        self.cb_use_emln.grid(row=row, column=1, sticky="W")

        # mln extension section
        row += 1
        self.emlncontainerrow = row
        self.emln_label = Label(self.frame, text="EMLN: ")
        self.emln_label.grid(row=self.emlncontainerrow, column=0, sticky='NE')
        self.emln_container = FileEditBar(self.frame,
                                          dir=self.module_dir,
                                          filesettings={
                                              'extension':
                                              '.emln',
                                              'ftypes':
                                              [('MLN extension files', '.emln')
                                               ]
                                          },
                                          defaultname='*unknown{}',
                                          importhook=self.import_emln,
                                          deletehook=self.delete_emln,
                                          projecthook=self.save_proj,
                                          filecontenthook=self.emlnfilecontent,
                                          fileslisthook=self.emlnfiles,
                                          updatehook=self.update_emln,
                                          onchangehook=self.project_setdirty)
        self.emln_container.grid(row=self.emlncontainerrow,
                                 column=1,
                                 sticky="NEWS")
        self.emln_container.columnconfigure(1, weight=2)
        self.onchange_use_emln(dirty=False)
        self.frame.rowconfigure(row, weight=1)

        # db section
        row += 1
        Label(self.frame, text="Evidence: ").grid(row=row,
                                                  column=0,
                                                  sticky='NE')
        self.db_container = FileEditBar(self.frame,
                                        dir=self.module_dir,
                                        filesettings={
                                            'extension': '.db',
                                            'ftypes':
                                            [('Database files', '.db')]
                                        },
                                        defaultname='*unknown{}',
                                        importhook=self.import_db,
                                        deletehook=self.delete_db,
                                        projecthook=self.save_proj,
                                        filecontenthook=self.dbfilecontent,
                                        fileslisthook=self.dbfiles,
                                        updatehook=self.update_db,
                                        onchangehook=self.project_setdirty)
        self.db_container.grid(row=row, column=1, sticky="NEWS")
        self.db_container.columnconfigure(1, weight=2)
        self.frame.rowconfigure(row, weight=1)

        # inference method selection
        row += 1
        self.list_methods_row = row
        Label(self.frame, text="Method: ").grid(row=row, column=0, sticky=E)
        self.selected_method = StringVar(master)
        self.selected_method.trace('w', self.settings_setdirty)
        self.list_methods = OptionMenu(self.frame, self.selected_method,
                                       *InferenceMethods.names())
        self.list_methods.grid(row=self.list_methods_row,
                               column=1,
                               sticky="NWE")

        # queries
        row += 1
        Label(self.frame, text="Queries: ").grid(row=row, column=0, sticky=E)
        self.query = StringVar(master)
        Entry(self.frame, textvariable=self.query).grid(row=row,
                                                        column=1,
                                                        sticky="NEW")

        #  parameters
        row += 1
        Label(self.frame, text="Parameters: ").grid(row=row,
                                                    column=0,
                                                    sticky="NE")
        self.params = StringVar(master)
        self.entry_params = Entry(self.frame, textvariable=self.params)
        self.entry_params.grid(row=row, column=1, sticky="NEW")

        # closed-world predicates
        row += 1
        Label(self.frame, text="CW preds: ").grid(row=row,
                                                  column=0,
                                                  sticky="NE")
        self.cwpreds = StringVar(master)
        self.entry_cw = Entry(self.frame, textvariable=self.cwpreds)
        self.entry_cw.grid(row=row, column=1, sticky="NEW")

        # all preds open-world
        cw_container = Frame(self.frame)
        cw_container.grid(row=row, column=1, sticky="NES")
        self.closed_world = IntVar()
        self.cb_closed_world = Checkbutton(
            cw_container,
            text="Apply CW assumption to all except queries",
            variable=self.closed_world)
        self.cb_closed_world.grid(row=row, column=2, sticky='E')

        # Multiprocessing and verbose
        row += 1
        options_container = Frame(self.frame)
        options_container.grid(row=row, column=1, sticky='NEWS')

        self.multicore = IntVar()
        self.cb_multicore = Checkbutton(options_container,
                                        text="Use all CPUs",
                                        variable=self.multicore)
        self.cb_multicore.grid(row=0, column=0, sticky=W)

        self.verbose = IntVar()
        self.cb_verbose = Checkbutton(options_container,
                                      text="verbose",
                                      variable=self.verbose)
        self.cb_verbose.grid(row=0, column=1, sticky=W)

        self.keep_evidence = IntVar()
        self.cb_keep_evidence = Checkbutton(options_container,
                                            text="keep result",
                                            variable=self.keep_evidence)
        self.cb_keep_evidence.grid(row=0, column=2, sticky=W)
        self.keep_evidence.set(True)

        # start and continue buttons
        row += 1
        self.btn_container = Frame(self.frame)
        self.btn_container.grid(row=row, column=1, sticky='EW')

        start_button = Button(self.btn_container,
                              text="Start Inference",
                              command=self.start)
        start_button.grid(row=0, column=1, sticky='E')

        continue_button = Button(self.btn_container,
                                 text="Continue >",
                                 command=self.oncontinue)
        continue_button.grid(row=0, column=2, sticky='W')

        self.settings_dirty = IntVar()
        self.project_dirty = IntVar()

        self.gconf = gconf
        self.project = None
        self.dir = os.path.abspath(
            ifnone(gconf['prev_query_path'], DEFAULT_CONFIG))
        if gconf['prev_query_project':self.dir] is not None:
            self.load_project(
                os.path.join(self.dir, gconf['prev_query_project':self.dir]))
        else:
            self.new_project()

        self.config = self.project.queryconf
        self.project.addlistener(self.project_setdirty)

        self.selected_module.set(self.gconf.get("module", modules[0]))
        self.update_dbeditor_from_result(node.outdbs)
        self.mln_container.dirty = False
        self.emln_container.dirty = False
        self.db_container.dirty = False
        self.project_setdirty(dirty=False)

        self.master.geometry(gconf['window_loc_query'])

        self.initialized = True
Example #14
0
from dnutils import logs

from recognize import Voice
from microphone import MicLevelController
from threading import Timer

logger = logs.getlogger(__name__ + 'REC', logs.DEBUG)

class VoiceRecorder():
    
    def __init__(self):
        self.voices = []
        self.voip = 0
        self.counter = 0
        self.recording = False

    def startListenerThread(self):
        
        newVoice = Voice(str(self.counter))
        if len(self.voices) < 2:
            self.voices.append(newVoice)
        else:
            if self.recording:
                Timer(0.6, self.startListenerThread).start()
                return
            self.voices[self.voip].stopRecording()
            self.voices[self.voip] = newVoice
            self.voip = (self.voip + 1) % 2
        newVoice.startRecording()
        self.counter += 1
        if self.counter < 20:
Example #15
0
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import os
from collections import defaultdict

from dnutils import logs
from nltk.corpus.reader.wordnet import Synset

from prac.core.base import PRACModule, PRACPIPE, DB_TRANSFORM
from prac.core.inference import PRACInferenceStep
from prac.core.wordnet import WordNet, POS_MAP

from pracmln import MLN, Database
from pracmln.mln.util import colorize, mergedom

logger = logs.getlogger(__name__, logs.INFO)


class WNSenses(PRACModule):
    '''
    Extracts possible word senses from WordNet given the part of speech
    of a word. Depends on the 'syntax' feature extractor.
    '''
    def initialize(self):
        self.mln = MLN(mlnfile=os.path.join(self.module_path, 'mln',
                                            'predicates.mln'),
                       logic='FuzzyLogic',
                       grammar='PRACGrammar')
        self.wordnetKBs = {}

    @DB_TRANSFORM
Example #16
0
def parse_db(mln,
             content,
             ignore_unknown_preds=False,
             db=None,
             dirs=['.'],
             projectpath=None):
    """
    Reads one or more databases in a string representation and returns
    the respective Database objects.
    
    :param mln:                     the MLN object which should be used to load
                                    the database.
    :param content:                 the string representation of one or
                                    multiple ('---'-separated) databases
    :param ignore_unknown_preds:    by default this function raises an
                                    Exception when it encounters a predicate
                                    in the DB that has not been declared in
                                    the associated MLN.
                                    ignore_unknown_preds=True simply ignores
                                    such predicates.
    :param db:                      The Database object that shall receive
                                    the facts stored in the new DB. If None,
                                    a new `Database` object will be created.
    :return:                        a list of databases
    """
    log = logs.getlogger('db')
    content = stripComments(content)
    allow_multiple = True
    if db is None:
        allow_multiple = True
        db = Database(mln, ignore_unknown_preds=ignore_unknown_preds)
    dbs = []
    # expand domains with dbtext constants and save evidence
    for line, l in enumerate(content.split("\n")):
        l = l.strip()
        if l == '':
            continue
        # separator between independent databases
        elif l == '---' and not db.isempty():
            dbs.append(db)
            db = Database(mln)
            continue
        # domain declaration
        elif "{" in l:
            domname, constants = db.mln.logic.parse_domain(l)
            domnames = [domname for _ in constants]
        # include
        elif l.startswith('#include'):
            filename = l[len("#include "):].strip()
            m = re.match(r'"(?P<filename>.+)"', filename)
            if m is not None:
                filename = m.group('filename')
                # if the path is relative, look for the respective file
                # relatively to all paths specified. Take the first file matching.
                if not mlnpath(filename).exists:
                    includefilename = None
                    for d in dirs:
                        mlnp = '/'.join([d, filename])
                        if mlnpath(mlnp).exists:
                            includefilename = mlnp
                            break
                    if includefilename is None:
                        raise Exception('File not found: %s' % filename)
                else:
                    includefilename = filename
            else:
                m = re.match(r'<(?P<filename>.+)>', filename)
                if m is not None:
                    filename = m.group('filename')
                else:
                    raise MLNParsingError('Malformed #include statement: %s' %
                                          line)
                if projectpath is None:
                    raise MLNParsingError(
                        'No project specified: Cannot locate import from project: %s'
                        % filename)
                includefilename = ':'.join([projectpath, filename])
            logger.debug('Including file: "%s"' % includefilename)
            p = mlnpath(includefilename)
            dbs.extend(
                parse_db(content=mlnpath(includefilename).content,
                         ignore_unknown_preds=ignore_unknown_preds,
                         dirs=[p.resolve_path()] + dirs,
                         projectpath=ifnone(p.project, projectpath,
                                            lambda x: '/'.join(p.path + [x])),
                         mln=mln))
            continue
        # valued evidence
        elif l[0] in "0123456789":
            s = l.find(" ")
            gndatom = l[s + 1:].replace(" ", "")
            value = float(l[:s])
            if value < 0 or value > 1:
                raise Exception('Valued evidence must be in [0,1]')
            if gndatom in db.evidence:
                raise Exception("Duplicate soft evidence for '%s'" % gndatom)
            try:
                _, predname, constants = mln.logic.parse_literal(
                    gndatom
                )  # TODO Should we allow soft evidence on non-atoms here? (This assumes atoms)
            except NoSuchPredicateError as e:
                if ignore_unknown_preds:
                    continue
                else:
                    raise e
            domnames = mln.predicate(predname).argdoms
            db << (gndatom, value)
        # literal
        else:
            if l[0] == "?":
                raise Exception("Unknown literals not supported (%s)" %
                                l)  # this is an Alchemy feature
            try:
                true, predname, constants = mln.logic.parse_literal(l)
            except NoSuchPredicateError as e:
                if ignore_unknown_preds:
                    continue
                else:
                    raise e
            except Exception as e:
                traceback.print_exc()
                raise MLNParsingError('Error parsing line %d: %s (%s)' %
                                      (line + 1, l, e.message))
            if mln.predicate(predname) is None and ignore_unknown_preds:
                log.debug('Predicate "%s" is undefined.' % predname)
                continue
            elif mln.predicate(predname) is None:
                raise NoSuchPredicateError(predname)
            domnames = mln.predicate(predname).argdoms
            # save evidence
            true = 1 if true else 0
            db << ("%s(%s)" % (predname, ",".join(constants)), true)

        # expand domains
        if len(domnames) != len(constants):
            raise Exception(
                "Ground atom %s in database %d has wrong number of parameters"
                % (l, len(dbs)))

        for i, c in enumerate(constants):
            db.domain({domnames[i]: c})

    if not db.isempty(): dbs.append(db)
    if len(dbs) > 1 and not allow_multiple:
        raise Exception(
            'Only one single database is permitted when loading via the constructor. Use Database.load() for loading multiple DBs,'
        )
    return dbs
Example #17
0
from collections import defaultdict

import numpy
from dnutils import logs
from dnutils.console import barstr
from numpy.ma.core import sqrt, log

from ..constants import HARD
from ..errors import SatisfiabilityException
from ..grounding.bpll import BPLLGroundingFactory
from ..grounding.default import DefaultGroundingFactory
from .common import DiscriminativeLearner, AbstractLearner
from ..util import fsum, temporary_evidence

logger = logs.getlogger(__name__)


class BPLL(AbstractLearner):
    """
    Pseudo-log-likelihood learning with blocking, i.e. a generalization
    of PLL which takes into consideration the fact that the truth value of a
    blocked atom cannot be inverted without changing a further atom's truth
    value from the same block.
    This learner is fairly efficient, as it computes f and grad based only
    on a sufficient statistic.
    """    
    
    def __init__(self, mrf, **params):
        AbstractLearner.__init__(self, mrf, **params)
        self._pls = None
Example #18
0
from . import locations as praclocations

from .inference import PRACInferenceStep, PRACInference
from .wordnet import WordNet, VERB_TAGS
from ..db.ies.models import constants
from ..db.ies.models import Word
from ..db.ies.extraction import HowtoImport
from pracmln import Database, MLN
from pracmln import MLNQuery
from pracmln.mln import NoSuchPredicateError
from pracmln.mln.util import mergedom
from collections import defaultdict

nltk.data.path = [praclocations.nltk_data]

logger = logs.getlogger(__name__, logs.INFO)
praclogger = logs.getlogger('PRAC', logs.INFO)
aclogger = logs.getlogger('actioncores', logs.INFO)


class PRACConfig(ConfigParser):
    '''
    Global configuration data structure for PRAC.

    Wraps around a ConfigParser
    '''
    DEFAULTS = {
        'mongodb': {
            'host': 'localhost',
            'port': 27017,
            'user': '',