Ejemplo n.º 1
0
 def addPossibleWordSensesToDBs(self, *dbs):
     '''
     Adds to the databases dbs all possible word senses (and fuzzy meanings)
     based on their part of speech.
     '''
     wordnet = WordNet()
     for db in dbs:
         word2senses = defaultdict(list)
         logger.info(db.mln.domains['concept'])
         for res in db.query('has_pos(?word,?pos)'):
             word_const = res['?word']
             pos = POS_MAP.get(res['?pos'], None)
             if pos is None:
                 continue
             word = word_const.split('-')[0]
             for i, synset in enumerate(wordnet.synsets(word, pos)):
                 sense_id = '%s-%.2d' % (word_const, i + 1)
                 word2senses[word_const].append(sense_id)
                 for concept in db.mln.domains['concept']:
                     sim = wordnet.semilarity(synset, concept)
                     db << ('is_a(%s,%s)' % (sense_id, concept), sim)
         for word in word2senses:
             for word2, senses in word2senses.iteritems():
                 if word2 == word:
                     continue
                 else:
                     for s in senses:
                         db << ('!has_sense(%s,%s)' % (word, s))
Ejemplo n.º 2
0
 def addFuzzyEvidenceToDBs(self, *dbs):
     '''
     Adds to the databases dbs all fuzzy 'is_a' relationships
     for all senses contained in the DB and in the MLN.
     (has side effects on the original one)
     '''
     mln_domains = dbs[0].domains
     domains_full = mergedom(mln_domains, *[db.domains for db in dbs])
     concepts = domains_full['concept']
     wordnet = WordNet()
     for db in dbs:
         for res in db.query('is_a(?sense, ?concept)'):
             sense = res['?sense']
             concept = res['?concept']
             for c in concepts:
                 similarity = wordnet.semilarity(concept, c)
                 logger.info('{} ~ {} = {:.2f}'.format(
                     concept, c, similarity))
                 db << ('is_a({},{})'.format(sense, c), similarity)
     return dbs
Ejemplo n.º 3
0
 def __init__(self, configfile='pracconf'):
     # read all the manifest files.
     sys.path.insert(0, locations.code_base)
     self.config = PRACConfig(configfile)
     self.actioncores = ActionCore.load(
         os.path.join(praclocations.models, 'actioncores.yaml'))
     self._manifests = []
     self._manifests_by_name = {}
     self.logger = praclogger
     self._verbose = 1
     for module_path in os.listdir(praclocations.pracmodules):
         if not os.path.isdir(
                 os.path.join(praclocations.pracmodules, module_path)):
             continue
         manifest_file_name = os.path.join(praclocations.pracmodules,
                                           module_path, 'pracmodule.yaml')
         if not os.path.exists(manifest_file_name):
             self.logger.warning(
                 'No module manifest file in path "{}".'.format(
                     module_path))
             continue
         manifest_file = open(manifest_file_name, 'r')
         modulessrc = os.path.abspath(
             os.path.join(praclocations.pracmodules, module_path, 'src'))
         sys.path.insert(0, modulessrc)
         module = PRACModuleManifest.read(manifest_file)
         module.module_path = os.path.join(praclocations.pracmodules,
                                           module_path)
         self._manifests.append(module)
         self._manifests_by_name[module.name] = module
         self.logger.debug('Read manifest file for module "{}".'.format(
             module.name))
     self._module_by_name = {}
     self._modules = []
     # TODO: replace this by real action core definitions
     self.wordnet = WordNet()
     self.mln = self.construct_global_mln()
     self.mongodb = MongoClient(host=self.config.get('mongodb', 'host'),
                                port=self.config.getint('mongodb', 'port'))
Ejemplo n.º 4
0
 def __init__(self):
     super(ConfusionMatrixSim, self).__init__()
     self.wordnet = WordNet()
Ejemplo n.º 5
0
class ConfusionMatrixSim(ConfusionMatrix):
    '''
    Subclass of ConfusionMatrix incorporating similarities
    between concepts into the precisions calculations
    '''

    def __init__(self):
        super(ConfusionMatrixSim, self).__init__()
        self.wordnet = WordNet()

    def countClassifications(self, classname, sim=False):
        '''
        Returns the true positive, true negative, false positive, false negative
        classification counts (in this order).
        False positives and false negatives consider concept similarity.
        '''
        tp = self.matrix.get(classname,{}).get(classname,0)
        classes = self.matrix.keys()

        fp = 0.
        wn = self.wordnet
        classSyn = wn.synset(classname)
        for c in classes:
            if c != classname:
                if sim:
                    cSyn = wn.synset(c)
                    fp += (self.getMatrixEntry(classname, c) * (1- wn.similarity(classSyn,cSyn)))
                else:
                    fp += self.getMatrixEntry(classname, c)

        fn = 0.
        for c in classes:
            if c != classname:
                if sim:
                    cSyn = wn.synset(c)
                    fn += (self.getMatrixEntry(c, classname) * (1- wn.similarity(classSyn,cSyn)))
                else:
                    fn += self.getMatrixEntry(c, classname)
        
        tn = 0.
        for c in classes:
            if c != classname:
                for c2 in classes:
                    if c2 != classname:
                        tn += self.getMatrixEntry(c, c2)
        if not sim:
            assert sum([tp, tn, fp, fn]) == self.instanceCount
        return tp, tn, fp, fn
        
    def getMetrics(self, classname, sim=False):
        '''
        Returns the classifier evaluation metrices in the following order:
        Accuracy, Precision, Recall, F1-Score.
        '''
        classes = []
        for classification in self.matrix:
            for truth in self.matrix.get(classification,{}):
                try:
                    classes.index(truth)
                except ValueError:
                    classes.append(truth)
        
        classes = sorted(classes)
    
        tp, tn, fp, fn = self.countClassifications(classname, sim)
        acc = None
        if tp + tn + fp + fn > 0:
            acc = (tp + tn) / float(tp + tn + fp + fn)
        
        pre = 0.0
        if tp + fp > 0:
            pre = tp / float(tp + fp)
        
        rec = 0.0
        if tp + fn > 0:
            rec = tp / float(tp + fn)
        
        f1 = 0.0
        if pre + rec > 0:
            f1  = (2.0 * pre * rec) / (pre + rec)
            
        return acc, pre, rec, f1

    def getLatexTable(self, sim=False):
        '''
        Returns LaTex code for the confusion matrix.
        '''
        grid = "|l|"
        for cl in sorted(self.labels):
            grid += "l|"
        endl = '\n'
        result = ''
        result += r'\footnotesize' + endl
        result += r'\begin{tabular}{' + grid + '}' + endl
        
        headerRow = r"Prediction/Ground Truth"
        for cl in sorted(self.labels):
            headerRow += r" & \begin{turn}{90}" + cl.replace('_', r'\_') + r'\end{turn}' 
        
        # count number of actual instances per class label
        examplesPerClass = {}
        for label in self.labels:
            tp, tn, fp, fn = self.countClassifications(label)
            examplesPerClass[label] = sum([tp, fp, fn])
            
        result += r'\hline' + endl
        result += headerRow + r'\\ \hline' + endl
        
        #for each class create row
        for clazz in sorted(self.labels):
            values = []
            #for each row fill colum
            for cl2 in sorted(self.labels):
                counts = self.getMatrixEntry(clazz, cl2)
                if sim:
                    classSyn = self.wordnet.synset(clazz)
                    cl2Syn = self.wordnet.synset(cl2)
                    counts *= self.wordnet.similarity(classSyn, cl2Syn)
                values.append('\cellcolor{{cfmcolor!{0}}}{1}'.format(int(round(counts/examplesPerClass[clazz] * 100)), ('\\textbf{{{:g}}}' if clazz == cl2 else '{:g}').format(float('{:.2f}'.format(counts)))))
            result += clazz.replace('_', r'\_') + ' & ' + ' & '.join(values) + r'\\ \hline' + endl
            
        result += r"\end{tabular}" + endl
        return result

    def printPrecisions(self, sim=False):
        '''
        Prints to the standard out a table of the class-specific error measures accurracy, precision, recall, F score.
        '''
        classes = []
        for classification in self.matrix:
            for truth in self.matrix.get(classification,{}):
                try:
                    classes.index(truth)
                except ValueError:
                    classes.append(truth)
        
        classes = sorted(classes)
        
        s = ''
        precs = {}
        for cf in classes:
            acc,pre,rec,f1 = self.getMetrics(cf, sim)

            print '{}: - Acc={:2f}, Pre={:2f}, Rec={:2f}, F1={:2f}\n'.format(cf, acc, pre, rec, f1)
            precs[cf] = 'Acc={:2f}, Pre={:2f}, Rec={:2f}, F1={:2f}'.format(acc, pre, rec, f1)
        return precs
            

    def precisionsToFile(self, filename, sim=False):
        '''
        Prints to the standard out a table of the class-specific error measures accurracy, precision, recall, F score.
        '''
        precisions = self.printPrecisions(sim=sim)
        f = open(filename, 'w+')
        for prec in precisions:
            f.write('{}: {}\n'.format(prec, precisions[prec]))
        f.write('Total Accuracy: {}\n'.format(self.getTotalAccuracy()))
        f.write('Average Precision: Acc={0[0]}, Pre={0[1]}, Rec={0[2]}, F1={0[3]}\n'.format(self.printAveragePrecision(sim=sim)))

    def printAveragePrecision(self, sim=False):
        classes = []
        for classification in self.matrix:
            for truth in self.matrix.get(classification,{}):
                try:
                    classes.index(truth)
                except ValueError:
                    classes.append(truth)
        
        classes = sorted(classes)
        aAcc = 0.0
        aPre = 0.0
        aRec = 0.0
        aF1 = 0.0
        
        for cf in classes:
            acc,pre,rec,f1 = self.getMetrics(cf, sim)
            aAcc += acc
            aPre += pre
            aRec += rec
            aF1 += f1
            
        print '{}: - Acc={:2f}, Pre={:2f}, Rec={:2f} F1={:2f}\n'.format('Average: ', aAcc/len(classes), aPre/len(classes), aRec/len(classes), aF1/len(classes)) 
        print ""
        return aAcc/len(classes), aPre/len(classes), aRec/len(classes), aF1/len(classes)


    def __str__(self):
        maxNumDigits = max(max(map(lambda x: x.values(), self.matrix.values()), key=max))
        maxNumDigits = len(str(maxNumDigits))
        maxClassLabelLength = max(map(len, self.matrix.keys()))
        padding = 1
        numLabels = len(self.matrix.keys())
        cellwidth = max(maxClassLabelLength, maxNumDigits, 3) + 2 * padding
        # create an horizontal line
        print maxNumDigits
        hline = '|' + '-' * (cellwidth) + '+'
        hline += '+'.join(['-' * (cellwidth)] * numLabels) + '|'
        sep = '|'
        outerHLine = '-' * len(hline)
        
        def createTableRow(args):
            return sep + sep.join(map(lambda a: str(a).rjust(cellwidth-padding) + ' ' * padding, args)) + sep           
        endl = '\n'
        # draw the table
        table = outerHLine + endl
        table += createTableRow(['P\C'] + sorted(self.matrix.keys())) + endl
        table += hline + endl
        for i, clazz in enumerate(sorted(self.labels)):
            classSyn = self.wordnet.synset(clazz)
            table += createTableRow([clazz] + map(lambda x: ('{:g}'.format(self.getMatrixEntry(clazz, x) * self.wordnet.similarity(classSyn, self.wordnet.synset(x)))), sorted(self.labels))) + endl
            if i < len(self.matrix.keys()) - 1:
                table += hline + endl
        table += outerHLine
        return table
        
    def toPDF(self, filename, sim=False):
        '''
        Creates a PDF file of this matrix. Requires 'pdflatex' and 'pdfcrop' installed.
        '''
        texFileName = filename + '.tex'
        texFile = open(texFileName, 'w+')
        texFile.write(r'''
        \documentclass[10pt]{{article}}
        \usepackage{{color}}
        \usepackage{{rotating}}
        \usepackage[table]{{xcolor}}
        \definecolor{{cfmcolor}}{{rgb}}{{0.2,0.4,0.6}}
        \begin{{document}}
        \pagenumbering{{gobble}}
        \resizebox{{\columnwidth}}{{!}}{{{}}}
        \end{{document}}
        '''.format(self.getLatexTable(sim)))
        texFile.close()
        cmd = 'pdflatex -halt-on-error {}'.format(texFileName)
        p = Popen(cmd, shell=True)
        if p.wait() != 0:
            raise Exception('Couldn\'t compile LaTex.')
        else:
            cmd = 'pdfcrop {}.pdf {}.pdf'.format(filename, filename)
            p = Popen(cmd, shell=True)
            if p.wait() != 0:
                raise Exception('Couldn\'t crop pdf')
Ejemplo n.º 6
0
def main():
    headline("Running main...")

    usage = 'PRAC Object Recognition tool'
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument(
        "-i",
        "--interactive",
        dest="interactive",
        default=False,
        action='store_true',
        help="Starts PRAC object recognition with an interactive "
        "GUI tool.")
    parser.add_argument(
        "-t",
        "--train",
        dest="trainMLN",
        nargs=1,
        default=None,
        help=
        "Train given MLN with inference results from argument. Example: pracobjrec -t orange.n.01 'It is a yellow "
        "or orange fruit.'")
    parser.add_argument(
        "-r",
        "--regular",
        dest="regular",
        default=False,
        action='store_true',
        help="Runs regular inference pipeline. Arguments: mlnName")
    parser.add_argument("-f",
                        "--onthefly",
                        dest="onthefly",
                        default=False,
                        action='store_true',
                        help="Generate MLN on the fly")
    parser.add_argument(
        "-m",
        "--mln",
        nargs=2,
        dest='mln',
        default=None,
        help="Runs regular inference pipeline. Arguments: mlnName")

    args = parser.parse_args()
    opts_ = vars(args)

    interactive = args.interactive
    regular = args.regular
    sentences = args

    prac = PRAC()
    prac.wordnet = WordNet(concepts=None)
    infer = PRACInference(prac, sentences)

    # in case we have natural-language parameters, parse them
    if len(infer.instructions) > 0:
        parser = prac.module('nl_parsing')
        prac.run(infer, parser)

    if interactive:  # use the GUI
        logger.info('Entering interactive mode')
        gui = PRACQueryGUI(infer)
        gui.open()
    elif args.trainMLN:  # training with property inference output
        logger.info(
            'Training MLN {} with result from property inference'.format(
                args.trainMLN))

        # property inference from parsed input
        propExtract = prac.module('prop_extraction')
        prac.run(infer, propExtract)

        objRecog = prac.module('obj_recognition')

        praclearn = PRACLearning(prac)
        praclearn.otherParams['mln'] = args.mln[0]
        praclearn.otherParams['logic'] = args.mln[1]
        praclearn.otherParams['concept'] = args.trainMLN
        praclearn.otherParams['onthefly'] = args.onthefly
        praclearn.training_dbs = infer.inference_steps[-1].output_dbs

        objRecog.train(praclearn)
        sys.exit(0)

    else:  # regular PRAC pipeline
        logger.info('Entering regular inference pipeline')

        # property inference from parsed input
        propExtract = prac.module('prop_extraction')
        prac.run(infer, propExtract)

        objRecog = prac.module('obj_recognition')

        # object inference based on inferred properties
        prac.run(infer, objRecog)

    step = infer.inference_steps[-1]
    print()
    print(prac_heading('PRAC INFERENCE RESULTS'))
    print()
    print('Object description: {}'.format(
        colorize(''.join(sentences), (None, 'white', True), True)))
    print()
    for db in step.output_dbs:
        print('Inferred properties:')
        for ek in sorted(db.evidence):
            e = db.evidence[ek]
            if e == 1.0 and any(
                    ek.startswith(p) for p in [
                        'color', 'size', 'shape', 'hypernym', 'hasa',
                        'dimension', 'consistency', 'material'
                    ]):
                print('{}({}, {}'.format(
                    colorize(ek.split('(')[0], (None, 'white', True), True),
                    colorize(
                        ek.split('(')[1].split(',')[0],
                        (None, 'magenta', True), True),
                    colorize(
                        ek.split('(')[1].split(',')[1], (None, 'green', True),
                        True)))

    for db in step.output_dbs:
        print()
        print('Inferred possible concepts:')
        for ek in sorted(db.evidence, key=db.evidence.get, reverse=True):
            e = db.evidence[ek]
            if e > 0.001 and ek.startswith('object'):
                print('{} {}({}, {})'.format(
                    colorize('{:.4f}'.format(e), (None, 'cyan', True), True),
                    colorize('object', (None, 'white', True), True),
                    colorize(
                        ek.split(',')[0].split('(')[1],
                        (None, 'magenta', True), True),
                    colorize(
                        ek.split(',')[1].split(')')[0], (None, 'yellow', True),
                        True)))
Ejemplo n.º 7
0
from nltk import word_tokenize
from nltk.corpus import wordnet as wn
from pracmln import MLN
from pracmln.mln.database import parse_db
from pracmln.mln.util import colorize
from pracmln.utils.visualization import get_cond_prob_png

from prac.core import locations
from prac.core.base import PRACModule
from prac.core.errors import ParserError
from prac.core.inference import PRACInferenceStep, NLInstruction
from prac.core.wordnet import WordNet
from prac.pracutils.utils import prac_heading

logger = logs.getlogger(__name__, logs.INFO)
wordnet = WordNet(concepts=None)


class StanfordParser(object):
    '''
    Python Wrapper for the Java implementation of the Stanford
    natural-language parser.
    '''
    def __init__(self, pcfg_model_fname=None):
        self.pcfg_model_fname = pcfg_model_fname
        self.package_lexparser = jpype.JPackage(
            "edu.stanford.nlp.parser.lexparser")
        self.package_trees = jpype.JPackage('edu.stanford.nlp.trees')
        self.package = jpype.JPackage("edu.stanford.nlp")
        self.parser = self.package_lexparser.LexicalizedParser.loadModel(
            self.pcfg_model_fname,
from prac.core.wordnet import WordNet

__word_net__ = WordNet()


def determine_path_similarity_between_two_concepts(a, b):
    return __word_net__.similarity(a, b, 'path')
Ejemplo n.º 9
0
    def role_distributions(self, step):
        '''
        TODO

        :param step:
        :return:
        '''
        distrs = {}
        for db_ in step.output_dbs:
            for word in db_.domains['word']:
                for q in db_.query('action_core(?w,?ac)'):

                    # ==========================================================
                    # Initializaton
                    # ==========================================================

                    actioncore = q['?ac']
                    projectpath = os.path.join(self.module_path,
                                               '{}.pracmln'.format(actioncore))
                    project = MLNProject.open(projectpath)
                    mlntext = project.mlns.get(project.queryconf['mln'], None)
                    mln = parse_mln(
                        mlntext,
                        searchpaths=[self.module_path],
                        projectpath=projectpath,
                        logic=project.queryconf.get('logic',
                                                    'FirstOrderLogic'),
                        grammar=project.queryconf.get('grammar',
                                                      'PRACGrammar'))

                    # ==========================================================
                    # Preprocessing
                    # ==========================================================

                    # add inferred concepts to known_concepts to display
                    # them in the graph. Ignore verbs and adjectives,
                    # as they do not have hypernym relations to nouns
                    concepts = self.prac.config.getlist('wordnet', 'concepts')
                    for con in db_.query('has_sense(?w,?s)'):
                        if con['?s'].split('.')[1] in ['a', 's', 'v']:
                            continue
                        concepts.append(con['?s'])
                    wn = WordNet(concepts=concepts)

                    db = db_.copy(mln=mln)
                    for qs in db_.query('!(EXIST ?w (has_sense(?w,?s)))'):
                        db.rmval('sense', qs['?s'])
                    for concept in db_.domains['concept']:
                        if concept not in mln.domains['concept']:
                            db.rmval('concept', concept)
                    for res in db_.query('has_sense({}, ?s)'.format(word)):
                        sense = res['?s']
                        if sense == 'null': continue
                        roles = self.prac.actioncores[actioncore].roles
                        role = None
                        for r in roles:
                            vars = [
                                '?v{}'.format(i) for i in range(
                                    len(db_.mln.predicate(r).argdoms) - 1)
                            ]
                            br = False
                            for qr in db_.query('{}({},{})'.format(
                                    r, ','.join(vars), actioncore)):
                                for v in vars:
                                    if qr[v] == word:
                                        role = r
                                        br = True
                                        break
                                if br: break
                            if br: break
                        if role is None: continue
                        db.retract('has_sense({},{})'.format(word, sense))
                        add_all_wordnet_similarities(db, wn)

                        # ======================================================
                        # Inference
                        # ======================================================

                        infer = self.mlnquery(method='EnumerationAsk',
                                              mln=mln,
                                              db=db,
                                              queries='has_sense',
                                              cw=True,
                                              multicore=True,
                                              verbose=self.prac.verbose > 2)

                        result = infer.resultdb

                        if self.prac.verbose == 2:
                            print
                            print prac_heading('INFERENCE RESULTS')
                            print
                            infer.write()

                        # ======================================================
                        # Graph generation
                        # ======================================================

                        g = wn.to_dot()
                        maxprob = 0.
                        for atom, truth in result.gndatoms():
                            _, predname, args = db.mln.logic.parse_literal(
                                atom)
                            concept = args[1]
                            if predname == 'has_sense' and args[
                                    0] == word and concept != 'null':
                                maxprob = max(maxprob, truth)

                        for atom, truth in result.gndatoms():
                            _, predname, args = db.mln.logic.parse_literal(
                                atom)
                            concept = args[1]
                            if predname == 'has_sense' and args[
                                    0] == word and concept != 'null':
                                if concept in concepts:
                                    g.node(concept,
                                           fillcolor=get_prob_color(truth /
                                                                    maxprob))
                        distrs[role] = render_gv(g)
        return distrs
Ejemplo n.º 10
0
 def set_known_concepts(self, concepts):
     self.wordnet = WordNet(concepts)