def addPossibleWordSensesToDBs(self, *dbs): ''' Adds to the databases dbs all possible word senses (and fuzzy meanings) based on their part of speech. ''' wordnet = WordNet() for db in dbs: word2senses = defaultdict(list) logger.info(db.mln.domains['concept']) for res in db.query('has_pos(?word,?pos)'): word_const = res['?word'] pos = POS_MAP.get(res['?pos'], None) if pos is None: continue word = word_const.split('-')[0] for i, synset in enumerate(wordnet.synsets(word, pos)): sense_id = '%s-%.2d' % (word_const, i + 1) word2senses[word_const].append(sense_id) for concept in db.mln.domains['concept']: sim = wordnet.semilarity(synset, concept) db << ('is_a(%s,%s)' % (sense_id, concept), sim) for word in word2senses: for word2, senses in word2senses.iteritems(): if word2 == word: continue else: for s in senses: db << ('!has_sense(%s,%s)' % (word, s))
def addFuzzyEvidenceToDBs(self, *dbs): ''' Adds to the databases dbs all fuzzy 'is_a' relationships for all senses contained in the DB and in the MLN. (has side effects on the original one) ''' mln_domains = dbs[0].domains domains_full = mergedom(mln_domains, *[db.domains for db in dbs]) concepts = domains_full['concept'] wordnet = WordNet() for db in dbs: for res in db.query('is_a(?sense, ?concept)'): sense = res['?sense'] concept = res['?concept'] for c in concepts: similarity = wordnet.semilarity(concept, c) logger.info('{} ~ {} = {:.2f}'.format( concept, c, similarity)) db << ('is_a({},{})'.format(sense, c), similarity) return dbs
def __init__(self, configfile='pracconf'): # read all the manifest files. sys.path.insert(0, locations.code_base) self.config = PRACConfig(configfile) self.actioncores = ActionCore.load( os.path.join(praclocations.models, 'actioncores.yaml')) self._manifests = [] self._manifests_by_name = {} self.logger = praclogger self._verbose = 1 for module_path in os.listdir(praclocations.pracmodules): if not os.path.isdir( os.path.join(praclocations.pracmodules, module_path)): continue manifest_file_name = os.path.join(praclocations.pracmodules, module_path, 'pracmodule.yaml') if not os.path.exists(manifest_file_name): self.logger.warning( 'No module manifest file in path "{}".'.format( module_path)) continue manifest_file = open(manifest_file_name, 'r') modulessrc = os.path.abspath( os.path.join(praclocations.pracmodules, module_path, 'src')) sys.path.insert(0, modulessrc) module = PRACModuleManifest.read(manifest_file) module.module_path = os.path.join(praclocations.pracmodules, module_path) self._manifests.append(module) self._manifests_by_name[module.name] = module self.logger.debug('Read manifest file for module "{}".'.format( module.name)) self._module_by_name = {} self._modules = [] # TODO: replace this by real action core definitions self.wordnet = WordNet() self.mln = self.construct_global_mln() self.mongodb = MongoClient(host=self.config.get('mongodb', 'host'), port=self.config.getint('mongodb', 'port'))
def __init__(self): super(ConfusionMatrixSim, self).__init__() self.wordnet = WordNet()
class ConfusionMatrixSim(ConfusionMatrix): ''' Subclass of ConfusionMatrix incorporating similarities between concepts into the precisions calculations ''' def __init__(self): super(ConfusionMatrixSim, self).__init__() self.wordnet = WordNet() def countClassifications(self, classname, sim=False): ''' Returns the true positive, true negative, false positive, false negative classification counts (in this order). False positives and false negatives consider concept similarity. ''' tp = self.matrix.get(classname,{}).get(classname,0) classes = self.matrix.keys() fp = 0. wn = self.wordnet classSyn = wn.synset(classname) for c in classes: if c != classname: if sim: cSyn = wn.synset(c) fp += (self.getMatrixEntry(classname, c) * (1- wn.similarity(classSyn,cSyn))) else: fp += self.getMatrixEntry(classname, c) fn = 0. for c in classes: if c != classname: if sim: cSyn = wn.synset(c) fn += (self.getMatrixEntry(c, classname) * (1- wn.similarity(classSyn,cSyn))) else: fn += self.getMatrixEntry(c, classname) tn = 0. for c in classes: if c != classname: for c2 in classes: if c2 != classname: tn += self.getMatrixEntry(c, c2) if not sim: assert sum([tp, tn, fp, fn]) == self.instanceCount return tp, tn, fp, fn def getMetrics(self, classname, sim=False): ''' Returns the classifier evaluation metrices in the following order: Accuracy, Precision, Recall, F1-Score. ''' classes = [] for classification in self.matrix: for truth in self.matrix.get(classification,{}): try: classes.index(truth) except ValueError: classes.append(truth) classes = sorted(classes) tp, tn, fp, fn = self.countClassifications(classname, sim) acc = None if tp + tn + fp + fn > 0: acc = (tp + tn) / float(tp + tn + fp + fn) pre = 0.0 if tp + fp > 0: pre = tp / float(tp + fp) rec = 0.0 if tp + fn > 0: rec = tp / float(tp + fn) f1 = 0.0 if pre + rec > 0: f1 = (2.0 * pre * rec) / (pre + rec) return acc, pre, rec, f1 def getLatexTable(self, sim=False): ''' Returns LaTex code for the confusion matrix. ''' grid = "|l|" for cl in sorted(self.labels): grid += "l|" endl = '\n' result = '' result += r'\footnotesize' + endl result += r'\begin{tabular}{' + grid + '}' + endl headerRow = r"Prediction/Ground Truth" for cl in sorted(self.labels): headerRow += r" & \begin{turn}{90}" + cl.replace('_', r'\_') + r'\end{turn}' # count number of actual instances per class label examplesPerClass = {} for label in self.labels: tp, tn, fp, fn = self.countClassifications(label) examplesPerClass[label] = sum([tp, fp, fn]) result += r'\hline' + endl result += headerRow + r'\\ \hline' + endl #for each class create row for clazz in sorted(self.labels): values = [] #for each row fill colum for cl2 in sorted(self.labels): counts = self.getMatrixEntry(clazz, cl2) if sim: classSyn = self.wordnet.synset(clazz) cl2Syn = self.wordnet.synset(cl2) counts *= self.wordnet.similarity(classSyn, cl2Syn) values.append('\cellcolor{{cfmcolor!{0}}}{1}'.format(int(round(counts/examplesPerClass[clazz] * 100)), ('\\textbf{{{:g}}}' if clazz == cl2 else '{:g}').format(float('{:.2f}'.format(counts))))) result += clazz.replace('_', r'\_') + ' & ' + ' & '.join(values) + r'\\ \hline' + endl result += r"\end{tabular}" + endl return result def printPrecisions(self, sim=False): ''' Prints to the standard out a table of the class-specific error measures accurracy, precision, recall, F score. ''' classes = [] for classification in self.matrix: for truth in self.matrix.get(classification,{}): try: classes.index(truth) except ValueError: classes.append(truth) classes = sorted(classes) s = '' precs = {} for cf in classes: acc,pre,rec,f1 = self.getMetrics(cf, sim) print '{}: - Acc={:2f}, Pre={:2f}, Rec={:2f}, F1={:2f}\n'.format(cf, acc, pre, rec, f1) precs[cf] = 'Acc={:2f}, Pre={:2f}, Rec={:2f}, F1={:2f}'.format(acc, pre, rec, f1) return precs def precisionsToFile(self, filename, sim=False): ''' Prints to the standard out a table of the class-specific error measures accurracy, precision, recall, F score. ''' precisions = self.printPrecisions(sim=sim) f = open(filename, 'w+') for prec in precisions: f.write('{}: {}\n'.format(prec, precisions[prec])) f.write('Total Accuracy: {}\n'.format(self.getTotalAccuracy())) f.write('Average Precision: Acc={0[0]}, Pre={0[1]}, Rec={0[2]}, F1={0[3]}\n'.format(self.printAveragePrecision(sim=sim))) def printAveragePrecision(self, sim=False): classes = [] for classification in self.matrix: for truth in self.matrix.get(classification,{}): try: classes.index(truth) except ValueError: classes.append(truth) classes = sorted(classes) aAcc = 0.0 aPre = 0.0 aRec = 0.0 aF1 = 0.0 for cf in classes: acc,pre,rec,f1 = self.getMetrics(cf, sim) aAcc += acc aPre += pre aRec += rec aF1 += f1 print '{}: - Acc={:2f}, Pre={:2f}, Rec={:2f} F1={:2f}\n'.format('Average: ', aAcc/len(classes), aPre/len(classes), aRec/len(classes), aF1/len(classes)) print "" return aAcc/len(classes), aPre/len(classes), aRec/len(classes), aF1/len(classes) def __str__(self): maxNumDigits = max(max(map(lambda x: x.values(), self.matrix.values()), key=max)) maxNumDigits = len(str(maxNumDigits)) maxClassLabelLength = max(map(len, self.matrix.keys())) padding = 1 numLabels = len(self.matrix.keys()) cellwidth = max(maxClassLabelLength, maxNumDigits, 3) + 2 * padding # create an horizontal line print maxNumDigits hline = '|' + '-' * (cellwidth) + '+' hline += '+'.join(['-' * (cellwidth)] * numLabels) + '|' sep = '|' outerHLine = '-' * len(hline) def createTableRow(args): return sep + sep.join(map(lambda a: str(a).rjust(cellwidth-padding) + ' ' * padding, args)) + sep endl = '\n' # draw the table table = outerHLine + endl table += createTableRow(['P\C'] + sorted(self.matrix.keys())) + endl table += hline + endl for i, clazz in enumerate(sorted(self.labels)): classSyn = self.wordnet.synset(clazz) table += createTableRow([clazz] + map(lambda x: ('{:g}'.format(self.getMatrixEntry(clazz, x) * self.wordnet.similarity(classSyn, self.wordnet.synset(x)))), sorted(self.labels))) + endl if i < len(self.matrix.keys()) - 1: table += hline + endl table += outerHLine return table def toPDF(self, filename, sim=False): ''' Creates a PDF file of this matrix. Requires 'pdflatex' and 'pdfcrop' installed. ''' texFileName = filename + '.tex' texFile = open(texFileName, 'w+') texFile.write(r''' \documentclass[10pt]{{article}} \usepackage{{color}} \usepackage{{rotating}} \usepackage[table]{{xcolor}} \definecolor{{cfmcolor}}{{rgb}}{{0.2,0.4,0.6}} \begin{{document}} \pagenumbering{{gobble}} \resizebox{{\columnwidth}}{{!}}{{{}}} \end{{document}} '''.format(self.getLatexTable(sim))) texFile.close() cmd = 'pdflatex -halt-on-error {}'.format(texFileName) p = Popen(cmd, shell=True) if p.wait() != 0: raise Exception('Couldn\'t compile LaTex.') else: cmd = 'pdfcrop {}.pdf {}.pdf'.format(filename, filename) p = Popen(cmd, shell=True) if p.wait() != 0: raise Exception('Couldn\'t crop pdf')
def main(): headline("Running main...") usage = 'PRAC Object Recognition tool' parser = argparse.ArgumentParser(description=usage) parser.add_argument( "-i", "--interactive", dest="interactive", default=False, action='store_true', help="Starts PRAC object recognition with an interactive " "GUI tool.") parser.add_argument( "-t", "--train", dest="trainMLN", nargs=1, default=None, help= "Train given MLN with inference results from argument. Example: pracobjrec -t orange.n.01 'It is a yellow " "or orange fruit.'") parser.add_argument( "-r", "--regular", dest="regular", default=False, action='store_true', help="Runs regular inference pipeline. Arguments: mlnName") parser.add_argument("-f", "--onthefly", dest="onthefly", default=False, action='store_true', help="Generate MLN on the fly") parser.add_argument( "-m", "--mln", nargs=2, dest='mln', default=None, help="Runs regular inference pipeline. Arguments: mlnName") args = parser.parse_args() opts_ = vars(args) interactive = args.interactive regular = args.regular sentences = args prac = PRAC() prac.wordnet = WordNet(concepts=None) infer = PRACInference(prac, sentences) # in case we have natural-language parameters, parse them if len(infer.instructions) > 0: parser = prac.module('nl_parsing') prac.run(infer, parser) if interactive: # use the GUI logger.info('Entering interactive mode') gui = PRACQueryGUI(infer) gui.open() elif args.trainMLN: # training with property inference output logger.info( 'Training MLN {} with result from property inference'.format( args.trainMLN)) # property inference from parsed input propExtract = prac.module('prop_extraction') prac.run(infer, propExtract) objRecog = prac.module('obj_recognition') praclearn = PRACLearning(prac) praclearn.otherParams['mln'] = args.mln[0] praclearn.otherParams['logic'] = args.mln[1] praclearn.otherParams['concept'] = args.trainMLN praclearn.otherParams['onthefly'] = args.onthefly praclearn.training_dbs = infer.inference_steps[-1].output_dbs objRecog.train(praclearn) sys.exit(0) else: # regular PRAC pipeline logger.info('Entering regular inference pipeline') # property inference from parsed input propExtract = prac.module('prop_extraction') prac.run(infer, propExtract) objRecog = prac.module('obj_recognition') # object inference based on inferred properties prac.run(infer, objRecog) step = infer.inference_steps[-1] print() print(prac_heading('PRAC INFERENCE RESULTS')) print() print('Object description: {}'.format( colorize(''.join(sentences), (None, 'white', True), True))) print() for db in step.output_dbs: print('Inferred properties:') for ek in sorted(db.evidence): e = db.evidence[ek] if e == 1.0 and any( ek.startswith(p) for p in [ 'color', 'size', 'shape', 'hypernym', 'hasa', 'dimension', 'consistency', 'material' ]): print('{}({}, {}'.format( colorize(ek.split('(')[0], (None, 'white', True), True), colorize( ek.split('(')[1].split(',')[0], (None, 'magenta', True), True), colorize( ek.split('(')[1].split(',')[1], (None, 'green', True), True))) for db in step.output_dbs: print() print('Inferred possible concepts:') for ek in sorted(db.evidence, key=db.evidence.get, reverse=True): e = db.evidence[ek] if e > 0.001 and ek.startswith('object'): print('{} {}({}, {})'.format( colorize('{:.4f}'.format(e), (None, 'cyan', True), True), colorize('object', (None, 'white', True), True), colorize( ek.split(',')[0].split('(')[1], (None, 'magenta', True), True), colorize( ek.split(',')[1].split(')')[0], (None, 'yellow', True), True)))
from nltk import word_tokenize from nltk.corpus import wordnet as wn from pracmln import MLN from pracmln.mln.database import parse_db from pracmln.mln.util import colorize from pracmln.utils.visualization import get_cond_prob_png from prac.core import locations from prac.core.base import PRACModule from prac.core.errors import ParserError from prac.core.inference import PRACInferenceStep, NLInstruction from prac.core.wordnet import WordNet from prac.pracutils.utils import prac_heading logger = logs.getlogger(__name__, logs.INFO) wordnet = WordNet(concepts=None) class StanfordParser(object): ''' Python Wrapper for the Java implementation of the Stanford natural-language parser. ''' def __init__(self, pcfg_model_fname=None): self.pcfg_model_fname = pcfg_model_fname self.package_lexparser = jpype.JPackage( "edu.stanford.nlp.parser.lexparser") self.package_trees = jpype.JPackage('edu.stanford.nlp.trees') self.package = jpype.JPackage("edu.stanford.nlp") self.parser = self.package_lexparser.LexicalizedParser.loadModel( self.pcfg_model_fname,
from prac.core.wordnet import WordNet __word_net__ = WordNet() def determine_path_similarity_between_two_concepts(a, b): return __word_net__.similarity(a, b, 'path')
def role_distributions(self, step): ''' TODO :param step: :return: ''' distrs = {} for db_ in step.output_dbs: for word in db_.domains['word']: for q in db_.query('action_core(?w,?ac)'): # ========================================================== # Initializaton # ========================================================== actioncore = q['?ac'] projectpath = os.path.join(self.module_path, '{}.pracmln'.format(actioncore)) project = MLNProject.open(projectpath) mlntext = project.mlns.get(project.queryconf['mln'], None) mln = parse_mln( mlntext, searchpaths=[self.module_path], projectpath=projectpath, logic=project.queryconf.get('logic', 'FirstOrderLogic'), grammar=project.queryconf.get('grammar', 'PRACGrammar')) # ========================================================== # Preprocessing # ========================================================== # add inferred concepts to known_concepts to display # them in the graph. Ignore verbs and adjectives, # as they do not have hypernym relations to nouns concepts = self.prac.config.getlist('wordnet', 'concepts') for con in db_.query('has_sense(?w,?s)'): if con['?s'].split('.')[1] in ['a', 's', 'v']: continue concepts.append(con['?s']) wn = WordNet(concepts=concepts) db = db_.copy(mln=mln) for qs in db_.query('!(EXIST ?w (has_sense(?w,?s)))'): db.rmval('sense', qs['?s']) for concept in db_.domains['concept']: if concept not in mln.domains['concept']: db.rmval('concept', concept) for res in db_.query('has_sense({}, ?s)'.format(word)): sense = res['?s'] if sense == 'null': continue roles = self.prac.actioncores[actioncore].roles role = None for r in roles: vars = [ '?v{}'.format(i) for i in range( len(db_.mln.predicate(r).argdoms) - 1) ] br = False for qr in db_.query('{}({},{})'.format( r, ','.join(vars), actioncore)): for v in vars: if qr[v] == word: role = r br = True break if br: break if br: break if role is None: continue db.retract('has_sense({},{})'.format(word, sense)) add_all_wordnet_similarities(db, wn) # ====================================================== # Inference # ====================================================== infer = self.mlnquery(method='EnumerationAsk', mln=mln, db=db, queries='has_sense', cw=True, multicore=True, verbose=self.prac.verbose > 2) result = infer.resultdb if self.prac.verbose == 2: print print prac_heading('INFERENCE RESULTS') print infer.write() # ====================================================== # Graph generation # ====================================================== g = wn.to_dot() maxprob = 0. for atom, truth in result.gndatoms(): _, predname, args = db.mln.logic.parse_literal( atom) concept = args[1] if predname == 'has_sense' and args[ 0] == word and concept != 'null': maxprob = max(maxprob, truth) for atom, truth in result.gndatoms(): _, predname, args = db.mln.logic.parse_literal( atom) concept = args[1] if predname == 'has_sense' and args[ 0] == word and concept != 'null': if concept in concepts: g.node(concept, fillcolor=get_prob_color(truth / maxprob)) distrs[role] = render_gv(g) return distrs
def set_known_concepts(self, concepts): self.wordnet = WordNet(concepts)