def extractTopNEsdcsFromSentence(self, sentence, n): if sentence.strip() == "": return [ExtendedSdcGroup([], entireText=sentence)] topNDependencies = self.parser.parseToTopNStanfordDependencies( sentence, n) result = list() idx = 0 for i, dependencies in enumerate(topNDependencies): esdc_group = self.esdcGroupFromDependencies(sentence, dependencies) assert esdc_group.entireText == sentence candidates = esdcCandidates.makeCandidatesForEsdcGroup(esdc_group) for c in candidates: c.idx = idx idx += 1 result.extend(candidates) candidateSet = set() newResult = [] for r in result: if not r in candidateSet: newResult.append(r) candidateSet.add(r) #result = list(sorted(result, key=lambda x: x.idx)) return newResult
def fromYaml(entireText, esdcsYaml, use_ids=False): """ Reads a list of esdcs as yaml (maps, lists, strings, as recovered from yaml.load), and makes them into proper ESDCs. It makes the following conversions: * Landmarks that are strings are converted to OBJECT esdcs automatically. * It parses tokens with ranges, to build standoff tags. * Tokens without ranges are automatically converted to standoffs. An error is raised if this cannot be done uniquely. Then the token must be specified with a range. """ esdcsYaml = wrapValueInList(esdcsYaml) esdcs = [] for esdcYaml in esdcsYaml: try: esdcType, outputDict = handleEsdc(esdcYaml, entireText) esdcs.append(ExtendedSdc(esdcType, **outputDict)) except: print "Trouble with", esdcYaml print "entire text", entireText raise objectifyLandmarks(esdcs) objectifyFiguresOfEvents(esdcs) addEmptyFigures(esdcs) return ExtendedSdcGroup(esdcs, entireText, use_ids=use_ids)
def extractTopNEsdcs(self, command, n): command = normalizeWhitespace(command) esdcList = [] for sentenceStandoff in self.sentenceTokenizer.tokenize(command): esdcs = self.extractTopNEsdcsFromSentence(sentenceStandoff.text, n + 10) for esdc in esdcs: correctStandoffs(sentenceStandoff, esdc) esdcList.append(esdcs) esdcList = [e for e in esdcList if len(e) != 0] indices = [0 for esdcGroups in esdcList] results = [] for iteration in range(0, n): lst = [esdcGroups[i] for i, esdcGroups in zip(indices, esdcList)] metadata = [e.metadata for e in lst] score = sum(e.score for e in lst) results.append(ExtendedSdcGroup(list(chain(*lst)), command, score=score, metadata=metadata)) next_indices = [min(index + 1, len(esdcList[i]) - 1) for i, index in enumerate(indices)] best_idx = na.argmax([esdcGroups[min(i, len(esdcGroups) - 1)].score for i, esdcGroups in zip(next_indices, esdcList)]) indices[best_idx] = next_indices[best_idx] return results
def testCandidate(self): cmd = "turn and move to the truck on the right" esdcg = ExtendedSdcGroup([ExtendedSdc('EVENT', r=[TextStandoff(cmd, (0, 4))],l2=[],l=[ExtendedSdc('PATH', r=[TextStandoff(cmd, (14, 16))],l2=[],l=[ExtendedSdc('OBJECT', r=[TextStandoff(cmd, (27, 29))],l2=[],l=[TextStandoff(cmd, (30, 33)), TextStandoff(cmd, (34, 39))],f=[TextStandoff(cmd, (17, 20)), TextStandoff(cmd, (21, 26))])],f=[])],f=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[], entireText=cmd)]), ExtendedSdc('EVENT', r=[TextStandoff(cmd, (9, 13))],l2=[],l=[],f=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[], entireText=cmd)])], cmd) candidates = esdcCandidates.makeCandidatesForEsdcGroup(esdcg) self.assertEqual(len(candidates), 5)
def correctStandoffsOffset(esdcs, new_entire_text, offset): modified_esdcs = ExtendedSdcGroup.copy(esdcs) update_rep_esdcs = [] def callback(esdc): esdc.entireText = new_entire_text update_rep_esdcs.append(esdc) for fieldName in esdc.fieldNames: if esdc.childIsListOfWords(fieldName): new_standoffs = [correctStandoffOffset(s, new_entire_text, offset) for s in esdc[fieldName]] esdc.fields[fieldName] = new_standoffs breadthFirstTraverse(modified_esdcs, callback) for e in reversed(update_rep_esdcs): e.updateRep() return ExtendedSdcGroup(modified_esdcs)
def correctStandoffsImmutable(sentenceStandoff, esdcs): """Corrects the standoffs in the esdcs to be relative to a larger standoff object. Returns the new ESDC """ modified_esdcs = ExtendedSdcGroup.copy(esdcs) def callback(esdc): for fieldName in esdc.fieldNames: if esdc.childIsListOfWords(fieldName): new_standoffs = [correctStandoffImmutable(sentenceStandoff, s) for s in esdc[fieldName]] esdc.fields[fieldName] = new_standoffs esdc.entireText = sentenceStandoff.entireText breadthFirstTraverse(modified_esdcs, callback) for e in modified_esdcs: e.updateRep() return ExtendedSdcGroup(modified_esdcs)
def esdcGroupFromDependencies(self, sentence, dependencies): if len(dependencies.dependencies) == 0: return ExtendedSdcGroup( [ExtendedSdc("EVENT", r=dependencies.tokens[0])], entireText=sentence, score=dependencies.score, metadata=dependencies) else: esdcList = self.extractEsdcList(sentence, dependencies) hierarchicalEsdcs = self.extractHierarchy(dependencies, esdcList) objectifyLandmarks(hierarchicalEsdcs) objectifyFiguresOfEvents(hierarchicalEsdcs) addEmptyFigures(hierarchicalEsdcs) hierarchicalEsdcs.sort(key=lambda e: e.startIdx) return ExtendedSdcGroup(hierarchicalEsdcs, entireText=sentence, score=dependencies.score, metadata=dependencies)
def testRep(self): esdc = ExtendedSdc('OBJECT', r=[], l2=[], l=[], f=[], entireText="Robots are awesome.") self.assertEqual(eval(repr(esdc)), esdc) esdcGroup = ExtendedSdcGroup([esdc], esdc.entireText) print repr(esdcGroup)
def extractEsdcs(self, command): command = normalizeWhitespace(command) esdcList = [] score = 0.0 for sentenceStandoff in self.sentenceTokenizer.tokenize(command): esdcs = self.extractEsdcsFromSentence(sentenceStandoff.text) assert sentenceStandoff.entireText == command correctStandoffs(sentenceStandoff, esdcs) esdcList.extend(esdcs) score += esdcs.score return ExtendedSdcGroup(esdcList, command, score=score)
def testNestedRepeatedStrings(self): from esdcs.dataStructures import ExtendedSdc, ExtendedSdcGroup from standoff import TextStandoff txt = "Move to the right side of the trailer of the trailer on the right and wait." esdcs = [ExtendedSdc('EVENT', r=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (0, 4))],l2=[],l=[ExtendedSdc('PATH', r=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (5, 7))],l2=[],l=[ExtendedSdc('OBJECT', r=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (23, 25))],l2=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (41, 44)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (45, 52))])],l=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (26, 29)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (30, 37))])],f=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (8, 11)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (12, 17)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (18, 22))])])],f=[])],f=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[], entireText='Move to the right side of the trailer of the trailer on the right and wait.')]), ExtendedSdc('OBJECT', r=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (53, 55))],l2=[],l=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (56, 59)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (60, 65))])],f=[ExtendedSdc('OBJECT', r=[],l2=[],l=[],f=[TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (41, 44)), TextStandoff("Move to the right side of the trailer of the trailer on the right and wait.", (45, 52))])])] entireText, yamlData = esdcIo.toYaml(ExtendedSdcGroup(esdcs)) rereadAnnotations = esdcIo.fromYaml(entireText, yamlData) try: self.assertEqual(list(rereadAnnotations), esdcs) except: print "start with", [e.asPrettyMap() for e in esdcs] print "ended with", [e.asPrettyMap() for e in rereadAnnotations] raise
def testCorrectStandoffsImmutable(self): annotations = yamlReader.load("data/forklift_open_ended.yaml") esdc1 = annotations[0].esdcs[0] esdc2 = annotations[1].esdcs[0] old_entire_text = esdc1.entireText new_entire_text = esdc1.entireText + " " + esdc2.entireText sentence_standoff = TextStandoff(new_entire_text, esdc1.range) correctedEsdc1 = correctStandoffsImmutable(sentence_standoff, ExtendedSdcGroup([esdc1])) self.assertEqual(esdc1.entireText, old_entire_text) self.assertEqual(correctedEsdc1.entireText, new_entire_text) print str(correctedEsdc1[0]) self.assertEqual(" ".join(x.text for x in correctedEsdc1[0]["f"]), "Forklift")
def evaluateAnnotation(self, annotation, state_cls, dirToUse, numToReturn=None, runMultiEsdcCommands=False): print 'annotation has', len(annotation.esdcs), 'esdcs' esdcs = self.esdc_extractor_func(annotation) entries = [] if runMultiEsdcCommands: print "multiesdcs" print 'finding plan for', (' ').join([esdc.text for esdc in esdcs]) if self.use_merging: from coreference.merge_coreferences import merge_coreferences from coreference.bag_of_words_resolver import BagOfWordsResolver # resolver = BagOfWordsResolver("%s/tools/coreference/models/coref_1.5.pck" % SLU_HOME) from coreference.oracle_resolver import OracleResolver # resolver = OracleResolver("%s/tools/forklift/dataAnnotation/data/forklift_ambiguous_revised_context_with_answers.prthaker.yaml" % SLU_HOME) resolver = OracleResolver("%s/tools/forklift/dataAnnotation/data/forklift_ambiguous_larger_corpus_with_answers.yaml" % SLU_HOME) gggs = merge_coreferences(esdcs, resolver) else: gggs = gggs_from_esdc_group(esdcs) start_state, plans = self.getSearchResults(annotation, state_cls, gggs, numToReturn=numToReturn) esdcNum = 0 entries.append((gggs, self.writeResults(dirToUse, annotation, esdcs, esdcNum, start_state, gggs, plans))) else: print "not multiesdcs" if self.use_merging: # from coreference.bag_of_words_resolver import BagOfWordsResolver from coreference.merge_coreferences import merge_coreferences from coreference.oracle_resolver import OracleResolver # resolver = OracleResolver("%s/tools/forklift/dataAnnotation/data/forklift_ambiguous_revised_context_with_answers.prthaker.yaml" % SLU_HOME) resolver = OracleResolver("%s/tools/forklift/dataAnnotation/data/forklift_ambiguous_larger_corpus_with_answers.yaml" % SLU_HOME) # resolver = BagOfWordsResolver("%s/tools/coreference/models/coref_1.5.pck" % SLU_HOME) gggs = merge_coreferences(esdcs, resolver) else: gggs = [] for esdc in esdcs: gggs.append(ggg_from_esdc(esdc)) for esdcNum, ggg in enumerate(gggs): esdcGroup = ExtendedSdcGroup([esdcs[esdcNum]]) gggs = [ggg] start_state, plans = self.getSearchResults(annotation, state_cls, [ggg], numToReturn) entries.append((gggs, self.writeResults(dirToUse, annotation, esdcGroup, esdcNum, start_state, gggs, plans))) return entries
def initial_annotation(state, esdc): esdcs = ExtendedSdcGroup([esdc]) ax, ay = state.getPosition() agent_prism = prism_from_point(ax, ay, 0, 1) agent_object = PhysicalObject(agent_prism, [], Path([1], [[ax], [ay], [0], [state.orientation]]), lcmId=state.getAgentId()) context = state.to_context() context.agent = agent_object annotation = Annotation(0, esdcs, context=context, agent=agent_object) #peg figure of event to agent fig = esdc.f[0] annotation.setGrounding(fig, agent_object) return annotation
def followCommand(self, command=None, state=None, esdcs=None, first_esdc_only=False, verbose=True, input_gggs=None): if command != None: self.updateCommand(command) if state != None: self.state = state command = str(self.commandEdit.toPlainText()) if input_gggs != None: self.esdcs = input_gggs[0].esdcs elif esdcs != None and command == self.command: self.esdcs = esdcs else: class AnnotationStub: def __init__(self, command): self.entireText = command self.esdcs = self.extractor(AnnotationStub(command)) if len(self.esdcs) == 0: return [], [] if first_esdc_only: self.esdcs = ExtendedSdcGroup([self.esdcs[0]]) self.flattenedEsdcs = self.esdcs.flattenedEsdcs print "esdcs", esdcIo.toYaml(self.esdcs) #print "flattened esdcs", self.flattenedEsdcs self.esdcModel.setData(self.esdcs) self.esdcView.expandAll() #convert to a list of plans if input_gggs != None: gggs = input_gggs elif self.gggs != None: gggs = self.gggs else: if self.merging_mode == "merge_coreferences": from coreference.bag_of_words_resolver import BagOfWordsResolver from coreference.merge_coreferences import merge_coreferences resolver = BagOfWordsResolver( "%s/tools/coreference/models/coref_1.5.pck" % SLU_HOME) gggs = merge_coreferences(self.esdcs, resolver) if len(gggs) > 1: gggs = [gggs[0]] elif self.merging_mode == "merge_events": from coreference.event_resolver import EventResolver from coreference.merge_coreferences import merge_coreferences resolver = EventResolver() gggs = merge_coreferences(self.esdcs, resolver) gggs = gggs[0:1] for i, ggg in enumerate(gggs): #ggg.to_latex("ggg_%d.pdf" % i) pass # export parses, when saving ground truth parses from asr evaluation run # -- stefie10 7/19/2012 #from esdcs.esdcIo import toYaml #import yaml #import os #with open("esdcs_%d.txt" % os.getpid(), "a") as f: # f.write(yaml.dump([toYaml(self.esdcs)])) if len(self.esdcs) != 1: raise ValueError("ESDCs didn't parse right: " + ` self.esdcs `) # print "merging events", self.esdcs.entireText # from coreference.merge_coreferences import merge_toplevel_events # #resolver = EventResolver() # #gggs = merge_coreferences(self.esdcs, resolver, # # verbose=False) # gggs = gggs_from_esdc_group(self.esdcs) # gggs = [merge_toplevel_events(gggs)] assert len(gggs) == 1, (len(gggs), self.esdcs.text) elif self.merging_mode == "merge_none": gggs = gggs_from_esdc_group(self.esdcs) else: raise ValueError("Bad merging mode: " + ` self.merging_mode `) if gggs[0].context == None: gggs[0].context = self.state.to_context() assert gggs[0].context.agent != None def run(): try: assert gggs[0].context.agent != None self.plansList = self.taskPlanner.find_plan( self.state, gggs, beam_width=self.beamWidthBox.value(), beam_width_sequence=self.seqBeamWidthBox.value(), search_depth_event=self.searchDepthBox.value(), beam_width_event=self.beamWidthEventBox.value(), save_state_tree=self.save_state_tree, allow_null_action=self.allow_null_action) except: print "excption on gggs" #for i, ggg in enumerate(gggs): # ggg.to_latex("ggg_%d.pdf" % i) raise start = time.time() cProfile.runctx("run()", globals(), locals(), "out.prof") end = time.time() if verbose: print "Cost Function Browser took", (end - start), "seconds." plansList = self.plansList if len(plansList) == 0: return [], [] else: self.plans = plansModel.Plan.from_inference_result( self.taskPlanner, plansList) self.plansModel.setData(self.plans) self.nodeFeatureWeights.load(self.taskPlanner.cf_obj, gggs, self.plans, context=self.state.to_context()) self.plansView.selectRow(0) self.gggWindow.load(plansList[0][2], groundingSpace=self.state.objects) return self.esdcs, self.plans
def __init__(self, graph, evidences, factor_id_to_esdc, gggs=[], context=None, parent_esdc_group=None, factor_id_to_cost=None, esdc_to_node_id=None): self._graph = graph self._graph.ggg = self self.evidences = evidences self._factor_id_to_esdc = factor_id_to_esdc if factor_id_to_cost == None: self._factor_id_to_cost = {} else: self._factor_id_to_cost = factor_id_to_cost self._esdc_to_factor_id = dict( (esdc, fid) for fid, esdc in self._factor_id_to_esdc.iteritems()) self.flattened_esdcs = self._esdc_to_factor_id.keys() if len(self.flattened_esdcs) == 0: self.esdcs = None self.entireText = None self.text = None else: if parent_esdc_group != None: self.esdcs = parent_esdc_group else: parent = None for e in self.flattened_esdcs: if parent == None or e.contains(parent): parent = e if parent == None: raise ValueError("can't find container: " + ` self.flattened_esdcs `) self.esdcs = ExtendedSdcGroup([parent]) self.entireText = self.esdcs.entireText self.text = self.esdcs.text self.gggs = gggs # print self.evidences # print [n for n in self.nodes if "OBJECT" in n.type] self.gamma_nodes = self.graph.gamma_nodes self.lambda_nodes = self.graph.lambda_nodes self.phi_nodes = self.graph.phi_nodes should_sort = True for l in self.lambda_nodes: from standoff import FakeStandoff if any([ isinstance(x, FakeStandoff) for x in self.evidence_for_node(l) ]) or len(self.evidence_for_node(l)) == 0: should_sort = False break if should_sort: def key(node): if node.is_lambda: return self.evidence_for_node(node)[0].range else: return node.id self.graph.nodes = sorted(self.graph.nodes, key=key) for n in self.nodes: self.evidences.setdefault(n.id, []) self.context = context if esdc_to_node_id != None: self.esdc_to_node_id = dict( (key, value) for key, value in esdc_to_node_id.iteritems() if key != None) else: self.esdc_to_node_id = dict() # for esdc in self.flattened_esdcs: # self.esdc_to_node_id.setdefault(esdc, None) for node in self.nodes: self.esdc_to_node_id.setdefault(self.node_to_top_esdc(node), node.id)
class GGG: """ A grounding graph, together with bindings for the variables. This class is the top-level, public-facing API. """ @staticmethod def copy(ggg): return GGG(ggg._graph, Evidences.copy(ggg.evidences), ggg._factor_id_to_esdc, context=ggg.context, parent_esdc_group=ggg.esdcs, factor_id_to_cost=dict(ggg._factor_id_to_cost), esdc_to_node_id=ggg.esdc_to_node_id) @staticmethod def from_ggg_and_evidence(ggg, new_evidence): return GGG( ggg._graph, new_evidence, ggg._factor_id_to_esdc, context=ggg.context, parent_esdc_group=ggg.esdcs, # factor_id_to_cost=dict(ggg._factor_id_to_cost)) factor_id_to_cost=None, esdc_to_node_id=ggg.esdc_to_node_id) @staticmethod def from_ggg_and_evidence_history(ggg, new_evidence, gggs=None): if gggs == None: gggs = ggg.gggs return GGG(ggg._graph, new_evidence, ggg._factor_id_to_esdc, gggs, context=ggg.context, factor_id_to_cost=dict(ggg._factor_id_to_cost), esdc_to_node_id=ggg.esdc_to_node_id) @staticmethod def from_ggg_and_parent_esdc_group(ggg, parent_esdcs): return GGG(ggg._graph, ggg.evidences, ggg._factor_id_to_esdc, context=ggg.context, parent_esdc_group=parent_esdcs, factor_id_to_cost=dict(ggg._factor_id_to_cost), esdc_to_node_id=ggg.esdc_to_node_id) @staticmethod def unlabeled_ggg(labeled_ggg, context): new_evidences = labeled_ggg.evidences_without_groundings() new_evidences = labeled_ggg.evidences_without_groundings() for factor in labeled_ggg.factors: new_evidences[factor.phi_node.id] = True ggg = GGG(labeled_ggg.graph, new_evidences, labeled_ggg._factor_id_to_esdc, context=context, esdc_to_node_id=labeled_ggg.esdc_to_node_id) return ggg def __init__(self, graph, evidences, factor_id_to_esdc, gggs=[], context=None, parent_esdc_group=None, factor_id_to_cost=None, esdc_to_node_id=None): self._graph = graph self._graph.ggg = self self.evidences = evidences self._factor_id_to_esdc = factor_id_to_esdc if factor_id_to_cost == None: self._factor_id_to_cost = {} else: self._factor_id_to_cost = factor_id_to_cost self._esdc_to_factor_id = dict( (esdc, fid) for fid, esdc in self._factor_id_to_esdc.iteritems()) self.flattened_esdcs = self._esdc_to_factor_id.keys() if len(self.flattened_esdcs) == 0: self.esdcs = None self.entireText = None self.text = None else: if parent_esdc_group != None: self.esdcs = parent_esdc_group else: parent = None for e in self.flattened_esdcs: if parent == None or e.contains(parent): parent = e if parent == None: raise ValueError("can't find container: " + ` self.flattened_esdcs `) self.esdcs = ExtendedSdcGroup([parent]) self.entireText = self.esdcs.entireText self.text = self.esdcs.text self.gggs = gggs # print self.evidences # print [n for n in self.nodes if "OBJECT" in n.type] self.gamma_nodes = self.graph.gamma_nodes self.lambda_nodes = self.graph.lambda_nodes self.phi_nodes = self.graph.phi_nodes should_sort = True for l in self.lambda_nodes: from standoff import FakeStandoff if any([ isinstance(x, FakeStandoff) for x in self.evidence_for_node(l) ]) or len(self.evidence_for_node(l)) == 0: should_sort = False break if should_sort: def key(node): if node.is_lambda: return self.evidence_for_node(node)[0].range else: return node.id self.graph.nodes = sorted(self.graph.nodes, key=key) for n in self.nodes: self.evidences.setdefault(n.id, []) self.context = context if esdc_to_node_id != None: self.esdc_to_node_id = dict( (key, value) for key, value in esdc_to_node_id.iteritems() if key != None) else: self.esdc_to_node_id = dict() # for esdc in self.flattened_esdcs: # self.esdc_to_node_id.setdefault(esdc, None) for node in self.nodes: self.esdc_to_node_id.setdefault(self.node_to_top_esdc(node), node.id) #self.check_rep() def with_cropped_range(self, start_t, end_t): new_evidences = Evidences.copy(self.evidences) for n in self.gamma_nodes: evidence = [ e.withCroppedRange(start_t, end_t) for e in self.evidence_for_node(n) ] new_evidences[n.id] = evidence result = GGG(self._graph, new_evidences, self._factor_id_to_esdc, context=self.context.withCroppedRange(start_t, end_t), parent_esdc_group=self.esdcs) return result @property def object_groundings(self): return list( chain(*[ self.evidence_for_node(n, []) for n in self.nodes if "OBJECT" in n.type ])) @property def graph(self): return self._graph def evidences_without_groundings(self): evidences = Evidences.copy(self.evidences) for n in self.gamma_nodes: if not n.is_bound: evidences[n.id] = [] return evidences def remove_cost_entries(self): """ Removed saved cost entries. These are useful for debugging since they contain the feature vectors, but use a huge amount of memory when pickling things. """ self._factor_id_to_cost = dict( (key, (cost, None)) for key, (cost, ce) in self._factor_id_to_cost.iteritems()) @property def object_nodes(self): return [n for n in self.gamma_nodes if n.is_object] @property def lexicalized_object_nodes(self): return [ n for n in self.gamma_nodes if (n.is_object and not node_is_empty_figure_of_event(self, n)) ] @property def event_nodes(self): return [n for n in self.gamma_nodes if n.is_event] def factor_is_grounded(self, factor): for node in factor.gamma_nodes: if not self.is_grounded(node): return False return True def is_grounded(self, node): return not self.is_ungrounded(node) def is_ungrounded(self, node): evidence = self.evidence_for_node(node) assert evidence != None if len(evidence) == 0: return True else: return False @property def has_ungrounded_lambda_nodes(self): for node in self.lambda_nodes: if self.is_ungrounded(node): return True return False def has_ungrounded_nodes(self, factor): for node in factor.gamma_nodes: if self.is_ungrounded(node): return True return False @property def all_grounded(self): return all(self.is_grounded(n) for n in self.gamma_nodes) def check_rep(self): for e in self.flattened_esdcs: assert e in self._esdc_to_factor_id #assert self.esdcs.contains(e), e.text # not true with merging. for factor_id, esdc in self._factor_id_to_esdc.iteritems(): assert factor_id in self.factor_ids, (factor_id, str(esdc)) assert factor_id in self.factor_ids assert self.factor_from_id(factor_id) != None for esdc, nid in self.esdc_to_node_id.iteritems(): if nid != None: if not nid in self.node_ids: print self.node_ids assert False, nid def esdc_to_factor_id(self, esdc): if esdc in self._esdc_to_factor_id: fid = self._esdc_to_factor_id[esdc] return fid else: return None def esdc_to_factor(self, esdc): fid = self.esdc_to_factor_id(esdc) if fid != None: return self.factor_from_id(fid) else: return None def factor_to_esdc(self, factor): return self.factor_id_to_esdc(factor.id) def node_to_esdcs(self, node): """ Returns all ESDCs directly connected to this node. """ return [self.factor_to_esdc(f) for f in node.factors] def node_to_top_esdc(self, node): """ Return the ESDCs that are "for" this node. The text of these esdcs will best describe what this node is supposed to be. """ for esdc, e_node_id in self.esdc_to_node_id.iteritems(): if e_node_id == node.id and self.esdcs.contains(esdc): return esdc for factor in self.factors_for_node(node): e = self.factor_to_esdc(factor) if self.esdcs.contains(e): return e # factor = node.factor_for_link("top") # if factor != None: # return self.factor_to_esdc(factor) # else: # return None return None def factor_id_to_esdc(self, factor_id): return self._factor_id_to_esdc[factor_id] def node_id_to_esdcs(self, node_id): return self.node_to_esdcs(self.node_from_id(node_id)) def factor_from_id(self, factor_id): return self._graph.factor_from_id(factor_id) def node_from_id(self, node_id): return self._graph.node_from_id(node_id) def set_evidence_for_node_id(self, node_id, value): self.evidences[node_id] = value def set_evidence_for_node(self, node, value): self.set_evidence_for_node_id(node.id, value) def clear_evidence_for_node_id(self, node_id): self.set_evidence_for_node_id(node_id, []) def clear_evidence_for_node(self, node): self.set_evidence_for_node(node, []) def evidence_for_node(self, node, default_value=None): return self.evidence_for_node_id(node.id, default_value=default_value) def evidence_for_node_id(self, node_id, default_value=None): if default_value == None: return self.evidences[node_id] else: return self.evidences.get(node_id, default_value) @property def groundings(self): return list( chain(*[ self.evidences[node.id] for node in self.nodes if (isinstance(self.evidences[node.id], list) and node.type != "lambda") ])) def groundings_for_factor(self, factor): return list( chain(*[ self.evidences[node.id] for node in factor.nodes if node.is_gamma ])) def toContext(self): return context.Context.from_groundings(self.groundings) @property def factor_ids(self): return self._graph.factor_ids @property def factors(self): return self._graph.factors @property def nodes(self): return self._graph.nodes @property def node_ids(self): return self._graph.node_ids @property def max_id(self): return max([int(x.id) for x in self.nodes + self.factors]) def nodes_for_link(self, name): """ Return nodes in the graph with a particular link type. (Link types are in Factor.link_names) """ nodes = [] for factor in self.factors: nodes.extend(factor.nodes_for_link(name)) return nodes def nodes_with_type(self, node_type): """ Return nodes in the graph with a particular type. (Type names are in Nodes.type) """ nodes = set() for factor in self.factors: nodes.update(factor.nodes_with_type(node_type)) return nodes def factors_for_node(self, target_node, links='top'): """ Find the factors in the GGG which have the given node at the given link. Returns a list of factors. Returns [] if none found. Can also take in a list of possible link names. """ if not isinstance(links, list): links = [links] target_factors = [] for factor in self.factors: if any(target_node in factor.nodes_for_link(l) for l in links): target_factors.append(factor) return target_factors def set_node_for_esdc(self, original_esdc, node): self.esdc_to_node_id[original_esdc] = node.id def inferred_context(self): return MergedContext(self.context, self.groundings) def node_id_for_esdc(self, esdc): if esdc in self.esdc_to_node_id.keys(): return self.esdc_to_node_id[esdc] else: return None def node_for_esdc(self, esdc): return self.node_id_to_node(self.node_id_for_esdc(esdc)) def node_id_to_node(self, node_id): return self._graph.node_id_to_node[node_id] @staticmethod def fromYaml(yml): return GGG(GroundingGraphStructure(yml), Evidences(yml)) def attach_ggg(self, ggg): self._graph = self._graph.attach_graph(ggg.graph) self.evidences = self.evidences.update_evidences(ggg.evidences) return self def factors_depth_first(self, start_node_id): return self._graph.factors_depth_first(start_node_id) node = self.node_from_id(start_node_id) active_factors = [] active_factors.extend(node.factors) results = [] while len(active_factors) != 0: new_active_factors = [] for factor in active_factors: if not factor in results: results.append(factor) for node in factor.nodes: new_active_factors.extend(node.factors) active_factors = new_active_factors return results def cost(self): return sum(self.costs) @property def costs(self): return [t[0] for t in self._factor_id_to_cost.values()] def cost_for_factor(self, factor): return self.cost_for_factor_id(factor.id) def cost_for_factor_id(self, factor_id): return self._factor_id_to_cost[factor_id][0] def entry_for_factor_id(self, factor_id): return self._factor_id_to_cost[factor_id][1] def entry_for_factor(self, factor): return self.entry_for_factor_id(factor.id) def set_cost_for_factor_id(self, factor_id, cost, entry): self._factor_id_to_cost[factor_id] = (cost, entry) def set_cost_for_factor(self, factor, cost, entry): return self.set_cost_for_factor_id(factor.id, cost, entry) def has_cost_for_factor_id(self, factor_id): return factor_id in self._factor_id_to_cost def has_cost_for_factor(self, factor): return self.has_cost_for_factor_id(factor.id) def costs_for_node(self, node): return [ self.cost_for_factor(f) for f in node.factors if self.has_cost_for_factor(f) ] def cost_for_node(self, node): return sum(self.costs_for_node(node)) def null_costs(self): for f in self.factors: self.null_cost_for_factor(f) def null_costs_for_node(self, node): for f in node.factors: self.null_cost_for_factor(f) def null_cost_for_factor(self, factor): self.null_cost_for_factor_id(factor.id) def null_cost_for_factor_id(self, factor_id): if factor_id in self._factor_id_to_cost: del self._factor_id_to_cost[factor_id] def to_latex(self, fname): graph_to_tex.to_tex_file(self, fname) def dot_name(self, node): if node.type == 'lambda': return node.id + '_' + "'" + self.evidence_for_node( node)[0].text + "'" else: return node.id + '_' + node.type def to_file(self, fname, use_edge_labels=True): """ Convert this GGG to dot language and, optionally, run dot to convert it to a PDF. """ basename = os.path.basename(fname) tmpname = "/tmp/" + basename f = open(tmpname, "w") print >> f, self._to_dot() f.close() basename, extension = os.path.splitext(fname) if extension == ".pdf": cmd = "dot %s -Tpdf -o %s" % (tmpname, fname) print "calling", cmd subprocess.call(cmd, shell=True) elif extension == ".dot": subprocess.call("cp %s %s" % (tmpname, fname), shell=True) else: raise ValueError("Unexpected extension: " + ` fname `) def lambda_nodes_to_text(self): results = [] for n in self.lambda_nodes: evidence = self.evidence_for_node(n) results.extend([str(e) for e in evidence]) return " ".join(results) def _to_dot(self): """ Convert this GGG to graphviz dot language. """ dot = "digraph esdcparse {\n" gev = self.evidences def node_label(node): if gev.has_key(node.id) and not gev.get_string(node.id) == '': result = re.escape(gev.get_string(node.id)) else: result = node.type.replace("gamma_", "") result = str(node.id) + ": " + result return result def enquote(string): return '"' + string + '"' edges = [] for factor in self._graph.factors: factor_name = node_label(factor) # dot += ' ' + factor.id + ' [shape=box label="" style=filled fillcolor="#000000" fixedsize="true" height="0.3" width="0.3"];\n' dot += ' ' + factor.id + ' [shape=box label="' + factor_name + '"];\n' for link, ids in factor.link_name_to_node_ids.iteritems(): for nid in ids: node = self.node_from_id(nid) # import pdb; pdb.set_trace() if link == 'top': edge = [node.id, factor.id, link] else: edge = [factor.id, node.id, link] if link == 'phi': dot += ' subgraph { rank=same; ' + node.id + '; ' + factor.id + ';}\n' dot += ' ' + node.id + ' [label=' + node.id + ' shape=diamond width=0.5 height=0.5 fixedsize=true];\n' else: dot += ' ' + node.id + ' [label=' + enquote( node_label(node)) + '];\n' edges.append(edge) for edge in edges: dot += ' %s -> %s [dir=none label=%s]\n' % (edge[0], edge[1], edge[2]) # dot += ' ' + edge[0] + ' -> ' + edge[1] + '[dir=none]\n' dot += "}\n" return dot @property def top_event_node(self): return find_top_node(self) @property def top_event_ggg(self): return self.node_to_top_esdc(find_top_node(self)) def create_esdc_tree(self): queue = [] queue.append([1, "x", self.top_event_node]) visited = set([]) full = "" yaml = "" while len(queue) > 0: depth, link, candidate = queue.pop(0) if isinstance(candidate, Node): if candidate.is_gamma: if len(set([f.id for f in candidate.factors]) & visited) != len( candidate.factors): yaml += " " * 2 * depth + link + ":\n" yaml += " " * 2 * (depth + 1) + candidate.type[6:] + ":\n" factors = candidate.factors for factor in factors: if factor.id not in visited: visited = visited | set([factor.id]) queue.insert(0, [depth + 1, "factor", factor]) if candidate.is_lambda: text = " ".join( [ts.text for ts in self.evidence_for_node(candidate)]) yaml += " " * 2 * depth + link + ":\n" yaml += " " * 2 * depth + "- - " + text + "\n" yaml += " " * 2 * (depth + 1) + "- [" + str( len(full)) + ", " + str(len(full + text)) + "]\n" full += text + " " if isinstance(candidate, Factor): for link in ["l2", "l", "r", "f"]: nodes = candidate.nodes_for_link(link) for node in nodes: if node.id not in visited: visited = visited | set([node.id]) queue.insert(0, [depth + 1, link, node]) return "- '" + full[:-1] + "'\n- - " + yaml[9:-1]