def test_normalize_filter_with_location(self): alist = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': { "$filter": [{ "p": "type", "o": "country" }, { "p": "location", "o": "Africa" }] } }) G = InferenceGraph() G.add_alist(alist) normalize = Normalize() results = normalize.decompose(alist, G) self.assertTrue(len(G.child_alists(alist.id)) > 0)
def test_inject_context_inference(self): G = InferenceGraph() a = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.TIME: '2023', tt.OBJECT: '', tt.OPVAR: '?x', tt.COST: 1 }) ctx1 = [{ ctx.nationality: 'United Kingdom', ctx.accuracy: 'low', ctx.speed: 'low' }, { ctx.place: 'United Kingdom', ctx.device: 'phone', ctx.datetime: '2020-07-27 11:00:00' }, {}] a.set(tt.CONTEXT, ctx1) G.add_alist(a) op_alist = Temporal().decompose(a, G) self.assertEqual(op_alist.get(tt.OP), 'regress')
def test_graph_add_nodes(self): graph = InferenceGraph() alist1 = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': 'Ghana' }) alist2 = Alist( **{ tt.ID: '101', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) graph.add_alists_from([alist1, alist2]) nodes = graph.nodes() print(nodes) self.assertTrue(len(nodes) == 2)
def test_context_composition(self): G = InferenceGraph() a = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.TIME: '2023', tt.OBJECT: '', tt.OPVAR: '?x', tt.COST: 1 }) ctx1 = [{ ctx.nationality: 'United Kingdom' }, { ctx.place: 'United Kingdom', ctx.device: 'computer', ctx.datetime: '2010-07-27 11:00:00' }, {}] a.set(tt.CONTEXT, ctx1) G.add_alist(a) query_ctx = frank.context.inject_query_context # query context should infer the ctx.accuracy from ctx.device op_alist = Temporal().decompose(query_ctx(a), G) self.assertEqual( (op_alist.get(tt.OP), len(G.child_alists(op_alist.id))), ('gpregress', 19))
def decompose(self, alist: A, G: InferenceGraph): # check for comparison operations: eq, lt, gt, lte, gte and for multiple variables in operation variable if alist.get(tt.OP).lower() in ['eq', 'lt', 'gt', 'lte', 'gte'] \ and len(alist.get(tt.OPVAR).split(' ')) > 1: opvars = alist.get(tt.OPVAR).split(' ') op_alist = alist.copy() # higher cost makes this decomposition more expensive op_alist.cost = alist.cost + 1 op_alist.branch_type = br.OR op_alist.parent_decomposition = 'comparison' op_alist.node_type = nt.HNODE # op_alist.state = states.EXPLORED # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) for p in opvars: pval = alist.get(p) child = Alist() child.set(tt.OP, "value") child.set(tt.OPVAR, p) child.set(p, pval) child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) # op_alist.link_child(child) G.link(op_alist, child, op_alist.parent_decomposition) else: return None return op_alist
def test_temporal(self): alist = Alist( **{ tt.ID: '0', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) G = InferenceGraph() G.add_alist(alist) temporal = Temporal() results = temporal.decompose(alist, G) self.assertTrue( len(G.child_alists(alist.id)) > 0, "should have more than one element")
def test_geospatial(self): alist = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) G = InferenceGraph() G.add_alist(alist) geospatial = Geospatial() results = geospatial.decompose(alist, G) self.assertTrue( len(G.child_alists(alist.id)) > 0, "geospatial decomp should return more than one child")
def test_normalize_is(self): alist = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': { "$is": "Ghana" } }) G = InferenceGraph() G.add_alist(alist) normalize = Normalize() results = normalize.decompose(alist, G) self.assertTrue(len(G.child_alists(alist.id)) > 0)
def descendant_explanation(self, G: InferenceGraph, alist: Alist, summary, max_length, length): if length <= max_length: # for child in alist.children: for child in G.child_alists(alist.id): summary = f"{summary}{' ' + child.get('how') if 'how' in child.attributes else ''}" + \ f"{' ' + child.get('what') if 'what' in child.attributes else ''}".strip() summary = self.descendant_explanation(G, child, summary, max_length, length + 1) return summary
def ancestor_explanation(self, G: InferenceGraph, alist: Alist, summary, max_length, length): if length <= max_length: # for parent in alist.parent: for parent in G.parent_alists(alist.id): summary = f"{parent.get('why') if 'why' in parent.attributes else ''} {summary}".strip( ) summary = self.ancestor_explanation(G, parent, summary, max_length, length + 1) return summary
def why(self, G: InferenceGraph, alist: Alist, decomp_op, in_place=True): ''' Explain a decomposition of this alist. Assumes a failed instantiation of this alist following KB searches''' expl = "" time = "" children = G.child_alists(alist.id) if alist.get(tt.TIME): time = f" in {alist.get(tt.TIME)}" if decomp_op == 'temporal': expl = f"Could not find the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)}{time}. " decomp_items = [] # for c in alist.children[0].children: for c in children: decomp_items.append(c.get(tt.TIME)) if len(decomp_items) >= 2: expl += f"Attempted to infer the required value{time} by finding the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)} " + \ f"at other times between {min(decomp_items)} and {max(decomp_items)}." elif decomp_op == 'geospatial': expl = f"Could not find the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)}{time}. " decomp_items = [] # for c in alist.children[0].children: for c in G.child_alists(children[0].id): decomp_items.append(c.instantiation_value(tt.SUBJECT)) entities = '' if len(decomp_items) > 8: entities = f"{', '.join(decomp_items[0:8])} etc" else: entities = f"{', '.join(decomp_items[0:len(decomp_items)-1])} and {decomp_items[-1]}" if decomp_items: expl += f"Finding the {alist.get(tt.PROPERTY)}{time} for the constituent parts of " + \ f" {alist.instantiation_value(tt.SUBJECT)}: {entities}." elif decomp_op == 'normalize': expl = f"Need to solve the sub-queries before determining the {alist.get(tt.PROPERTY)}{time}." elif decomp_op == 'comparison': expl = f"Need to solve the sub-queries to determine the items to compare." if in_place: alist.set("why", expl) G.add_alist(alist)
def start(self, alist: Alist, session_id, inference_graphs): ''' Create new inference graph to infere answer''' G = InferenceGraph() self.frank_infer = Infer(G) self.frank_infer.session_id = session_id self.inference_graphs = inference_graphs self.start_time = time.time() self.frank_infer.last_heartbeat = time.time() alist = frank.context.inject_query_context(alist) self.frank_infer.enqueue_root(alist) self.schedule(-1)
def setUp(self): G = InferenceGraph() self.infer = Infer(G) self.alist = Alist(**{tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1}) self.c1 = Alist(**{tt.ID: '2', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1}) self.c1.attributes['?x'] = '' self.c1.instantiate_variable('?x', '120') self.c1.state = states.REDUCIBLE self.c1.data_sources = ['wikidata', 'worldbank']
def decompose(self, alist: A, G: InferenceGraph): # check if subject is empty or is a variable if not alist.get(tt.SUBJECT) \ or alist.get(tt.SUBJECT).startswith(vx.PROJECTION) \ or alist.get(tt.SUBJECT).startswith(vx.AUXILLIARY): return None # get the sub locations of the subject # TODO: perform geospatial decomp on OBJECT attribute sub_items = sparqlEndpoint.find_sub_location( alist.get(tt.SUBJECT).strip()) if not sub_items: return None alist.data_sources.add('geonames') op_alist = alist.copy() op_alist.set(tt.OP, 'sum') # higher cost makes this decomposition more expensive op_alist.cost = alist.cost + 4 op_alist.branch_type = br.AND op_alist.parent_decomposition = 'geospatial' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) for s in sub_items: child = alist.copy() child.set(tt.SUBJECT, s) child.set(tt.OP, 'value') child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) # op_alist.link_child(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist
def test_gpregress_2(self): alist = Alist( **{ tt.ID: '101', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) c1 = Alist( **{ tt.ID: '21011', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2019.0', tt.OPVAR: '?x', tt.COST: 1, '?x': 1839758040765.62 }) c2 = Alist( **{ tt.ID: '21012', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2018.0', tt.OPVAR: '?x', tt.COST: 1, '?x': 1885482534238.33 }) G = InferenceGraph() G.add_alist(alist) G.link(alist, c1) G.link(alist, c2) a = frank.reduce.gpregress.reduce(alist, G.child_alists(alist.id), G) print(a) self.assertAlmostEqual(a.instantiation_value(tt.OPVAR), 1792866444829.7, places=1)
def test_eq(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_eq_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 20}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 20}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.eq.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_eq_') == 'true' else False)
def create_graph(self): graph = InferenceGraph() alist1 = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': 'Ghana' }) alist2 = Alist( **{ tt.ID: '101', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) alist3 = Alist( **{ tt.ID: '102', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) graph.add_alists_from([alist1]) graph.link(alist1, alist2, edge_label='TP') graph.link(alist1, alist3, edge_label='GS') return graph
def test_comp(self): # root = Alist(**{tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', # tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1}) # node101 = Alist(**{tt.OP:'comp', tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', # tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1}) a = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': { "$is": "Ghana" } }) G = InferenceGraph() G.add_alist(a) normalize.Normalize().decompose(a, G) child1 = G.child_alists(a.id)[0] result = frank.reduce.comp.reduce(child1, G.child_alists(child1.id), G) self.assertTrue(result != None)
def decompose(self, alist: A, G: InferenceGraph): nest_vars = alist.uninstantiated_nesting_variables() for nest_attr, v in nest_vars.items(): if NormalizeFn.FILTER in v: op_alist = alist.copy() op_alist.set(tt.OPVAR, nest_attr) op_alist.set(tt.OP, 'comp') del op_alist.attributes[nest_attr] op_alist.cost = alist.cost + 1 op_alist.branch_type = br.AND op_alist.state = states.EXPLORED op_alist.parent_decomposition = 'normalize' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) # check for filters that heuristics apply to # e.g type==country and location==Europs filter_patterns = {} geo_class = '' for x in v[NormalizeFn.FILTER]: prop = str(x['p']) obj = str(x['o']) if prop == 'type' and (obj == 'country' or obj == 'continent'): filter_patterns['geopolitical'] = obj elif prop == 'location': filter_patterns['location'] = obj if {'geopolitical', 'location'} <= set(filter_patterns): # use heuristics to create a single alist containing the # conjunction to find the X located in Y child = A(**{}) child.set(tt.OP, 'values') child.set(tt.OPVAR, nest_attr) child.set(tt.SUBJECT, nest_attr) child.set( tt.PROPERTY, '__geopolitical:' + filter_patterns['geopolitical']) child.set(tt.OBJECT, filter_patterns['location']) child.cost = op_alist.cost + 1 child.state = states.UNEXPLORED child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) child = frank.context.inject_query_context(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist else: for x in v[NormalizeFn.FILTER]: child = A(**{}) child.set(tt.OP, 'values') child.set(tt.OPVAR, nest_attr) child.set(tt.SUBJECT, nest_attr) for attr, attrval in x.items(): child.set(attr, attrval) child.cost = op_alist.cost + 1 child.state = states.UNEXPLORED child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) child = frank.context.inject_query_context(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist elif NormalizeFn.IN in v: op_alist = alist.copy() op_alist.set(tt.OPVAR, nest_attr) op_alist.set(tt.OP, 'comp') del op_alist.attributes[nest_attr] op_alist.cost = alist.cost + 1 op_alist.state = states.EXPLORED op_alist.parent_decomposition = 'normalize' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) listed_items = [] if isinstance(v[NormalizeFn.IN], list): for x in v[NormalizeFn.IN]: listed_items.append(str(x)) elif isinstance(v[NormalizeFn.IN], str): for x in str(v[NormalizeFn.IN]).split(';'): listed_items.append(str(x).strip()) for x in listed_items: child = A(**{}) child.set(tt.OP, 'value') if nest_attr[0] in [ vx.AUXILLIARY, vx.PROJECTION, vx.NESTING ]: child.set(tt.OPVAR, nest_attr) child.set(nest_attr, x) else: new_var = vx.PROJECTION + '_x' + \ str(len(op_alist.attributes)) child.set(tt.OPVAR, new_var) child.set(nest_attr, new_var) child.set(new_var, x) child.state = states.UNEXPLORED child.node_type = nt.ZNODE child.cost = op_alist.cost + 1 child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) child = frank.context.inject_query_context(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist elif NormalizeFn.IS in v: op_alist = alist.copy() op_alist.set(tt.OPVAR, nest_attr) op_alist.set(tt.OP, 'comp') del op_alist.attributes[nest_attr] op_alist.cost = alist.cost + 1 op_alist.state = states.EXPLORED op_alist.parent_decomposition = 'normalize' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) child = A(**{}) child.set(tt.OP, 'value') new_var = vx.PROJECTION + '_x' + str(len(op_alist.attributes)) child.set(tt.OPVAR, new_var) child.set(new_var, v[NormalizeFn.IS]) child.state = states.REDUCIBLE child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) child = frank.context.inject_query_context(child) G.link(op_alist, child, op_alist.parent_decomposition) if v[NormalizeFn.IS].startswith( (vx.AUXILLIARY, vx.NESTING, vx.PROJECTION)) == False: # this is an instantiation, so a pseudo leaf node should be created leaf = A(**{}) leaf.set(tt.OP, 'value') new_var = vx.PROJECTION + '_x' + \ str(len(op_alist.attributes)) leaf.set(tt.OPVAR, new_var) leaf.set(new_var, v[NormalizeFn.IS]) leaf.state = states.REDUCIBLE leaf.cost = op_alist.cost + 1 leaf.node_type = nt.ZNODE leaf.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) leaf = frank.context.inject_query_context(leaf) G.link(child, leaf, op_alist.parent_decomposition) return op_alist elif tt.OP in v: op_alist = alist.copy() op_alist.set(tt.OPVAR, nest_attr) op_alist.set(tt.OP, 'comp') # del op_alist.attributes[nest_attr] op_alist.set(nest_attr, '') op_alist.cost = alist.cost + 1 op_alist.parent_decomposition = 'normalize' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) var_ctr = 200 child = A(**{}) for ak, av in v.items(): if isinstance(av, str): child.set(ak, av.strip()) elif ak == tt.CONTEXT: child.set(ak, av) else: new_var = vx.NESTING + str(var_ctr) child.set(ak, new_var) child.set(new_var, av) var_ctr = var_ctr + 1 child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) child = frank.context.inject_query_context(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist return None
def generateExplanation(self, G: InferenceGraph, node_id, descendant_blanket_length=1, ancestor_blanket_length=1): ''' Generate explanation of a node given its blanket''' ''' Saliency ordering: Some decompositions and aggregation operations are not important given their distance from the node being explained. VALUE nodes based on a LOOKUP decomposition do not need to be explained. State the fact that the value was retrieved. Multiple VALUE nodes from LOOKUPs of a node can be simplified as "values looked up from DISTINCT(sources). Any VALUE child node that was not retrieved and hence decomposed for further lookup should be highlighted; but no need to specify the details of that operation if it falls outside the explanation blanket. Any VALUE node for which instantiation failed should be highlighted in the explanation. Synonyms for "enclosures" or "scope" to replace "blanket". Repetition rule: Should not repeat the same explanation for child nodes with the same operation. Instead summarise as one explanation and only highlight differences: e.g. failed instantiations, high uncertainties. General procedure for generating the explanation for a node. FRANK generates an explanation of length 1 for each node as part of inference. To generate an explanation with length > 1, 1. ancestors of n*: recursively propagate explanations from parents to their children by appending. This explanation should provide a "causal explanation" or justification of the decomposition operation at n*. 2. descendants of n* : recursively propagate and append explantions from child nodes to their parents. This provides a justification of the aggregation operation at n*. 3. at n*: a detailed explanation of any aggregation operation performed at n* and the decomposition performed Structure of the explanation template: <at n*> by <descendants of n*> in the context of <ancestors of n*> E.g: The predicted population of Ghana in 2017 using regression function based on values from past years is 26,500,000 with an error margin of +/-35,000. Retrieved the population values between 1990 and 2010 from the World Bank and Wikidata. Had to predict the population of Ghana in 2017 since I needed to calculate the total population of Africa in 2017 but could not find any value for the population of Africa in 2017. After decomposing Africa into its parts, I also could not find the population of Ghana in 2017. Return an object containing a fully composed explanation as well as the partial explanations for WHAT, WHY and HOW. ''' explanation = {"all": "", "what": "", "how": "", "why": ""} # get n_star node n_star: Alist = G.alist(node_id) if not n_star: return '' ancestors = self.ancestor_explanation(G, n_star, "", int(ancestor_blanket_length), 1).strip() descendants = self.descendant_explanation( G, n_star, "", int(descendant_blanket_length), 1).strip() self_exp = f"{n_star.get('what') if 'what' in n_star.attributes else ''} {n_star.get('how') if 'how' in n_star.attributes else ''} " sources = self.sources(n_star) explanation = { # "all":f"{n_star_exp} {n_dsc} {n_asc}", "what": self_exp, "how": descendants, "why": ancestors, "sources": sources } return explanation # def summarizeChildren(self, alist: Alist, summary, max_distance, distance): if max_distance < distance: return '' if not alist.children: return '' feat = '' if alist.parent_decomposition.lower() == 'temporal': feat = tt.TIME elif alist.parent_decomposition.lower() == 'geospatial': feat = tt.SUBJECT elif alist.parent_decomposition.lower() == 'lookup': feat = tt.OBJECT elif alist.parent_decomposition.lower() in ['normalize', 'comp']: feat = alist.get(tt.OPVAR) elif alist.parent_decomposition.lower() == 'comparison': feat = f"?{alist.get(tt.OP)}" elif not alist.parent: feat = alist.get(tt.OPVAR) decomps = [] ops = [] properties = [] for child in alist.children: # if alist.parent_decomposition.lower() == 'temporal': feats = feat.split(' ') for ft in feats: if ft in child.attributes: decomps.append(child.get(ft)) # elif alist.parent_decomposition.lower() in ['normalize','comp']: # decomps.append(child.get(feat)) ops.append(child.get(tt.OP)) properties.append(child.get(tt.PROPERTY)) sources = '' if len(alist.data_sources) == 1: sources = f" from {list(alist.data_sources)[0]}" elif len(alist.data_sources) > 1: sources = f" from {', '.join(list(alist.data_sources)[0:len(alist.data_sources)-1])} and {list(alist.data_sources)[-1]}" if alist.parent_decomposition.lower() == 'temporal': data_range = f" between {min(decomps)} and {max(decomps)}" if min(decomps) == max(decomps): data_range = f" in {min(decomps)}" if alist.instantiation_value(alist.get(tt.OPVAR)): # summarize as successful instantiation summary += f" Found {properties[0]} values{data_range}{sources} to predict the {properties[0]} of " + \ f"{alist.instantiation_value(tt.SUBJECT)} in {alist.get(tt.TIME)}. " else: # summarize as unsuccessful instantiation summary += f" Failed to find the {properties[0]} values{data_range}. " elif alist.parent_decomposition.lower() in ['normalize', 'comp']: # todo: be specific about sub-query time = f" in {alist.get(tt.TIME)}" if alist.get(tt.TIME) else "" if alist.get(tt.OP) in ['min', 'max', 'avg', 'mode', 'mean']: summary += f" Solved the sub-query and calculated the {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)}{time}." elif alist.get(tt.OP) in ['comp'] and decomps: listed_str = '' for dc in decomps: listed = dc.split(',') if len(listed) > 8: listed_str += f"{', '.join(listed[0:8])}, etc. " else: listed_str += ', '.join(listed) summary += f" Found these as solutions to the sub-query: {listed_str}." else: if alist.get(tt.PROPERTY): summary += f" Solved the sub-query and calculated the {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)} of " + \ f"{alist.instantiation_value(tt.SUBJECT)}{time}." else: summary += ' ' + alist.get(tt.EXPLAIN) elif not alist.parent: for child in alist.children: summary += self.summarizeNode(child, False) for child in alist.children: csumm = self.summarizeChildren(child, summary, max_distance, distance + 1) summary = csumm if csumm else summary return summary # def summarizeParents(self, alist: Alist, summary, max_distance, distance): # if distance <= max_distance: # for parent in alist.parent: # time = f" in {parent.get(tt.TIME)}" if parent.get( # tt.TIME) else "" # if alist.parent_decomposition.lower() == 'temporal': # if alist.get(tt.OP).lower() == 'regress': # summary = f" Had to predict the {parent.get(tt.PROPERTY)} of {parent.instantiation_value(tt.SUBJECT)}{time} " + \ # f"since the required {parent.get(tt.PROPERTY)} value was not found " + \ # f"in the knowledge bases searched.{summary}" # else: # summary += f" Tried to find the {parent.get(tt.PROPERTY)} of {parent.instantiation_value(tt.SUBJECT)} " + \ # f"in other times from which to extrapolate." # elif alist.parent_decomposition.lower() == 'geospatial': # summary += f" The {parent.get(tt.PROPERTY)} of {parent.instantiation_value(tt.SUBJECT)}{time} was not found " + \ # f"so we had find the {parent.get(tt.PROPERTY)} for its constituents.{summary}" # elif alist.parent_decomposition.lower() == 'comparison': # summary += f" Had to solve for the values of the items to be compared." # else: # if parent.get(tt.OP).lower() in ['eq', 'lt', 'gt']: # summary += f" Had to decompose the query in order to determine if the first sub-query " + \ # f"is {self.ops_text[parent.get(tt.OP)]} the second.{summary}" # elif parent.get(tt.OP).lower() in ['min', 'max', 'avg', 'mode', 'mean']: # summary += f" The required {self.ops_text[parent.get(tt.OP)]} of the {parent.get(tt.PROPERTY)}{time} " + \ # f"was not found in the knowledge bases searched.{summary}" # else: # summary += f" The {self.ops_text[parent.get(tt.OP)]} of the {parent.get(tt.PROPERTY)} of {parent.instantiation_value(tt.SUBJECT)}{time} " + \ # f"was not found in the knowledge bases searched.{summary}" # summary = self.summarizeParents( # parent, summary, max_distance, distance+1) # return summary # def summarizeNode(self, alist: Alist, in_place=True): ''' generate explanation for node, assign to alist attribute in place and return the explanation''' summary = '' try: if len(alist.data_sources) == 1: sources = f"{list(alist.data_sources)[0]}".strip() elif len(alist.data_sources) > 1: sources = f"{', '.join(list(alist.data_sources)[0:len(alist.data_sources)-1])} and {list(alist.data_sources)[-1]}".strip( ) comp_in_child = False if alist.parent_decomposition.lower( ) == 'temporal' and alist.instantiation_value(alist.get(tt.OPVAR)): summary = f"The predicted {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)} " + \ f"in {alist.get(tt.TIME)} using a regression function based on {alist.get(tt.PROPERTY)} " + \ f"data from past times is {alist.instantiation_value(alist.get(tt.OPVAR))}." # f"data from past times is {list(alist.projection_variables().values())[0]}." elif alist.parent_decomposition.lower( ) == 'geospatial' and alist.instantiation_value(tt.SUBJECT): summary = f"Found the constituents of {alist.instantiation_value(tt.SUBJECT)}." elif alist.parent_decomposition.lower() == 'lookup' and sources: # if len(alist.data_sources) ==1: # sources = f" from {list(alist.data_sources)[0]}" # elif len(alist.data_sources) > 1: # sources = f" from {', '.join(list(alist.data_sources)[0:len(alist.data_sources)-1])} and {list(alist.data_sources)[-1]}" summary = f"Facts were retrieved from the {sources} knowledge base(s)." elif alist.parent_decomposition.lower() in ['normalize', 'comp']: # sources = ' '.join(alist.data_sources) filter_exp = "" counter = 0 for child in alist.children: if child.get(tt.PROPERTY).startswith("__geopolitical"): if len(filter_exp) > 0: filter_exp = filter_exp + " and " ctype = child.get(tt.PROPERTY).split(":")[1] filter_exp = filter_exp + f"to find entities that have type {ctype}" + \ f" and are located in {child.instantiation_value(tt.OBJECT)}" elif child.get(tt.OP).lower() == "values": if len(filter_exp) > 0: filter_exp = filter_exp + " and " filter_exp = filter_exp + f"to find entities that have a {child.get(tt.PROPERTY)} of " + \ f"'{child.get(tt.PROPERTY)}'" else: if child.instantiation_value(tt.OBJECT): if counter == 0: if len(filter_exp) > 0: filter_exp = filter_exp + " and " filter_exp = filter_exp + f"to find the {child.get(tt.PROPERTY)} of " + \ f"{child.instantiation_value(tt.SUBJECT)} ({child.instantiation_value(tt.OBJECT)})" elif counter > 0 and counter <= 5: filter_exp = filter_exp + \ f", {child.instantiation_value(tt.SUBJECT)} ({child.instantiation_value(tt.OBJECT)})" elif counter == 6: filter_exp = filter_exp + ", etc" else: if counter == 0: if len(filter_exp) > 0: filter_exp = filter_exp + " and " filter_exp = filter_exp + f"to find the {child.get(tt.PROPERTY)} of" + \ f"{child.instantiation_value(tt.SUBJECT)}" elif counter > 0 and counter <= 5: filter_exp = filter_exp + \ f", {child.instantiation_value(tt.SUBJECT)}" elif counter == 6: filter_exp = filter_exp + ", etc" counter += 1 if filter_exp.strip(): summary = f"Evaluated the sub-query {filter_exp}." # * generate explanation for reduce operation. reduce_exp = "" # * explain any subqueries set comps in children nodes time = "" if alist.get( tt.TIME) == '' else "in " + alist.get(tt.TIME) opDesc = "" if alist.get(tt.OP).lower() not in [ "value", "values", "comp", "regress", "nnpredict", "gt", "gte", "lt", "lte", "eq" ]: inferred_value = alist.instantiation_value(alist.get(tt.OPVAR)) proj_var = alist.projection_variables() if len(proj_var) > 0 and alist.get(list(proj_var.keys())[0]): inferred_value = alist.instantiation_value( list(proj_var.keys())[0]) opDesc = f" Calculated the {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)} {time} " + \ f"for the entities. Inferred value is {inferred_value}." if opDesc not in reduce_exp: reduce_exp = reduce_exp.strip() + opDesc elif alist.get(tt.OP).lower() == "value": if alist.get(tt.PROPERTY) and alist.instantiation_value( tt.SUBJECT): opDesc = f" The {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)} of " + \ f"{alist.instantiation_value(tt.SUBJECT)} {time} is {alist.instantiation_value(tt.OBJECT)}." if opDesc not in reduce_exp: reduce_exp = reduce_exp.strip() + opDesc elif alist.get(tt.OP).lower() in ["gt", "gte", "lt", "lte", "eq"]: proj_var = f"?{alist.get(tt.OP).lower()}" cmp_vars = alist.get(tt.OPVAR).split(' ') if alist.is_instantiated(proj_var) and bool( alist.instantiation_value(proj_var)): opDesc = f"The comparison returned True since {alist.instantiation_value(cmp_vars[0])} is {self.ops_text[alist.get(tt.OP)]} {alist.instantiation_value(cmp_vars[1])}." if opDesc not in reduce_exp: reduce_exp = reduce_exp.strip() + opDesc summary = (reduce_exp + " " + summary).strip() # assign explanation to node (and to its parent). if in_place: alist.set(tt.EXPLAIN, summary) if len(alist.parent) > 0 and \ not len(summary.strip()) > 0 and \ reduce_exp not in alist.parent[0].get(tt.EXPLAIN): alist.parent[0].set(tt.EXPLAIN, summary) except Exception as ex: print("error generating explanation: " + str(ex)) return summary
def test_graph_add_nodes_and_edges(self): graph = InferenceGraph() alist1 = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': 'Ghana' }) alist2 = Alist( **{ tt.ID: '101', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) alist3 = Alist( **{ tt.ID: '102', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) graph.add_alists_from([alist1]) # plt.ion() # # plt.plot() # fig = plt.figure() # plt.show() # graph.display() graph.plot_plotly("Graph 1") # plt.pause(0.3) graph.link(alist1, alist2, edge_label='TP') graph.link(alist1, alist3, edge_label='GS') edges = graph.edges() # plt.clf() # graph.display() # plt.pause(2) graph.plot_plotly("Graph 2") self.assertTrue(len(edges) > 0)
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): if not children: return None nodes_enqueue = [] nodes_enqueue_process = [] # get intersection of child values common_items = set() head, *tail = children has_head_children = False has_tail_children = False for c in G.child_alists(head.id): has_head_children = True if c.get(tt.OP) != 'comp': if c.get(tt.OPVAR).startswith(vx.NESTING): common_items.add(str(c.instantiation_value(c.get(tt.OPVAR)))) else: projVars = c.projection_variables() if projVars != None: for pvkey, pvval in projVars.items(): common_items.add(c.instantiation_value(pvkey)) for t in tail: c_items = set() for tc in G.child_alists(t.id): has_tail_children = True if tc.get(tt.OPVAR).startswith(vx.NESTING): c_items.add(str(c.instantiation_value(tc.get(tt.OPVAR)))) projVars = tc.projection_variables() if projVars != None: for pvkey, pvval in projVars.items(): c_items.add(tc.instantiation_value(pvkey)) common_items = common_items.intersection(c_items) if not common_items and not has_head_children and not has_tail_children: for c in children: if c.get(tt.OP) != 'comp': if c.get(tt.OPVAR).startswith(vx.NESTING): common_items.add( str(c.instantiation_value(c.get(tt.OPVAR)))) else: projVars = c.projection_variables() if projVars != None: for pvkey, pvval in projVars.items(): common_items.add(c.instantiation_value(pvkey)) if not common_items: return None else: # if common items not empty, ignore existing siblings before creating new siblings sibs = G.child_alists(G.parent_alists(alist.id)[0].id) for x in sibs: if x.id != alist.id: # x.prune() G.prune(x.id) print( f'{pcol.RED}sibling pruned {x.id}{pcol.RESET} {x}{pcol.RESETALL}' ) # setup new sibling branch(s) parent = G.parent_alists(alist.id)[0] op_alist = parent.copy() op_alist.set(alist.get(tt.OPVAR), '') op_alist.set(tt.OP, parent.get(tt.OP)) op_alist.set(tt.OPVAR, parent.get(tt.OPVAR)) op_alist.set(op_alist.get(tt.OPVAR), '') op_alist.state = states.EXPLORED # set as an aggregation node to help with display rendering op_alist.node_type = nt.HNODE G.link(parent, op_alist, 'comp') G.link(alist, op_alist, 'set-comp', create_new_id=False) nodes_enqueue.append((op_alist, parent, False, 'comp')) print( f'{pcol.BLUE}set-comp >> {op_alist.id}{pcol.RESET} {op_alist}{pcol.RESETALL}' ) if alist.children: nodes_enqueue.append((op_alist, alist, False, 'setcomp')) # create children of the new branch # copy to avoid using different version from another thread in loop op_alist_copy = op_alist.copy() for ff in common_items: new_sibling: Alist = op_alist_copy.copy() new_sibling.set(tt.OP, 'value') new_sibling.set(tt.OPVAR, op_alist_copy.get(tt.OPVAR)) new_sibling.set(alist.get(tt.OPVAR), ff) new_sibling.instantiate_variable(alist.get(tt.OPVAR), ff) for ref in new_sibling.variable_references(alist.get(tt.OPVAR)): if ref not in [tt.OPVAR]: new_sibling.set(ref, ff) new_sibling.node_type = nt.ZNODE G.link(op_alist, new_sibling, 'comp_lookup') nodes_enqueue_process.append( (new_sibling, op_alist, True, 'comp_lookup')) print( f'{pcol.BLUE} set-comp-child >>> {new_sibling.id}{pcol.RESET} {new_sibling}{pcol.RESETALL}' ) alist.state = states.IGNORE alist.nodes_to_enqueue_only = nodes_enqueue alist.nodes_to_enqueue_and_process = nodes_enqueue_process return alist
def create_graph2(self): graph = InferenceGraph() parent = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) child = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 2 }) child2 = Alist( **{ tt.ID: '1', tt.SUBJECT: 'a_Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 5 }) grandchild = Alist( **{ tt.ID: '1', tt.SUBJECT: 'b_Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 3 }) ggrandchild = Alist( **{ tt.ID: '1', tt.SUBJECT: 'c_Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 4 }) # ggrandchild.state = states.EXPLORED graph.add_alists_from([parent]) graph.link(parent, child, edge_label='TP') graph.link(parent, child2, edge_label='GS') graph.link(child, grandchild, edge_label='GS') graph.link(grandchild, ggrandchild, edge_label='GS') return graph
def setUp(self): self.G = InferenceGraph() self.alist = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) self.c1 = Alist( **{ tt.ID: '2', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c1.instantiate_variable('?x', '120') self.c2 = Alist( **{ tt.ID: '3', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2011', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c2.instantiate_variable('?x', '122') self.c3 = Alist( **{ tt.ID: '4', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2012', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c3.instantiate_variable('?x', '126') self.c4 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2013', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c4.instantiate_variable('?x', '125') self.c5 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2014', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c5.instantiate_variable('?x', '126') self.c6 = Alist( **{ tt.ID: '6', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2015', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c6.instantiate_variable('?x', '128') self.c7 = Alist( **{ tt.ID: '7', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c7.instantiate_variable('?x', '129') self.G.add_alist(self.alist) self.G.link(self.alist, self.c1) self.G.link(self.alist, self.c2) self.G.link(self.alist, self.c3) self.G.link(self.alist, self.c4) self.G.link(self.alist, self.c5) self.G.link(self.alist, self.c6) self.G.link(self.alist, self.c7) self.G2 = InferenceGraph() self.alist2 = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) self.c21 = Alist( **{ tt.ID: '2', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c21.instantiate_variable('?x', 'a') self.c22 = Alist( **{ tt.ID: '3', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2011', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c22.instantiate_variable('?x', 'b') self.c23 = Alist( **{ tt.ID: '4', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2012', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c23.instantiate_variable('?x', 'c') self.c24 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2013', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c24.instantiate_variable('?x', 'd') self.c25 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2014', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c25.instantiate_variable('?x', 'a') self.G2.add_alist(self.alist2) self.G2.link(self.alist2, self.c21) self.G2.link(self.alist2, self.c22) self.G2.link(self.alist2, self.c23) self.G2.link(self.alist2, self.c24) self.G2.link(self.alist2, self.c25)
def what(self, G: InferenceGraph, alist: Alist, is_reduced: bool, in_place=True): ''' Explain a reduction of this alist. ''' what = '' how = '' time = "" if alist.get(tt.TIME): time = f" in {alist.get(tt.TIME)}" if not is_reduced: if alist.get(tt.OP) in ['eq', 'gt', 'gte', 'lt', 'lte']: what = f"Failed to compare the values since the values of all items being compare are not known. " elif alist.get(tt.OP) in ['comp']: what = f"Failed to solve the sub-problem. " elif alist.get(tt.OP) in ['value', 'values']: what = f"Failed to determine the {self.ops_text[alist.get(tt.OP)]} of {alist.get(tt.PROPERTY)}{time}." else: what = f"Failed to calculate the {self.ops_text[alist.get(tt.OP)]} of {alist.get(tt.PROPERTY)}{time}." else: if alist.get(tt.OP) in ['eq', 'gt', 'gte', 'lt', 'lte']: vars_compared = alist.get(tt.OPVAR).split(' ') if len(vars_compared) > 1: what = f"Inferred value is '{alist.instantiation_value('?'+ alist.get(tt.OP))}'." how = f"Did a comparison to determine if {alist.instantiation_value(vars_compared[0])} is " + \ f"{self.ops_text[alist.get(tt.OP)]} {alist.instantiation_value(vars_compared[1])}." elif alist.get(tt.OP) in ['comp']: listed_str = '' listed = alist.instantiation_value(alist.get(tt.OPVAR)) if listed: listed = listed.split(',') if len(listed) > 8: listed_str += f"{', '.join(listed[0:8])}, etc" else: listed_str += ', '.join(listed) if listed_str: what = f"Solved the sub-query and found the following values: {listed_str}." else: inferred_value = '' projected = alist.projection_variables() if projected: inferred_value = list(projected.values())[0] if not inferred_value: inferred_value = alist.instantiation_value( alist.get(tt.OPVAR)) if inferred_value: if ':' in alist.get(tt.PROPERTY): listed_str = '' listed = alist.instantiation_value(alist.get( tt.OPVAR)).split(',') if len(listed) > 8: listed_str += f"{', '.join(listed[0:8])}, etc" else: listed_str += ', '.join(listed) what = f"The {alist.get(tt.PROPERTY).split(':')[1]} values found for the sub-query include: {listed_str}." elif (projected or inferred_value) and not alist.get(tt.PROPERTY): # for alists with just a projected value but no property what = f"An input value for operation is {inferred_value}." elif projected and alist.get( tt.OPVAR) not in projected and alist.get( tt.OP) in ['max', 'min']: what = f"The entity whose {alist.get(tt.PROPERTY)}{time} has the {self.ops_text[alist.get(tt.OP)]} of {alist.instantiation_value(alist.get(tt.OPVAR))} is {inferred_value}." elif projected and alist.get( tt.OPVAR) not in projected and alist.get( tt.OP) not in ['max', 'min']: what = f"The {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)}{time} of {inferred_value} is {alist.instantiation_value(alist.get(tt.OPVAR))}." else: what = f"The {self.ops_text[alist.get(tt.OP)]} of the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)}{time} is {inferred_value}." if alist.get(tt.OP) in [ 'regress', 'nnpredict', 'linregress', 'gpregress', 'nnregress' ]: decomp_items = [] children = G.child_alists(alist.id) # for c in alist.children[0].children: for c in G.child_alists(children[0].id): decomp_items.append(c.get(tt.TIME)) if len(decomp_items) > 0: how = f"Generated a regression function from times between {min(decomp_items)} and {max(decomp_items)}." if in_place: alist.set("what", what) alist.set("how", how) G.add_alist(alist)
def decompose(self, alist: A, G: InferenceGraph): current_year = datetime.datetime.now().year branch_factor = config.config["temporal_branching_factor"] parent_year = None if alist.get(tt.TIME).startswith(vx.NESTING) or \ not alist.get(tt.TIME): return None else: parent_year = datetime.datetime.strptime(alist.get(tt.TIME), '%Y') count = 0 op_alist = alist.copy() op = "regress" context = op_alist.get(tt.CONTEXT) if context: if context[0]: if ctx.accuracy in context[0] and context[0][ctx.accuracy] == 'high': op = 'gpregress' if branch_factor <= 10: # increase number of data points for regression branch_factor = 20 # if context[1] and ctx.datetime in context[1]: # # use the ctx.datetime as current year if specified in context # current_year = datetime.datetime.strptime(context[1][ctx.datetime], '%Y-%m-%d %H:%M:%S').year # flush context: needed to clear any query time context value # whose corresponding alist attribute (t) has been modified frank.context.flush(op_alist, [tt.TIME]) op_alist.set(tt.OP, op) op_alist.cost = alist.cost + 2.0 op_alist.branch_type = br.AND op_alist.state = states.EXPLORED op_alist.parent_decomposition = 'temporal' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) if (current_year - parent_year.year) > branch_factor/2: for i in range(1, math.ceil(branch_factor/2)): child_a = alist.copy() child_a.set(tt.TIME, str(parent_year.year + i)) child_a.set(tt.OP, "value") child_a.cost = op_alist.cost + 1 child_a.node_type = nt.ZNODE child_a.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) frank.context.flush(child_a, [tt.TIME]) # op_alist.link_child(child_a) G.link(op_alist, child_a, 'value') child_b = alist.copy() child_b.set(tt.TIME, str(parent_year.year - i)) child_b.set(tt.OP, "value") child_b.cost = op_alist.cost + 1 child_b.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) frank.context.flush(child_b, [tt.TIME]) child_b.node_type = nt.ZNODE # op_alist.link_child(child_b) G.link(op_alist, child_b, 'value') count = count + 2 elif parent_year.year >= current_year: for i in range(1, math.ceil(branch_factor)): child_a = alist.copy() child_a.set(tt.TIME, str(current_year - i)) child_a.set(tt.OP, "value") child_a.cost = op_alist.cost + 1 child_a.node_type = nt.ZNODE child_a.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) frank.context.flush(child_a, [tt.TIME]) # op_alist.link_child(child_a) G.link(op_alist, child_a, 'value') count = count + 1 for i in range(1, (branch_factor - count)): child_a = alist.copy() child_a.set(tt.TIME, str(parent_year.year - (count + i))) child_a.set(tt.OP, "value") child_a.cost = op_alist.cost + 1 child_a.node_type = nt.ZNODE child_a.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) frank.context.flush(child_a, [tt.TIME]) # op_alist.link_child(child_a) G.link(op_alist, child_a, 'value') return op_alist
class TestReduce(unittest.TestCase): def setUp(self): self.G = InferenceGraph() self.alist = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) self.c1 = Alist( **{ tt.ID: '2', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c1.instantiate_variable('?x', '120') self.c2 = Alist( **{ tt.ID: '3', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2011', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c2.instantiate_variable('?x', '122') self.c3 = Alist( **{ tt.ID: '4', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2012', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c3.instantiate_variable('?x', '126') self.c4 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2013', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c4.instantiate_variable('?x', '125') self.c5 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2014', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c5.instantiate_variable('?x', '126') self.c6 = Alist( **{ tt.ID: '6', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2015', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c6.instantiate_variable('?x', '128') self.c7 = Alist( **{ tt.ID: '7', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c7.instantiate_variable('?x', '129') self.G.add_alist(self.alist) self.G.link(self.alist, self.c1) self.G.link(self.alist, self.c2) self.G.link(self.alist, self.c3) self.G.link(self.alist, self.c4) self.G.link(self.alist, self.c5) self.G.link(self.alist, self.c6) self.G.link(self.alist, self.c7) self.G2 = InferenceGraph() self.alist2 = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) self.c21 = Alist( **{ tt.ID: '2', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c21.instantiate_variable('?x', 'a') self.c22 = Alist( **{ tt.ID: '3', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2011', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c22.instantiate_variable('?x', 'b') self.c23 = Alist( **{ tt.ID: '4', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2012', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c23.instantiate_variable('?x', 'c') self.c24 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2013', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c24.instantiate_variable('?x', 'd') self.c25 = Alist( **{ tt.ID: '5', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2014', tt.OPVAR: '?x', tt.COST: 1, '?x': '' }) self.c25.instantiate_variable('?x', 'a') self.G2.add_alist(self.alist2) self.G2.link(self.alist2, self.c21) self.G2.link(self.alist2, self.c22) self.G2.link(self.alist2, self.c23) self.G2.link(self.alist2, self.c24) self.G2.link(self.alist2, self.c25) def test_value(self): a = frank.reduce.value.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertTrue(a.instantiation_value(tt.OBJECT), '124') def test_value2(self): a = frank.reduce.value.reduce(self.alist2, self.G2.child_alists(self.alist2.id), self.G2) self.assertTrue(a.instantiation_value(tt.OBJECT), 'a') def test_values(self): a = frank.reduce.values.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(a.instantiation_value(tt.OBJECT), '120,122,126,125,126,128,129') def test_sum(self): a = frank.reduce.sum.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(float(a.instantiation_value(tt.OPVAR)), 876.0) def test_max(self): a = frank.reduce.max.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(int(a.instantiation_value(tt.OPVAR)), 129) def test_min(self): a = frank.reduce.min.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(a.instantiation_value(tt.OPVAR), '120') def test_count(self): a = frank.reduce.count.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(a.instantiation_value(tt.OPVAR), 7) def test_product(self): a = frank.reduce.product.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertEqual(a.instantiation_value(tt.OPVAR), 479724456960000.0) def test_regress(self): a = frank.reduce.regress.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) print(a) self.assertAlmostEqual(a.instantiation_value(tt.OPVAR), 134.89, places=2) def test_gpregress(self): a = frank.reduce.gpregress.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) print(a) self.assertAlmostEqual(a.instantiation_value(tt.OPVAR), 134, places=0) def test_gpregress_2(self): alist = Alist( **{ tt.ID: '101', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2020', tt.OPVAR: '?x', tt.COST: 1 }) c1 = Alist( **{ tt.ID: '21011', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2019.0', tt.OPVAR: '?x', tt.COST: 1, '?x': 1839758040765.62 }) c2 = Alist( **{ tt.ID: '21012', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2018.0', tt.OPVAR: '?x', tt.COST: 1, '?x': 1885482534238.33 }) G = InferenceGraph() G.add_alist(alist) G.link(alist, c1) G.link(alist, c2) a = frank.reduce.gpregress.reduce(alist, G.child_alists(alist.id), G) print(a) self.assertAlmostEqual(a.instantiation_value(tt.OPVAR), 1792866444829.7, places=1) def test_do_gpregress(self): data = [[2019.0, 1839758040765.62], [2018.0, 1885482534238.33], [2017.0, 2055505502224.73], [2016.0, 1793989048409.29], [2015.0, 1802214373741.32], [2014.0, 2455993625159.37], [2013.0, 2472806919901.67], [2012.0, 2465188674415.03], [2011.0, 2616201578192.25], [2010.0, 2208871646202.82], [2009.0, 1667019780934.28], [2008.0, 1695824571927.15], [2007.0, 1397084345950.39], [2006.0, 1107640297889.95]] X, y = [], [] for d in data: X.append([d[0]]) y.append(d[1]) X = np.array(X) y = np.array(y) predict = frank.reduce.gpregress.do_gpregress(X, y, np.array( [2022.]), (np.max(y) - np.min(y))**2, 1) y_predict = predict[0]['y'] self.assertAlmostEqual(y_predict, 1324535292167, places=0) @unittest.skip def test_nnpredict(self): a = frank.reduce.nnpredict.reduce(self.alist, self.G.child_alists(self.alist.id), self.G) self.assertAlmostEqual(a.instantiation_value(tt.OPVAR), 158.97, places=2) def test_comp(self): # root = Alist(**{tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', # tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1}) # node101 = Alist(**{tt.OP:'comp', tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', # tt.OBJECT: '?x', tt.TIME: '2016', tt.OPVAR: '?x', tt.COST: 1}) a = Alist( **{ tt.ID: '1', tt.SUBJECT: '$y', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1, '$y': { "$is": "Ghana" } }) G = InferenceGraph() G.add_alist(a) normalize.Normalize().decompose(a, G) child1 = G.child_alists(a.id)[0] result = frank.reduce.comp.reduce(child1, G.child_alists(child1.id), G) self.assertTrue(result != None) def test_eq(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_eq_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 20}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 20}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.eq.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_eq_') == 'true' else False) def test_gt(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_gt_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 36}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 33}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.gt.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_gt_') == 'true' else False) def test_gte(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_gte_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 33}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 33}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.gte.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_gte_') == 'true' else False) def test_lt(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_lt_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 20}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 30}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.lt.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_lt_') == 'true' else False) def test_lte(self): a = Alist(**{ tt.ID: '1', tt.OPVAR: '$x $y', '$x': '?x1', '$y': '?y1', '?_lte_': '' }) b = Alist(**{tt.ID: '2', tt.OPVAR: '?x1', '?x1': 30}) c = Alist(**{tt.ID: '3', tt.OPVAR: '?y1', '?y1': 30}) G = InferenceGraph() G.add_alist(a) G.link(a, b) G.link(a, c) result = frank.reduce.lte.reduce(a, [b, c], G) self.assertTrue(True if result.instantiation_value('?_lte_') == 'true' else False)