def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): # do comparisons vars_to_compare = alist.get(tt.OPVAR).split(' ') # propagate projection vars to parent5 propagate.projections(alist, tuple(children)) response_var = "?_lte_" if len(vars_to_compare) == 0: alist.set(response_var, "false") return alist result = True if len(vars_to_compare) > 1 and utils.is_numeric( alist.instantiation_value(vars_to_compare[0])): for x in vars_to_compare[1:]: if utils.is_numeric( alist.instantiation_value(x)) and utils.is_numeric( alist.instantiation_value(x)): result = (utils.get_number( alist.instantiation_value( vars_to_compare[0]), 0) <= utils.get_number( alist.instantiation_value(x), 0)) and result else: result = False break else: result = False alist.set(response_var, str(result).lower()) # alist.instantiate_variable(tt.COV, estimate_uncertainty( # children, True, alist.get(tt.OP), len(children) # )) return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): y_predict = None X = [] y = [] data_pts = [] for c in children: opVarValue = c.instantiation_value(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)): x_val = utils.get_number(c.get(tt.TIME), None) y_val = utils.get_number(opVarValue, None) X.append([x_val]) y.append(y_val) data_pts.append([x_val, y_val]) X = np.array(X) y = np.array(y) reg = LinearRegression().fit(X, y) x_predict = utils.get_number(alist.get(tt.TIME), None) y_predict = reg.predict(np.array([[x_predict]]))[0] prediction = [x_predict, y_predict] coeffs = [v for v in reg.coef_] coeffs.insert(0, reg.intercept_) fnStr = 'LIN;' + ';'.join([str(v) for v in reg.coef_]) fnAndData = \ """{{"function":{coeffs}, "data":{data_pts}, "prediction":{prediction}}}""".format( coeffs=coeffs, data_pts=data_pts, prediction=prediction) alist.instantiate_variable(alist.get(tt.OPVAR), y_predict) alist.set(tt.FNPLOT, fnAndData) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, len(data_pts) == len(children), alist.get(tt.OP), len(children))) return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): delimiter = ';;' total = 0.0 numList = [] nonNumList = [] inst_vars = alist.instantiated_attributes().keys() for c in children: for k, v in c.instantiated_attributes().items(): if k not in inst_vars and k in alist.attributes and k != tt.OP: c.instantiate_variable(k, v) opVarValue = c.get(c.get(tt.OPVAR)) if isinstance(opVarValue, str): opVarValue = list(map(str, opVarValue.split(delimiter))) else: opVarValue = [opVarValue] for opval in opVarValue: if utils.is_numeric(opval): total += float(opval) numList.append(float(opval)) if not str(opval).startswith(vx.NESTING): nonNumList.append(str(opval)) else: # if not c.get(c.get(tt.OPVAR)).startswith(vx.NESTING): # nonNumList.append(c.get(c.get(tt.OPVAR))) nonNumList.append(opval) if numList or nonNumList: if len(numList) >= len(nonNumList): opVar = alist.get(tt.OPVAR) valueToReturn = total / len(children) if opVar == alist.get(tt.TIME): valueToReturn = str(int(valueToReturn)) alist.instantiate_variable(opVar, valueToReturn) else: # # get modal value # valueToReturn = max(nonNumList, key=nonNumList.count) counts = dict(Counter(nonNumList)) counts_set = set(counts.values()) max_val = max(counts_set) items = [x for x, y in counts.items() if y == max_val] valueToReturn = f'{delimiter} '.join(map(str, set(items))) # if len(nonNumList) == 1: # valueToReturn = nonNumList[0] # else: # # return list of different values # valueToReturn = ', '.join(map(str,set(nonNumList))) alist.instantiate_variable(alist.get(tt.OPVAR), valueToReturn) else: return None alist.instantiate_variable( tt.COV, estimate_uncertainty(children, len(numList) == len(children), alist.get(tt.OP), len(children))) return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): allNumeric = True y_predict = None X = [] y = [] data_pts = [] for c in children: opVarValue = c.instantiation_value(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)): x_val = utils.get_number(c.get(tt.TIME), None) y_val = utils.get_number(opVarValue, None) X.append([x_val]) y.append(y_val) data_pts.append([x_val, y_val]) else: allNumeric = False X = np.array(X) y = np.array(y) x_to_predict = utils.get_number(alist.get(tt.TIME), None) if not x_to_predict: return None else: x_to_predict = np.array([x_to_predict]) gp_prediction = do_gpregress(X, y, x_to_predict, (np.max(y) - np.min(y))**2, 1) if gp_prediction is None: return None y_predict = gp_prediction[0]['y'] try: prediction = [x_to_predict, y_predict] alist.instantiate_variable(alist.get(tt.OPVAR), y_predict) alist.instantiate_variable(tt.COV, gp_prediction[0]['stdev'] / y_predict) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, allNumeric, alist.get(tt.OP), len(children))) except Exception as ex: print(ex) return None return alist
def test_is_number(self): self.assertTrue(utils.is_numeric("12.34")) self.assertTrue(utils.is_numeric("1234")) self.assertTrue(utils.is_numeric("1234748248726347234")) self.assertTrue(utils.is_numeric("-1234748248726347234")) self.assertTrue(utils.is_numeric("-1234748248726347234e-7")) self.assertFalse(utils.is_numeric("some12.34"))
def cache_and_print_answer(self, isFinal=False): elapsed_time = time.time() - self.start_time answer = 'No answer found' if self.frank_infer.propagated_alists: latest_root = self.frank_infer.propagated_alists[-1] # get projection variables from the alist # only one projection variable can be used as an alist projVars = latest_root.projection_variables() if projVars: for pvkey, pv in projVars.items(): answer = latest_root.instantiation_value(pvkey) # if no projection variables exist, then use aggregation variable as answer else: answer = latest_root.instantiation_value( latest_root.get(tt.OPVAR)) try: if utils.is_numeric(answer): answer = utils.to_precision( answer, int(config.config["answer_sigdig"])) except Exception: pass # format error bar errorbar = 0.0 try: errorbar = utils.get_number(latest_root.get( tt.COV), 0) * utils.get_number(answer, 0) errorbar_sigdig = utils.to_precision( errorbar, int(config.config["errorbar_sigdig"])) except Exception: pass ans_obj = {"answer": f"{answer}", "error_bar": f"{errorbar_sigdig}", "sources": f"{','.join(list(latest_root.data_sources))}", "elapsed_time": f"{round(elapsed_time)}s", "alist": self.frank_infer.propagated_alists[-1].attributes } self.inference_graphs[self.frank_infer.session_id] = { 'graph': self.frank_infer.G, 'intermediate_answer': ans_obj, 'answer': ans_obj if isFinal else None, } if isFinal: print(f"\n{pcol.CYAN}Answer alist{pcol.RESETALL} \n" + json.dumps(ans_obj, indent=2))
def estimate_uncertainty(nodes: list, all_numeric: bool, operation: str, child_count: float) -> float: combined_confidence = 0.0 try: variance_values = [] sum_variance = 0.0 sum_mean = 0.0 n = len(nodes) # todo: for now assume the real-valued objects are being estimated for r in nodes: node_variance = 0.0 objValue = r.instantiation_value(tt.OBJECT) if utils.is_numeric(objValue): numeric_value = utils.get_number(objValue, 0) node_variance = math.pow(r.get(tt.COV) * numeric_value, 2) sum_mean += numeric_value else: # todo: work on this later; may not work as expected for non-real-valued objects node_variance = math.pow(r.get(tt.COV), 2) sum_mean += 1.0 variance_values.append(node_variance) sum_variance += node_variance missRatio = 1 - (len(nodes) / child_count) if operation.lower() in ["value", "mean", "avg", "regress", "product"]: combined_confidence = math.sqrt(sum_variance / n) / (sum_mean / n) else: combined_confidence = math.sqrt(sum_variance) / (sum_mean / n) if not utils.is_numeric(combined_confidence): combined_confidence = 0.0 combined_confidence = combined_confidence + \ (combined_confidence * missRatio) except Exception as e: print("Uncertainty aggregate error: " + str(e)) return combined_confidence
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): sum = 0.0 allNumeric = True for c in children: for k, v in c.instantiated_attributes().items(): if k in alist.attributes: alist.instantiate_variable(k, v) opVarValue = c.get(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue): sum += float(opVarValue) else: allNumeric = False alist.instantiate_variable(alist.get(tt.OPVAR), sum / len(children)) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, allNumeric, alist.get(tt.OP), len(children))) return alist
def search_kb(self, alist: Alist): """ Search knowledge bases to instantiate variables in alist. Args ---- alist: Alist Return ------ Returns `True` if variable instantiation is successful from a KB search. """ self.last_heartbeat = time.time() prop_refs = [] found_facts = [] # cannot search if alist has uninstantiated nested variables if alist.uninstantiated_nesting_variables(): return found_facts self.write_trace( f"{pcol.MAGENTA}search {alist.id}{pcol.RESET} {alist}{pcol.RESETALL}" ) if alist.state == states.EXPLORED: new_alist = alist.copy() new_alist.state = states.EXPLORED new_alist.set(tt.OPVAR, alist.get(tt.OPVAR)) return True prop_string = alist.get(tt.PROPERTY) sources = { 'wikidata': { 'fn': wikidata, 'trust': 'low' }, 'worldbank': { 'fn': worldbank, 'trust': 'high' }, 'musicbrainz': { 'fn': musicbrainz, 'trust': 'high' } } # , # 'gregbrimblecom!': {'fn': jsonld.JSONLD.from_url('gregbrimblecom!', 'https://gregbrimble.com'), 'trust': 'high'}, # 'mozilla': {'fn': jsonld.JSONLD.from_url('mozilla', 'https://www.mozilla.org/en-GB/'), 'trust': 'high'} # } context = alist.get(tt.CONTEXT) context_store = {} context_store = { **context[0], **context[1], **context[2] } if context else {} for source_name, source in sources.items(): # check context for trust if ctx.trust in context_store: if context_store[ ctx.trust] == 'high' and source['trust'] != 'high': continue # for source_name, source in {'worldbank':worldbank}.items(): search_alist = alist.copy() # inject context into IR search_alist = frank.context.inject_retrieval_context( search_alist, source_name) # if the property_refs does not contain an entry for the property in this alist # search KB for a ref for the property prop_sources = [] if prop_string in self.property_refs: prop_sources = [x[1] for x in self.property_refs[prop_string]] if (prop_string not in self.property_refs and not prop_string.startswith('__')) \ or (prop_string in self.property_refs and source_name not in prop_sources): props = source['fn'].search_properties(prop_string) if len(props) > 0: maxScore = 0 for p in props: if p[2] >= maxScore: prop_refs.append((p, source_name)) self.reverse_property_refs[p[0]] = prop_string maxScore = p[2] else: break self.property_refs[prop_string] = prop_refs search_attr = tt.SUBJECT uninstantiated_variables = search_alist.uninstantiated_attributes() if tt.SUBJECT in uninstantiated_variables: search_attr = tt.SUBJECT elif tt.OBJECT in uninstantiated_variables: search_attr = tt.OBJECT elif tt.TIME in uninstantiated_variables: search_attr = tt.TIME cache_found_flag = False if config.config['use_cache']: searchable_attr = list( filter(lambda x: x != search_attr, [tt.SUBJECT, tt.PROPERTY, tt.OBJECT, tt.TIME])) # search with original property name (cache_found_flag, results) = (False, []) # (cache_found_flag, results) = frank.cache.neo4j.search_cache(alist_to_instantiate=search_alist, # attribute_to_instantiate=search_attr, # search_attributes=searchable_attr) if cache_found_flag == True: found_facts.append(results[0]) # search with source-specific property IDs for (propid, _source_name) in self.property_refs[prop_string]: self.last_heartbeat = time.time() search_alist.set(tt.PROPERTY, propid[0]) (cache_found_flag, results) = (False, []) # = frank.cache.neo4j.search_cache(alist_to_instantiate=search_alist, # attribute_to_instantiate=search_attr, # search_attributes=searchable_attr) if cache_found_flag == True: found_facts.append(results[0]) self.write_trace( f'{pcol.MAGENTA}found: cache{pcol.RESETALL}') # if not found_facts: # self.write_trace('found:>>> cache') if not cache_found_flag and prop_string in self.property_refs: # search for data for each property reference source for propid_label, _source_name in self.property_refs[ prop_string]: self.last_heartbeat = time.time() try: if _source_name == source_name: search_alist.set(tt.PROPERTY, propid_label[0]) found_facts.extend( source['fn'].find_property_values( search_alist, search_attr)) # TODO: handle location search in less adhoc manner if alist.get(tt.PROPERTY).lower() == "location": if search_attr == tt.SUBJECT: found_facts.extend( wikidata.part_of_relation_subject( search_alist)) elif search_attr == tt.OBJECT: found_facts.extend( wikidata.part_of_relation_object( search_alist)) break except Exception as ex: self.write_trace( f"{pcol.RED}Search Error{pcol.RESETALL}", processLog.LogLevel.ERROR) print(str(ex)) if not found_facts and alist.get( tt.PROPERTY).startswith('__geopolitical:'): if search_attr == tt.SUBJECT: found_facts.extend( wikidata.part_of_geopolitical_subject(search_alist)) # TODO: save facts found to cache if caching is enabled # if foundFacts and config.config['use_cache']: # for ff in foundFacts: # cache().save(ff, ff.dataSources[0]) if found_facts: self.last_heartbeat = time.time() all_numeric = True non_numeric_data_items = [] numeric_data_items = [] for ff in found_facts: self.last_heartbeat = time.time() if utils.is_numeric(ff.get(search_attr)): numeric_data_items.append( utils.get_number(ff.get(search_attr), 0.0)) else: all_numeric = False non_numeric_data_items.append(ff.get(search_attr)) ff.set(tt.OPVAR, alist.get(tt.OPVAR)) ff.set(ff.get(tt.OPVAR), ff.get(search_attr)) sourceCov = sourcePrior().get_prior( source=list(ff.data_sources)[0]).cov ff.set(tt.COV, sourceCov) ff.state = states.REDUCIBLE ff.set(tt.EXPLAIN, '') ff.node_type = nt.FACT if ff.get(tt.PROPERTY) in self.reverse_property_refs: ff.set(tt.PROPERTY, self.reverse_property_refs[ff.get(tt.PROPERTY)]) alist.parent_decomposition = "Lookup" self.G.add_alist(alist) self.G.link(alist, ff, alist.parent_decomposition) # fact is considered reduced self.write_trace( f' {pcol.MAGENTA}found:{pcol.RESET} {str(ff)}{pcol.RESETALL}' ) return len(found_facts) > 0