Example #1
0
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph):
    # do comparisons
    vars_to_compare = alist.get(tt.OPVAR).split(' ')

    # propagate projection vars to parent5
    propagate.projections(alist, tuple(children))
    response_var = "?_lte_"
    if len(vars_to_compare) == 0:
        alist.set(response_var, "false")
        return alist

    result = True
    if len(vars_to_compare) > 1 and utils.is_numeric(
            alist.instantiation_value(vars_to_compare[0])):
        for x in vars_to_compare[1:]:
            if utils.is_numeric(
                    alist.instantiation_value(x)) and utils.is_numeric(
                        alist.instantiation_value(x)):
                result = (utils.get_number(
                    alist.instantiation_value(
                        vars_to_compare[0]), 0) <= utils.get_number(
                            alist.instantiation_value(x), 0)) and result
            else:
                result = False
                break
    else:
        result = False
    alist.set(response_var, str(result).lower())

    # alist.instantiate_variable(tt.COV, estimate_uncertainty(
    #   children, True, alist.get(tt.OP), len(children)
    # ))
    return alist
Example #2
0
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph):
    y_predict = None
    X = []
    y = []
    data_pts = []
    for c in children:
        opVarValue = c.instantiation_value(c.get(tt.OPVAR))
        if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)):
            x_val = utils.get_number(c.get(tt.TIME), None)
            y_val = utils.get_number(opVarValue, None)
            X.append([x_val])
            y.append(y_val)
            data_pts.append([x_val, y_val])
    X = np.array(X)
    y = np.array(y)
    reg = LinearRegression().fit(X, y)
    x_predict = utils.get_number(alist.get(tt.TIME), None)
    y_predict = reg.predict(np.array([[x_predict]]))[0]
    prediction = [x_predict, y_predict]
    coeffs = [v for v in reg.coef_]
    coeffs.insert(0, reg.intercept_)
    fnStr = 'LIN;' + ';'.join([str(v) for v in reg.coef_])
    fnAndData = \
        """{{"function":{coeffs}, "data":{data_pts}, "prediction":{prediction}}}""".format(
            coeffs=coeffs, data_pts=data_pts, prediction=prediction)

    alist.instantiate_variable(alist.get(tt.OPVAR), y_predict)
    alist.set(tt.FNPLOT, fnAndData)

    alist.instantiate_variable(
        tt.COV,
        estimate_uncertainty(children,
                             len(data_pts) == len(children), alist.get(tt.OP),
                             len(children)))
    return alist
Example #3
0
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph):
    delimiter = ';;'
    total = 0.0
    numList = []
    nonNumList = []
    inst_vars = alist.instantiated_attributes().keys()
    for c in children:
        for k, v in c.instantiated_attributes().items():
            if k not in inst_vars and k in alist.attributes and k != tt.OP:
                c.instantiate_variable(k, v)

        opVarValue = c.get(c.get(tt.OPVAR))
        if isinstance(opVarValue, str):
            opVarValue = list(map(str, opVarValue.split(delimiter)))
        else:
            opVarValue = [opVarValue]
        for opval in opVarValue:
            if utils.is_numeric(opval):
                total += float(opval)
                numList.append(float(opval))
                if not str(opval).startswith(vx.NESTING):
                    nonNumList.append(str(opval))

            else:
                # if not c.get(c.get(tt.OPVAR)).startswith(vx.NESTING):
                #     nonNumList.append(c.get(c.get(tt.OPVAR)))
                nonNumList.append(opval)

    if numList or nonNumList:
        if len(numList) >= len(nonNumList):
            opVar = alist.get(tt.OPVAR)
            valueToReturn = total / len(children)
            if opVar == alist.get(tt.TIME):
                valueToReturn = str(int(valueToReturn))
            alist.instantiate_variable(opVar, valueToReturn)
        else:
            # # get modal value
            # valueToReturn = max(nonNumList, key=nonNumList.count)
            counts = dict(Counter(nonNumList))
            counts_set = set(counts.values())
            max_val = max(counts_set)
            items = [x for x, y in counts.items() if y == max_val]
            valueToReturn = f'{delimiter} '.join(map(str, set(items)))

            # if len(nonNumList) == 1:
            #     valueToReturn = nonNumList[0]
            # else:
            #     # return list of different values
            #     valueToReturn = ', '.join(map(str,set(nonNumList)))

            alist.instantiate_variable(alist.get(tt.OPVAR), valueToReturn)
    else:
        return None

    alist.instantiate_variable(
        tt.COV,
        estimate_uncertainty(children,
                             len(numList) == len(children), alist.get(tt.OP),
                             len(children)))
    return alist
Example #4
0
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph):
    allNumeric = True
    y_predict = None
    X = []
    y = []
    data_pts = []
    for c in children:
        opVarValue = c.instantiation_value(c.get(tt.OPVAR))
        if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)):
            x_val = utils.get_number(c.get(tt.TIME), None)
            y_val = utils.get_number(opVarValue, None)
            X.append([x_val])
            y.append(y_val)
            data_pts.append([x_val, y_val])
        else:
            allNumeric = False
    X = np.array(X)
    y = np.array(y)

    x_to_predict = utils.get_number(alist.get(tt.TIME), None)
    if not x_to_predict:
        return None
    else:
        x_to_predict = np.array([x_to_predict])

    gp_prediction = do_gpregress(X, y, x_to_predict,
                                 (np.max(y) - np.min(y))**2, 1)

    if gp_prediction is None:
        return None

    y_predict = gp_prediction[0]['y']
    try:
        prediction = [x_to_predict, y_predict]
        alist.instantiate_variable(alist.get(tt.OPVAR), y_predict)
        alist.instantiate_variable(tt.COV,
                                   gp_prediction[0]['stdev'] / y_predict)

        alist.instantiate_variable(
            tt.COV,
            estimate_uncertainty(children, allNumeric, alist.get(tt.OP),
                                 len(children)))
    except Exception as ex:
        print(ex)
        return None

    return alist
Example #5
0
 def test_is_number(self):
     self.assertTrue(utils.is_numeric("12.34"))
     self.assertTrue(utils.is_numeric("1234"))
     self.assertTrue(utils.is_numeric("1234748248726347234"))
     self.assertTrue(utils.is_numeric("-1234748248726347234"))
     self.assertTrue(utils.is_numeric("-1234748248726347234e-7"))
     self.assertFalse(utils.is_numeric("some12.34"))
Example #6
0
    def cache_and_print_answer(self, isFinal=False):
        elapsed_time = time.time() - self.start_time
        answer = 'No answer found'

        if self.frank_infer.propagated_alists:
            latest_root = self.frank_infer.propagated_alists[-1]

            # get projection variables from the alist
            # only one projection variable can be used as an alist
            projVars = latest_root.projection_variables()
            if projVars:
                for pvkey, pv in projVars.items():
                    answer = latest_root.instantiation_value(pvkey)

            # if no projection variables exist, then use aggregation variable as answer
            else:
                answer = latest_root.instantiation_value(
                    latest_root.get(tt.OPVAR))

            try:
                if utils.is_numeric(answer):
                    answer = utils.to_precision(
                        answer,  int(config.config["answer_sigdig"]))
            except Exception:
                pass

            # format error bar
            errorbar = 0.0
            try:
                errorbar = utils.get_number(latest_root.get(
                    tt.COV), 0) * utils.get_number(answer, 0)
                errorbar_sigdig = utils.to_precision(
                    errorbar, int(config.config["errorbar_sigdig"]))
            except Exception:
                pass
            ans_obj = {"answer": f"{answer}",
                       "error_bar": f"{errorbar_sigdig}",
                       "sources": f"{','.join(list(latest_root.data_sources))}",
                       "elapsed_time": f"{round(elapsed_time)}s",
                       "alist": self.frank_infer.propagated_alists[-1].attributes
                       }

            self.inference_graphs[self.frank_infer.session_id] = {
                'graph': self.frank_infer.G,
                'intermediate_answer': ans_obj,
                'answer': ans_obj if isFinal else None,
            }

            if isFinal:
                print(f"\n{pcol.CYAN}Answer alist{pcol.RESETALL} \n" +
                      json.dumps(ans_obj, indent=2))
Example #7
0
def estimate_uncertainty(nodes: list, all_numeric: bool, operation: str,
                         child_count: float) -> float:
    combined_confidence = 0.0
    try:
        variance_values = []
        sum_variance = 0.0
        sum_mean = 0.0
        n = len(nodes)
        # todo: for now assume the real-valued objects are being estimated
        for r in nodes:
            node_variance = 0.0
            objValue = r.instantiation_value(tt.OBJECT)
            if utils.is_numeric(objValue):
                numeric_value = utils.get_number(objValue, 0)
                node_variance = math.pow(r.get(tt.COV) * numeric_value, 2)
                sum_mean += numeric_value
            else:
                # todo: work on this later; may not work as expected for non-real-valued objects
                node_variance = math.pow(r.get(tt.COV), 2)
                sum_mean += 1.0
            variance_values.append(node_variance)
            sum_variance += node_variance

        missRatio = 1 - (len(nodes) / child_count)
        if operation.lower() in ["value", "mean", "avg", "regress", "product"]:
            combined_confidence = math.sqrt(sum_variance / n) / (sum_mean / n)
        else:
            combined_confidence = math.sqrt(sum_variance) / (sum_mean / n)

        if not utils.is_numeric(combined_confidence):
            combined_confidence = 0.0
        combined_confidence = combined_confidence + \
            (combined_confidence * missRatio)

    except Exception as e:
        print("Uncertainty aggregate error: " + str(e))

    return combined_confidence
Example #8
0
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph):
    sum = 0.0
    allNumeric = True
    for c in children:
        for k, v in c.instantiated_attributes().items():
            if k in alist.attributes:
                alist.instantiate_variable(k, v)

        opVarValue = c.get(c.get(tt.OPVAR))
        if utils.is_numeric(opVarValue):
            sum += float(opVarValue)
        else:
            allNumeric = False

    alist.instantiate_variable(alist.get(tt.OPVAR), sum / len(children))

    alist.instantiate_variable(
        tt.COV,
        estimate_uncertainty(children, allNumeric, alist.get(tt.OP),
                             len(children)))
    return alist
Example #9
0
    def search_kb(self, alist: Alist):
        """ Search knowledge bases to instantiate variables in alist.

        Args
        ----
        alist: Alist

        Return
        ------
        Returns `True` if variable instantiation is successful from a KB search.

        """
        self.last_heartbeat = time.time()
        prop_refs = []
        found_facts = []
        # cannot search if alist has uninstantiated nested variables
        if alist.uninstantiated_nesting_variables():
            return found_facts

        self.write_trace(
            f"{pcol.MAGENTA}search {alist.id}{pcol.RESET} {alist}{pcol.RESETALL}"
        )
        if alist.state == states.EXPLORED:
            new_alist = alist.copy()
            new_alist.state = states.EXPLORED
            new_alist.set(tt.OPVAR, alist.get(tt.OPVAR))
            return True

        prop_string = alist.get(tt.PROPERTY)
        sources = {
            'wikidata': {
                'fn': wikidata,
                'trust': 'low'
            },
            'worldbank': {
                'fn': worldbank,
                'trust': 'high'
            },
            'musicbrainz': {
                'fn': musicbrainz,
                'trust': 'high'
            }
        }
        # ,
        #     'gregbrimblecom!': {'fn': jsonld.JSONLD.from_url('gregbrimblecom!', 'https://gregbrimble.com'), 'trust': 'high'},
        #     'mozilla': {'fn': jsonld.JSONLD.from_url('mozilla', 'https://www.mozilla.org/en-GB/'), 'trust': 'high'}
        # }
        context = alist.get(tt.CONTEXT)
        context_store = {}
        context_store = {
            **context[0],
            **context[1],
            **context[2]
        } if context else {}
        for source_name, source in sources.items():
            # check context for trust
            if ctx.trust in context_store:
                if context_store[
                        ctx.trust] == 'high' and source['trust'] != 'high':
                    continue
            # for source_name, source in {'worldbank':worldbank}.items():
            search_alist = alist.copy()
            # inject context into IR
            search_alist = frank.context.inject_retrieval_context(
                search_alist, source_name)

            # if the property_refs does not contain an entry for the property in this alist
            # search KB for a ref for the property
            prop_sources = []
            if prop_string in self.property_refs:
                prop_sources = [x[1] for x in self.property_refs[prop_string]]

            if (prop_string not in self.property_refs and not prop_string.startswith('__')) \
                    or (prop_string in self.property_refs and source_name not in prop_sources):

                props = source['fn'].search_properties(prop_string)

                if len(props) > 0:
                    maxScore = 0
                    for p in props:
                        if p[2] >= maxScore:
                            prop_refs.append((p, source_name))
                            self.reverse_property_refs[p[0]] = prop_string
                            maxScore = p[2]
                        else:
                            break
                self.property_refs[prop_string] = prop_refs

            search_attr = tt.SUBJECT
            uninstantiated_variables = search_alist.uninstantiated_attributes()
            if tt.SUBJECT in uninstantiated_variables:
                search_attr = tt.SUBJECT
            elif tt.OBJECT in uninstantiated_variables:
                search_attr = tt.OBJECT
            elif tt.TIME in uninstantiated_variables:
                search_attr = tt.TIME

            cache_found_flag = False
            if config.config['use_cache']:
                searchable_attr = list(
                    filter(lambda x: x != search_attr,
                           [tt.SUBJECT, tt.PROPERTY, tt.OBJECT, tt.TIME]))
                # search with original property name
                (cache_found_flag, results) = (False, [])
                # (cache_found_flag, results) = frank.cache.neo4j.search_cache(alist_to_instantiate=search_alist,
                #                                                         attribute_to_instantiate=search_attr,
                #                                                         search_attributes=searchable_attr)
                if cache_found_flag == True:
                    found_facts.append(results[0])
                # search with source-specific property IDs

                for (propid, _source_name) in self.property_refs[prop_string]:
                    self.last_heartbeat = time.time()
                    search_alist.set(tt.PROPERTY, propid[0])
                    (cache_found_flag, results) = (False, [])
                    #  = frank.cache.neo4j.search_cache(alist_to_instantiate=search_alist,
                    #                                                         attribute_to_instantiate=search_attr,
                    #                                                         search_attributes=searchable_attr)
                    if cache_found_flag == True:
                        found_facts.append(results[0])
                        self.write_trace(
                            f'{pcol.MAGENTA}found: cache{pcol.RESETALL}')
                # if not found_facts:
                #     self.write_trace('found:>>> cache')
            if not cache_found_flag and prop_string in self.property_refs:
                # search for data for each property reference source
                for propid_label, _source_name in self.property_refs[
                        prop_string]:
                    self.last_heartbeat = time.time()

                    try:
                        if _source_name == source_name:
                            search_alist.set(tt.PROPERTY, propid_label[0])
                            found_facts.extend(
                                source['fn'].find_property_values(
                                    search_alist, search_attr))
                            # TODO: handle location search in less adhoc manner
                            if alist.get(tt.PROPERTY).lower() == "location":
                                if search_attr == tt.SUBJECT:
                                    found_facts.extend(
                                        wikidata.part_of_relation_subject(
                                            search_alist))
                                elif search_attr == tt.OBJECT:
                                    found_facts.extend(
                                        wikidata.part_of_relation_object(
                                            search_alist))
                            break
                    except Exception as ex:
                        self.write_trace(
                            f"{pcol.RED}Search Error{pcol.RESETALL}",
                            processLog.LogLevel.ERROR)
                        print(str(ex))
            if not found_facts and alist.get(
                    tt.PROPERTY).startswith('__geopolitical:'):
                if search_attr == tt.SUBJECT:
                    found_facts.extend(
                        wikidata.part_of_geopolitical_subject(search_alist))
            # TODO: save facts found to cache if caching is enabled
            # if foundFacts and config.config['use_cache']:
            #     for ff in foundFacts:
            #         cache().save(ff, ff.dataSources[0])

        if found_facts:
            self.last_heartbeat = time.time()
            all_numeric = True
            non_numeric_data_items = []
            numeric_data_items = []

            for ff in found_facts:
                self.last_heartbeat = time.time()
                if utils.is_numeric(ff.get(search_attr)):
                    numeric_data_items.append(
                        utils.get_number(ff.get(search_attr), 0.0))
                else:
                    all_numeric = False
                    non_numeric_data_items.append(ff.get(search_attr))
                ff.set(tt.OPVAR, alist.get(tt.OPVAR))
                ff.set(ff.get(tt.OPVAR), ff.get(search_attr))
                sourceCov = sourcePrior().get_prior(
                    source=list(ff.data_sources)[0]).cov
                ff.set(tt.COV, sourceCov)
                ff.state = states.REDUCIBLE
                ff.set(tt.EXPLAIN, '')
                ff.node_type = nt.FACT
                if ff.get(tt.PROPERTY) in self.reverse_property_refs:
                    ff.set(tt.PROPERTY,
                           self.reverse_property_refs[ff.get(tt.PROPERTY)])

                alist.parent_decomposition = "Lookup"
                self.G.add_alist(alist)
                self.G.link(alist, ff, alist.parent_decomposition)

                # fact is considered reduced
                self.write_trace(
                    f'  {pcol.MAGENTA}found:{pcol.RESET} {str(ff)}{pcol.RESETALL}'
                )
        return len(found_facts) > 0