Example #1
0
    def enumerate_TTAs(self, query, evidence=None):
        """Enumerates the total truth assignments computed for the given query.
        
        Keyword arguments:
        query -- pysmt formula encoding the query
        evidence -- pysmt formula encoding the evidence (optional, default: None)

        """
        msg = "Enumerating TTAs for P(Q|E), Q: {},E: {}".format(
            serialize(query),
            serialize(evidence) if evidence != None else "None")
        self.logger.debug(msg)
        query_labels = set()

        if evidence:
            if contains_labels(evidence):
                msg = "The evidence contains variables with reserved names."
                self.logger.error(msg)
                raise WMIRuntimeException(msg)

            # label LRA-atoms in the evidence
            bool_evidence = WMIInference._query_labelling(
                evidence, query_labels)
            f_e = And(self.support, bool_evidence)
        else:
            f_e = self.support

        if contains_labels(query):
            msg = "The query contains variables with reserved names."
            self.logger.error(msg)
            raise WMIRuntimeException(msg)

        # label LRA-atoms in the query
        bool_query = WMIInference._query_labelling(query, query_labels)
        f_e_q = And(f_e, bool_query)

        # extract the domain of integration according to the model,
        # query and evidence
        domX = set(get_real_variables(f_e_q))
        domA = {x for x in get_boolean_variables(f_e_q) if not is_label(x)}

        n_ttas_e_q = self.wmi.enumerate_TTAs(f_e_q, self.weights, domA, domX)
        if n_ttas_e_q > 0:
            n_ttas_e = self.wmi.enumerate_TTAs(f_e, self.weights, domA, domX)
            if n_ttas_e == 0:
                msg = "(Knowledge base & Evidence) is inconsistent."
                self.logger.error(msg)
                raise WMIRuntimeException(msg)

            return n_ttas_e_q + n_ttas_e
        else:
            return 0
Example #2
0
def learn_supports_adaptive(dataset, seed, bg_knowledge=None, timeout=None, initial=None, mult=None,
                            hops=None, max_mult=None, negative_bootstrap=None):

    if timeout is None:
        timeout = DEF_TIMEOUT

    if initial is  None:
        initial = DEF_INITIAL

    if mult is None:
        mult = DEF_MULT

    if hops is None:
        hops = DEF_HOPS

    if max_mult is None:
        max_mult = DEF_MAX_MULT

    results = []
    discovered = set()
    t_mults = set()
    
    last = initial
    i = 0

    msg = "Adaptive support learning. timeout = {}, init = {}, mult = {}, hops = {}"
    logger.info(msg.format(timeout, initial, mult, hops))
    while i < hops and last < max_mult:
        logger.debug("i: {} last: {}".format(i, last))
        t_mults.add(last)
        res = learn_support(dataset, seed, last, timeout=timeout, bg_knowledge=bg_knowledge,
                            symmetry_breaking="mvn",
                            negative_bootstrap=negative_bootstrap)
        
        if res is not None:
            chi, k, h, thresholds = res
            chistr = serialize(chi)            
            smaller = {t for t in t_mults if t < last}
            
            if chistr not in discovered:
                discovered.add(chistr)
                results.append(res + (last,))

            if len(smaller) > 0:
                last = (last + max(smaller)) / 2
                i += 1
            else:
                last = last / mult

        else: # last t_mult timed out
            larger = {t for t in t_mults if t > last}
            if len(larger) > 0:
                last = (last + min(larger)) / 2
                i += 1
            else:
                last = last * mult

    return results
Example #3
0
def get_canonical_form(expression):
    """Given a pysmt formula representing a polynomial, rewrites it in canonical
    form.

    Keyword arguments:
    expression - pysmt formula

    Raises:
    WMIParsingError -- If it fails to parse back the formula after converting it

    """
    canonical = sympy2pysmt(expand(serialize(expression)))
    return canonical
Example #4
0
    def __init__(self, support, weights, check_consistency=False):
        """Default constructor.

        Keyword arguments: 
        support -- pysmt formula encoding the
        support weights -- pysmt formula encoding the FIUC weight function
        check_consistency -- if True, raises a WMIRuntimeException if
            the model is inconsistent (default: False)

        """
        self.init_sublogger(__name__)

        # check if the support and weight function contain reserved names
        if contains_labels(support):
            msg = "The support contains variables with reserved names."
            self.logger.error(msg)
            raise WMIRuntimeException(msg)

        if contains_labels(weights):
            msg = "The weight function contains variables with reserved names."
            self.logger.error(msg)
            raise WMIRuntimeException(msg)

        # labelling the weight function conditions
        self.weights = Weights(weights)
        self.support = And(support, self.weights.labelling)

        self.logger.debug("Support: {}".format(serialize(support)))
        self.logger.debug("Weights: {}".format(serialize(weights)))

        # initialize the WMI engine
        self.wmi = WMI()

        # check support consistency if requested
        if check_consistency and not WMI.check_consistency(support):
            raise WMIRuntimeException(WMIInference.MSG_INCONSISTENT_SUPPORT)
Example #5
0
def pysmt2sympy(expression):
    """Converts a pysmt formula representing a polynomial into a string.
        The string can then be read and modified by sympy.
    
    Args:
        formula (FNode): The pysmt formula to convert.
    
    Returns:
        str: The string representing the formula.
        
    Raises:
        WMIParsingException: If the method fails to parse the formula.
        
    """
    serialize_formula = serialize(expression)
    try:
        sympy_formula = sympify(serialize_formula)
    except SympifyError:
        raise WMIParsingException(WMIParsingException.CANNOT_CONVERT_PYSMT_FORMULA_TO_SYMPY, expression)
    return sympy_formula
Example #6
0
 def compute_print(method, query, evidence):
     print("query: ", serialize(query))
     print("evidence: ", serialize(evidence) if evidence else "-")
     prob = method.compute_normalized_probability(query, evidence)
     print("normalized: ", prob)
     print("--------------------------------------------------")
Example #7
0
            else:
                raise WMIParsingError("Unhandled formula format", formula)


if __name__ == "__main__":
    from pysmt.shortcuts import Symbol, Ite, And, LE, LT, Real, Times, serialize
    from pysmt.typing import REAL

    def compute_print(method, query, evidence):
        print("query: ", serialize(query))
        print("evidence: ", serialize(evidence) if evidence else "-")
        prob = method.compute_normalized_probability(query, evidence)
        print("normalized: ", prob)
        print("--------------------------------------------------")

    x = Symbol("x", REAL)
    A = Symbol("A")
    support = And(LE(Real(-1), x), LE(x, Real(1)))
    weights = Ite(LT(Real(0), x), Ite(A, Times(Real(2), x), x),
                  Ite(A, Times(Real(-2), x), Times(Real(-1), x)))

    praise = PRAiSEInference(support, weights)
    print("support: ", serialize(support))
    print("weights: ", serialize(weights))
    print("==================================================")

    suite = [(A, None), (And(A, LE(Real(0), x)), None), (LE(Real(0), x), A)]

    for query, evidence in suite:
        compute_print(praise, query, evidence)
Example #8
0
def run_problem(problem,
                learner,
                seed,
                n_samples,
                timeout,
                global_norm,
                use_lariat=True):

    ground_truth = problem.model
    evaluation = dict()

    train = problem.datasets['train']
    valid = problem.datasets['valid']

    train_valid = Dataset(train.features, train.data + valid.data,
                          train.constraints)

    if problem.learned_supports is not None:
        prior_supports = {
            problem.metadata['supports_metadata'][i]['support_threshold_mult']:
            chi
            for i, chi in enumerate(problem.learned_supports)
        }
    else:
        logger.warning("Couldn't find any learned support.")
        prior_supports = dict()

    prior_supports['None'] = None
    prior_supports['gt-renorm'] = ground_truth.support

    t_0 = time()
    learner.estimate_density(train, validation_data=valid)
    t_f = time() - t_0
    logger.info("training time: {}".format(t_f))
    evaluation['training_time'] = t_f

    learned_models = []
    cached_models = dict()
    max_ll = None
    best = None

    logger.info("Evaluating:\n {}".format("\n".join(
        map(str, prior_supports.keys()))))

    for t_mult, prior_support in prior_supports.items():

        if t_mult != 'None' and not use_lariat:
            continue

        evaluation[t_mult] = dict()
        ps_str = serialize(prior_support) if not isinstance(t_mult,
                                                            str) else t_mult

        if ps_str in cached_models:
            learned_model, evaluation[t_mult] = cached_models[ps_str]
        else:
            try:
                logger.info(
                    "--------------------------------------------------")
                logger.info("Support: {}".format(t_mult))

                mode = RENORM_FULL if prior_support is not None else RENORM_OFF
                t_0 = time()
                learned_model, renormd = learner.renormalize(
                    train,
                    seed,
                    mode=mode,
                    support=prior_support,
                    timeout=timeout,
                    global_norm=global_norm)
                t_f = time() - t_0
                if not renormd and prior_support is not None:
                    continue

                evaluation[t_mult]['renorm_time'] = t_f

            except CalledProcessError as e:
                logger.warning("XADD error: {}".format(e))
                continue

            except ModelException as e:
                logger.warning("Model error: {}".format(e))
                continue

            logger.debug("Computing approx-IAE")
            iae = approx_IAE(learned_model, ground_truth, seed, n_samples)
            evaluation[t_mult]['approx-iae'] = iae

            logger.debug("Computing train-LL")
            train_ll, train_out = learned_model.log_likelihood(train)
            evaluation[t_mult]['train-ll'] = train_ll
            evaluation[t_mult]['train-out'] = train_out
            logger.debug("Computing valid-LL")
            valid_ll, valid_out = learned_model.log_likelihood(valid)
            evaluation[t_mult]['valid-ll'] = valid_ll
            evaluation[t_mult]['valid-out'] = valid_out
            train_valid_ll, train_valid_out = learned_model.log_likelihood(
                train_valid)
            evaluation[t_mult]['train-valid-ll'] = train_valid_ll
            evaluation[t_mult]['train-valid-out'] = train_valid_out

            if t_mult not in ['None','gt-renorm'] \
               and (max_ll is None or valid_ll > max_ll):
                max_ll = valid_ll
                best = t_mult

            logger.debug("Computing volume difference")
            poly1 = Model(learned_model.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            poly2 = Model(ground_truth.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            vol_diff = ISE(poly1, poly2, seed, n_samples, engine='rej')

            evaluation[t_mult]['vol-diff'] = vol_diff

            cached_models[ps_str] = (learned_model, evaluation[t_mult])

            domain = Domain.make(
                map(lambda v: v.symbol_name(), ground_truth.boolean_vars),
                learned_model.bounds)
            eval_falses = evaluate(domain, learned_model.support,
                                   np.asarray(train.data))

        learned_models.append((t_mult, learned_model))

    evaluation['best'] = best

    tmuls = sorted([
        key for key in evaluation
        if key not in ['None', 'gt-renorm', 'training_time', 'best']
    ])

    eval_msg = """RESULTS:
Training time: {}
No renorm: {}
GT renorm: {}
Best chi : {}

All chis:
{}
""".format(evaluation['training_time'], evaluation['None'],
           evaluation['gt-renorm'], (best, evaluation.get(best)),
           "\n".join([str((tmul, evaluation[tmul])) for tmul in tmuls]))

    logger.info(eval_msg)

    return learned_models, evaluation
def generate_experiment(seed, n_problems, n_train, n_valid, n_reals, n_bools,
                        depth, bias, k, literals, h, ratio, errors):

    logger.info("Generating experiment:\n" +
                "seed: {}\n".format(seed) +
                "n_problems: {}\n".format(n_problems) +
                "n_train: {}\n".format(n_train) +
                "n_valid: {}\n".format(n_valid) +
                "n_reals: {}\n".format(n_reals) +
                "n_bools: {}\n".format(n_bools) +
                "bias: {}\n".format(bias) +
                "k: {}\n".format(k) +
                "literals: {}\n".format(literals) +
                "h: {}\n".format(h) +
                "ratio: {}\n".format(ratio) +
                "errors: {}\n".format(errors))
                
    model_generator = ModelGenerator(n_reals, n_bools, seed,
                                     templ_bools="b{}",
                                     templ_reals="r{}",
                                     initial_bounds=[0, 1])

    problems = []
    while len(problems) < n_problems:
        try:
            # generating the ground truth model
            # not complex enough
            #chi = model_generator.generate_support_tree(depth)
            sample_count = 1000
            chi = support_generator(1, n_bools, n_reals, bias, k, literals, h,
                                    sample_count, ratio, errors, seed)[0]

            w = model_generator.generate_weights_tree(depth, nonnegative=True,
                                                      splits_only=True)

            boolean_vars = list(set(v for v in chi.get_free_variables()
                                    if v.symbol_type() == BOOL).union(
                                            set(model_generator.bools)))
            
            real_vars = list(set(v for v in chi.get_free_variables()
                                    if v.symbol_type() == REAL).union(
                                            set(model_generator.reals)))
            
            bounds = {v.symbol_name() : model_generator.initial_bounds
                      for v in real_vars}

            fbounds = And([And(LE(Real(bounds[var.symbol_name()][0]), var),
                               LE(var, Real(bounds[var.symbol_name()][1])))
                           for var in real_vars])
            model = Model(And(fbounds, chi), w, boolean_vars + real_vars, bounds)

            # use exact inference to normalize the ground truth
            sample_count = None
            normalize(model, seed, sample_count, engine='pa')

            logger.debug("model generator reals: {}".format(model_generator.reals))
            logger.debug("model generator IDs: {}".format(list(map(id, model_generator.reals))))

            logger.debug("model reals: {}".format(model.continuous_vars))
            logger.debug("model IDs: {}".format(list(map(id, model.continuous_vars))))

            # sampling the dataset from the ground truth model
            datasets = {}
            datasets['train'] = sample_dataset(model, n_train)
            datasets['valid'] = sample_dataset(model, n_valid)

        except ModelException as e:
            logger.debug(e.msg)
            continue
        
        logger.debug("Model {}\n".format(len(problems)+1) +
                     "chi: {}\n".format(serialize(model.support)) +
                     "w: {}\n".format(serialize(model.weightfun)))

        problem = Problem(model,
                          datasets,
                          bounds=bounds)

        problems.append(problem)

    # better safe than sorry?
    metadata = {'n_reals' : n_reals, 'n_bools' : n_bools, 'depth' : depth,
                'n_train' : n_train, 'n_valid' : n_valid, 'seed' : seed}
        

    return Experiment(problems, metadata=metadata)
Example #10
0
            else:
                raise WMIParsingError("Unhandled formula format", formula)


if __name__ == "__main__":
    from pysmt.shortcuts import Symbol, Ite, And, LE, LT, Real, Times, serialize
    from pysmt.typing import REAL

    def compute_print(method, query, evidence):
        print "query: ", serialize(query)
        print "evidence: ", serialize(evidence) if evidence else "-"
        prob = method.compute_normalized_probability(query, evidence)
        print "normalized: ", prob
        print "--------------------------------------------------"

    x = Symbol("x", REAL)
    A = Symbol("A")
    support = And(LE(Real(-1), x), LE(x, Real(1)))
    weights = Ite(LT(Real(0), x), Ite(A, Times(Real(2), x), x),
                  Ite(A, Times(Real(-2), x), Times(Real(-1), x)))

    praise = PRAiSEInference(support, weights)
    print "support: ", serialize(support)
    print "weights: ", serialize(weights)
    print "=================================================="

    suite = [(A, None), (And(A, LE(Real(0), x)), None), (LE(Real(0), x), A)]

    for query, evidence in suite:
        compute_print(praise, query, evidence)
Example #11
0
    def perform_query(self,
                      query,
                      evidence=None,
                      mode=None,
                      non_negative=True):
        """Performs a query P(Q). Optional evidence can be specified, performing
        P(Q|E). Returns the probability of the query, calculated as:

            P(Q|E) = WMI(Q & E & kb) / WMI(E & kb)

        as well as the number of integrations performed.
        
        Keyword arguments:
        query -- pysmt formula encoding the query
        evidence -- pysmt formula encoding the evidence (default: None)
        mode -- string in WMI.MODES to select the method (optional)
        non_negative -- if True, negative WMI results raise an exception (default: True)
        """
        mode = mode or WMIInference.DEF_MODE
        evstr = (serialize(evidence) if evidence != None else "None")
        msg = "Computing P(Q|E), Q: {}, E: {}".format(serialize(query), evstr)
        self.logger.debug(msg)
        query_labels = set()

        if evidence:
            # check if evidence contains reserved variable names
            if contains_labels(evidence):
                msg = "The evidence contains variables with reserved names."
                self.logger.error(msg)
                raise WMIRuntimeException(msg)

            # label LRA-atoms in the evidence
            bool_evidence = WMIInference._query_labelling(
                evidence, query_labels)
            f_e = And(self.support, bool_evidence)
        else:
            f_e = self.support

        if contains_labels(query):
            msg = "The query contains variables with reserved names."
            self.logger.error(msg)
            raise WMIRuntimeException(msg)

        # label LRA-atoms in the query
        bool_query = WMIInference._query_labelling(query, query_labels)
        f_e_q = And(f_e, bool_query)

        # extract the domain of integration according to the model,
        # query and evidence
        domX = set(get_real_variables(f_e_q))
        domA = {x for x in get_boolean_variables(f_e_q) if not is_label(x)}
        self.logger.debug("domX: {}, domA: {}".format(domX, domA))

        # compute WMI(Q & E & kb)
        wmi_e_q, n_e_q = self.wmi.compute(f_e_q, self.weights, mode, domA,
                                          domX)
        if wmi_e_q > 0 or (wmi_e_q < 0 and not non_negative):
            # compute WMI(E & kb)
            wmi_e, n_e = self.wmi.compute(f_e, self.weights, mode, domA, domX)
            if wmi_e == 0:
                msg = "(Knowledge base & Evidence) is inconsistent."
                self.logger.error(msg)
                raise WMIRuntimeException(msg)
            elif wmi_e < 0 and non_negative:
                msg = self.MSG_NEGATIVE_RES.format(wmi_e_q)
                self.logger.error(msg)
                raise WMIRuntimeException(msg)

            normalized_p = wmi_e_q / wmi_e
            n_integrations = n_e_q + n_e

        elif wmi_e_q == 0:
            normalized_p = 0.0
            n_integrations = n_e_q

        else:
            msg = self.MSG_NEGATIVE_RES.format(wmi_e_q)
            self.logger.error(msg)
            raise WMIRuntimeException(msg)

        msg = "Norm. P(Q|E): {}, n_integrations: {}"
        self.logger.debug(msg.format(normalized_p, n_integrations))
        return normalized_p, n_integrations