def enumerate_TTAs(self, query, evidence=None): """Enumerates the total truth assignments computed for the given query. Keyword arguments: query -- pysmt formula encoding the query evidence -- pysmt formula encoding the evidence (optional, default: None) """ msg = "Enumerating TTAs for P(Q|E), Q: {},E: {}".format( serialize(query), serialize(evidence) if evidence != None else "None") self.logger.debug(msg) query_labels = set() if evidence: if contains_labels(evidence): msg = "The evidence contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) # label LRA-atoms in the evidence bool_evidence = WMIInference._query_labelling( evidence, query_labels) f_e = And(self.support, bool_evidence) else: f_e = self.support if contains_labels(query): msg = "The query contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) # label LRA-atoms in the query bool_query = WMIInference._query_labelling(query, query_labels) f_e_q = And(f_e, bool_query) # extract the domain of integration according to the model, # query and evidence domX = set(get_real_variables(f_e_q)) domA = {x for x in get_boolean_variables(f_e_q) if not is_label(x)} n_ttas_e_q = self.wmi.enumerate_TTAs(f_e_q, self.weights, domA, domX) if n_ttas_e_q > 0: n_ttas_e = self.wmi.enumerate_TTAs(f_e, self.weights, domA, domX) if n_ttas_e == 0: msg = "(Knowledge base & Evidence) is inconsistent." self.logger.error(msg) raise WMIRuntimeException(msg) return n_ttas_e_q + n_ttas_e else: return 0
def learn_supports_adaptive(dataset, seed, bg_knowledge=None, timeout=None, initial=None, mult=None, hops=None, max_mult=None, negative_bootstrap=None): if timeout is None: timeout = DEF_TIMEOUT if initial is None: initial = DEF_INITIAL if mult is None: mult = DEF_MULT if hops is None: hops = DEF_HOPS if max_mult is None: max_mult = DEF_MAX_MULT results = [] discovered = set() t_mults = set() last = initial i = 0 msg = "Adaptive support learning. timeout = {}, init = {}, mult = {}, hops = {}" logger.info(msg.format(timeout, initial, mult, hops)) while i < hops and last < max_mult: logger.debug("i: {} last: {}".format(i, last)) t_mults.add(last) res = learn_support(dataset, seed, last, timeout=timeout, bg_knowledge=bg_knowledge, symmetry_breaking="mvn", negative_bootstrap=negative_bootstrap) if res is not None: chi, k, h, thresholds = res chistr = serialize(chi) smaller = {t for t in t_mults if t < last} if chistr not in discovered: discovered.add(chistr) results.append(res + (last,)) if len(smaller) > 0: last = (last + max(smaller)) / 2 i += 1 else: last = last / mult else: # last t_mult timed out larger = {t for t in t_mults if t > last} if len(larger) > 0: last = (last + min(larger)) / 2 i += 1 else: last = last * mult return results
def get_canonical_form(expression): """Given a pysmt formula representing a polynomial, rewrites it in canonical form. Keyword arguments: expression - pysmt formula Raises: WMIParsingError -- If it fails to parse back the formula after converting it """ canonical = sympy2pysmt(expand(serialize(expression))) return canonical
def __init__(self, support, weights, check_consistency=False): """Default constructor. Keyword arguments: support -- pysmt formula encoding the support weights -- pysmt formula encoding the FIUC weight function check_consistency -- if True, raises a WMIRuntimeException if the model is inconsistent (default: False) """ self.init_sublogger(__name__) # check if the support and weight function contain reserved names if contains_labels(support): msg = "The support contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) if contains_labels(weights): msg = "The weight function contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) # labelling the weight function conditions self.weights = Weights(weights) self.support = And(support, self.weights.labelling) self.logger.debug("Support: {}".format(serialize(support))) self.logger.debug("Weights: {}".format(serialize(weights))) # initialize the WMI engine self.wmi = WMI() # check support consistency if requested if check_consistency and not WMI.check_consistency(support): raise WMIRuntimeException(WMIInference.MSG_INCONSISTENT_SUPPORT)
def pysmt2sympy(expression): """Converts a pysmt formula representing a polynomial into a string. The string can then be read and modified by sympy. Args: formula (FNode): The pysmt formula to convert. Returns: str: The string representing the formula. Raises: WMIParsingException: If the method fails to parse the formula. """ serialize_formula = serialize(expression) try: sympy_formula = sympify(serialize_formula) except SympifyError: raise WMIParsingException(WMIParsingException.CANNOT_CONVERT_PYSMT_FORMULA_TO_SYMPY, expression) return sympy_formula
def compute_print(method, query, evidence): print("query: ", serialize(query)) print("evidence: ", serialize(evidence) if evidence else "-") prob = method.compute_normalized_probability(query, evidence) print("normalized: ", prob) print("--------------------------------------------------")
else: raise WMIParsingError("Unhandled formula format", formula) if __name__ == "__main__": from pysmt.shortcuts import Symbol, Ite, And, LE, LT, Real, Times, serialize from pysmt.typing import REAL def compute_print(method, query, evidence): print("query: ", serialize(query)) print("evidence: ", serialize(evidence) if evidence else "-") prob = method.compute_normalized_probability(query, evidence) print("normalized: ", prob) print("--------------------------------------------------") x = Symbol("x", REAL) A = Symbol("A") support = And(LE(Real(-1), x), LE(x, Real(1))) weights = Ite(LT(Real(0), x), Ite(A, Times(Real(2), x), x), Ite(A, Times(Real(-2), x), Times(Real(-1), x))) praise = PRAiSEInference(support, weights) print("support: ", serialize(support)) print("weights: ", serialize(weights)) print("==================================================") suite = [(A, None), (And(A, LE(Real(0), x)), None), (LE(Real(0), x), A)] for query, evidence in suite: compute_print(praise, query, evidence)
def run_problem(problem, learner, seed, n_samples, timeout, global_norm, use_lariat=True): ground_truth = problem.model evaluation = dict() train = problem.datasets['train'] valid = problem.datasets['valid'] train_valid = Dataset(train.features, train.data + valid.data, train.constraints) if problem.learned_supports is not None: prior_supports = { problem.metadata['supports_metadata'][i]['support_threshold_mult']: chi for i, chi in enumerate(problem.learned_supports) } else: logger.warning("Couldn't find any learned support.") prior_supports = dict() prior_supports['None'] = None prior_supports['gt-renorm'] = ground_truth.support t_0 = time() learner.estimate_density(train, validation_data=valid) t_f = time() - t_0 logger.info("training time: {}".format(t_f)) evaluation['training_time'] = t_f learned_models = [] cached_models = dict() max_ll = None best = None logger.info("Evaluating:\n {}".format("\n".join( map(str, prior_supports.keys())))) for t_mult, prior_support in prior_supports.items(): if t_mult != 'None' and not use_lariat: continue evaluation[t_mult] = dict() ps_str = serialize(prior_support) if not isinstance(t_mult, str) else t_mult if ps_str in cached_models: learned_model, evaluation[t_mult] = cached_models[ps_str] else: try: logger.info( "--------------------------------------------------") logger.info("Support: {}".format(t_mult)) mode = RENORM_FULL if prior_support is not None else RENORM_OFF t_0 = time() learned_model, renormd = learner.renormalize( train, seed, mode=mode, support=prior_support, timeout=timeout, global_norm=global_norm) t_f = time() - t_0 if not renormd and prior_support is not None: continue evaluation[t_mult]['renorm_time'] = t_f except CalledProcessError as e: logger.warning("XADD error: {}".format(e)) continue except ModelException as e: logger.warning("Model error: {}".format(e)) continue logger.debug("Computing approx-IAE") iae = approx_IAE(learned_model, ground_truth, seed, n_samples) evaluation[t_mult]['approx-iae'] = iae logger.debug("Computing train-LL") train_ll, train_out = learned_model.log_likelihood(train) evaluation[t_mult]['train-ll'] = train_ll evaluation[t_mult]['train-out'] = train_out logger.debug("Computing valid-LL") valid_ll, valid_out = learned_model.log_likelihood(valid) evaluation[t_mult]['valid-ll'] = valid_ll evaluation[t_mult]['valid-out'] = valid_out train_valid_ll, train_valid_out = learned_model.log_likelihood( train_valid) evaluation[t_mult]['train-valid-ll'] = train_valid_ll evaluation[t_mult]['train-valid-out'] = train_valid_out if t_mult not in ['None','gt-renorm'] \ and (max_ll is None or valid_ll > max_ll): max_ll = valid_ll best = t_mult logger.debug("Computing volume difference") poly1 = Model(learned_model.support, None, ground_truth.get_vars(), ground_truth.bounds) poly2 = Model(ground_truth.support, None, ground_truth.get_vars(), ground_truth.bounds) vol_diff = ISE(poly1, poly2, seed, n_samples, engine='rej') evaluation[t_mult]['vol-diff'] = vol_diff cached_models[ps_str] = (learned_model, evaluation[t_mult]) domain = Domain.make( map(lambda v: v.symbol_name(), ground_truth.boolean_vars), learned_model.bounds) eval_falses = evaluate(domain, learned_model.support, np.asarray(train.data)) learned_models.append((t_mult, learned_model)) evaluation['best'] = best tmuls = sorted([ key for key in evaluation if key not in ['None', 'gt-renorm', 'training_time', 'best'] ]) eval_msg = """RESULTS: Training time: {} No renorm: {} GT renorm: {} Best chi : {} All chis: {} """.format(evaluation['training_time'], evaluation['None'], evaluation['gt-renorm'], (best, evaluation.get(best)), "\n".join([str((tmul, evaluation[tmul])) for tmul in tmuls])) logger.info(eval_msg) return learned_models, evaluation
def generate_experiment(seed, n_problems, n_train, n_valid, n_reals, n_bools, depth, bias, k, literals, h, ratio, errors): logger.info("Generating experiment:\n" + "seed: {}\n".format(seed) + "n_problems: {}\n".format(n_problems) + "n_train: {}\n".format(n_train) + "n_valid: {}\n".format(n_valid) + "n_reals: {}\n".format(n_reals) + "n_bools: {}\n".format(n_bools) + "bias: {}\n".format(bias) + "k: {}\n".format(k) + "literals: {}\n".format(literals) + "h: {}\n".format(h) + "ratio: {}\n".format(ratio) + "errors: {}\n".format(errors)) model_generator = ModelGenerator(n_reals, n_bools, seed, templ_bools="b{}", templ_reals="r{}", initial_bounds=[0, 1]) problems = [] while len(problems) < n_problems: try: # generating the ground truth model # not complex enough #chi = model_generator.generate_support_tree(depth) sample_count = 1000 chi = support_generator(1, n_bools, n_reals, bias, k, literals, h, sample_count, ratio, errors, seed)[0] w = model_generator.generate_weights_tree(depth, nonnegative=True, splits_only=True) boolean_vars = list(set(v for v in chi.get_free_variables() if v.symbol_type() == BOOL).union( set(model_generator.bools))) real_vars = list(set(v for v in chi.get_free_variables() if v.symbol_type() == REAL).union( set(model_generator.reals))) bounds = {v.symbol_name() : model_generator.initial_bounds for v in real_vars} fbounds = And([And(LE(Real(bounds[var.symbol_name()][0]), var), LE(var, Real(bounds[var.symbol_name()][1]))) for var in real_vars]) model = Model(And(fbounds, chi), w, boolean_vars + real_vars, bounds) # use exact inference to normalize the ground truth sample_count = None normalize(model, seed, sample_count, engine='pa') logger.debug("model generator reals: {}".format(model_generator.reals)) logger.debug("model generator IDs: {}".format(list(map(id, model_generator.reals)))) logger.debug("model reals: {}".format(model.continuous_vars)) logger.debug("model IDs: {}".format(list(map(id, model.continuous_vars)))) # sampling the dataset from the ground truth model datasets = {} datasets['train'] = sample_dataset(model, n_train) datasets['valid'] = sample_dataset(model, n_valid) except ModelException as e: logger.debug(e.msg) continue logger.debug("Model {}\n".format(len(problems)+1) + "chi: {}\n".format(serialize(model.support)) + "w: {}\n".format(serialize(model.weightfun))) problem = Problem(model, datasets, bounds=bounds) problems.append(problem) # better safe than sorry? metadata = {'n_reals' : n_reals, 'n_bools' : n_bools, 'depth' : depth, 'n_train' : n_train, 'n_valid' : n_valid, 'seed' : seed} return Experiment(problems, metadata=metadata)
else: raise WMIParsingError("Unhandled formula format", formula) if __name__ == "__main__": from pysmt.shortcuts import Symbol, Ite, And, LE, LT, Real, Times, serialize from pysmt.typing import REAL def compute_print(method, query, evidence): print "query: ", serialize(query) print "evidence: ", serialize(evidence) if evidence else "-" prob = method.compute_normalized_probability(query, evidence) print "normalized: ", prob print "--------------------------------------------------" x = Symbol("x", REAL) A = Symbol("A") support = And(LE(Real(-1), x), LE(x, Real(1))) weights = Ite(LT(Real(0), x), Ite(A, Times(Real(2), x), x), Ite(A, Times(Real(-2), x), Times(Real(-1), x))) praise = PRAiSEInference(support, weights) print "support: ", serialize(support) print "weights: ", serialize(weights) print "==================================================" suite = [(A, None), (And(A, LE(Real(0), x)), None), (LE(Real(0), x), A)] for query, evidence in suite: compute_print(praise, query, evidence)
def perform_query(self, query, evidence=None, mode=None, non_negative=True): """Performs a query P(Q). Optional evidence can be specified, performing P(Q|E). Returns the probability of the query, calculated as: P(Q|E) = WMI(Q & E & kb) / WMI(E & kb) as well as the number of integrations performed. Keyword arguments: query -- pysmt formula encoding the query evidence -- pysmt formula encoding the evidence (default: None) mode -- string in WMI.MODES to select the method (optional) non_negative -- if True, negative WMI results raise an exception (default: True) """ mode = mode or WMIInference.DEF_MODE evstr = (serialize(evidence) if evidence != None else "None") msg = "Computing P(Q|E), Q: {}, E: {}".format(serialize(query), evstr) self.logger.debug(msg) query_labels = set() if evidence: # check if evidence contains reserved variable names if contains_labels(evidence): msg = "The evidence contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) # label LRA-atoms in the evidence bool_evidence = WMIInference._query_labelling( evidence, query_labels) f_e = And(self.support, bool_evidence) else: f_e = self.support if contains_labels(query): msg = "The query contains variables with reserved names." self.logger.error(msg) raise WMIRuntimeException(msg) # label LRA-atoms in the query bool_query = WMIInference._query_labelling(query, query_labels) f_e_q = And(f_e, bool_query) # extract the domain of integration according to the model, # query and evidence domX = set(get_real_variables(f_e_q)) domA = {x for x in get_boolean_variables(f_e_q) if not is_label(x)} self.logger.debug("domX: {}, domA: {}".format(domX, domA)) # compute WMI(Q & E & kb) wmi_e_q, n_e_q = self.wmi.compute(f_e_q, self.weights, mode, domA, domX) if wmi_e_q > 0 or (wmi_e_q < 0 and not non_negative): # compute WMI(E & kb) wmi_e, n_e = self.wmi.compute(f_e, self.weights, mode, domA, domX) if wmi_e == 0: msg = "(Knowledge base & Evidence) is inconsistent." self.logger.error(msg) raise WMIRuntimeException(msg) elif wmi_e < 0 and non_negative: msg = self.MSG_NEGATIVE_RES.format(wmi_e_q) self.logger.error(msg) raise WMIRuntimeException(msg) normalized_p = wmi_e_q / wmi_e n_integrations = n_e_q + n_e elif wmi_e_q == 0: normalized_p = 0.0 n_integrations = n_e_q else: msg = self.MSG_NEGATIVE_RES.format(wmi_e_q) self.logger.error(msg) raise WMIRuntimeException(msg) msg = "Norm. P(Q|E): {}, n_integrations: {}" self.logger.debug(msg.format(normalized_p, n_integrations)) return normalized_p, n_integrations