Ejemplo n.º 1
0
def learn_supports_adaptive(dataset, seed, bg_knowledge=None, timeout=None, initial=None, mult=None,
                            hops=None, max_mult=None, negative_bootstrap=None):

    if timeout is None:
        timeout = DEF_TIMEOUT

    if initial is  None:
        initial = DEF_INITIAL

    if mult is None:
        mult = DEF_MULT

    if hops is None:
        hops = DEF_HOPS

    if max_mult is None:
        max_mult = DEF_MAX_MULT

    results = []
    discovered = set()
    t_mults = set()
    
    last = initial
    i = 0

    msg = "Adaptive support learning. timeout = {}, init = {}, mult = {}, hops = {}"
    logger.info(msg.format(timeout, initial, mult, hops))
    while i < hops and last < max_mult:
        logger.debug("i: {} last: {}".format(i, last))
        t_mults.add(last)
        res = learn_support(dataset, seed, last, timeout=timeout, bg_knowledge=bg_knowledge,
                            symmetry_breaking="mvn",
                            negative_bootstrap=negative_bootstrap)
        
        if res is not None:
            chi, k, h, thresholds = res
            chistr = serialize(chi)            
            smaller = {t for t in t_mults if t < last}
            
            if chistr not in discovered:
                discovered.add(chistr)
                results.append(res + (last,))

            if len(smaller) > 0:
                last = (last + max(smaller)) / 2
                i += 1
            else:
                last = last / mult

        else: # last t_mult timed out
            larger = {t for t in t_mults if t > last}
            if len(larger) > 0:
                last = (last + min(larger)) / 2
                i += 1
            else:
                last = last * mult

    return results
Ejemplo n.º 2
0
 def learn_wrap(data, labels, learn_inc, queue):
     res = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None)
     (new_data, new_labels, formula), k, h = res
     msg = "Learned CNF(k={}, h={})"
     logger.debug(msg.format(k, h))
     msg = "Data-set grew from {} to {} entries"
     logger.debug(msg.format(len(labels), len(new_labels)))
     
     queue.put((formula, k, h))
Ejemplo n.º 3
0
    def dump(self, problem_path):
        if problem_path is None and self.original_path is None:
            raise IOError("Unspecified path")
        elif problem_path is not None and exists(problem_path):
            raise IOError("File exists: {}".format(problem_path))
        elif problem_path is None and self.original_path is not None:
            msg = "Dumping the problem with no specified path, using {}"
            logger.debug(msg.format(self.original_path))
            problem_path = self.original_path

        problem_name = basename(problem_path)
        folder = abspath(dirname(problem_path))

        model_filename = Problem.MODEL_TEMPL.format(problem_name)
        model_path = join(folder, model_filename)

        if self.original_path is None:
            self.model.dump(model_path)

        index = {
            'model_path': relpath(model_path, folder),
            'dataset_paths': {}
        }

        for dataset_name, dataset in self.datasets.items():

            dataset_filename = Problem.DATASET_TEMPL.format(
                problem_name, dataset_name)
            dataset_path = join(folder, dataset_filename)

            if self.original_path is None:
                dataset.dump(dataset_path)

            index['dataset_paths'][dataset_name] = relpath(
                dataset_path, folder)

        if len(self.learned_supports) > 0:

            index['support_paths'] = []

            for i, chi in enumerate(self.learned_supports):
                support_filename = Problem.SUPPORT_TEMPL.format(
                    problem_name, i)
                support_path = join(folder, support_filename)
                logger.debug("Writing support file: {}".format(support_path))
                write_smtlib(chi, support_path)
                index['support_paths'].append(relpath(support_path, folder))

        if self.bounds is not None:
            index['bounds'] = self.bounds

        if self.metadata is not None:
            index['metadata'] = self.metadata

        with open(problem_path, 'wb') as f:
            pickle.dump(index, f)
Ejemplo n.º 4
0
                def renorm_wrap(inst, support, support_path, weight_path):
                    try:
                        inst.renormalize(support)
                        support = inst.tree_to_WMI_support()
                        weight = inst.tree_to_WMI_weightfun()
                        msg = "Writing result to files:\n{}\n{}"
                        logger.debug(msg.format(support_path, weight_path))
                        write_smtlib(support, support_path)
                        write_smtlib(weight, weight_path)
                        logger.debug("Done.")

                    except ModelException as e:
                        logger.error(
                            "Couldn't renormalize the DET: {}".format(e))
Ejemplo n.º 5
0
def normalize(model, seed, sample_count, engine='pa'):
    
    if engine == 'pa':
        solver = PredicateAbstractionEngine(model.domain, model.support, model.weightfun)
    elif engine == 'rej':
        solver = RejectionEngine(model.domain, model.support, model.weightfun,
                                 sample_count=sample_count, seed=seed)
    else:
        raise NotImplementedError()

    Z = solver.compute_volume()

    assert(Z >= 0), "Z is negative"

    if not np.isclose(Z, 1.0):
        logger.debug("Normalizing w with Z: {}".format(Z))
        model.weightfun = Times(Real(1.0/Z), model.weightfun)

    return Z
Ejemplo n.º 6
0
def check_Z_normalize(model, seed, sample_count):
    """Tests whether the model is normalized. If not, updates the weight
    function accordingly."""

    logger.debug("Approximating Z")
    solver = RejectionEngine(model.domain, model.support, model.weightfun,
                             sample_count=sample_count, seed=seed)
    all_ohes = dict()
    for var in model.domain.bool_vars:
        print("VAR:", var)
        if "_OHE_" in var:
            prefix = var.partition("_OHE_")[0]
            if prefix not in all_ohes:
                all_ohes[prefix] = []

            all_ohes[prefix].append(var)
    ohe_variables = list(all_ohes.values()) if len(all_ohes) > 0 else None
    Z_approx = solver.compute_volume(ohe_variables=ohe_variables)
    logger.debug("Z_approx: {}".format(Z_approx))
    if Z_approx <= 0:
        raise ModelException("Partition function is <= 0")
    
    if not abs(Z_approx - 1.0) <= DEF_CLOSE_ENOUGH:
        model.weightfun = Times(Real(float(1.0/Z_approx)), model.weightfun)
Ejemplo n.º 7
0
    def renormalize(self, support):
        assert (support is not None), "Can't renormalize with support = None"
        self.support = support

        # mark the tree
        queue = []
        for leaf in self.root.get_leaves():
            domA = [
                var.symbol_name() for var in leaf.bounds
                if var.symbol_type() == BOOL
            ]
            domX = []
            bs = []
            for var, b in leaf.bounds.items():
                if var.symbol_type() == REAL:
                    domX.append(var.symbol_name())
                    bs.append(tuple(b))

            domain = Domain.make(domA, domX, bs)
            intersection = And(support, leaf.bounds_to_SMT())
            engine = PredicateAbstractionEngine(domain, intersection, Real(1))
            intervol = engine.compute_volume()
            leaf.marked = intervol <= 0
            if leaf.marked:
                logger.debug("Marked a leaf")
                queue.append(leaf)

        while len(queue) > 0:
            n = queue.pop(0)
            if not n.parent.marked:
                if n.parent.pos.marked and n.parent.neg.marked:
                    n.parent.marked = True
                    queue.append(n.parent)

        self.root.merge_marked()
        self.root.renormalize_node(support)
Ejemplo n.º 8
0
def approx_IAE(model1, model2, seed, sample_count):
    assert(set(model1.get_vars()) == set(model2.get_vars())),\
        "M1 vars: {}\n M2 vars: {}".format(model1.get_vars(),model2.get_vars())

    domain, bounds = merged_domain(model1, model2)

    samples, pos_ratio = positive(sample_count, domain,
                                  Or(model1.support, model2.support),
                                  weight=None)
    samples_m1 = samples[evaluate(domain,
                                  And(model1.support, Not(model2.support)),
                                  samples)]
    samples_m2 = samples[evaluate(domain,
                                  And(Not(model1.support), model2.support),
                                  samples)]
    samples_inter = samples[evaluate(domain, And(model1.support, model2.support),
                                  samples)]

    weights_m1 = sum(evaluate(domain, model1.weightfun, samples_m1))
    weights_m2 = sum(evaluate(domain, model2.weightfun, samples_m2))
    weights_inter = sum(abs(evaluate(domain, model1.weightfun, samples_inter) -
                        evaluate(domain, model2.weightfun, samples_inter)))

    n_m1 = len(samples_m1)
    n_m2 = len(samples_m2)
    n_inter = len(samples_inter)

    norm_m1 = weights_m1 / sample_count
    norm_m2 = weights_m2 / sample_count
    norm_inter = weights_inter / sample_count
    
    logger.debug(f"[ S1 ~S2] len: {n_m1}, sum: {weights_m1}, norm: {norm_m1}")
    logger.debug(f"[ S1 ~S2] len: {n_m2}, sum: {weights_m2}, norm: {norm_m2}")
    logger.debug(f"[ S1 ~S2] len: {n_inter}, sum: {weights_inter}, norm: {norm_inter}")

    approx_vol = pos_ratio * 2**len(domain.bool_vars)
    for lb, ub in bounds.values():
        approx_vol *= (ub - lb)

    return approx_vol*(weights_m1 + weights_m2 + weights_inter) / sample_count
Ejemplo n.º 9
0
def run_problem(problem,
                learner,
                seed,
                n_samples,
                timeout,
                global_norm,
                use_lariat=True):

    ground_truth = problem.model
    evaluation = dict()

    train = problem.datasets['train']
    valid = problem.datasets['valid']

    train_valid = Dataset(train.features, train.data + valid.data,
                          train.constraints)

    if problem.learned_supports is not None:
        prior_supports = {
            problem.metadata['supports_metadata'][i]['support_threshold_mult']:
            chi
            for i, chi in enumerate(problem.learned_supports)
        }
    else:
        logger.warning("Couldn't find any learned support.")
        prior_supports = dict()

    prior_supports['None'] = None
    prior_supports['gt-renorm'] = ground_truth.support

    t_0 = time()
    learner.estimate_density(train, validation_data=valid)
    t_f = time() - t_0
    logger.info("training time: {}".format(t_f))
    evaluation['training_time'] = t_f

    learned_models = []
    cached_models = dict()
    max_ll = None
    best = None

    logger.info("Evaluating:\n {}".format("\n".join(
        map(str, prior_supports.keys()))))

    for t_mult, prior_support in prior_supports.items():

        if t_mult != 'None' and not use_lariat:
            continue

        evaluation[t_mult] = dict()
        ps_str = serialize(prior_support) if not isinstance(t_mult,
                                                            str) else t_mult

        if ps_str in cached_models:
            learned_model, evaluation[t_mult] = cached_models[ps_str]
        else:
            try:
                logger.info(
                    "--------------------------------------------------")
                logger.info("Support: {}".format(t_mult))

                mode = RENORM_FULL if prior_support is not None else RENORM_OFF
                t_0 = time()
                learned_model, renormd = learner.renormalize(
                    train,
                    seed,
                    mode=mode,
                    support=prior_support,
                    timeout=timeout,
                    global_norm=global_norm)
                t_f = time() - t_0
                if not renormd and prior_support is not None:
                    continue

                evaluation[t_mult]['renorm_time'] = t_f

            except CalledProcessError as e:
                logger.warning("XADD error: {}".format(e))
                continue

            except ModelException as e:
                logger.warning("Model error: {}".format(e))
                continue

            logger.debug("Computing approx-IAE")
            iae = approx_IAE(learned_model, ground_truth, seed, n_samples)
            evaluation[t_mult]['approx-iae'] = iae

            logger.debug("Computing train-LL")
            train_ll, train_out = learned_model.log_likelihood(train)
            evaluation[t_mult]['train-ll'] = train_ll
            evaluation[t_mult]['train-out'] = train_out
            logger.debug("Computing valid-LL")
            valid_ll, valid_out = learned_model.log_likelihood(valid)
            evaluation[t_mult]['valid-ll'] = valid_ll
            evaluation[t_mult]['valid-out'] = valid_out
            train_valid_ll, train_valid_out = learned_model.log_likelihood(
                train_valid)
            evaluation[t_mult]['train-valid-ll'] = train_valid_ll
            evaluation[t_mult]['train-valid-out'] = train_valid_out

            if t_mult not in ['None','gt-renorm'] \
               and (max_ll is None or valid_ll > max_ll):
                max_ll = valid_ll
                best = t_mult

            logger.debug("Computing volume difference")
            poly1 = Model(learned_model.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            poly2 = Model(ground_truth.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            vol_diff = ISE(poly1, poly2, seed, n_samples, engine='rej')

            evaluation[t_mult]['vol-diff'] = vol_diff

            cached_models[ps_str] = (learned_model, evaluation[t_mult])

            domain = Domain.make(
                map(lambda v: v.symbol_name(), ground_truth.boolean_vars),
                learned_model.bounds)
            eval_falses = evaluate(domain, learned_model.support,
                                   np.asarray(train.data))

        learned_models.append((t_mult, learned_model))

    evaluation['best'] = best

    tmuls = sorted([
        key for key in evaluation
        if key not in ['None', 'gt-renorm', 'training_time', 'best']
    ])

    eval_msg = """RESULTS:
Training time: {}
No renorm: {}
GT renorm: {}
Best chi : {}

All chis:
{}
""".format(evaluation['training_time'], evaluation['None'],
           evaluation['gt-renorm'], (best, evaluation.get(best)),
           "\n".join([str((tmul, evaluation[tmul])) for tmul in tmuls]))

    logger.info(eval_msg)

    return learned_models, evaluation
def generate_experiment(seed, n_problems, n_train, n_valid, n_reals, n_bools,
                        depth, bias, k, literals, h, ratio, errors):

    logger.info("Generating experiment:\n" +
                "seed: {}\n".format(seed) +
                "n_problems: {}\n".format(n_problems) +
                "n_train: {}\n".format(n_train) +
                "n_valid: {}\n".format(n_valid) +
                "n_reals: {}\n".format(n_reals) +
                "n_bools: {}\n".format(n_bools) +
                "bias: {}\n".format(bias) +
                "k: {}\n".format(k) +
                "literals: {}\n".format(literals) +
                "h: {}\n".format(h) +
                "ratio: {}\n".format(ratio) +
                "errors: {}\n".format(errors))
                
    model_generator = ModelGenerator(n_reals, n_bools, seed,
                                     templ_bools="b{}",
                                     templ_reals="r{}",
                                     initial_bounds=[0, 1])

    problems = []
    while len(problems) < n_problems:
        try:
            # generating the ground truth model
            # not complex enough
            #chi = model_generator.generate_support_tree(depth)
            sample_count = 1000
            chi = support_generator(1, n_bools, n_reals, bias, k, literals, h,
                                    sample_count, ratio, errors, seed)[0]

            w = model_generator.generate_weights_tree(depth, nonnegative=True,
                                                      splits_only=True)

            boolean_vars = list(set(v for v in chi.get_free_variables()
                                    if v.symbol_type() == BOOL).union(
                                            set(model_generator.bools)))
            
            real_vars = list(set(v for v in chi.get_free_variables()
                                    if v.symbol_type() == REAL).union(
                                            set(model_generator.reals)))
            
            bounds = {v.symbol_name() : model_generator.initial_bounds
                      for v in real_vars}

            fbounds = And([And(LE(Real(bounds[var.symbol_name()][0]), var),
                               LE(var, Real(bounds[var.symbol_name()][1])))
                           for var in real_vars])
            model = Model(And(fbounds, chi), w, boolean_vars + real_vars, bounds)

            # use exact inference to normalize the ground truth
            sample_count = None
            normalize(model, seed, sample_count, engine='pa')

            logger.debug("model generator reals: {}".format(model_generator.reals))
            logger.debug("model generator IDs: {}".format(list(map(id, model_generator.reals))))

            logger.debug("model reals: {}".format(model.continuous_vars))
            logger.debug("model IDs: {}".format(list(map(id, model.continuous_vars))))

            # sampling the dataset from the ground truth model
            datasets = {}
            datasets['train'] = sample_dataset(model, n_train)
            datasets['valid'] = sample_dataset(model, n_valid)

        except ModelException as e:
            logger.debug(e.msg)
            continue
        
        logger.debug("Model {}\n".format(len(problems)+1) +
                     "chi: {}\n".format(serialize(model.support)) +
                     "w: {}\n".format(serialize(model.weightfun)))

        problem = Problem(model,
                          datasets,
                          bounds=bounds)

        problems.append(problem)

    # better safe than sorry?
    metadata = {'n_reals' : n_reals, 'n_bools' : n_bools, 'depth' : depth,
                'n_train' : n_train, 'n_valid' : n_valid, 'seed' : seed}
        

    return Experiment(problems, metadata=metadata)
Ejemplo n.º 11
0
    def renormalize(self,
                    training_data,
                    seed,
                    mode=RENORM_OFF,
                    support=None,
                    timeout=None,
                    global_norm=False):

        if timeout is None:
            timeout = DEF_RENORM_TIMEOUT

        detcopy = self.det.copy()

        model_support = detcopy.tree_to_WMI_support()
        model_weight = detcopy.tree_to_WMI_weightfun()

        bounds = {
            v.symbol_name(): b
            for v, b in detcopy.root.bounds.items() if v.symbol_type() == REAL
        }

        renorm_support = None
        if mode == RENORM_BG_ONLY and training_data.constraints is not None:
            renorm_support = training_data.constraints
        elif mode == RENORM_FULL:
            if training_data.constraints is not None and support is not None:
                renorm_support = training_data.constraints & support
            elif training_data.constraints is not None:
                renorm_support = training_data.constraints
            elif support is not None:
                renorm_support = support

        renormalized = False
        if renorm_support is not None:

            if global_norm:
                logger.debug("Global renormalization")
                model_support = model_support & renorm_support
                renormalized = True
            else:
                logger.debug("Local renormalization")

                def renorm_wrap(inst, support, support_path, weight_path):
                    try:
                        inst.renormalize(support)
                        support = inst.tree_to_WMI_support()
                        weight = inst.tree_to_WMI_weightfun()
                        msg = "Writing result to files:\n{}\n{}"
                        logger.debug(msg.format(support_path, weight_path))
                        write_smtlib(support, support_path)
                        write_smtlib(weight, weight_path)
                        logger.debug("Done.")

                    except ModelException as e:
                        logger.error(
                            "Couldn't renormalize the DET: {}".format(e))

                # communication with wrapper process through file
                # NEVER use multiprocessing.Queue with huge pysmt formulas
                rndstr = ''.join(choice(TMP_CHARS) for _ in range(TMP_LEN))
                support_path = "{}.support".format(rndstr)
                weight_path = "{}.weight".format(rndstr)
                timed_proc = Process(target=renorm_wrap,
                                     args=(detcopy, renorm_support,
                                           support_path, weight_path))

                logger.debug(
                    "Starting renormalization with timeout: {}".format(
                        timeout))
                timed_proc.start()
                logger.debug("Timed proc started")
                timed_proc.join(timeout)
                logger.debug("Timed proc joined")

                if timed_proc.is_alive():
                    logger.warning("Renormalization timed out")
                    pid = timed_proc.pid
                    logger.warning(
                        "Killing process {} and its children".format(pid))
                    kill_recursive(pid)

                else:
                    try:
                        model_support = read_smtlib(support_path)
                        remove(support_path)
                    except FileNotFoundError:
                        model_support = None
                    try:
                        model_weight = read_smtlib(weight_path)
                        remove(weight_path)
                    except FileNotFoundError:
                        model_weight = None

                    if model_support is None or model_weight is None:
                        raise ModelException("Couldn't renormalize the DET")

                    logger.debug("Renormalization done")
                    renormalized = True

        model = Model(model_support,
                      model_weight,
                      list(map(lambda x: x[0], training_data.features)),
                      bounds,
                      metadata=self.learner_args)

        # is Z = 1?
        if renormalized:
            check_Z_normalize(model, seed, TEST_AND_NORM_SAMPLES)

        elif not global_norm:
            # fallback strategy for local: to global
            model, renormalized = self.renormalize(training_data,
                                                   seed,
                                                   mode=mode,
                                                   support=support,
                                                   timeout=timeout,
                                                   global_norm=True)

        return model, renormalized
Ejemplo n.º 12
0
    def renormalize(self,
                    training_data,
                    seed,
                    mode=RENORM_OFF,
                    support=None,
                    timeout=None,
                    global_norm=True):

        if timeout is None:
            timeout = DEF_RENORM_TIMEOUT

        feature_dict = {
            var.symbol_name(): var
            for var, _ in training_data.features
        }

        model_weightfun, model_support = SPN_to_WMI(self.spn.root,
                                                    feature_dict)

        bounds = {}
        for i, feat in enumerate(training_data.features):
            var = feat[0]
            if var.symbol_type() == REAL:
                xi = list(map(lambda row: row[i], training_data.data))
                bounds[var.symbol_name()] = [min(xi), max(xi)]

        renorm_support = None
        if mode == RENORM_BG_ONLY and training_data.constraints is not None:
            renorm_support = training_data.constraints
        elif mode == RENORM_FULL:
            if training_data.constraints is not None and support is not None:
                renorm_support = training_data.constraints & support
            elif training_data.constraints is not None:
                renorm_support = training_data.constraints
            elif support is not None:
                renorm_support = support

        renormalized = False
        if renorm_support is not None:
            if global_norm:
                logger.debug("Global renormalization")
                model_support = model_support & renorm_support
                renormalized = True

            else:
                logger.debug("Local renormalization")
                domain = Domain.make([
                    v.symbol_name() for v, _ in training_data.features
                    if v.symbol_type() == BOOL
                ], bounds)

                nc_model_support = normalize_formula(model_support)
                nc_model_weightfun = normalize_formula(model_weightfun)
                nc_renorm_support = normalize_formula(renorm_support)

                t_0 = time()
                xaddsolver = XaddEngine(domain,
                                        nc_model_support,
                                        nc_model_weightfun,
                                        mode="original",
                                        timeout=timeout)

                t_init = time() - t_0
                logger.debug("XADDEngine t_init: {}".format(t_init))
                try:
                    res = xaddsolver.normalize(renorm_support)
                    t_norm = time() - t_init
                except CalledProcessError as e:
                    raise ModelException("CalledProcessError")

                if res is None:
                    logger.warning("Timeout.")
                else:
                    logger.debug("XADDEngine t_norm: {}".format(t_norm))
                    model_weightfun = get_env().formula_manager.normalize(res)
                    model_support = get_env().formula_manager.normalize(
                        And(model_support, renorm_support))
                    renormalized = True

        model = Model(model_support,
                      model_weightfun,
                      list(map(lambda x: x[0], training_data.features)),
                      bounds,
                      metadata=self.learner_args)

        if renormalized:
            check_Z_normalize(model, seed, TEST_AND_NORM_SAMPLES)

        elif not global_norm:
            # fallback strategy for local: to global
            model, renormalized = self.renormalize(training_data,
                                                   seed,
                                                   mode=mode,
                                                   support=support,
                                                   timeout=timeout,
                                                   global_norm=True)

        return model, renormalized
Ejemplo n.º 13
0
def learn_support(dataset, seed, threshold_mult, timeout=None, bg_knowledge=None,
                  symmetry_breaking=None,
                  negative_bootstrap=None):

    logger.info(f"Running INCAL+. Symmetry breaking = {symmetry_breaking} negative_bootstrap = {negative_bootstrap}")    

    # default might become symmetry_breaking = "mvn"
    if symmetry_breaking is None:
        symmetry_breaking = ""

    if negative_bootstrap is None:
        negative_bootstrap = 0
    else:
        try:
            # absolute count is specified with an integer
            negative_bootstrap = int(negative_bootstrap)
        except ValueError:
            pass

        # relative count (wrt |D|) is specified with a float
        negative_bootstrap = int(len(dataset) * float(negative_bootstrap))
            
    # compute bounds and add positive labels to the data
    bounds = {}
    for row in dataset.data:
        for i, feat in enumerate(dataset.features):
            var = feat[0]

            if var.symbol_type() == BOOL:
                continue

            varname = var.symbol_name()
            if not varname in bounds:
                bounds[varname] = [row[i], row[i]]
            else:
                if row[i] < bounds[varname][0]:
                    bounds[varname][0] = row[i]
                elif row[i] > bounds[varname][1]:
                    bounds[varname][1] = row[i]

    data = np.array(dataset.data)
    labels = np.ones(data.shape[0])

    # create a Domain instance
    varnames = []
    vartypes = {}
    for v, _ in dataset.features:
        varnames.append(v.symbol_name())
        vartypes[v.symbol_name()] = v.symbol_type()

    domain = Domain(varnames, vartypes, bounds)
    distance = Distance(domain, Distance.l_inf)

    max_closest = None
    for i1 in range(len(data)):
        min_distance = None
        for i2 in range(0, len(data)):
            if i1 != i2:
                p1, p2 = dataset.data[i1], dataset.data[i2]
                d = distance.between(p1, p2)
                min_distance = d if min_distance is None else min(min_distance, d)
        if min_distance < 1:
            max_closest = min_distance if max_closest is None else max(max_closest, min_distance)

    logger.debug("Maximum distance between closest neighbors: {}".format(max_closest))

    threshold = threshold_mult * max_closest
    logger.debug("Overall threshold: {}".format(threshold))

    thresholds = {r: threshold * domain.domain_size(r) for r in domain.real_vars}
    logger.debug("Thresholds per dimension: {}".format(thresholds))

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds,
                                    background_knowledge=bg_knowledge)
        if negative_bootstrap > 0:
            _data, _labels = OneClassStrategy.add_negatives(domain, _data, _labels, thresholds, negative_bootstrap)

        learner = KCnfSmtLearner(_k, _h, strategy, symmetry_breaking)

        random.seed(seed)        
        initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
        res = learner.learn(domain, _data, _labels, initial_indices)
        return res


    # wrapping INCAL+ into a timed process
    def learn_wrap(data, labels, learn_inc, queue):
        res = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None)
        (new_data, new_labels, formula), k, h = res
        msg = "Learned CNF(k={}, h={})"
        logger.debug(msg.format(k, h))
        msg = "Data-set grew from {} to {} entries"
        logger.debug(msg.format(len(labels), len(new_labels)))
        
        queue.put((formula, k, h))

    queue = Queue()
    timed_proc = Process(target=learn_wrap, args=(data, labels, learn_inc, queue))
    timed_proc.start()
    timed_proc.join(timeout)
    if timed_proc.is_alive():
        # timed process didn't complete the job
        timed_proc.terminate()
        timed_proc.join()
        return None
    else:
        # get the learned formula, (k,h)
        chi, k, h = queue.get()
        return chi, k, h, thresholds
def SPN_to_WMI(node, feature_dict):

    w_children = []
    chi_children = []
    for child in node.children:
        subw, subchi = SPN_to_WMI(child, feature_dict)
        w_children.append(subw)
        if subchi is not None:
            chi_children.append(subchi)

    if isinstance(node, SumNode):
        wmi_weights = list(map(lambda w: Real(float(w)), node.weights))
        weighted_sum = [
            Times(wmi_weights[i], w_children[i])
            for i in range(len(wmi_weights))
        ]
        w_node = Plus(weighted_sum)
        chi_node = Or(chi_children)

    elif isinstance(node, ProductNode):
        w_node = Times(w_children)
        chi_node = And(chi_children)

    else:  # it's a leaf
        wmi_var = feature_dict[node.featureName]

        if isinstance(node, BernoulliNode):
            assert (0 <= node.p and node.p <= 1)
            w_node = boolean_leaf(wmi_var, node.p, 1 - node.p)
            chi_node = None

        elif isinstance(node, CategoricalNode):
            # I think this is never going to be used
            assert (node.values == 2), "Not a Boolean variable"
            w_node = boolean_leaf(wmi_var, node.probs[0], node.probs[1])
            chi_node = None

        elif isinstance(node, PiecewiseLinearPDFNodeOld):
            # I think this is never going to be used
            logger.debug("Var: {}".format(wmi_var.symbol_name()) +
                         " x_range: {}".format(node.x_range) +
                         " y_range: {}".format(node.y_range) +
                         " dom: {}".format(node.domain))

            if wmi_var.symbol_type() == REAL:
                w_node = datapoints_to_piecewise_linear(
                    wmi_var, node.x_range, node.y_range)
                chi_node = And(LE(Real(float(node.domain[0])), wmi_var),
                               LE(wmi_var, Real(float(node.domain[-1]))))
            else:
                w_node = boolean_leaf(wmi_var, node.y_range[2],
                                      node.y_range[1])
                chi_node = None

            logger.debug("Leaf: {}".format(w_node))

        elif isinstance(node, PiecewiseLinearPDFNode) or \
             isinstance(node, IsotonicUnimodalPDFNode):
            logger.debug("Var: {}".format(wmi_var.symbol_name()) +
                         " x_range: {}".format(node.x_range) +
                         " y_range: {}".format(node.y_range) +
                         " dom: {}".format(node.domain))

            if wmi_var.symbol_type() == REAL:
                actual_prob = datapoints_to_piecewise_linear(
                    wmi_var, node.x_range, node.y_range)
                w_node = Plus(
                    Times(Real(float(1 - node.prior_weight)), actual_prob),
                    Times(Real(float(node.prior_weight)),
                          Real(float(node.prior_density))))
                chi_node = And(LE(Real(float(node.domain[0])), wmi_var),
                               LE(wmi_var, Real(float(node.domain[-1]))))
            else:
                p_true = node.y_range[list(node.x_range).index(True)]
                p_false = node.y_range[list(node.x_range).index(False)]
                print("p_true", p_true, "p_false", p_false)
                w_node = boolean_leaf(wmi_var, p_true, p_false)
                """
                if isclose(p_true, 1.0):
                    chi_node = wmi_var
                elif isclose(p_true, 1.0):
                    chi_node = Not(wmi_var)
                else:
                    chi_node = None
                """
                chi_node = None

            logger.debug("Leaf: {}".format(w_node))

        elif isinstance(node, HistNode):
            actual_prob = hist_to_piecewise_constant(wmi_var, node.breaks,
                                                     node.densities)
            w_node = Plus(
                Times(Real(float(1 - node.prior_weight)), actual_prob),
                Times(Real(float(node.prior_weight)),
                      Real(float(node.prior_density))))
            chi_node = And(LE(Real(float(node.domain[0])), wmi_var),
                           LE(wmi_var, Real(float(node.domain[-1]))))

        elif isinstance(node, KernelDensityEstimatorNode):
            raise NotImplementedError()
        else:
            raise NotImplementedError("Node type {} not supported".format(
                type(node)))

    return w_node, chi_node