Example #1
0
    def verify_numpy(self, evidence, marginals1):

        size = batch_size = d.evd_size(evidence)

        u.show(
            f'\nVerifying against classical AC (numpy arrays, batch_size {batch_size})'
        )

        # split lambdas into scalars (with batch)
        evidence = self.split_evidence(evidence)

        eval_time = 0  # pure evaluation time (add/mul/div)
        for start in range(0, size, batch_size):
            u.show(f'{int(100*start/size):4d}%\r', end='', flush=True)
            stop = start + batch_size
            evidence_batch = d.evd_slice(evidence, start, stop)
            marginals_batch, et = self.evaluate_numpy(evidence_batch)
            marginals_batch = self.gather_marginals(marginals_batch)
            if start == 0:
                marginals2 = marginals_batch
            else:
                marginals2 = np.concatenate((marginals2, marginals_batch),
                                            axis=0)
        eval_time += et

        u.equal(marginals1, marginals2, tolerance=True)

        size = d.evd_size(evidence)
        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, batch_size
    def fit(self,
            evidence,
            marginals,
            loss_type,
            metric_type,
            *,
            batch_size=32):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(self.trainable, f'TAC is not trainable')
        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(marginals), f'marginals ill formatted')
        u.input_check(data.mar_matches_output(marginals, self.output_node),
                      f'marginals must match query node of tbn')
        u.input_check(loss_type in self.loss_types,
                      f'loss {loss_type} is not supported')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')
        u.input_check(
            data.evd_size(evidence) == len(marginals),
            f'evidence size must match marginals size')

        u.show(f'\nTraining {self.circuit_type}:')
        start_training_time = time.perf_counter()

        epoch_count = self.trainer.train(evidence, marginals, loss_type,
                                         metric_type, batch_size)

        training_time = time.perf_counter() - start_training_time
        time_per_epoch = training_time / epoch_count

        u.show(
            f'Training Time: {training_time:.3f} sec ({time_per_epoch:.3f} sec per epoch)'
        )
Example #3
0
    def verify_tf_graph(self, evidence, marginals1):
        assert self.tf_ac is not None

        size = batch_size = d.evd_size(evidence)

        u.show(
            f'\nVerifying against classical AC (tf graph, batch_size {batch_size}))'
        )

        # split lambdas into scalars (with batch)
        evidence = self.split_evidence(evidence)

        # tf graph accepts only tensors as input
        evidence = tuple(tf.constant(e, dtype=tf.float32) for e in evidence)

        start_eval_time = time.perf_counter()
        for start in range(0, size, batch_size):
            u.show(f'{int(100*start/size):4d}%\r', end='', flush=True)
            stop = start + batch_size
            evidence_batch = d.evd_slice(evidence, start, stop)
            marginals_batch = self.tf_ac(
                *evidence_batch)  # evaluating tf graph
            marginals_batch = self.gather_marginals(marginals_batch)
            if start == 0:
                marginals2 = marginals_batch
            else:
                marginals2 = np.concatenate((marginals2, marginals_batch),
                                            axis=0)
        eval_time = time.perf_counter() - start_eval_time

        u.equal(marginals1, marginals2, tolerance=True)

        size = d.evd_size(evidence)
        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, batch_size
    def evaluate(self, evidence, *, batch_size=64, report_time=False):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence),
                      f'TAC evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes),
            f'TAC evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'TAC evidence must match evidence tbn nodes')

        u.show(f'\nEvaluating {self.circuit_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        marginals = None
        eval_time = 0
        for i, evd_batch in enumerate(data.evd_batches(evidence, batch_size)):
            u.show(f'{int(100*i/evd_size):4d}%\r', end='', flush=True)
            start_time = time.perf_counter()
            mar_batch = self.tac_graph.evaluate(evd_batch)
            eval_time += time.perf_counter() - start_time
            if marginals is None: marginals = mar_batch
            else: marginals = np.concatenate((marginals, mar_batch), axis=0)

        time_per_example = eval_time / evd_size
        time_per_million = time_per_example / (self.size / 1000000)

        u.show(f'\rEvaluation Time: {eval_time:.3f} sec '
               f'({1000*time_per_example:.1f} ms per example,'
               f' {1000*time_per_million:.1f} ms per 1M tac nodes)')

        assert data.mar_matches_output(marginals, self.output_node)
        assert data.mar_is_predictions(marginals)

        if report_time:
            return marginals, eval_time, batch_size
        return marginals
Example #5
0
    def verify_array(self, evidence, marginals1):
        u.show(f'\nVerifying against classical AC (array)...')

        size = d.evd_size(evidence)
        rows = d.evd_col2row(evidence)
        marginals2 = []

        # evaluation time excludes assertion of evidence
        eval_time = 0  # pure evaluation time (add/mul/div)
        for lambdas in rows:
            self.assert_evidence_array(lambdas)
            marginal, et = self.evaluate_array()  # np array
            marginals2.append(marginal)
            eval_time += et

        marginals2 = np.array(marginals2, dtype=np.float32)
        u.equal(marginals1, marginals2, tolerance=True)

        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, 1
    def metric(self, evidence, labels, metric_type, *, batch_size=64):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(labels, one_hot=(metric_type == 'CA')),
                      f'labels ill formatted')
        u.input_check(data.mar_matches_output(labels, self.output_node),
                      f'labels must match query node of tbn')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')

        u.show(f'\nComputing {metric_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        start_eval_time = time.perf_counter()

        batches, _ = data.data_batches(evidence, labels, batch_size)
        result = 0
        for evd_batch, lab_batch in batches:
            bresult = self.tac_graph.compute_metric(metric_type, evd_batch,
                                                    lab_batch)
            result += bresult * len(lab_batch)
        result /= evd_size  # average weighted by batch size (last batch may be smaller)

        evaluation_time = time.perf_counter() - start_eval_time
        time_per_example = evaluation_time / evd_size

        u.show(f'{metric_type} Time: {evaluation_time:.3f} sec '
               f'({time_per_example:.4f} sec per example)')

        return result
def posteriors(bn, inputs, output, evidence):
    u.show('\nRunning VE...', end='', flush=True)
    assert not bn.testing and len(inputs) == len(evidence)

    # we will perform elimination only on nodes that are connected to output
    qnode = bn.node(output)  # query node
    nodes = qnode.connected_nodes()  # set
    assert qnode in nodes

    # identify inputs and evidence connected to query node
    evidence_ = evidence
    enodes, evidence = [], []
    for i, e in zip(inputs, evidence_):
        n = bn.node(i)  # evidence node
        if n in nodes:  # connected to query
            enodes.append(n)
            evidence.append(e)  # e is a batch of lambdas for node n
    assert enodes and evidence  # output must be connected to some input

    # maps bn node to Var
    node2var = {n: Var(bn_node=n) for n in nodes}
    nodes2vars = lambda nodes_: tuple(node2var[n] for n in nodes_)

    # construct batch Var
    batch_size = data.evd_size(evidence)
    batch_var = Var(batch_size=batch_size)

    # get elimination order
    order, _, _, _ = bn.elm_order('minfill')
    elm_order = tuple(node2var[n] for n in order if n != qnode and n in nodes)

    # bn factors
    evd_factor = lambda evd, node: Factor(evd, (batch_var, node2var[node]))
    cpt_factor = lambda cpt, node: Factor(
        cpt, nodes2vars(node.family), sort=True)
    indicators = tuple(evd_factor(evd, n) for evd, n in zip(evidence, enodes))
    cpts = tuple(cpt_factor(n.tabular_cpt(), n) for n in nodes)
    query_var = node2var[qnode]

    # indexing factors for lookup during elimination
    # factor.tvars exclude the batch var
    scalars = set()  # scalar factors (have no vars)
    var2factors = {var: set()
                   for var in elm_order}  # maps var to factors containing var
    var2factors[query_var] = set()

    def index(factor):  # add factor to pool
        if factor.is_scalar:
            scalars.add(factor)
        else:
            for var in factor.tvars:
                var2factors[var].add(factor)

    def remove(factors):  # remove factors from pool
        for f in set(
                factors
        ):  # copy since factors may be equal to some var2factors[var]
            assert not f.is_scalar
            for var in f.tvars:
                var2factors[var].remove(f)

    def get_factors(var):  # returns factors that contain var
        factors = var2factors[var]
        assert factors
        return factors

    def verify_elm(f):  # verify pool at end of elimination
        assert all(not factors for var, factors in var2factors.items()
                   if var != query_var)
        assert not scalars == bn.is_connected()

    # we are about to start eliminating vars: index them first
    for factor in indicators:
        index(factor)
    for factor in cpts:
        index(factor)

    # eliminate vars
    one = Factor.one()  # identity factor for multiplication
    for var in elm_order:
        factors = get_factors(var)  # factors that contain var
        factor = one
        for f in factors:
            factor = factor.multiply(f)
        factor = factor.sumout(var)
        remove(factors)
        index(factor)

    verify_elm(factor)

    factor = one
    for f in get_factors(query_var):
        factor = factor.multiply(f)
    for f in scalars:
        factor = factor.multiply(f)
    assert factor.has_batch and factor.tvars == (query_var, )

    factor = factor.normalize()
    u.show('done.')
    return factor.table  # ndarray