Example #1
0
def set_separators_and_clusters(view, trainable, verbose):

    has_replicas = lambda: any(len(nodes) > 1 for nodes in view.fcpts.values())

    # identify functional cpts (fcpts) and associate them with functional vars
    __set_fcpts(view, trainable)

    # replicate fcpts if not already replicated by decouple.py
    if view.fcpts and not has_replicas():
        __replicate_fcpts(view, verbose)

    # compute separators
    __set_classical_cls_and_sep(view)

    # remove functional vars from separators, then remove dead fcpts
    if has_replicas():
        __shrink_separators(view)
        __remove_dead_fcpts(view, verbose)

    # remove clamped vars from separators (if any)
    for i, p, _, _, _ in view.bottom_up():
        sep = view.sep(i)
        sep -= set(var for var in sep if var._clamped)

    # compute clusters (cluster of host computed earlier in view.py)
    for i, _, c1, c2, _ in view.bottom_up():
        if not c1: view.cls_set(i, set(i.var.family))  # leaf node i
        else: view.cls_set(i, view.sep(c1) | view.sep(c2))

    if verbose:
        u.show('  View ranks : ' + view.ranks_str())
def trace(query_var, evidence_vars, tbn, jt, og):
    assert tbn._for_inference
    assert tbn == jt.tbn
    # the following need to be relaxed
    assert not query_var.has_pruned_values()
    assert not any(var.has_pruned_values() for var in evidence_vars)

    ops = og.add_evidence_ops(
        evidence_vars)  # ops that construct tensors for evidence
    jt.declare_evidence(evidence_vars,
                        ops)  # save ops in jointree for later lookup

    # qcontext: captures the pruned tbn used to compute posterior on query_var
    qcontext = prune.for_node_posterior(query_var, evidence_vars, tbn)

    # add ops that will create tensors for selected cpts (if any)
    for var in qcontext.testing_nodes:  # top-down
        __selected_cpt(var, qcontext, jt, og)  # also prunes

    # add ops that will create tensor for the posterior over query_node
    __node_posterior(query_var, qcontext, jt, og)

    hit_rate = jt.hits * 100 / jt.lookups if jt.lookups > 0 else 0
    all_count = len(qcontext.testing_nodes)
    live_count = qcontext.live_count
    sval_count = sum(1 for n in tbn.nodes if len(n.values) == 1)
    pruned_count = len(tbn.nodes) - len(qcontext.nodes)
    pruned_perct = pruned_count * 100 / len(tbn.nodes)
    u.show(
        f'  Tracing posterior for \'{query_var.name}\':\n'
        f'    og-cache lookups {jt.lookups}, hits {jt.hits}, rate {hit_rate:.1f}%\n'
        f'    selected cpts: all {all_count}, live {live_count}\n'
        f'    single-value nodes: {sval_count}\n'
        f'    pruned nodes: {pruned_count}, percentage {pruned_perct:.1f}%')
def train(size,output,data_size,testing,use_bk,tie_parameters):
    circuit_type = 'TAC' if testing else 'AC'
    u.show(f'\n===Training {circuit_type} for rectangle {output} in {size}x{size} images, use_bk {use_bk}, tie {tie_parameters}')

    # get training and testing data (labels are one-hot)
    t_evidence, t_labels = rdata.get(size,output,noisy_image_count=size,noise_count=size)
    v_evidence, v_labels = rdata.get(size,output,noisy_image_count=2*size,noise_count=2*size)
    
    # get model
    bn, inputs = rmodel.get(size,output,testing,use_bk,tie_parameters)
    
    # compile model to circuit
    circuit = tac.TAC(bn,inputs,output,trainable=True,profile=False)
    
    # use a random subset of the generated data
    t_percentage = data_size / len(t_labels)
    v_percentage = max(1000,data_size)/len(v_labels) # no less than 1000
    t_evidence, t_labels = data.random_subset(t_evidence,t_labels,t_percentage)
    v_evidence, v_labels = data.random_subset(v_evidence,v_labels,v_percentage)
  
    # train AC
    circuit.fit(t_evidence,t_labels,loss_type='CE',metric_type='CA')
    
    # compute accuracy
    accuracy = circuit.metric(v_evidence,v_labels,metric_type='CA')
    u.show(f'\n{circuit_type} accuracy {100*accuracy:.2f}')
    
    return (100*accuracy, circuit)
Example #4
0
def generate_samples(model, valid_loader):
    model.load_state_dict(torch.load(args.save_path))
    model.eval()
    (x, _) = next(iter(valid_loader))
    x = x.to(device)

    x_prime, vq_loss, perplexity = model(x)

    utils.show(x_prime, "results/valid_recon.png")
    utils.show(x, "results/valid_originals.png")
Example #5
0
def train(epoch):
    epoch_loss = 0
    epoch_loss_indiv = [0 for x in range(len(target_channels))]
    epoch_ssim_indiv = [0 for x in range(len(target_channels))]
    for iteration, batch in enumerate(training_data_loader, 1):
        inp, target = batch[0].to(device), batch[1].to(device)

        optimizer.zero_grad()
        prediction = model(inp)

        loss = 0
        for x in range(len(target_channels)):
            loss_x = criterion(prediction[:, x, :, :], target[:, x, :, :])
            ssim_x = calc_ssim(
                prediction[:, x, :, :].view(len(batch[0]), 1, 240, 240),
                target[:, x, :, :].view(len(batch[0]), 1, 240, 240))
            epoch_loss_indiv[x] += loss_x.item()
            epoch_ssim_indiv[x] += ssim_x
            loss += loss_x

        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(
            epoch, iteration, len(training_data_loader), loss.item()))

    epochLoss = epoch_loss / len(training_data_loader)

    for x in range(len(target_channels)):
        epoch_loss_indiv[x] /= len(training_data_loader)
        epoch_ssim_indiv[x] /= len(training_data_loader)

    psnr_indiv = list(map(calc_psnr, epoch_loss_indiv))
    psnr = calc_psnr(epochLoss)
    print("psnr_indiv= ", psnr_indiv, "global_psnr= ", psnr)

    print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epochLoss))
    pred = prediction.cpu()[0]
    target = target.cpu()[0]

    for p, t, c_name in zip(pred, target, target_channels):
        show(epoch,
             p,
             t,
             criterion,
             images_path,
             args.model_name,
             title="train_" + c_name)
    return psnr, psnr_indiv, epoch_ssim_indiv
def train(size,
          digits,
          data_size,
          testing,
          use_bk,
          tie_parameters,
          remove_common=False):
    assert size >= 7
    assert all(d in range(10) for d in digits)

    circuit_type = 'TAC' if testing else 'AC'
    u.show(
        f'\n===Training {circuit_type} for digits {digits} in {size}x{size} images, use_bk {use_bk}, tie {tie_parameters}'
    )

    # get model
    net, inputs, output = dmodel.get(size, digits, testing, use_bk,
                                     tie_parameters, remove_common)

    # get data (ground truth)
    t_evidence, t_labels = ddata.get(size,
                                     digits,
                                     noisy_image_count=100,
                                     noise_count=size)
    v_evidence, v_labels = ddata.get(size,
                                     digits,
                                     noisy_image_count=200,
                                     noise_count=size)

    # compile model into circuit
    circuit = tac.TAC(net, inputs, output, trainable=True, profile=False)

    # get random subset of dats
    t_percentage = data_size / len(t_labels)
    v_percentage = max(1000, data_size) / len(v_labels)  # no less than 1000
    t_evidence, t_labels = data.random_subset(t_evidence, t_labels,
                                              t_percentage)
    v_evidence, v_labels = data.random_subset(v_evidence, v_labels,
                                              v_percentage)

    # fit circuit
    circuit.fit(t_evidence, t_labels, loss_type='CE', metric_type='CA')

    # compute accuracy
    accuracy = circuit.metric(v_evidence, v_labels, metric_type='CA')
    u.show(f'\n{circuit_type} accuracy {100*accuracy:.2f}')
    def fit(self,
            evidence,
            marginals,
            loss_type,
            metric_type,
            *,
            batch_size=32):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(self.trainable, f'TAC is not trainable')
        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(marginals), f'marginals ill formatted')
        u.input_check(data.mar_matches_output(marginals, self.output_node),
                      f'marginals must match query node of tbn')
        u.input_check(loss_type in self.loss_types,
                      f'loss {loss_type} is not supported')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')
        u.input_check(
            data.evd_size(evidence) == len(marginals),
            f'evidence size must match marginals size')

        u.show(f'\nTraining {self.circuit_type}:')
        start_training_time = time.perf_counter()

        epoch_count = self.trainer.train(evidence, marginals, loss_type,
                                         metric_type, batch_size)

        training_time = time.perf_counter() - start_training_time
        time_per_epoch = training_time / epoch_count

        u.show(
            f'Training Time: {training_time:.3f} sec ({time_per_epoch:.3f} sec per epoch)'
        )
def validate(size,output,testing,elm_method='minfill',elm_wait=30):
    
    circuit_type = 'TAC' if testing else 'AC'
    
    # get data (ground truth)
    evidence, labels = rdata.get(size,output)
    
    u.show(f'\n===Checking {circuit_type} for rectangle {output} in {size}x{size} images: {len(labels)} total')
    
    # get model
    bn, inputs = rmodel.get(size,output,testing=testing,use_bk=True,tie_parameters=False)
    
    # compile model
    AC = tac.TAC(bn,inputs,output,trainable=False,profile=False,
            elm_method=elm_method,elm_wait=elm_wait)

    # evaluate TAC on evidence to get predictions
    predictions = AC.evaluate(evidence)

    # verify that predictions match one_hot_marginals
    if u.equal(predictions,labels): 
        u.show('\n===All good!')
    else:
        u.show('***bumper!!!')
        quit()
Example #9
0
    def verify_numpy(self, evidence, marginals1):

        size = batch_size = d.evd_size(evidence)

        u.show(
            f'\nVerifying against classical AC (numpy arrays, batch_size {batch_size})'
        )

        # split lambdas into scalars (with batch)
        evidence = self.split_evidence(evidence)

        eval_time = 0  # pure evaluation time (add/mul/div)
        for start in range(0, size, batch_size):
            u.show(f'{int(100*start/size):4d}%\r', end='', flush=True)
            stop = start + batch_size
            evidence_batch = d.evd_slice(evidence, start, stop)
            marginals_batch, et = self.evaluate_numpy(evidence_batch)
            marginals_batch = self.gather_marginals(marginals_batch)
            if start == 0:
                marginals2 = marginals_batch
            else:
                marginals2 = np.concatenate((marginals2, marginals_batch),
                                            axis=0)
        eval_time += et

        u.equal(marginals1, marginals2, tolerance=True)

        size = d.evd_size(evidence)
        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, batch_size
Example #10
0
    def verify_array(self, evidence, marginals1):
        u.show(f'\nVerifying against classical AC (array)...')

        size = d.evd_size(evidence)
        rows = d.evd_col2row(evidence)
        marginals2 = []

        # evaluation time excludes assertion of evidence
        eval_time = 0  # pure evaluation time (add/mul/div)
        for lambdas in rows:
            self.assert_evidence_array(lambdas)
            marginal, et = self.evaluate_array()  # np array
            marginals2.append(marginal)
            eval_time += et

        marginals2 = np.array(marginals2, dtype=np.float32)
        u.equal(marginals1, marginals2, tolerance=True)

        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, 1
    def metric(self, evidence, labels, metric_type, *, batch_size=64):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(labels, one_hot=(metric_type == 'CA')),
                      f'labels ill formatted')
        u.input_check(data.mar_matches_output(labels, self.output_node),
                      f'labels must match query node of tbn')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')

        u.show(f'\nComputing {metric_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        start_eval_time = time.perf_counter()

        batches, _ = data.data_batches(evidence, labels, batch_size)
        result = 0
        for evd_batch, lab_batch in batches:
            bresult = self.tac_graph.compute_metric(metric_type, evd_batch,
                                                    lab_batch)
            result += bresult * len(lab_batch)
        result /= evd_size  # average weighted by batch size (last batch may be smaller)

        evaluation_time = time.perf_counter() - start_eval_time
        time_per_example = evaluation_time / evd_size

        u.show(f'{metric_type} Time: {evaluation_time:.3f} sec '
               f'({time_per_example:.4f} sec per example)')

        return result
Example #12
0
def test(epoch):
    avg_psnr = 0
    epoch_loss_indiv = [0 for x in range(len(target_channels))]
    epoch_ssim_indiv = [0 for x in range(len(target_channels))]
    with torch.no_grad():
        for _, batch in enumerate(testing_data_loader):
            inp, target = batch[0].to(device), batch[1].to(device)

            prediction = model(inp)
            mse = criterion(prediction, target)

            for x in range(len(target_channels)):
                loss_x = criterion(prediction[:, x, :, :], target[:, x, :, :])
                ssim_x = calc_ssim(
                    prediction[:, x, :, :].view(len(batch[0]), 1, 240, 240),
                    target[:, x, :, :].view(len(batch[0]), 1, 240, 240))
                epoch_loss_indiv[x] += loss_x.item()
                epoch_ssim_indiv[x] += ssim_x

            psnr = 10 * log10(1 / mse.item())
            avg_psnr += psnr

        print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr /
                                                 len(testing_data_loader)))
        pred = prediction.cpu()[0]
        target = target.cpu()[0]

        for p, t, c_name in zip(pred, target, target_channels):
            show(epoch, p, t, criterion, images_path, args.model_name,
                 " " + c_name)

        for x in range(len(target_channels)):
            epoch_loss_indiv[x] /= len(testing_data_loader)
            epoch_ssim_indiv[x] /= len(testing_data_loader)

        psnr_indiv = list(map(calc_psnr, epoch_loss_indiv))
    return avg_psnr / len(testing_data_loader), psnr_indiv, epoch_ssim_indiv
Example #13
0
    def train(self, evidence, marginals, loss_type, metric_type, batch_size):
        assert loss_type in self.loss_types and metric_type in self.metric_types

        # split data into training and validation (after randonly shuffling it)
        t_data, v_data = data.random_split(evidence, marginals,
                                           self.split_ratio)

        t_evidence, t_marginals, t_size = t_data
        v_evidence, v_marginals, v_size = v_data

        # batch memory is based on tf graphs for tac, optimizer and metrics
        tac_graph = self.tac.tac_graph
        circuit_type = self.tac.circuit_type
        parameter_count = tac_graph.parameter_count
        fixed_zeros_count = tac_graph.fixed_zeros_count
        batch_count = ceil(t_size / batch_size)

        u.show(f'  loss: {loss_type}, metric: {metric_type}\n'
               f'  data: training {t_size}, validation {v_size}\n'
               f'  batch: size {batch_size}, count {batch_count}\n'
               f'  trainable parameters  {parameter_count}\n'
               f'  fixed zero parameters {fixed_zeros_count}')

        # initialize trainer and tac_graph optimizer
        batch_size = self.__init_training(loss_type, metric_type, t_size,
                                          batch_size)

        # initialize the tac weights (try a few random weights and pick best)
        weights_epochs = self.__find_initial_weights(t_evidence, t_marginals,
                                                     v_evidence, v_marginals,
                                                     batch_size)
        # train
        for epoch in range(self.epochs_count):

            # optimize loss on training data, compute metric on validation data
            t_loss, lr = self.__optimize_loss(loss_type, t_evidence,
                                              t_marginals, batch_size, epoch)
            v_metric = self.__compute_metric(metric_type, v_evidence,
                                             v_marginals, batch_size)

            # logging for tensorboard
            tac_graph.log(epoch, t_loss, v_metric, lr)

            # main control
            stop, save, event = self.__analyze_epoch(v_metric, epoch)
            if stop or event:
                u.show((f'\r  epoch {epoch:5d}: t_loss {t_loss:.8f}, '
                        f'v_metric {v_metric:.8f}, lr {lr:.4f}{event}'))
            if save: tac_graph.save_current_weights()
            if stop: break

        # restore learned weights and write them to file
        fname = paths.cpts / f'{circuit_type}.txt'
        u.show(f'  writing learned CPTs to {fname}')
        tac_graph.end_training(fname)

        return weights_epochs + epoch + 1  # total number of epochs we performed
Example #14
0
    def __init__(self, opsgraph):
        assert not opsgraph.trainable and not opsgraph.testing

        u.show(f'\nConstructing classical AC...')
        start_compile_time = time.perf_counter()

        # list of add/mul/div nodes, topologically sorted (bottom up)
        self.nodes = None
        # list of AC nodes representing evidence (one per var/value)
        self.evd_nodes = []
        # list of AC nodes representing parameters (cpt entries)
        self.parameter_nodes = []
        # list of evidence lambdas
        self.lambdas = [
        ]  # each lambda is a tuple of evidence nodes (lambda per var)
        # factor that contains output AC nodes (marginal)
        self.output_factor = None
        # size of AC (number of nodes)
        self.size = None

        # maps ops.op into its factor (result of executing operation)
        op2factor = {}

        # execution will populate the nodes, lambdas and roots fields
        Node.instances = []  # created add/mul/div will be added to this list
        for op in opsgraph.ops:  # bottom up
            factor = self.execute(op, op2factor)
            if type(op) == ops.EvidenceOp:
                self.lambdas.append(factor)
                self.evd_nodes.extend(factor.nodes())
            elif type(op) == ops.FixedCptOp:
                self.parameter_nodes.extend(factor.nodes())
        self.nodes = Node.instances  # add/mul/div nodes

        # order of lambdas should match order of opsgraph inputs
        assert opsgraph.evidence_vars == tuple(f.vars[0] for f in self.lambdas)
        self.lambdas = tuple(f.nodes() for f in self.lambdas)

        # saving output nodes of AC
        output_op = opsgraph.ops[-1]
        self.output_factor = op2factor[output_op]

        # computing AC size
        self.size = len(self.nodes) + len(self.parameter_nodes) + len(
            self.evd_nodes)

        compile_time = time.perf_counter() - start_compile_time

        u.show(f'  AC size {self.size:,}')
        u.show(f'Compile Time: {compile_time:.3f} sec')
 def elm_order(self, solver, wait):
     u.show(f'    calling {solver}...', end='')
     graph_fname = 'decompose/tmp/graph.gr'
     tree_fname = 'decompose/tmp/tree.td'
     if solver == 'flow cutter':
         program = 'flow_cutter_pace17'
         cmd = [f'./decompose/solvers/{program}']
         online = True
     elif solver == 'tamaki heuristic':
         program = 'tamaki/tw-heuristic'
         cmd = [f'./decompose/solvers/{program}']
         online = True
     elif solver == 'tamaki exact':
         program = 'tamaki/tw-exact'
         cmd = [f'./decompose/solvers/{program}']
         online = False
     # write graph to file
     self.write(graph_fname)
     # call tree decomposition program
     with open(f'{graph_fname}', "r") as input, open(f'{tree_fname}',
                                                     "w") as output:
         process = subprocess.Popen(cmd, stdin=input, stdout=output)
         if online:
             u.show(f'waiting {wait} sec...', end='', flush=True)
             sleep(wait)
             process.send_signal(signal.SIGTERM)
         else:
             process.wait()  # blocks python until process returns
     code = process.returncode
     _, error = process.communicate()
     process.kill()
     u.check(code != 0, f'failed to execute {solver} because\n  {error}',
             f'using treewidth solver')
     u.show('done')
     # read decomposition tree from file
     tree = TreeD(tree_fname)
     # convert decomposition tree to elimination order (vertices)
     vertex_order = tree.elm_order()
     # return elimination order of tbn nodes
     stats = f'elm order: cls max {tree.width}'
     return self.vertices2nodes(vertex_order), tree.width, stats
    def __compile(self, net, inputs, output, hard_inputs, trainable,
                  elm_method, elm_wait, profile):
        if profile: u.show('\n***PROFILER ON***')
        u.show(f'\nCompiling {self.network_type} into {self.circuit_type}')
        start_compile_time = time.time()

        # net1 and net2 have nodes corresponding to inputs and output (same names)
        net1 = net.copy_for_inference()
        self.tbn = net1
        self.input_nodes = u.map(net1.node, inputs)
        self.output_node = net1.node(output)
        self.hard_input_nodes = u.map(net1.node, hard_inputs)

        # decouple net1 for more efficient compilation
        # net2 is only used to build jointree (duplicate functional cpts)
        net2, elm_order, _ = decouple.get(net1, self.hard_input_nodes,
                                          trainable, elm_method, elm_wait)
        # net2 may be equal to net1 (no decoupling)
        # if net2 != net1 (decoupling happened), then net2._decoupling_of = net1

        # compile tbn into an ops_graph
        jt = jointree.Jointree(net2, elm_order, self.hard_input_nodes,
                               trainable)
        ops_graph = og.OpsGraph(trainable, net.testing)  # empty

        # inference will populate ops_graph with operations that construct tac_graph
        inference.trace(self.output_node, self.input_nodes, net1, jt,
                        ops_graph)
        if u.verbose: ops_graph.print_stats()

        # construct tac_graph by executing operations of ops_graph
        self.ops_graph = ops_graph
        self.tac_graph = tg.TacGraph(ops_graph, profile)
        self.size = self.tac_graph.size
        self.rank = self.tac_graph.rank
        self.binary_rank = self.tac_graph.binary_rank
        self.parameter_count = self.tac_graph.parameter_count

        compile_time = time.time() - start_compile_time
        u.show(f'Compile Time: {compile_time:.3f} sec')
def validate(size, digits, testing, elm_method='minfill', elm_wait=30):
    assert size >= 7
    assert all(d in range(10) for d in digits)

    # get data (ground truth)
    evidence, labels = ddata.get(size, digits)
    data_size = len(labels)

    circuit_type = 'TAC' if testing else 'AC'
    u.show(
        f'\n===Checking {circuit_type} for digits {digits} in {size}x{size} images: {data_size} total'
    )

    # get model
    net, inputs, output = dmodel.get(size,
                                     digits,
                                     testing,
                                     use_bk=True,
                                     tie_parameters=False,
                                     remove_common=False)

    # compile model into circuit
    circuit = tac.TAC(net,
                      inputs,
                      output,
                      trainable=False,
                      profile=False,
                      elm_method=elm_method,
                      elm_wait=elm_wait)

    # evaluate circuit on evidence to get predictions
    predictions = circuit.evaluate(evidence)

    # verify that predictions match labels
    if u.equal(predictions, labels):
        u.show('\n===All good!\n')
    else:
        u.show('***bumper!!!')
        quit()
    def evaluate(self, evidence, *, batch_size=64, report_time=False):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence),
                      f'TAC evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes),
            f'TAC evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'TAC evidence must match evidence tbn nodes')

        u.show(f'\nEvaluating {self.circuit_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        marginals = None
        eval_time = 0
        for i, evd_batch in enumerate(data.evd_batches(evidence, batch_size)):
            u.show(f'{int(100*i/evd_size):4d}%\r', end='', flush=True)
            start_time = time.perf_counter()
            mar_batch = self.tac_graph.evaluate(evd_batch)
            eval_time += time.perf_counter() - start_time
            if marginals is None: marginals = mar_batch
            else: marginals = np.concatenate((marginals, mar_batch), axis=0)

        time_per_example = eval_time / evd_size
        time_per_million = time_per_example / (self.size / 1000000)

        u.show(f'\rEvaluation Time: {eval_time:.3f} sec '
               f'({1000*time_per_example:.1f} ms per example,'
               f' {1000*time_per_million:.1f} ms per 1M tac nodes)')

        assert data.mar_matches_output(marginals, self.output_node)
        assert data.mar_is_predictions(marginals)

        if report_time:
            return marginals, eval_time, batch_size
        return marginals
Example #19
0
    def verify_tf_graph(self, evidence, marginals1):
        assert self.tf_ac is not None

        size = batch_size = d.evd_size(evidence)

        u.show(
            f'\nVerifying against classical AC (tf graph, batch_size {batch_size}))'
        )

        # split lambdas into scalars (with batch)
        evidence = self.split_evidence(evidence)

        # tf graph accepts only tensors as input
        evidence = tuple(tf.constant(e, dtype=tf.float32) for e in evidence)

        start_eval_time = time.perf_counter()
        for start in range(0, size, batch_size):
            u.show(f'{int(100*start/size):4d}%\r', end='', flush=True)
            stop = start + batch_size
            evidence_batch = d.evd_slice(evidence, start, stop)
            marginals_batch = self.tf_ac(
                *evidence_batch)  # evaluating tf graph
            marginals_batch = self.gather_marginals(marginals_batch)
            if start == 0:
                marginals2 = marginals_batch
            else:
                marginals2 = np.concatenate((marginals2, marginals_batch),
                                            axis=0)
        eval_time = time.perf_counter() - start_eval_time

        u.equal(marginals1, marginals2, tolerance=True)

        size = d.evd_size(evidence)
        u.show(
            f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)'
        )
        return eval_time, batch_size
Example #20
0
    def __find_initial_weights(self, t_evidence, t_marginals, v_evidence,
                               v_marginals, batch_size):
        #u.show(f'  optimizer warming up...\r',end='',flush=True)
        u.show(f'  finding initial weights (starting with uniform):',
               end='',
               flush=True)

        loss_type = self.loss_type
        metric_type = self.metric_type
        tac_graph = self.tac.tac_graph
        best_loss = None
        best_metric = self.metric_best_value  # initialized to worst possible value
        epochs = 0  # number of epochs we will try

        # weights already set to uniform in ops.py so we will try these first
        for i in range(self.weight_tries):
            epochs += 2  # we do a two-step lookahead

            # loss and metric before trying to improve current weights
            pre_loss = self.__compute_metric(loss_type, t_evidence,
                                             t_marginals, batch_size)
            pre_metric = self.__compute_metric(metric_type, v_evidence,
                                               v_marginals, batch_size)

            # loss and metric after improving current weights using GD
            for _ in range(2):  # two-step lookahead
                loss, _ = self.__optimize_loss(loss_type, t_evidence,
                                               t_marginals, batch_size, None)
            metric = self.__compute_metric(metric_type, v_evidence,
                                           v_marginals, batch_size)

            # printing details that are helpful to sanity check behavior
            u.show(f'\n    t_loss {pre_loss:11.8f} -> {loss:11.8f}, ',
                   f'v_metric {pre_metric:11.8f} -> {metric:11.8f}',
                   end='',
                   flush=True)

            # reset optimizer before quitting in case we decide we are done
            tac_graph.reset_optimizer()

            # see if the current weights improve on the previous ones
            assert i != 0 or self.metric_comp(metric, best_metric)
            if self.metric_comp(metric, best_metric):
                best_loss = loss
                best_metric = metric
                tac_graph.save_current_weights()
                if self.metric_comp(metric, self.metric_target):
                    break  # found good-enough initial weights

            # try a new set of random weights if this is not the last iteration
            if i < self.weight_tries - 1:
                tac_graph.assign_random_weights()

        u.show(
            f'\n  starting at: t_loss {best_loss:.8f}, v_metric {best_metric:.8f}, '
            f'found after {epochs} epochs',
            flush=True)

        # use the best found weights
        self.metric_best_value = best_metric
        tac_graph.restore_saved_weights()
        return epochs
Example #21
0
def main():
     # Configs
    args = get_args()
    cfg = Config(args.config)
    pose_kwargs = cfg.POSE
    clf_kwargs = cfg.CLASSIFIER
    tracker_kwargs = cfg.TRACKER

    # Initiate video/webcam
    source = args.source if args.source else 0
    video = Video(source)

    ## Initiate trtpose, deepsort and action classifier
    pose_estimator = get_pose_estimator(**pose_kwargs)
    if args.task != 'pose':
        tracker = get_tracker(**tracker_kwargs)
        if args.task == 'action':
            action_classifier = get_classifier(**clf_kwargs)

    ## initiate drawer and text for visualization
    drawer = Drawer(draw_numbers=args.draw_kp_numbers)
    user_text = {
        'text_color': 'green',
        'add_blank': True,
        'Mode': args.task,
        # MaxDist: cfg.TRACKER.max_dist,
        # MaxIoU: cfg.TRACKER.max_iou_distance,
    }

    # loop over the video frames
    for bgr_frame in video:
        rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
        # predict pose estimation
        start_pose = time.time()
        predictions = pose_estimator.predict(rgb_frame, get_bbox=True) # return predictions which include keypoints in trtpose order, bboxes (x,y,w,h)
        # if no keypoints, update tracker's memory and it's age
        if len(predictions) == 0 and args.task != 'pose':
            debug_img = bgr_frame
            tracker.increment_ages()
        else:
            # draw keypoints only if task is 'pose'
            if args.task != 'pose':
                # Tracking
                # start_track = time.time()
                predictions = utils.convert_to_openpose_skeletons(predictions)
                predictions, debug_img = tracker.predict(rgb_frame, predictions,
                                                                debug=args.debug_track)
                # end_track = time.time() - start_track

                # Action Recognition
                if len(predictions) > 0 and args.task == 'action':
                    predictions = action_classifier.classify(predictions)

        end_pipeline = time.time() - start_pose
        # add user's desired text on render image
        user_text.update({
            'Frame': video.frame_cnt,
            'Speed': '{:.1f}ms'.format(end_pipeline*1000),
        })

        # draw predicted results on bgr_img with frame info
        render_image = drawer.render_frame(bgr_frame, predictions, **user_text)

        if video.frame_cnt == 1 and args.save_folder:
            # initiate writer for saving rendered video.
            output_suffix = get_suffix(args, cfg)
            output_path = video.get_output_file_path(
                args.save_folder, suffix=output_suffix)
            writer = video.get_writer(render_image, output_path, fps=30)

            if args.debug_track and args.task != 'pose':
                debug_output_path = output_path[:-4] + '_debug.avi'
                debug_writer = video.get_writer(debug_img, debug_output_path)
            print(f'[INFO] Saving video to : {output_path}')
        # show frames
        try:
            if args.debug_track and args.task != 'pose':
                debug_writer.write(debug_img)
                utils.show(debug_img, window='debug_tracking')
            if args.save_folder:
                writer.write(render_image)
            utils.show(render_image, window='webcam' if isinstance(source, int) else osp.basename(source))
        except StopIteration:
            break
    if args.debug_track and args.task != 'pose':
        debug_writer.release()
    if args.save_folder and len(predictions) > 0:
        writer.release()
    video.stop()
def main():
    t0 = time.time()
    # Settings
    cfg = Config(config_file='../configs/train_action_recogn_pipeline.yaml')
    cfg.merge_from_file('../configs/infer_trtpose_deepsort_dnn.yaml')
    cfg_stage = cfg[os.path.basename(__file__)]
    img_format = cfg.img_format

    ## IO folders
    get_path = lambda x: os.path.join(*x) if isinstance(x,
                                                        (list, tuple)) else x
    src_imgs_folder = get_path(cfg_stage.input.imgs_folder)
    src_valid_imgs = get_path(cfg_stage.input.valid_imgs)
    dst_skeletons_folder = get_path(cfg_stage.output.skeletons_folder)
    dst_imgs_folder = get_path(cfg_stage.output.imgs_folder)
    dst_imgs_info_txt = get_path(cfg_stage.output.imgs_info_txt)

    # initiate pose estimator
    pose_estimator = get_pose_estimator(**cfg.POSE)
    drawer = Drawer(draw_numbers=True)

    # Init output path
    print(
        f"[INFO] Creating output folder -> {os.path.dirname(dst_skeletons_folder)}"
    )
    os.makedirs(dst_imgs_folder, exist_ok=True)
    os.makedirs(dst_skeletons_folder, exist_ok=True)
    os.makedirs(os.path.dirname(dst_imgs_info_txt), exist_ok=True)

    # train val images reader
    images_loader = ReadValidImagesAndActionTypesByTxt(src_imgs_folder,
                                                       src_valid_imgs,
                                                       img_format)
    images_loader.save_images_info(dst_imgs_info_txt)
    print(f'[INFO] Total Images -> {len(images_loader)}')

    # Read images and process
    loop = tqdm(range(len(images_loader)), total=len(images_loader))
    for i in loop:
        img_bgr, label, img_info = images_loader.read_image()
        img_disp = img_bgr.copy()
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        # predict trtpose skeleton and save to file as openpose format
        predictions = pose_estimator.predict(img_rgb, get_bbox=False)

        if len(predictions) == 0: continue
        predictions = utils.convert_to_openpose_skeletons(predictions)

        # save predicted image
        save_name = img_format.format(i)
        img_name = os.path.join(dst_imgs_folder, save_name)

        img_disp = drawer.render_frame(img_disp, predictions)
        cv2.imwrite(img_name, img_disp)
        try:
            utils.show(img_disp, wait=1)
        except StopIteration:
            break

        # save skeletons in text file
        skeleton_txt = os.path.join(dst_skeletons_folder,
                                    save_name[:-4] + '.txt')
        save_data = [img_info + pred.flatten_keypoints for pred in predictions]
        with open(skeleton_txt, 'w') as f:
            json.dump(save_data, f)

        # update progress bar descriptions
        loop.set_description(f'action -> {label}')
        loop.set_postfix(num_of_person=len(predictions))

    loop.close()
    cv2.destroyAllWindows()
    t1 = time.gmtime(time.time() - t0)
    total_time = time.strftime("%H:%M:%S", t1)

    print('Total Extraction Time', total_time)
    print(
        tabulate([list(images_loader.labels_info.values())],
                 list(images_loader.labels_info.keys()), 'grid'))
Example #23
0
 def print_cache_info():
     u.show('  restructure', Dims.restructure_into.cache_info())
     u.show('  re-multiply', Dims.restructure_for_multiply.cache_info())
     u.show('  re-mulpro  ', Dims.restructure_for_mulpro.cache_info())
Example #24
0
def main(argv):

    try:
        opts, args = getopt.getopt(argv, 'dfhs', [])
    except getopt.GetoptError:
        print('usage: main.py -d -f -h -s')
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-f':
            tacgraph.force_profile = True
        elif opt == '-d':
            p.set_double_precision()
        elif opt == '-s':
            u.set_silent()
        else:  # covers -h
            print('usage: main.py -d -f -h -s')
            sys.exit()


if __name__ == '__main__':
    main(sys.argv[1:])

    ram = u.system_RAM_GB()

    u.show('\nPyTac Version 1.2.2, 2020 © Adnan Darwiche')
    u.show(f'RAM {ram:.2f} GB, processors {cpu_count()}')
    u.show(f'TF {tf.__version__}, {p.precision} precision')

    play.play()
def posteriors(bn, inputs, output, evidence):
    u.show('\nRunning VE...', end='', flush=True)
    assert not bn.testing and len(inputs) == len(evidence)

    # we will perform elimination only on nodes that are connected to output
    qnode = bn.node(output)  # query node
    nodes = qnode.connected_nodes()  # set
    assert qnode in nodes

    # identify inputs and evidence connected to query node
    evidence_ = evidence
    enodes, evidence = [], []
    for i, e in zip(inputs, evidence_):
        n = bn.node(i)  # evidence node
        if n in nodes:  # connected to query
            enodes.append(n)
            evidence.append(e)  # e is a batch of lambdas for node n
    assert enodes and evidence  # output must be connected to some input

    # maps bn node to Var
    node2var = {n: Var(bn_node=n) for n in nodes}
    nodes2vars = lambda nodes_: tuple(node2var[n] for n in nodes_)

    # construct batch Var
    batch_size = data.evd_size(evidence)
    batch_var = Var(batch_size=batch_size)

    # get elimination order
    order, _, _, _ = bn.elm_order('minfill')
    elm_order = tuple(node2var[n] for n in order if n != qnode and n in nodes)

    # bn factors
    evd_factor = lambda evd, node: Factor(evd, (batch_var, node2var[node]))
    cpt_factor = lambda cpt, node: Factor(
        cpt, nodes2vars(node.family), sort=True)
    indicators = tuple(evd_factor(evd, n) for evd, n in zip(evidence, enodes))
    cpts = tuple(cpt_factor(n.tabular_cpt(), n) for n in nodes)
    query_var = node2var[qnode]

    # indexing factors for lookup during elimination
    # factor.tvars exclude the batch var
    scalars = set()  # scalar factors (have no vars)
    var2factors = {var: set()
                   for var in elm_order}  # maps var to factors containing var
    var2factors[query_var] = set()

    def index(factor):  # add factor to pool
        if factor.is_scalar:
            scalars.add(factor)
        else:
            for var in factor.tvars:
                var2factors[var].add(factor)

    def remove(factors):  # remove factors from pool
        for f in set(
                factors
        ):  # copy since factors may be equal to some var2factors[var]
            assert not f.is_scalar
            for var in f.tvars:
                var2factors[var].remove(f)

    def get_factors(var):  # returns factors that contain var
        factors = var2factors[var]
        assert factors
        return factors

    def verify_elm(f):  # verify pool at end of elimination
        assert all(not factors for var, factors in var2factors.items()
                   if var != query_var)
        assert not scalars == bn.is_connected()

    # we are about to start eliminating vars: index them first
    for factor in indicators:
        index(factor)
    for factor in cpts:
        index(factor)

    # eliminate vars
    one = Factor.one()  # identity factor for multiplication
    for var in elm_order:
        factors = get_factors(var)  # factors that contain var
        factor = one
        for f in factors:
            factor = factor.multiply(f)
        factor = factor.sumout(var)
        remove(factors)
        index(factor)

    verify_elm(factor)

    factor = one
    for f in get_factors(query_var):
        factor = factor.multiply(f)
    for f in scalars:
        factor = factor.multiply(f)
    assert factor.has_batch and factor.tvars == (query_var, )

    factor = factor.normalize()
    u.show('done.')
    return factor.table  # ndarray
Example #26
0
def __remove_dead_fcpts(view, verbose):

    pre_replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values())

    #view.dot('pre.gv')
    #u.pause()

    # dead basically means: not contributing to shrinking separators
    # leaf node i is dead if it has a functional cpt whose var is summed out
    # host cannot be dead (contains query var which is never summed out)
    # root cannot be dead (would have been pruned if it was dead)
    def dead(i):
        return i != view.host and view.has_fcpt(i) and i.var not in view.sep(i)

    # add dead fcpts to dropped and clear their separators
    dropped = set()

    def drop_dead_fcpts():
        key = lambda i: (dead(i), not i.is_host, len(i.var.parents))
        for fvar, leaves in view.fcpts.items():
            leaves.sort(key=key)  # we prefer to keep hosts
            new_leaves = []
            for index, i in enumerate(leaves):
                if index != 0 and dead(i):  # we need to keep one fcpt per fvar
                    view.sep_set(i, set())  # clear separator
                    dropped.add(i)
                else:
                    new_leaves.append(i)
            view.fcpts[fvar] = new_leaves

    # shrink separators further due to removing dead fcpts
    def shrink_separators():
        for i, _, c1, c2, _ in view.bottom_up():
            if c1:
                assert i not in dropped
                view.sep_intersect(i, view.sep(c1) | view.sep(c2))
        for i, p, _, _, s in view.top_down():
            if s and i not in dropped:
                view.sep_intersect(i, view.sep(s) | view.sep(p))
                view.cls_set(p, view.sep(i) | view.sep(p))

    # whether view still have dead fcpts
    def more_dead():
        for leaves in view.fcpts.values():
            count = sum(dead(i) for i in leaves)
            if count > 1 or (count == 1 and len(leaves) > 1):
                return True
        return False

    # identify dead fcpts
    while True:
        drop_dead_fcpts()
        shrink_separators()
        if not more_dead(): break
    assert all(view.fcpts.values())  # at least one fcpt for each fvar

    # remove dead fcpts from view
    for i in dropped:
        __remove_leaf(view, i)
    __reconstruct(view)

    if verbose:
        replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values())
        distinct_count = sum(1 for leaves in view.fcpts.values()
                             if len(leaves) >= 2)
        u.show(
            f'   kept fcpts: {replica_count}/{pre_replica_count}, distinct {distinct_count}'
        )
Example #27
0
 def __print_progress(self, e, i, n, lr):
     p = 100 * (i + 1) // n
     u.show(f'  epoch {e:5d}:{p:4d}%   lr {lr:.5f}', end='', flush=True)
     u.show((b'\x08' * 32).decode(), end='', flush=True)
Example #28
0
def __replicate_fcpts(view, verbose):

    __set_classical_cls_and_sep(view)

    if verbose:
        replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values())
        distinct_count = sum(1 for leaves in view.fcpts.values()
                             if len(leaves) >= 2)
        u.show(f'  added fcpts: {replica_count}, distinct {distinct_count}')
        u.show('  View ranks : ' + view.ranks_str())

    # compute vars that have fcpt at/below each view node
    vars = {}  # maps view node i to vars at/below node i
    ovars = {}  # maps view node i to vars outside node i
    fvars = {}  # maps view node i to vars with functional cpt at/below node i
    ofvars = {}  # maps view node i to vars with functional cpt outside node i

    def set_vars():
        nonlocal vars, ovars, fvars, ofvars
        vars, ovars, fvars, ofvars = {}, {}, {}, {}
        for i, _, c1, c2, _ in view.bottom_up():
            if not c1:  # leaf node i
                fvars[i] = set([i.var]) if view.has_fcpt(i) else set()
                vars[i] = set(i.var.family)
            else:
                fvars[i] = fvars[c1] | fvars[c2]
                vars[i] = vars[c1] | vars[c2]
        for i, p, _, _, s in view.top_down():
            if not s:  # root node i
                ofvars[i] = set([p.var]) if view.has_fcpt(p) else set()
                ovars[i] = set(p.var.family)
            else:
                ofvars[i] = fvars[s] | ofvars[p]
                ovars[i] = vars[s] | ovars[p]

    set_vars()

    fvar_order = list(view.fcpts)
    fvar_order.sort(key=lambda var: len(var.parents))

    additions = []
    for i, p, c1, c2, s in view.bottom_up():
        if not s: continue
        if c1:
            fvars[i] = fvars[c1] | fvars[c2]
            vars[i] = vars[c1] | vars[c2]
        ovars[i] = vars[s] | ovars[p]
        ofvars[i] = fvars[s] | ofvars[p]
        #        sepi = view.sep(i)
        sepi = vars[i] & ovars[i]  # outside loop
        for fvar in fvar_order:  # all functional variables
            parents = set(fvar.parents)
            cond1 = len(parents - sepi) <= 1
            cond2 = parents <= fvars[i]
            cond3 = fvar in fvars[s] and fvar not in fvars[i]
            if (cond1 or cond2) and cond3:
                additions.append((i, fvar))
                fvars[i].add(fvar)
                vars[i] |= set(fvar.family)

    # replicate fcpts in view
    for i, fvar in additions:
        leaf = __add_leaf(view, fvar, i)
        view.fcpts[fvar].append(leaf)
    __reconstruct(view)
    __set_classical_cls_and_sep(view)

    if verbose:
        replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values())
        distinct_count = sum(1 for leaves in view.fcpts.values()
                             if len(leaves) >= 2)
        u.show(f'  added fcpts: {replica_count}, distinct {distinct_count}')
        u.show('  View ranks : ' + view.ranks_str())
Example #29
0
def get(net1, hard_evd_nodes, trainable_tbn, elm_method, elm_wait):
    assert net1._for_inference

    #net1.dot(fname='tbn_pre_decouple.gv', view=True)
    u.show(f'  Decoupling tbn:')
    elm_order, cliques1, max_binary_rank1, stats = net1.elm_order(
        elm_method, elm_wait)
    u.show('   ', stats)

    # cutting edges outgoing form hard evidence nodes
    cut_edges = lambda n: len(n.children) >= 1 and n in hard_evd_nodes and \
                              (n.parents or len(n.children) >= 2)
    cut_edges_set = set(n for n in net1.nodes if cut_edges(n))

    # replicating functional cpts
    # if both duplicate and cut_edges trigger, use cut_edges as it is more effective
    duplicate = lambda n: n not in cut_edges_set and len(n.children) >= 2 \
                            and n.is_functional(trainable_tbn)

    duplicate_set = set(n for n in net1.nodes if duplicate(n))

    # perhaps decoupling does nothing
    if not duplicate_set and not cut_edges_set:
        u.show('    nothing to decouple')
        return net1, elm_order, (max_binary_rank1, max_binary_rank1
                                 )  # no decoupling possible

    # we will decouple
    net2 = TBN(f'{net1.name}__decoupled')
    net2._decoupling_of = net1

    # -when creating a clone c(n) in net2 for node n in net1, we need to look up the
    #  parents of c(n) in net2.
    # -this is done by calling get_image(p) on each parent p of node n
    # -the length of images[p] equals the number of times get_image(p) will be called
    # -members of images[p] may not be distinct depending on the replication strategey
    images = {}

    def get_image(n):
        return images[n].pop()

    # -when we have hard evidence on node n (net1), we create a replica r (net2) of n
    # for each child of n, which copies evidence on n into the cpts of its children.
    # -maps node r (net2) to node n (net1) that it is copying evidence from
    evidence_from = {}

    # maps node n (net1) to a tuple (c_1,...,c_k) where k is the number of clones that
    # node n will have in nets2, and c_i is the number of children for clone i in net2
    ccounts = {}

    # fully replicated(i): one i-replica for each c-replica, where c is child of i in net1
    # partial replicated(i): one i-replica for each child c of i in net1
    fully_replicated = lambda i: all(ccount == 1 for ccount in ccounts[i])
    replicas_count = lambda i: len(ccounts[i]
                                   )  # number of replicas node i has in net2

    # compute the number of replicas in net2 for each node in net1 (fill ccounts)
    for n in reversed(net1.nodes):  # bottom up
        ccounts[n] = []
        cparents = set()
        for c in n.children:
            cparents |= set(c.parents)
        #replicate_node = any(cparents <= clique for clique in cliques1)
        #replicate_node = all(p in duplicate_set for p in n.parents)
        replicate_node = True
        if n in duplicate_set and replicate_node:
            # replicate node n
            for c in n.children:
                if True:  #not fully_replicated(c):
                    # replicate node n for each replica of child c
                    ccounts[n].extend([1] * replicas_count(c))
                else:
                    # replicate node n for each child c
                    ccounts[n].append(replicas_count(c))
        else:  # do not replicate node n
            # n could be in cut_edges_set, but ccounts will not be used in that case
            duplicate_set.discard(n)
            children_replicas_count = sum(
                replicas_count(c) for c in n.children)
            ccounts[n].append(children_replicas_count)

    # cutting edges takes priority over decoupling as it is more effective
    for n in net1.nodes:  # visiting parents before children
        if n in cut_edges_set:  # disconnect n from its children
            assert n not in duplicate_set
            n._clamped = True  # flag set in original network (net1)
            parents = [get_image(p) for p in n.parents]
            master = clone_node(n, n.name, parents)
            net2.add(master)
            images[n] = []
            # master not added to images as it will not be a parent of any node in net2
            for i, c in enumerate(n.children):
                for j in range(replicas_count(
                        c)):  # j iterates over replicas of child c
                    # these clones will be removed after elimination order is computed
                    # clones are not testing even if master is testing
                    clone = Node(f'{n.name}_evd{i}_{j}',
                                 values=master.values,
                                 parents=[])
                    net2.add(clone)
                    evidence_from[clone] = master
                    images[n].append(
                        clone
                    )  # children of n will reference clones, not master
        elif n in duplicate_set:  # duplicate node n and its functional cpt
            images[n] = []
            for i, ccount in enumerate(ccounts[n]):
                assert ccount > 0  # number of children each clone will have in net2
                parents = [get_image(p) for p in n.parents]
                clone = clone_node(n, f'{n.name}_fcpt{i}', parents)
                if i > 0: clone._master = False  # clone() sets this to True
                net2.add(clone)
                images[n].extend([clone] * ccount)
        else:  # just copy node n from net1 to net2
            (ccount,
             ) = ccounts[n]  # number of children clone will have in net2
            parents = [get_image(p) for p in n.parents]
            clone = clone_node(n, n.name, parents)
            net2.add(clone)
            images[n] = [clone] * ccount

    assert not net2._for_inference
    assert len(images) == len(net1.nodes)
    assert len(images) <= len(net2.nodes)
    assert all(v == [] for v in images.values())

    #net2.dot(fname='tbn_post_decouple.gv', view=True)
    elm_order, _, max_binary_rank2, stats = net2.elm_order(
        elm_method, elm_wait)
    u.show('   ', stats)

    if not duplicate_set:
        elm_order = [n._original for n in elm_order if n not in evidence_from]
        # only clamping took place, so we only care about elimination order
        # return original network with _clamped flag set for some nodes
        return net1, elm_order, (max_binary_rank1, max_binary_rank2)

    if cut_edges_set:  # some variables were clamped
        # get rid of auxiliary evidence nodes from elimination order
        elm_order = [n for n in elm_order if n not in evidence_from]
        # need to restore net2 by getting rid of auxiliary evidence nodes
        # and restoring children of clamped nodes
        net2.nodes = [n for n in net2.nodes if n not in evidence_from]
        replace = lambda n: evidence_from[n] if n in evidence_from else n
        for n in net2.nodes:
            n._parents = tuple(replace(p) for p in n.parents)
            n._family = tuple(replace(f) for f in n.family)

    u.show(f'    node growth: {len(net2.nodes)/len(net1.nodes):.1f}')

    return net2, elm_order, (max_binary_rank1, max_binary_rank2)
    def __build_tac_graph(self, ops_graph):
        assert not self.finalized
        u.show('  Constructing TacGraph: tac...', end='', flush=True)

        ### compiling tf graph for tac and another tf graph for its trainable cpts

        # step 1: create tac inputs (variables) by executing corresponding ops of OpsGraph
        self.__create_variables(
            ops_graph)  # must be done outside of @tf.functions

        # step 2: create remaining tensors of tac tf graph
        # self.MAR() returns the tac marginals
        espec = [
            tf.TensorSpec(shape=e.shape, dtype=e.dtype)
            for e in self.evidence_variables
        ]
        self.MAR = self.__marginals.get_concrete_function(ops_graph, *espec)

        # trainable cpts are created twice: initially as part of the tac tf graph (above),
        # then in their own tf graph (so we can save them without evaluating tac graph)
        # self.TCPTS() returns trainable cpts (for saving)
        self.TCPTS = self.__trainable_cpts.get_concrete_function(ops_graph)

        ### compiling tf graphs for computing metrics and losses (three tf graphs)

        u.show('metrics...', end='', flush=True)
        shape, dtype = self.marginal_spec
        mspec = tf.TensorSpec(shape=shape, dtype=dtype)
        self.CA = self.__classification_accuracy.get_concrete_function(
            mspec, mspec)
        self.CE = self.__cross_entropy.get_concrete_function(mspec, mspec)
        self.MSE = self.__mean_squared_error.get_concrete_function(
            mspec, mspec)

        ### compiling tf graph for optimizer (to minimize loss)

        if self.trainable:
            u.show('optimizer...', end='', flush=True)
            self.optimizer_lr = tf.Variable(.1,
                                            dtype=p.float)  # will be updated
            self.optimizer = tf.optimizers.Adam(
                learning_rate=self.optimizer_lr)
            rspec = tf.TensorSpec(shape=(), dtype=p.float)
            lspec = tf.TensorSpec(shape=(), dtype=tf.string)
            self.OPT = self.__optimize_loss.get_concrete_function(
                rspec, lspec, mspec, *espec)
            self.optimizer_state = self.optimizer.get_weights(
            )  # after self.OPT is set

        # some book keeping
        self.loss_fns = {'CE': self.CE, 'MSE': self.MSE}
        self.metric_fns = {'CA': self.CA, 'CE': self.CE, 'MSE': self.MSE}

        self.parameter_count = sum([t.shape[0] for t in self.weight_variables])

        # compute sizes of compiled tf graphs
        graph_size = lambda fn: self.__graph_size(fn.graph)[0]
        self.size, self.binary_rank, self.rank = self.__graph_size(
            self.MAR.graph)
        concrete_fns = (self.TCPTS, self.CE, self.CA, self.MSE)
        metrics_size = sum(graph_size(fn) for fn in concrete_fns)
        self.total_size = self.size + metrics_size

        # printing statistics of compiled tf graphs
        if u.verbose:  # do computations below only if we will print results
            stats = self.__graph_stats(self.MAR.graph)
            u.show(stats)
        u.show(
            f'      binary rank {self.binary_rank:.1f}, rank {self.rank} (for separators)'
        )
        u.show(f'    metrics size {metrics_size:,}')
        if self.trainable:
            opt_size = graph_size(self.OPT)
            u.show(f'    optimizer size {opt_size:,}')
            self.total_size += opt_size

        assert not self.trainable or self.weight_variables
        assert not self.weight_variables or self.trainable
        assert self.trainable or self.fixed_cpt_tensors

        self.finalized = True