def set_separators_and_clusters(view, trainable, verbose): has_replicas = lambda: any(len(nodes) > 1 for nodes in view.fcpts.values()) # identify functional cpts (fcpts) and associate them with functional vars __set_fcpts(view, trainable) # replicate fcpts if not already replicated by decouple.py if view.fcpts and not has_replicas(): __replicate_fcpts(view, verbose) # compute separators __set_classical_cls_and_sep(view) # remove functional vars from separators, then remove dead fcpts if has_replicas(): __shrink_separators(view) __remove_dead_fcpts(view, verbose) # remove clamped vars from separators (if any) for i, p, _, _, _ in view.bottom_up(): sep = view.sep(i) sep -= set(var for var in sep if var._clamped) # compute clusters (cluster of host computed earlier in view.py) for i, _, c1, c2, _ in view.bottom_up(): if not c1: view.cls_set(i, set(i.var.family)) # leaf node i else: view.cls_set(i, view.sep(c1) | view.sep(c2)) if verbose: u.show(' View ranks : ' + view.ranks_str())
def trace(query_var, evidence_vars, tbn, jt, og): assert tbn._for_inference assert tbn == jt.tbn # the following need to be relaxed assert not query_var.has_pruned_values() assert not any(var.has_pruned_values() for var in evidence_vars) ops = og.add_evidence_ops( evidence_vars) # ops that construct tensors for evidence jt.declare_evidence(evidence_vars, ops) # save ops in jointree for later lookup # qcontext: captures the pruned tbn used to compute posterior on query_var qcontext = prune.for_node_posterior(query_var, evidence_vars, tbn) # add ops that will create tensors for selected cpts (if any) for var in qcontext.testing_nodes: # top-down __selected_cpt(var, qcontext, jt, og) # also prunes # add ops that will create tensor for the posterior over query_node __node_posterior(query_var, qcontext, jt, og) hit_rate = jt.hits * 100 / jt.lookups if jt.lookups > 0 else 0 all_count = len(qcontext.testing_nodes) live_count = qcontext.live_count sval_count = sum(1 for n in tbn.nodes if len(n.values) == 1) pruned_count = len(tbn.nodes) - len(qcontext.nodes) pruned_perct = pruned_count * 100 / len(tbn.nodes) u.show( f' Tracing posterior for \'{query_var.name}\':\n' f' og-cache lookups {jt.lookups}, hits {jt.hits}, rate {hit_rate:.1f}%\n' f' selected cpts: all {all_count}, live {live_count}\n' f' single-value nodes: {sval_count}\n' f' pruned nodes: {pruned_count}, percentage {pruned_perct:.1f}%')
def train(size,output,data_size,testing,use_bk,tie_parameters): circuit_type = 'TAC' if testing else 'AC' u.show(f'\n===Training {circuit_type} for rectangle {output} in {size}x{size} images, use_bk {use_bk}, tie {tie_parameters}') # get training and testing data (labels are one-hot) t_evidence, t_labels = rdata.get(size,output,noisy_image_count=size,noise_count=size) v_evidence, v_labels = rdata.get(size,output,noisy_image_count=2*size,noise_count=2*size) # get model bn, inputs = rmodel.get(size,output,testing,use_bk,tie_parameters) # compile model to circuit circuit = tac.TAC(bn,inputs,output,trainable=True,profile=False) # use a random subset of the generated data t_percentage = data_size / len(t_labels) v_percentage = max(1000,data_size)/len(v_labels) # no less than 1000 t_evidence, t_labels = data.random_subset(t_evidence,t_labels,t_percentage) v_evidence, v_labels = data.random_subset(v_evidence,v_labels,v_percentage) # train AC circuit.fit(t_evidence,t_labels,loss_type='CE',metric_type='CA') # compute accuracy accuracy = circuit.metric(v_evidence,v_labels,metric_type='CA') u.show(f'\n{circuit_type} accuracy {100*accuracy:.2f}') return (100*accuracy, circuit)
def generate_samples(model, valid_loader): model.load_state_dict(torch.load(args.save_path)) model.eval() (x, _) = next(iter(valid_loader)) x = x.to(device) x_prime, vq_loss, perplexity = model(x) utils.show(x_prime, "results/valid_recon.png") utils.show(x, "results/valid_originals.png")
def train(epoch): epoch_loss = 0 epoch_loss_indiv = [0 for x in range(len(target_channels))] epoch_ssim_indiv = [0 for x in range(len(target_channels))] for iteration, batch in enumerate(training_data_loader, 1): inp, target = batch[0].to(device), batch[1].to(device) optimizer.zero_grad() prediction = model(inp) loss = 0 for x in range(len(target_channels)): loss_x = criterion(prediction[:, x, :, :], target[:, x, :, :]) ssim_x = calc_ssim( prediction[:, x, :, :].view(len(batch[0]), 1, 240, 240), target[:, x, :, :].view(len(batch[0]), 1, 240, 240)) epoch_loss_indiv[x] += loss_x.item() epoch_ssim_indiv[x] += ssim_x loss += loss_x epoch_loss += loss.item() loss.backward() optimizer.step() print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format( epoch, iteration, len(training_data_loader), loss.item())) epochLoss = epoch_loss / len(training_data_loader) for x in range(len(target_channels)): epoch_loss_indiv[x] /= len(training_data_loader) epoch_ssim_indiv[x] /= len(training_data_loader) psnr_indiv = list(map(calc_psnr, epoch_loss_indiv)) psnr = calc_psnr(epochLoss) print("psnr_indiv= ", psnr_indiv, "global_psnr= ", psnr) print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epochLoss)) pred = prediction.cpu()[0] target = target.cpu()[0] for p, t, c_name in zip(pred, target, target_channels): show(epoch, p, t, criterion, images_path, args.model_name, title="train_" + c_name) return psnr, psnr_indiv, epoch_ssim_indiv
def train(size, digits, data_size, testing, use_bk, tie_parameters, remove_common=False): assert size >= 7 assert all(d in range(10) for d in digits) circuit_type = 'TAC' if testing else 'AC' u.show( f'\n===Training {circuit_type} for digits {digits} in {size}x{size} images, use_bk {use_bk}, tie {tie_parameters}' ) # get model net, inputs, output = dmodel.get(size, digits, testing, use_bk, tie_parameters, remove_common) # get data (ground truth) t_evidence, t_labels = ddata.get(size, digits, noisy_image_count=100, noise_count=size) v_evidence, v_labels = ddata.get(size, digits, noisy_image_count=200, noise_count=size) # compile model into circuit circuit = tac.TAC(net, inputs, output, trainable=True, profile=False) # get random subset of dats t_percentage = data_size / len(t_labels) v_percentage = max(1000, data_size) / len(v_labels) # no less than 1000 t_evidence, t_labels = data.random_subset(t_evidence, t_labels, t_percentage) v_evidence, v_labels = data.random_subset(v_evidence, v_labels, v_percentage) # fit circuit circuit.fit(t_evidence, t_labels, loss_type='CE', metric_type='CA') # compute accuracy accuracy = circuit.metric(v_evidence, v_labels, metric_type='CA') u.show(f'\n{circuit_type} accuracy {100*accuracy:.2f}')
def fit(self, evidence, marginals, loss_type, metric_type, *, batch_size=32): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(self.trainable, f'TAC is not trainable') u.input_check(data.is_evidence(evidence), f'evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'evidence must match evidence nodes of tbn') u.input_check(data.is_marginals(marginals), f'marginals ill formatted') u.input_check(data.mar_matches_output(marginals, self.output_node), f'marginals must match query node of tbn') u.input_check(loss_type in self.loss_types, f'loss {loss_type} is not supported') u.input_check(metric_type in self.metric_types, f'metric {metric_type} is not supported') u.input_check( data.evd_size(evidence) == len(marginals), f'evidence size must match marginals size') u.show(f'\nTraining {self.circuit_type}:') start_training_time = time.perf_counter() epoch_count = self.trainer.train(evidence, marginals, loss_type, metric_type, batch_size) training_time = time.perf_counter() - start_training_time time_per_epoch = training_time / epoch_count u.show( f'Training Time: {training_time:.3f} sec ({time_per_epoch:.3f} sec per epoch)' )
def validate(size,output,testing,elm_method='minfill',elm_wait=30): circuit_type = 'TAC' if testing else 'AC' # get data (ground truth) evidence, labels = rdata.get(size,output) u.show(f'\n===Checking {circuit_type} for rectangle {output} in {size}x{size} images: {len(labels)} total') # get model bn, inputs = rmodel.get(size,output,testing=testing,use_bk=True,tie_parameters=False) # compile model AC = tac.TAC(bn,inputs,output,trainable=False,profile=False, elm_method=elm_method,elm_wait=elm_wait) # evaluate TAC on evidence to get predictions predictions = AC.evaluate(evidence) # verify that predictions match one_hot_marginals if u.equal(predictions,labels): u.show('\n===All good!') else: u.show('***bumper!!!') quit()
def verify_numpy(self, evidence, marginals1): size = batch_size = d.evd_size(evidence) u.show( f'\nVerifying against classical AC (numpy arrays, batch_size {batch_size})' ) # split lambdas into scalars (with batch) evidence = self.split_evidence(evidence) eval_time = 0 # pure evaluation time (add/mul/div) for start in range(0, size, batch_size): u.show(f'{int(100*start/size):4d}%\r', end='', flush=True) stop = start + batch_size evidence_batch = d.evd_slice(evidence, start, stop) marginals_batch, et = self.evaluate_numpy(evidence_batch) marginals_batch = self.gather_marginals(marginals_batch) if start == 0: marginals2 = marginals_batch else: marginals2 = np.concatenate((marginals2, marginals_batch), axis=0) eval_time += et u.equal(marginals1, marginals2, tolerance=True) size = d.evd_size(evidence) u.show( f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)' ) return eval_time, batch_size
def verify_array(self, evidence, marginals1): u.show(f'\nVerifying against classical AC (array)...') size = d.evd_size(evidence) rows = d.evd_col2row(evidence) marginals2 = [] # evaluation time excludes assertion of evidence eval_time = 0 # pure evaluation time (add/mul/div) for lambdas in rows: self.assert_evidence_array(lambdas) marginal, et = self.evaluate_array() # np array marginals2.append(marginal) eval_time += et marginals2 = np.array(marginals2, dtype=np.float32) u.equal(marginals1, marginals2, tolerance=True) u.show( f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)' ) return eval_time, 1
def metric(self, evidence, labels, metric_type, *, batch_size=64): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(data.is_evidence(evidence), f'evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'evidence must match evidence nodes of tbn') u.input_check(data.is_marginals(labels, one_hot=(metric_type == 'CA')), f'labels ill formatted') u.input_check(data.mar_matches_output(labels, self.output_node), f'labels must match query node of tbn') u.input_check(metric_type in self.metric_types, f'metric {metric_type} is not supported') u.show(f'\nComputing {metric_type}: evidence size {evd_size}, ' f'batch size {batch_size}') start_eval_time = time.perf_counter() batches, _ = data.data_batches(evidence, labels, batch_size) result = 0 for evd_batch, lab_batch in batches: bresult = self.tac_graph.compute_metric(metric_type, evd_batch, lab_batch) result += bresult * len(lab_batch) result /= evd_size # average weighted by batch size (last batch may be smaller) evaluation_time = time.perf_counter() - start_eval_time time_per_example = evaluation_time / evd_size u.show(f'{metric_type} Time: {evaluation_time:.3f} sec ' f'({time_per_example:.4f} sec per example)') return result
def test(epoch): avg_psnr = 0 epoch_loss_indiv = [0 for x in range(len(target_channels))] epoch_ssim_indiv = [0 for x in range(len(target_channels))] with torch.no_grad(): for _, batch in enumerate(testing_data_loader): inp, target = batch[0].to(device), batch[1].to(device) prediction = model(inp) mse = criterion(prediction, target) for x in range(len(target_channels)): loss_x = criterion(prediction[:, x, :, :], target[:, x, :, :]) ssim_x = calc_ssim( prediction[:, x, :, :].view(len(batch[0]), 1, 240, 240), target[:, x, :, :].view(len(batch[0]), 1, 240, 240)) epoch_loss_indiv[x] += loss_x.item() epoch_ssim_indiv[x] += ssim_x psnr = 10 * log10(1 / mse.item()) avg_psnr += psnr print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader))) pred = prediction.cpu()[0] target = target.cpu()[0] for p, t, c_name in zip(pred, target, target_channels): show(epoch, p, t, criterion, images_path, args.model_name, " " + c_name) for x in range(len(target_channels)): epoch_loss_indiv[x] /= len(testing_data_loader) epoch_ssim_indiv[x] /= len(testing_data_loader) psnr_indiv = list(map(calc_psnr, epoch_loss_indiv)) return avg_psnr / len(testing_data_loader), psnr_indiv, epoch_ssim_indiv
def train(self, evidence, marginals, loss_type, metric_type, batch_size): assert loss_type in self.loss_types and metric_type in self.metric_types # split data into training and validation (after randonly shuffling it) t_data, v_data = data.random_split(evidence, marginals, self.split_ratio) t_evidence, t_marginals, t_size = t_data v_evidence, v_marginals, v_size = v_data # batch memory is based on tf graphs for tac, optimizer and metrics tac_graph = self.tac.tac_graph circuit_type = self.tac.circuit_type parameter_count = tac_graph.parameter_count fixed_zeros_count = tac_graph.fixed_zeros_count batch_count = ceil(t_size / batch_size) u.show(f' loss: {loss_type}, metric: {metric_type}\n' f' data: training {t_size}, validation {v_size}\n' f' batch: size {batch_size}, count {batch_count}\n' f' trainable parameters {parameter_count}\n' f' fixed zero parameters {fixed_zeros_count}') # initialize trainer and tac_graph optimizer batch_size = self.__init_training(loss_type, metric_type, t_size, batch_size) # initialize the tac weights (try a few random weights and pick best) weights_epochs = self.__find_initial_weights(t_evidence, t_marginals, v_evidence, v_marginals, batch_size) # train for epoch in range(self.epochs_count): # optimize loss on training data, compute metric on validation data t_loss, lr = self.__optimize_loss(loss_type, t_evidence, t_marginals, batch_size, epoch) v_metric = self.__compute_metric(metric_type, v_evidence, v_marginals, batch_size) # logging for tensorboard tac_graph.log(epoch, t_loss, v_metric, lr) # main control stop, save, event = self.__analyze_epoch(v_metric, epoch) if stop or event: u.show((f'\r epoch {epoch:5d}: t_loss {t_loss:.8f}, ' f'v_metric {v_metric:.8f}, lr {lr:.4f}{event}')) if save: tac_graph.save_current_weights() if stop: break # restore learned weights and write them to file fname = paths.cpts / f'{circuit_type}.txt' u.show(f' writing learned CPTs to {fname}') tac_graph.end_training(fname) return weights_epochs + epoch + 1 # total number of epochs we performed
def __init__(self, opsgraph): assert not opsgraph.trainable and not opsgraph.testing u.show(f'\nConstructing classical AC...') start_compile_time = time.perf_counter() # list of add/mul/div nodes, topologically sorted (bottom up) self.nodes = None # list of AC nodes representing evidence (one per var/value) self.evd_nodes = [] # list of AC nodes representing parameters (cpt entries) self.parameter_nodes = [] # list of evidence lambdas self.lambdas = [ ] # each lambda is a tuple of evidence nodes (lambda per var) # factor that contains output AC nodes (marginal) self.output_factor = None # size of AC (number of nodes) self.size = None # maps ops.op into its factor (result of executing operation) op2factor = {} # execution will populate the nodes, lambdas and roots fields Node.instances = [] # created add/mul/div will be added to this list for op in opsgraph.ops: # bottom up factor = self.execute(op, op2factor) if type(op) == ops.EvidenceOp: self.lambdas.append(factor) self.evd_nodes.extend(factor.nodes()) elif type(op) == ops.FixedCptOp: self.parameter_nodes.extend(factor.nodes()) self.nodes = Node.instances # add/mul/div nodes # order of lambdas should match order of opsgraph inputs assert opsgraph.evidence_vars == tuple(f.vars[0] for f in self.lambdas) self.lambdas = tuple(f.nodes() for f in self.lambdas) # saving output nodes of AC output_op = opsgraph.ops[-1] self.output_factor = op2factor[output_op] # computing AC size self.size = len(self.nodes) + len(self.parameter_nodes) + len( self.evd_nodes) compile_time = time.perf_counter() - start_compile_time u.show(f' AC size {self.size:,}') u.show(f'Compile Time: {compile_time:.3f} sec')
def elm_order(self, solver, wait): u.show(f' calling {solver}...', end='') graph_fname = 'decompose/tmp/graph.gr' tree_fname = 'decompose/tmp/tree.td' if solver == 'flow cutter': program = 'flow_cutter_pace17' cmd = [f'./decompose/solvers/{program}'] online = True elif solver == 'tamaki heuristic': program = 'tamaki/tw-heuristic' cmd = [f'./decompose/solvers/{program}'] online = True elif solver == 'tamaki exact': program = 'tamaki/tw-exact' cmd = [f'./decompose/solvers/{program}'] online = False # write graph to file self.write(graph_fname) # call tree decomposition program with open(f'{graph_fname}', "r") as input, open(f'{tree_fname}', "w") as output: process = subprocess.Popen(cmd, stdin=input, stdout=output) if online: u.show(f'waiting {wait} sec...', end='', flush=True) sleep(wait) process.send_signal(signal.SIGTERM) else: process.wait() # blocks python until process returns code = process.returncode _, error = process.communicate() process.kill() u.check(code != 0, f'failed to execute {solver} because\n {error}', f'using treewidth solver') u.show('done') # read decomposition tree from file tree = TreeD(tree_fname) # convert decomposition tree to elimination order (vertices) vertex_order = tree.elm_order() # return elimination order of tbn nodes stats = f'elm order: cls max {tree.width}' return self.vertices2nodes(vertex_order), tree.width, stats
def __compile(self, net, inputs, output, hard_inputs, trainable, elm_method, elm_wait, profile): if profile: u.show('\n***PROFILER ON***') u.show(f'\nCompiling {self.network_type} into {self.circuit_type}') start_compile_time = time.time() # net1 and net2 have nodes corresponding to inputs and output (same names) net1 = net.copy_for_inference() self.tbn = net1 self.input_nodes = u.map(net1.node, inputs) self.output_node = net1.node(output) self.hard_input_nodes = u.map(net1.node, hard_inputs) # decouple net1 for more efficient compilation # net2 is only used to build jointree (duplicate functional cpts) net2, elm_order, _ = decouple.get(net1, self.hard_input_nodes, trainable, elm_method, elm_wait) # net2 may be equal to net1 (no decoupling) # if net2 != net1 (decoupling happened), then net2._decoupling_of = net1 # compile tbn into an ops_graph jt = jointree.Jointree(net2, elm_order, self.hard_input_nodes, trainable) ops_graph = og.OpsGraph(trainable, net.testing) # empty # inference will populate ops_graph with operations that construct tac_graph inference.trace(self.output_node, self.input_nodes, net1, jt, ops_graph) if u.verbose: ops_graph.print_stats() # construct tac_graph by executing operations of ops_graph self.ops_graph = ops_graph self.tac_graph = tg.TacGraph(ops_graph, profile) self.size = self.tac_graph.size self.rank = self.tac_graph.rank self.binary_rank = self.tac_graph.binary_rank self.parameter_count = self.tac_graph.parameter_count compile_time = time.time() - start_compile_time u.show(f'Compile Time: {compile_time:.3f} sec')
def validate(size, digits, testing, elm_method='minfill', elm_wait=30): assert size >= 7 assert all(d in range(10) for d in digits) # get data (ground truth) evidence, labels = ddata.get(size, digits) data_size = len(labels) circuit_type = 'TAC' if testing else 'AC' u.show( f'\n===Checking {circuit_type} for digits {digits} in {size}x{size} images: {data_size} total' ) # get model net, inputs, output = dmodel.get(size, digits, testing, use_bk=True, tie_parameters=False, remove_common=False) # compile model into circuit circuit = tac.TAC(net, inputs, output, trainable=False, profile=False, elm_method=elm_method, elm_wait=elm_wait) # evaluate circuit on evidence to get predictions predictions = circuit.evaluate(evidence) # verify that predictions match labels if u.equal(predictions, labels): u.show('\n===All good!\n') else: u.show('***bumper!!!') quit()
def evaluate(self, evidence, *, batch_size=64, report_time=False): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(data.is_evidence(evidence), f'TAC evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'TAC evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'TAC evidence must match evidence tbn nodes') u.show(f'\nEvaluating {self.circuit_type}: evidence size {evd_size}, ' f'batch size {batch_size}') marginals = None eval_time = 0 for i, evd_batch in enumerate(data.evd_batches(evidence, batch_size)): u.show(f'{int(100*i/evd_size):4d}%\r', end='', flush=True) start_time = time.perf_counter() mar_batch = self.tac_graph.evaluate(evd_batch) eval_time += time.perf_counter() - start_time if marginals is None: marginals = mar_batch else: marginals = np.concatenate((marginals, mar_batch), axis=0) time_per_example = eval_time / evd_size time_per_million = time_per_example / (self.size / 1000000) u.show(f'\rEvaluation Time: {eval_time:.3f} sec ' f'({1000*time_per_example:.1f} ms per example,' f' {1000*time_per_million:.1f} ms per 1M tac nodes)') assert data.mar_matches_output(marginals, self.output_node) assert data.mar_is_predictions(marginals) if report_time: return marginals, eval_time, batch_size return marginals
def verify_tf_graph(self, evidence, marginals1): assert self.tf_ac is not None size = batch_size = d.evd_size(evidence) u.show( f'\nVerifying against classical AC (tf graph, batch_size {batch_size}))' ) # split lambdas into scalars (with batch) evidence = self.split_evidence(evidence) # tf graph accepts only tensors as input evidence = tuple(tf.constant(e, dtype=tf.float32) for e in evidence) start_eval_time = time.perf_counter() for start in range(0, size, batch_size): u.show(f'{int(100*start/size):4d}%\r', end='', flush=True) stop = start + batch_size evidence_batch = d.evd_slice(evidence, start, stop) marginals_batch = self.tf_ac( *evidence_batch) # evaluating tf graph marginals_batch = self.gather_marginals(marginals_batch) if start == 0: marginals2 = marginals_batch else: marginals2 = np.concatenate((marginals2, marginals_batch), axis=0) eval_time = time.perf_counter() - start_eval_time u.equal(marginals1, marginals2, tolerance=True) size = d.evd_size(evidence) u.show( f'Evaluation Time: {eval_time:.3f} sec ({1000*eval_time/size:.0f} ms per example)' ) return eval_time, batch_size
def __find_initial_weights(self, t_evidence, t_marginals, v_evidence, v_marginals, batch_size): #u.show(f' optimizer warming up...\r',end='',flush=True) u.show(f' finding initial weights (starting with uniform):', end='', flush=True) loss_type = self.loss_type metric_type = self.metric_type tac_graph = self.tac.tac_graph best_loss = None best_metric = self.metric_best_value # initialized to worst possible value epochs = 0 # number of epochs we will try # weights already set to uniform in ops.py so we will try these first for i in range(self.weight_tries): epochs += 2 # we do a two-step lookahead # loss and metric before trying to improve current weights pre_loss = self.__compute_metric(loss_type, t_evidence, t_marginals, batch_size) pre_metric = self.__compute_metric(metric_type, v_evidence, v_marginals, batch_size) # loss and metric after improving current weights using GD for _ in range(2): # two-step lookahead loss, _ = self.__optimize_loss(loss_type, t_evidence, t_marginals, batch_size, None) metric = self.__compute_metric(metric_type, v_evidence, v_marginals, batch_size) # printing details that are helpful to sanity check behavior u.show(f'\n t_loss {pre_loss:11.8f} -> {loss:11.8f}, ', f'v_metric {pre_metric:11.8f} -> {metric:11.8f}', end='', flush=True) # reset optimizer before quitting in case we decide we are done tac_graph.reset_optimizer() # see if the current weights improve on the previous ones assert i != 0 or self.metric_comp(metric, best_metric) if self.metric_comp(metric, best_metric): best_loss = loss best_metric = metric tac_graph.save_current_weights() if self.metric_comp(metric, self.metric_target): break # found good-enough initial weights # try a new set of random weights if this is not the last iteration if i < self.weight_tries - 1: tac_graph.assign_random_weights() u.show( f'\n starting at: t_loss {best_loss:.8f}, v_metric {best_metric:.8f}, ' f'found after {epochs} epochs', flush=True) # use the best found weights self.metric_best_value = best_metric tac_graph.restore_saved_weights() return epochs
def main(): # Configs args = get_args() cfg = Config(args.config) pose_kwargs = cfg.POSE clf_kwargs = cfg.CLASSIFIER tracker_kwargs = cfg.TRACKER # Initiate video/webcam source = args.source if args.source else 0 video = Video(source) ## Initiate trtpose, deepsort and action classifier pose_estimator = get_pose_estimator(**pose_kwargs) if args.task != 'pose': tracker = get_tracker(**tracker_kwargs) if args.task == 'action': action_classifier = get_classifier(**clf_kwargs) ## initiate drawer and text for visualization drawer = Drawer(draw_numbers=args.draw_kp_numbers) user_text = { 'text_color': 'green', 'add_blank': True, 'Mode': args.task, # MaxDist: cfg.TRACKER.max_dist, # MaxIoU: cfg.TRACKER.max_iou_distance, } # loop over the video frames for bgr_frame in video: rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) # predict pose estimation start_pose = time.time() predictions = pose_estimator.predict(rgb_frame, get_bbox=True) # return predictions which include keypoints in trtpose order, bboxes (x,y,w,h) # if no keypoints, update tracker's memory and it's age if len(predictions) == 0 and args.task != 'pose': debug_img = bgr_frame tracker.increment_ages() else: # draw keypoints only if task is 'pose' if args.task != 'pose': # Tracking # start_track = time.time() predictions = utils.convert_to_openpose_skeletons(predictions) predictions, debug_img = tracker.predict(rgb_frame, predictions, debug=args.debug_track) # end_track = time.time() - start_track # Action Recognition if len(predictions) > 0 and args.task == 'action': predictions = action_classifier.classify(predictions) end_pipeline = time.time() - start_pose # add user's desired text on render image user_text.update({ 'Frame': video.frame_cnt, 'Speed': '{:.1f}ms'.format(end_pipeline*1000), }) # draw predicted results on bgr_img with frame info render_image = drawer.render_frame(bgr_frame, predictions, **user_text) if video.frame_cnt == 1 and args.save_folder: # initiate writer for saving rendered video. output_suffix = get_suffix(args, cfg) output_path = video.get_output_file_path( args.save_folder, suffix=output_suffix) writer = video.get_writer(render_image, output_path, fps=30) if args.debug_track and args.task != 'pose': debug_output_path = output_path[:-4] + '_debug.avi' debug_writer = video.get_writer(debug_img, debug_output_path) print(f'[INFO] Saving video to : {output_path}') # show frames try: if args.debug_track and args.task != 'pose': debug_writer.write(debug_img) utils.show(debug_img, window='debug_tracking') if args.save_folder: writer.write(render_image) utils.show(render_image, window='webcam' if isinstance(source, int) else osp.basename(source)) except StopIteration: break if args.debug_track and args.task != 'pose': debug_writer.release() if args.save_folder and len(predictions) > 0: writer.release() video.stop()
def main(): t0 = time.time() # Settings cfg = Config(config_file='../configs/train_action_recogn_pipeline.yaml') cfg.merge_from_file('../configs/infer_trtpose_deepsort_dnn.yaml') cfg_stage = cfg[os.path.basename(__file__)] img_format = cfg.img_format ## IO folders get_path = lambda x: os.path.join(*x) if isinstance(x, (list, tuple)) else x src_imgs_folder = get_path(cfg_stage.input.imgs_folder) src_valid_imgs = get_path(cfg_stage.input.valid_imgs) dst_skeletons_folder = get_path(cfg_stage.output.skeletons_folder) dst_imgs_folder = get_path(cfg_stage.output.imgs_folder) dst_imgs_info_txt = get_path(cfg_stage.output.imgs_info_txt) # initiate pose estimator pose_estimator = get_pose_estimator(**cfg.POSE) drawer = Drawer(draw_numbers=True) # Init output path print( f"[INFO] Creating output folder -> {os.path.dirname(dst_skeletons_folder)}" ) os.makedirs(dst_imgs_folder, exist_ok=True) os.makedirs(dst_skeletons_folder, exist_ok=True) os.makedirs(os.path.dirname(dst_imgs_info_txt), exist_ok=True) # train val images reader images_loader = ReadValidImagesAndActionTypesByTxt(src_imgs_folder, src_valid_imgs, img_format) images_loader.save_images_info(dst_imgs_info_txt) print(f'[INFO] Total Images -> {len(images_loader)}') # Read images and process loop = tqdm(range(len(images_loader)), total=len(images_loader)) for i in loop: img_bgr, label, img_info = images_loader.read_image() img_disp = img_bgr.copy() img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # predict trtpose skeleton and save to file as openpose format predictions = pose_estimator.predict(img_rgb, get_bbox=False) if len(predictions) == 0: continue predictions = utils.convert_to_openpose_skeletons(predictions) # save predicted image save_name = img_format.format(i) img_name = os.path.join(dst_imgs_folder, save_name) img_disp = drawer.render_frame(img_disp, predictions) cv2.imwrite(img_name, img_disp) try: utils.show(img_disp, wait=1) except StopIteration: break # save skeletons in text file skeleton_txt = os.path.join(dst_skeletons_folder, save_name[:-4] + '.txt') save_data = [img_info + pred.flatten_keypoints for pred in predictions] with open(skeleton_txt, 'w') as f: json.dump(save_data, f) # update progress bar descriptions loop.set_description(f'action -> {label}') loop.set_postfix(num_of_person=len(predictions)) loop.close() cv2.destroyAllWindows() t1 = time.gmtime(time.time() - t0) total_time = time.strftime("%H:%M:%S", t1) print('Total Extraction Time', total_time) print( tabulate([list(images_loader.labels_info.values())], list(images_loader.labels_info.keys()), 'grid'))
def print_cache_info(): u.show(' restructure', Dims.restructure_into.cache_info()) u.show(' re-multiply', Dims.restructure_for_multiply.cache_info()) u.show(' re-mulpro ', Dims.restructure_for_mulpro.cache_info())
def main(argv): try: opts, args = getopt.getopt(argv, 'dfhs', []) except getopt.GetoptError: print('usage: main.py -d -f -h -s') sys.exit(2) for opt, arg in opts: if opt == '-f': tacgraph.force_profile = True elif opt == '-d': p.set_double_precision() elif opt == '-s': u.set_silent() else: # covers -h print('usage: main.py -d -f -h -s') sys.exit() if __name__ == '__main__': main(sys.argv[1:]) ram = u.system_RAM_GB() u.show('\nPyTac Version 1.2.2, 2020 © Adnan Darwiche') u.show(f'RAM {ram:.2f} GB, processors {cpu_count()}') u.show(f'TF {tf.__version__}, {p.precision} precision') play.play()
def posteriors(bn, inputs, output, evidence): u.show('\nRunning VE...', end='', flush=True) assert not bn.testing and len(inputs) == len(evidence) # we will perform elimination only on nodes that are connected to output qnode = bn.node(output) # query node nodes = qnode.connected_nodes() # set assert qnode in nodes # identify inputs and evidence connected to query node evidence_ = evidence enodes, evidence = [], [] for i, e in zip(inputs, evidence_): n = bn.node(i) # evidence node if n in nodes: # connected to query enodes.append(n) evidence.append(e) # e is a batch of lambdas for node n assert enodes and evidence # output must be connected to some input # maps bn node to Var node2var = {n: Var(bn_node=n) for n in nodes} nodes2vars = lambda nodes_: tuple(node2var[n] for n in nodes_) # construct batch Var batch_size = data.evd_size(evidence) batch_var = Var(batch_size=batch_size) # get elimination order order, _, _, _ = bn.elm_order('minfill') elm_order = tuple(node2var[n] for n in order if n != qnode and n in nodes) # bn factors evd_factor = lambda evd, node: Factor(evd, (batch_var, node2var[node])) cpt_factor = lambda cpt, node: Factor( cpt, nodes2vars(node.family), sort=True) indicators = tuple(evd_factor(evd, n) for evd, n in zip(evidence, enodes)) cpts = tuple(cpt_factor(n.tabular_cpt(), n) for n in nodes) query_var = node2var[qnode] # indexing factors for lookup during elimination # factor.tvars exclude the batch var scalars = set() # scalar factors (have no vars) var2factors = {var: set() for var in elm_order} # maps var to factors containing var var2factors[query_var] = set() def index(factor): # add factor to pool if factor.is_scalar: scalars.add(factor) else: for var in factor.tvars: var2factors[var].add(factor) def remove(factors): # remove factors from pool for f in set( factors ): # copy since factors may be equal to some var2factors[var] assert not f.is_scalar for var in f.tvars: var2factors[var].remove(f) def get_factors(var): # returns factors that contain var factors = var2factors[var] assert factors return factors def verify_elm(f): # verify pool at end of elimination assert all(not factors for var, factors in var2factors.items() if var != query_var) assert not scalars == bn.is_connected() # we are about to start eliminating vars: index them first for factor in indicators: index(factor) for factor in cpts: index(factor) # eliminate vars one = Factor.one() # identity factor for multiplication for var in elm_order: factors = get_factors(var) # factors that contain var factor = one for f in factors: factor = factor.multiply(f) factor = factor.sumout(var) remove(factors) index(factor) verify_elm(factor) factor = one for f in get_factors(query_var): factor = factor.multiply(f) for f in scalars: factor = factor.multiply(f) assert factor.has_batch and factor.tvars == (query_var, ) factor = factor.normalize() u.show('done.') return factor.table # ndarray
def __remove_dead_fcpts(view, verbose): pre_replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values()) #view.dot('pre.gv') #u.pause() # dead basically means: not contributing to shrinking separators # leaf node i is dead if it has a functional cpt whose var is summed out # host cannot be dead (contains query var which is never summed out) # root cannot be dead (would have been pruned if it was dead) def dead(i): return i != view.host and view.has_fcpt(i) and i.var not in view.sep(i) # add dead fcpts to dropped and clear their separators dropped = set() def drop_dead_fcpts(): key = lambda i: (dead(i), not i.is_host, len(i.var.parents)) for fvar, leaves in view.fcpts.items(): leaves.sort(key=key) # we prefer to keep hosts new_leaves = [] for index, i in enumerate(leaves): if index != 0 and dead(i): # we need to keep one fcpt per fvar view.sep_set(i, set()) # clear separator dropped.add(i) else: new_leaves.append(i) view.fcpts[fvar] = new_leaves # shrink separators further due to removing dead fcpts def shrink_separators(): for i, _, c1, c2, _ in view.bottom_up(): if c1: assert i not in dropped view.sep_intersect(i, view.sep(c1) | view.sep(c2)) for i, p, _, _, s in view.top_down(): if s and i not in dropped: view.sep_intersect(i, view.sep(s) | view.sep(p)) view.cls_set(p, view.sep(i) | view.sep(p)) # whether view still have dead fcpts def more_dead(): for leaves in view.fcpts.values(): count = sum(dead(i) for i in leaves) if count > 1 or (count == 1 and len(leaves) > 1): return True return False # identify dead fcpts while True: drop_dead_fcpts() shrink_separators() if not more_dead(): break assert all(view.fcpts.values()) # at least one fcpt for each fvar # remove dead fcpts from view for i in dropped: __remove_leaf(view, i) __reconstruct(view) if verbose: replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values()) distinct_count = sum(1 for leaves in view.fcpts.values() if len(leaves) >= 2) u.show( f' kept fcpts: {replica_count}/{pre_replica_count}, distinct {distinct_count}' )
def __print_progress(self, e, i, n, lr): p = 100 * (i + 1) // n u.show(f' epoch {e:5d}:{p:4d}% lr {lr:.5f}', end='', flush=True) u.show((b'\x08' * 32).decode(), end='', flush=True)
def __replicate_fcpts(view, verbose): __set_classical_cls_and_sep(view) if verbose: replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values()) distinct_count = sum(1 for leaves in view.fcpts.values() if len(leaves) >= 2) u.show(f' added fcpts: {replica_count}, distinct {distinct_count}') u.show(' View ranks : ' + view.ranks_str()) # compute vars that have fcpt at/below each view node vars = {} # maps view node i to vars at/below node i ovars = {} # maps view node i to vars outside node i fvars = {} # maps view node i to vars with functional cpt at/below node i ofvars = {} # maps view node i to vars with functional cpt outside node i def set_vars(): nonlocal vars, ovars, fvars, ofvars vars, ovars, fvars, ofvars = {}, {}, {}, {} for i, _, c1, c2, _ in view.bottom_up(): if not c1: # leaf node i fvars[i] = set([i.var]) if view.has_fcpt(i) else set() vars[i] = set(i.var.family) else: fvars[i] = fvars[c1] | fvars[c2] vars[i] = vars[c1] | vars[c2] for i, p, _, _, s in view.top_down(): if not s: # root node i ofvars[i] = set([p.var]) if view.has_fcpt(p) else set() ovars[i] = set(p.var.family) else: ofvars[i] = fvars[s] | ofvars[p] ovars[i] = vars[s] | ovars[p] set_vars() fvar_order = list(view.fcpts) fvar_order.sort(key=lambda var: len(var.parents)) additions = [] for i, p, c1, c2, s in view.bottom_up(): if not s: continue if c1: fvars[i] = fvars[c1] | fvars[c2] vars[i] = vars[c1] | vars[c2] ovars[i] = vars[s] | ovars[p] ofvars[i] = fvars[s] | ofvars[p] # sepi = view.sep(i) sepi = vars[i] & ovars[i] # outside loop for fvar in fvar_order: # all functional variables parents = set(fvar.parents) cond1 = len(parents - sepi) <= 1 cond2 = parents <= fvars[i] cond3 = fvar in fvars[s] and fvar not in fvars[i] if (cond1 or cond2) and cond3: additions.append((i, fvar)) fvars[i].add(fvar) vars[i] |= set(fvar.family) # replicate fcpts in view for i, fvar in additions: leaf = __add_leaf(view, fvar, i) view.fcpts[fvar].append(leaf) __reconstruct(view) __set_classical_cls_and_sep(view) if verbose: replica_count = sum(len(leaves) - 1 for leaves in view.fcpts.values()) distinct_count = sum(1 for leaves in view.fcpts.values() if len(leaves) >= 2) u.show(f' added fcpts: {replica_count}, distinct {distinct_count}') u.show(' View ranks : ' + view.ranks_str())
def get(net1, hard_evd_nodes, trainable_tbn, elm_method, elm_wait): assert net1._for_inference #net1.dot(fname='tbn_pre_decouple.gv', view=True) u.show(f' Decoupling tbn:') elm_order, cliques1, max_binary_rank1, stats = net1.elm_order( elm_method, elm_wait) u.show(' ', stats) # cutting edges outgoing form hard evidence nodes cut_edges = lambda n: len(n.children) >= 1 and n in hard_evd_nodes and \ (n.parents or len(n.children) >= 2) cut_edges_set = set(n for n in net1.nodes if cut_edges(n)) # replicating functional cpts # if both duplicate and cut_edges trigger, use cut_edges as it is more effective duplicate = lambda n: n not in cut_edges_set and len(n.children) >= 2 \ and n.is_functional(trainable_tbn) duplicate_set = set(n for n in net1.nodes if duplicate(n)) # perhaps decoupling does nothing if not duplicate_set and not cut_edges_set: u.show(' nothing to decouple') return net1, elm_order, (max_binary_rank1, max_binary_rank1 ) # no decoupling possible # we will decouple net2 = TBN(f'{net1.name}__decoupled') net2._decoupling_of = net1 # -when creating a clone c(n) in net2 for node n in net1, we need to look up the # parents of c(n) in net2. # -this is done by calling get_image(p) on each parent p of node n # -the length of images[p] equals the number of times get_image(p) will be called # -members of images[p] may not be distinct depending on the replication strategey images = {} def get_image(n): return images[n].pop() # -when we have hard evidence on node n (net1), we create a replica r (net2) of n # for each child of n, which copies evidence on n into the cpts of its children. # -maps node r (net2) to node n (net1) that it is copying evidence from evidence_from = {} # maps node n (net1) to a tuple (c_1,...,c_k) where k is the number of clones that # node n will have in nets2, and c_i is the number of children for clone i in net2 ccounts = {} # fully replicated(i): one i-replica for each c-replica, where c is child of i in net1 # partial replicated(i): one i-replica for each child c of i in net1 fully_replicated = lambda i: all(ccount == 1 for ccount in ccounts[i]) replicas_count = lambda i: len(ccounts[i] ) # number of replicas node i has in net2 # compute the number of replicas in net2 for each node in net1 (fill ccounts) for n in reversed(net1.nodes): # bottom up ccounts[n] = [] cparents = set() for c in n.children: cparents |= set(c.parents) #replicate_node = any(cparents <= clique for clique in cliques1) #replicate_node = all(p in duplicate_set for p in n.parents) replicate_node = True if n in duplicate_set and replicate_node: # replicate node n for c in n.children: if True: #not fully_replicated(c): # replicate node n for each replica of child c ccounts[n].extend([1] * replicas_count(c)) else: # replicate node n for each child c ccounts[n].append(replicas_count(c)) else: # do not replicate node n # n could be in cut_edges_set, but ccounts will not be used in that case duplicate_set.discard(n) children_replicas_count = sum( replicas_count(c) for c in n.children) ccounts[n].append(children_replicas_count) # cutting edges takes priority over decoupling as it is more effective for n in net1.nodes: # visiting parents before children if n in cut_edges_set: # disconnect n from its children assert n not in duplicate_set n._clamped = True # flag set in original network (net1) parents = [get_image(p) for p in n.parents] master = clone_node(n, n.name, parents) net2.add(master) images[n] = [] # master not added to images as it will not be a parent of any node in net2 for i, c in enumerate(n.children): for j in range(replicas_count( c)): # j iterates over replicas of child c # these clones will be removed after elimination order is computed # clones are not testing even if master is testing clone = Node(f'{n.name}_evd{i}_{j}', values=master.values, parents=[]) net2.add(clone) evidence_from[clone] = master images[n].append( clone ) # children of n will reference clones, not master elif n in duplicate_set: # duplicate node n and its functional cpt images[n] = [] for i, ccount in enumerate(ccounts[n]): assert ccount > 0 # number of children each clone will have in net2 parents = [get_image(p) for p in n.parents] clone = clone_node(n, f'{n.name}_fcpt{i}', parents) if i > 0: clone._master = False # clone() sets this to True net2.add(clone) images[n].extend([clone] * ccount) else: # just copy node n from net1 to net2 (ccount, ) = ccounts[n] # number of children clone will have in net2 parents = [get_image(p) for p in n.parents] clone = clone_node(n, n.name, parents) net2.add(clone) images[n] = [clone] * ccount assert not net2._for_inference assert len(images) == len(net1.nodes) assert len(images) <= len(net2.nodes) assert all(v == [] for v in images.values()) #net2.dot(fname='tbn_post_decouple.gv', view=True) elm_order, _, max_binary_rank2, stats = net2.elm_order( elm_method, elm_wait) u.show(' ', stats) if not duplicate_set: elm_order = [n._original for n in elm_order if n not in evidence_from] # only clamping took place, so we only care about elimination order # return original network with _clamped flag set for some nodes return net1, elm_order, (max_binary_rank1, max_binary_rank2) if cut_edges_set: # some variables were clamped # get rid of auxiliary evidence nodes from elimination order elm_order = [n for n in elm_order if n not in evidence_from] # need to restore net2 by getting rid of auxiliary evidence nodes # and restoring children of clamped nodes net2.nodes = [n for n in net2.nodes if n not in evidence_from] replace = lambda n: evidence_from[n] if n in evidence_from else n for n in net2.nodes: n._parents = tuple(replace(p) for p in n.parents) n._family = tuple(replace(f) for f in n.family) u.show(f' node growth: {len(net2.nodes)/len(net1.nodes):.1f}') return net2, elm_order, (max_binary_rank1, max_binary_rank2)
def __build_tac_graph(self, ops_graph): assert not self.finalized u.show(' Constructing TacGraph: tac...', end='', flush=True) ### compiling tf graph for tac and another tf graph for its trainable cpts # step 1: create tac inputs (variables) by executing corresponding ops of OpsGraph self.__create_variables( ops_graph) # must be done outside of @tf.functions # step 2: create remaining tensors of tac tf graph # self.MAR() returns the tac marginals espec = [ tf.TensorSpec(shape=e.shape, dtype=e.dtype) for e in self.evidence_variables ] self.MAR = self.__marginals.get_concrete_function(ops_graph, *espec) # trainable cpts are created twice: initially as part of the tac tf graph (above), # then in their own tf graph (so we can save them without evaluating tac graph) # self.TCPTS() returns trainable cpts (for saving) self.TCPTS = self.__trainable_cpts.get_concrete_function(ops_graph) ### compiling tf graphs for computing metrics and losses (three tf graphs) u.show('metrics...', end='', flush=True) shape, dtype = self.marginal_spec mspec = tf.TensorSpec(shape=shape, dtype=dtype) self.CA = self.__classification_accuracy.get_concrete_function( mspec, mspec) self.CE = self.__cross_entropy.get_concrete_function(mspec, mspec) self.MSE = self.__mean_squared_error.get_concrete_function( mspec, mspec) ### compiling tf graph for optimizer (to minimize loss) if self.trainable: u.show('optimizer...', end='', flush=True) self.optimizer_lr = tf.Variable(.1, dtype=p.float) # will be updated self.optimizer = tf.optimizers.Adam( learning_rate=self.optimizer_lr) rspec = tf.TensorSpec(shape=(), dtype=p.float) lspec = tf.TensorSpec(shape=(), dtype=tf.string) self.OPT = self.__optimize_loss.get_concrete_function( rspec, lspec, mspec, *espec) self.optimizer_state = self.optimizer.get_weights( ) # after self.OPT is set # some book keeping self.loss_fns = {'CE': self.CE, 'MSE': self.MSE} self.metric_fns = {'CA': self.CA, 'CE': self.CE, 'MSE': self.MSE} self.parameter_count = sum([t.shape[0] for t in self.weight_variables]) # compute sizes of compiled tf graphs graph_size = lambda fn: self.__graph_size(fn.graph)[0] self.size, self.binary_rank, self.rank = self.__graph_size( self.MAR.graph) concrete_fns = (self.TCPTS, self.CE, self.CA, self.MSE) metrics_size = sum(graph_size(fn) for fn in concrete_fns) self.total_size = self.size + metrics_size # printing statistics of compiled tf graphs if u.verbose: # do computations below only if we will print results stats = self.__graph_stats(self.MAR.graph) u.show(stats) u.show( f' binary rank {self.binary_rank:.1f}, rank {self.rank} (for separators)' ) u.show(f' metrics size {metrics_size:,}') if self.trainable: opt_size = graph_size(self.OPT) u.show(f' optimizer size {opt_size:,}') self.total_size += opt_size assert not self.trainable or self.weight_variables assert not self.weight_variables or self.trainable assert self.trainable or self.fixed_cpt_tensors self.finalized = True