def getHMM(size, card, param=False, transition=None, emission=None): # define an HMM model of size nodes with card hidden states assert size >= 2 and card >= 2 if param: assert transition is not None and emission is not None u.input_check(np.array(transition).shape == (card, card), f'wrong size for transition matrix') u.input_check(np.array(emission).shape == (card, card), f'wrong size for matrix') # check size for transition and emission matrix hmm = TBN(f'hmm_{size}') values = ['v' + str(i) for i in range(card)] hidden_nodes = [] # store list of created hidden nodes for i in range(size): # add hidden node if i == 0: uniform_cpt = [1./card] * card; hidden_i = Node('h_0', values=values, parents=[], cpt=uniform_cpt) hmm.add(hidden_i) # notice that H0 is uniform if parametrized hidden_nodes.append(hidden_i) else: hidden_i = Node('h_' + str(i), values=values, parents=[hidden_nodes[i - 1]], cpt_tie="transition", cpt=transition) hmm.add(hidden_i) hidden_nodes.append(hidden_i) # add evidence node evidence_i = Node('e_' + str(i), values=values, parents=[hidden_nodes[i]], cpt_tie="emission", cpt=emission) hmm.add(evidence_i) # finish creating the hmm #hmm.dot(view=True) print("Finish creating HMM_{} with cardinality {}".format(size, card)) return hmm
def add(self,node): u.input_check(type(node)==Node, f'{node} is not a TBN node object') u.input_check(node.tbn is None, '' if node.tbn is None else \ f'node {node.name} is already in a different TBN {node.tbn.name}') u.input_check(not node.name in self._n2o, f'a node with name {node.name} already exists in TBN {self.name}') for p in node.parents: # parents must have already been added u.input_check(p.tbn is self and p.name in self._n2o, f'parent {p.name} of node {node.name} has not been added to TBN {self.name}') assert self._for_inference == node._for_inference # check if node is tied and process accordingly tie_id = node.cpt_tie if tie_id: # node is tied to another assert not node.fixed_cpt # only trainable cpts can be tied if tie_id in self._cpt_ties: # a node tied to this one has already been added tied_nodes = self._cpt_ties[tie_id] tied_node = tied_nodes[0] assert node.shape() == tied_node.shape() # tied cpts should have same shape tied_nodes.append(node) else: # no other node tied to this one has been added yet self._cpt_ties[tie_id] = [node] # connect node to parents for p in node.parents: p._children.append(node) # add node node._tbn = self self.testing |= node.testing self._n2o[node.name] = node self._add_order.append(node) self.nodes.append(node)
def __init__(self, tbn, inputs, output, *, hard_inputs=[], trainable=False, elm_method='minfill', elm_wait=30, profile=False): u.input_check(all(tbn.is_node_name(i) for i in inputs), 'TAC inputs must be names of tbn nodes)') u.input_check(tbn.is_node_name(output), 'TAC output must be a name of a tbn node)') u.input_check( set(hard_inputs) <= set(inputs), 'TAC hard inputs must be a subset of its inputs') u.input_check(inputs, 'TAC inputs cannot be empty') u.input_check(output not in inputs, 'TAC output cannot be one of its inputs') # inputs are names of tbn nodes # output is name of tbn node self.trainable = trainable # whether tac parameters can be trained self.profile = profile # saves tac and profiles time self.tbn = None # copy prepared for inference self.input_nodes = None # tac input (tbn nodes) self.output_node = None # tac output (tbn node) self.hard_input_nodes = None # whether evidence will always be hard self.ops_graph = None # ops graph representing tac self.tac_graph = None # tensor graph representing tac self.size = None # size of tensor graph self.rank = None # max rank of any tensor self.binary_rank = None # max rank of tensor dimensions were binary self.parameter_count = None # number of trainable parameters self.loss_types = ('CE', 'MSE') self.metric_types = ('CE', 'CA', 'MSE') self.circuit_type = 'TAC' if tbn.testing else 'AC' self.network_type = 'TBN' if tbn.testing else 'BN' # compiling the tbn self.__compile(tbn, inputs, output, hard_inputs, trainable, elm_method, elm_wait, profile) # construct trainer for fitting tac (after compiling tbn) if trainable: self.trainer = train.Trainer(self)
def simulate(self, size, evidence_type, *, hard_evidence=False): u.input_check(evidence_type is 'grid' or evidence_type is 'random', f'evidence type {evidence_type} not supported') cards = u.map('card', self.input_nodes) if evidence_type is 'grid': assert len(cards) == 2 and all(card == 2 for card in cards) assert not hard_evidence evidence = data.evd_grid(size) else: evidence = data.evd_random(size, cards, hard_evidence) marginals = self.tac_graph.evaluate(evidence) return (evidence, marginals)
def getNthOrderHMM(size, card, N, param=False, transition=None, emission=None): # define an N order HMM model of length size with cardinality hidden states assert size >= 2 and card >= 2 and N >= 1 if param: u.input_check(np.array(transition).shape == (card,) * (N + 1), "wrong size for transition matrix") u.input_check(np.array(emission).shape == (2, 2), "wrong size for emission matrix") # check the size of transition and emission probabilities hmm = TBN(f'hmm_{N}_{size}') values = ['v' + str(i) for i in range(card)] hidden_nodes = [] # store list of hidden nodes # add first N hidden nodes for i in range(N): name = 'h_' + str(i) parents = [hidden_nodes[j] for j in range(i)] cpt = (1./card) * np.ones(shape=(card,)*(i+1)) # create a uniform conditional cpt hidden_i = Node(name, values=values, parents=parents, cpt=cpt) hmm.add(hidden_i) hidden_nodes.append(hidden_i) # add hidden nodes # add the subsequent hidden nodes for i in range(N, size): name = 'h_' + str(i) parents = [hidden_nodes[j] for j in range(i-N, i)] hidden_i = Node(name, values=values, parents=parents, cpt=transition, cpt_tie="transition") hmm.add(hidden_i) hidden_nodes.append(hidden_i) # add evidence for i in range(size): name = 'e_' + str(i) parents = [hidden_nodes[i]] evidence_i = Node(name, values=values, parents=parents, cpt=emission, cpt_tie="emission") hmm.add(evidence_i) # finish defining the hmm # hmm.dot(view=True) print("Finish creating a {}-order hmm of length {} and cardinality {}".format(N, size, card)) return hmm
def evaluate(self, evidence, *, batch_size=64, report_time=False): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(data.is_evidence(evidence), f'TAC evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'TAC evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'TAC evidence must match evidence tbn nodes') u.show(f'\nEvaluating {self.circuit_type}: evidence size {evd_size}, ' f'batch size {batch_size}') marginals = None eval_time = 0 for i, evd_batch in enumerate(data.evd_batches(evidence, batch_size)): u.show(f'{int(100*i/evd_size):4d}%\r', end='', flush=True) start_time = time.perf_counter() mar_batch = self.tac_graph.evaluate(evd_batch) eval_time += time.perf_counter() - start_time if marginals is None: marginals = mar_batch else: marginals = np.concatenate((marginals, mar_batch), axis=0) time_per_example = eval_time / evd_size time_per_million = time_per_example / (self.size / 1000000) u.show(f'\rEvaluation Time: {eval_time:.3f} sec ' f'({1000*time_per_example:.1f} ms per example,' f' {1000*time_per_million:.1f} ms per 1M tac nodes)') assert data.mar_matches_output(marginals, self.output_node) assert data.mar_is_predictions(marginals) if report_time: return marginals, eval_time, batch_size return marginals
def __init__(self, name, *, values=(True, False), parents=[], functional=None, fixed_cpt=False, fixed_zeros=False, testing=None, cpt_tie=None, cpt=None, cpt1=None, cpt2=None): # copy potentially mutable arguments in case they get changed by the user values, parents, cpt, cpt1, cpt2 = \ copy(values), copy(parents), copy(cpt), copy(cpt1), copy(cpt2) # other arguments are immutable so no need to copy them # check integrity of arguments u.input_check( type(name) is str and str is not '', f'node name must be a nonempty string') u.input_check(isinstance(values, Sequence), f'node values must be a python sequence') u.input_check(len(values) >= 1, f'node must have at least one value') u.input_check( len(values) == len(set(values)), f'node values must be unique') u.input_check(type(parents) is list, f'node parents must be a list') u.input_check( len(parents) == len(set(parents)), f'node parents must be unique') u.input_check(all(type(p) is Node for p in parents), f'node parents must be TBN nodes') u.input_check(functional in (True, False, None), f'functional flag must be True or False') u.input_check(fixed_cpt in (True, False), f'fixed_cpt flag must be True or False') u.input_check(fixed_zeros in (True, False), f'fixed_zeros flag must be True or False') u.input_check(testing in (True, False, None), f'testing flag must be True or False') u.input_check(testing != False or (cpt1 is None and cpt2 is None), f'node cannot have cpt1/cpt2 if it is not testing') u.input_check( cpt_tie is None or (type(cpt_tie) is str and str is not ''), f'node flag cpt_tie must be a non-empty string') u.input_check( not (fixed_cpt and fixed_zeros), f'node flags fixed_cpt and fixed_zeros cannot be both True') u.input_check(not (fixed_cpt and cpt_tie), f'node cpt cannot be tied if it is also fixed') u.input_check(not (fixed_zeros and cpt_tie), f'node cpt cannot be tied if it has fixed zeros') u.input_check(cpt is None or (cpt1 is None and cpt2 is None), f'node cannot have both cpt and cpt1/cpt2') u.input_check( (cpt1 is None) == (cpt2 is None), f'node cpt1 and cpt2 must both be specified if the node is testing' ) # shortcut for specifying equal cpt1/cpt2 if testing and cpt is not None: assert cpt1 is None and cpt2 is None cpt1 = cpt2 = cpt cpt = None # infer testing flag if needed (flag is optional) if testing is None: testing = cpt1 is not None and cpt2 is not None u.input_check(not testing or parents, f'testing node must have parents') # use random cpts if not specified (used usually for testing) assert testing in (True, False) card = len(values) cards = tuple(p.card for p in parents) if testing and cpt1 is None: cpt1 = tbn.cpt.random(card, cards) cpt2 = tbn.cpt.random(card, cards) if not testing and cpt is None: cpt = tbn.cpt.random(card, cards) # populate node attributes self._id = next(Node.ID) # need not be unique (clones have same id) self._name = name # a unique string identifier of node self._testing = testing # whether node is testing self._fixed_cpt = fixed_cpt # cpt cannot be trained self._fixed_zeros = fixed_zeros # zero probabilities in cpt will not be trained self._functional = functional # whether node is functional # -the following attributes may change when preparing network for inference # -node values may be pruned, network edges may be pruned and cpts may # may be expanded to tabular form and/or pruned due to edge/value pruning self._values = values # becomes a tuple if values are pruned self._parents = parents # becomes a tuple (must match cpt order) self._cpt = cpt # becomes np array self._cpt1 = cpt1 # becomes np array self._cpt2 = cpt2 # becomes np array self._cpt_tie = cpt_tie # tied cpts may have different shapes after pruning # derived attributes that may also change when preparing for inference family = [*parents, self] self._card = card self._family = family # becomes a tuple (must match cpt order) self._children = [] # updated when children added to network # further attributes that are set later self._for_inference = False # set when preparing for inference self._tbn = None # set when node added to a tbn
def node(self,name): node = self._n2o.get(name,None) u.input_check(node,f'node {name} does not exist in TBN {self.name}') return node
def metric(self, evidence, labels, metric_type, *, batch_size=64): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(data.is_evidence(evidence), f'evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'evidence must match evidence nodes of tbn') u.input_check(data.is_marginals(labels, one_hot=(metric_type == 'CA')), f'labels ill formatted') u.input_check(data.mar_matches_output(labels, self.output_node), f'labels must match query node of tbn') u.input_check(metric_type in self.metric_types, f'metric {metric_type} is not supported') u.show(f'\nComputing {metric_type}: evidence size {evd_size}, ' f'batch size {batch_size}') start_eval_time = time.perf_counter() batches, _ = data.data_batches(evidence, labels, batch_size) result = 0 for evd_batch, lab_batch in batches: bresult = self.tac_graph.compute_metric(metric_type, evd_batch, lab_batch) result += bresult * len(lab_batch) result /= evd_size # average weighted by batch size (last batch may be smaller) evaluation_time = time.perf_counter() - start_eval_time time_per_example = evaluation_time / evd_size u.show(f'{metric_type} Time: {evaluation_time:.3f} sec ' f'({time_per_example:.4f} sec per example)') return result
def fit(self, evidence, marginals, loss_type, metric_type, *, batch_size=32): evd_size = data.evd_size(evidence) # number of examples batch_size = min(evd_size, batch_size) # used batch size u.input_check(self.trainable, f'TAC is not trainable') u.input_check(data.is_evidence(evidence), f'evidence is ill formatted') u.input_check( data.evd_is_hard(evidence, self.input_nodes, self.hard_input_nodes), f'evidence must be hard') u.input_check(data.evd_matches_input(evidence, self.input_nodes), f'evidence must match evidence nodes of tbn') u.input_check(data.is_marginals(marginals), f'marginals ill formatted') u.input_check(data.mar_matches_output(marginals, self.output_node), f'marginals must match query node of tbn') u.input_check(loss_type in self.loss_types, f'loss {loss_type} is not supported') u.input_check(metric_type in self.metric_types, f'metric {metric_type} is not supported') u.input_check( data.evd_size(evidence) == len(marginals), f'evidence size must match marginals size') u.show(f'\nTraining {self.circuit_type}:') start_training_time = time.perf_counter() epoch_count = self.trainer.train(evidence, marginals, loss_type, metric_type, batch_size) training_time = time.perf_counter() - start_training_time time_per_epoch = training_time / epoch_count u.show( f'Training Time: {training_time:.3f} sec ({time_per_epoch:.3f} sec per epoch)' )