Exemple #1
0
def getHMM(size, card, param=False, transition=None, emission=None):
    # define an HMM model of size nodes with card hidden states
    assert size >= 2 and card >= 2
    if param:
        assert transition is not None and emission is not None
        u.input_check(np.array(transition).shape == (card, card), f'wrong size for transition matrix')
        u.input_check(np.array(emission).shape == (card, card), f'wrong size for matrix')
        # check size for transition and emission matrix
    hmm = TBN(f'hmm_{size}')
    values = ['v' + str(i) for i in range(card)]
    hidden_nodes = [] 
    # store list of created hidden nodes
    for i in range(size):
        # add hidden node
        if i == 0:
            uniform_cpt = [1./card] * card;
            hidden_i = Node('h_0', values=values, parents=[], cpt=uniform_cpt)
            hmm.add(hidden_i)
            # notice that H0 is uniform if parametrized
            hidden_nodes.append(hidden_i)
        else:
            hidden_i = Node('h_' + str(i), values=values, parents=[hidden_nodes[i - 1]], cpt_tie="transition", cpt=transition)
            hmm.add(hidden_i)
            hidden_nodes.append(hidden_i)
        # add evidence node
        evidence_i = Node('e_' + str(i), values=values, parents=[hidden_nodes[i]], cpt_tie="emission", cpt=emission)
        hmm.add(evidence_i)
        # finish creating the hmm
    #hmm.dot(view=True)
    print("Finish creating HMM_{} with cardinality {}".format(size, card))
    return hmm
 def add(self,node):
     u.input_check(type(node)==Node,
         f'{node} is not a TBN node object')
     u.input_check(node.tbn is None, '' if node.tbn is None else \
         f'node {node.name} is already in a different TBN {node.tbn.name}')
     u.input_check(not node.name in self._n2o,
         f'a node with name {node.name} already exists in TBN {self.name}')
     for p in node.parents: # parents must have already been added
         u.input_check(p.tbn is self and p.name in self._n2o,
             f'parent {p.name} of node {node.name} has not been added to TBN {self.name}')
     assert self._for_inference == node._for_inference
     
     # check if node is tied and process accordingly
     tie_id = node.cpt_tie
     if tie_id: # node is tied to another
         assert not node.fixed_cpt    # only trainable cpts can be tied
         if tie_id in self._cpt_ties: # a node tied to this one has already been added
             tied_nodes = self._cpt_ties[tie_id] 
             tied_node  = tied_nodes[0]
             assert node.shape() == tied_node.shape() # tied cpts should have same shape
             tied_nodes.append(node)
         else: # no other node tied to this one has been added yet
             self._cpt_ties[tie_id] = [node]
         
     # connect node to parents
     for p in node.parents: 
         p._children.append(node)
     
     # add node
     node._tbn            = self
     self.testing        |= node.testing
     self._n2o[node.name] = node
     self._add_order.append(node)
     self.nodes.append(node)
    def __init__(self,
                 tbn,
                 inputs,
                 output,
                 *,
                 hard_inputs=[],
                 trainable=False,
                 elm_method='minfill',
                 elm_wait=30,
                 profile=False):

        u.input_check(all(tbn.is_node_name(i) for i in inputs),
                      'TAC inputs must be names of tbn nodes)')
        u.input_check(tbn.is_node_name(output),
                      'TAC output must be a name of a tbn node)')
        u.input_check(
            set(hard_inputs) <= set(inputs),
            'TAC hard inputs must be a subset of its inputs')
        u.input_check(inputs, 'TAC inputs cannot be empty')
        u.input_check(output not in inputs,
                      'TAC output cannot be one of its inputs')

        # inputs are names of tbn nodes
        # output is name of tbn node
        self.trainable = trainable  # whether tac parameters can be trained
        self.profile = profile  # saves tac and profiles time
        self.tbn = None  # copy prepared for inference
        self.input_nodes = None  # tac input (tbn nodes)
        self.output_node = None  # tac output (tbn node)
        self.hard_input_nodes = None  # whether evidence will always be hard
        self.ops_graph = None  # ops graph representing tac
        self.tac_graph = None  # tensor graph representing tac
        self.size = None  # size of tensor graph
        self.rank = None  # max rank of any tensor
        self.binary_rank = None  # max rank of tensor dimensions were binary
        self.parameter_count = None  # number of trainable parameters

        self.loss_types = ('CE', 'MSE')
        self.metric_types = ('CE', 'CA', 'MSE')

        self.circuit_type = 'TAC' if tbn.testing else 'AC'
        self.network_type = 'TBN' if tbn.testing else 'BN'

        # compiling the tbn
        self.__compile(tbn, inputs, output, hard_inputs, trainable, elm_method,
                       elm_wait, profile)

        # construct trainer for fitting tac (after compiling tbn)
        if trainable:
            self.trainer = train.Trainer(self)
    def simulate(self, size, evidence_type, *, hard_evidence=False):
        u.input_check(evidence_type is 'grid' or evidence_type is 'random',
                      f'evidence type {evidence_type} not supported')

        cards = u.map('card', self.input_nodes)

        if evidence_type is 'grid':
            assert len(cards) == 2 and all(card == 2 for card in cards)
            assert not hard_evidence
            evidence = data.evd_grid(size)
        else:
            evidence = data.evd_random(size, cards, hard_evidence)

        marginals = self.tac_graph.evaluate(evidence)

        return (evidence, marginals)
Exemple #5
0
def getNthOrderHMM(size, card, N, param=False, transition=None, emission=None):
    # define an N order HMM model of length size with cardinality hidden states
    assert size >= 2 and card >= 2 and N >= 1
    if param:
        u.input_check(np.array(transition).shape == (card,) * (N + 1), "wrong size for transition matrix")
        u.input_check(np.array(emission).shape == (2, 2), "wrong size for emission matrix")
        # check the size of transition and emission probabilities
    hmm = TBN(f'hmm_{N}_{size}')
    values = ['v' + str(i) for i in range(card)]
    hidden_nodes = []
    # store list of hidden nodes
    # add first N hidden nodes
    for i in range(N):
        name = 'h_' + str(i)
        parents = [hidden_nodes[j] for j in range(i)]
        cpt = (1./card) * np.ones(shape=(card,)*(i+1))
        # create a uniform conditional cpt
        hidden_i = Node(name, values=values, parents=parents, cpt=cpt)
        hmm.add(hidden_i)
        hidden_nodes.append(hidden_i)
        # add hidden nodes
    # add the subsequent hidden nodes
    for i in range(N, size):
        name = 'h_' + str(i)
        parents = [hidden_nodes[j] for j in range(i-N, i)]
        hidden_i = Node(name, values=values, parents=parents, cpt=transition, cpt_tie="transition")
        hmm.add(hidden_i)
        hidden_nodes.append(hidden_i)
    # add evidence
    for i in range(size):
        name = 'e_' + str(i)
        parents = [hidden_nodes[i]]
        evidence_i = Node(name, values=values, parents=parents, cpt=emission, cpt_tie="emission")
        hmm.add(evidence_i)
    # finish defining the hmm
    #   hmm.dot(view=True)
    print("Finish creating a {}-order hmm of length {} and cardinality {}".format(N, size, card))
    return hmm
    def evaluate(self, evidence, *, batch_size=64, report_time=False):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence),
                      f'TAC evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes),
            f'TAC evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'TAC evidence must match evidence tbn nodes')

        u.show(f'\nEvaluating {self.circuit_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        marginals = None
        eval_time = 0
        for i, evd_batch in enumerate(data.evd_batches(evidence, batch_size)):
            u.show(f'{int(100*i/evd_size):4d}%\r', end='', flush=True)
            start_time = time.perf_counter()
            mar_batch = self.tac_graph.evaluate(evd_batch)
            eval_time += time.perf_counter() - start_time
            if marginals is None: marginals = mar_batch
            else: marginals = np.concatenate((marginals, mar_batch), axis=0)

        time_per_example = eval_time / evd_size
        time_per_million = time_per_example / (self.size / 1000000)

        u.show(f'\rEvaluation Time: {eval_time:.3f} sec '
               f'({1000*time_per_example:.1f} ms per example,'
               f' {1000*time_per_million:.1f} ms per 1M tac nodes)')

        assert data.mar_matches_output(marginals, self.output_node)
        assert data.mar_is_predictions(marginals)

        if report_time:
            return marginals, eval_time, batch_size
        return marginals
    def __init__(self,
                 name,
                 *,
                 values=(True, False),
                 parents=[],
                 functional=None,
                 fixed_cpt=False,
                 fixed_zeros=False,
                 testing=None,
                 cpt_tie=None,
                 cpt=None,
                 cpt1=None,
                 cpt2=None):

        # copy potentially mutable arguments in case they get changed by the user
        values, parents, cpt, cpt1, cpt2 = \
            copy(values), copy(parents), copy(cpt), copy(cpt1), copy(cpt2)
        # other arguments are immutable so no need to copy them

        # check integrity of arguments
        u.input_check(
            type(name) is str and str is not '',
            f'node name must be a nonempty string')
        u.input_check(isinstance(values, Sequence),
                      f'node values must be a python sequence')
        u.input_check(len(values) >= 1, f'node must have at least one value')
        u.input_check(
            len(values) == len(set(values)), f'node values must be unique')
        u.input_check(type(parents) is list, f'node parents must be a list')
        u.input_check(
            len(parents) == len(set(parents)), f'node parents must be unique')
        u.input_check(all(type(p) is Node for p in parents),
                      f'node parents must be TBN nodes')
        u.input_check(functional in (True, False, None),
                      f'functional flag must be True or False')
        u.input_check(fixed_cpt in (True, False),
                      f'fixed_cpt flag must be True or False')
        u.input_check(fixed_zeros in (True, False),
                      f'fixed_zeros flag must be True or False')
        u.input_check(testing in (True, False, None),
                      f'testing flag must be True or False')
        u.input_check(testing != False or (cpt1 is None and cpt2 is None),
                      f'node cannot have cpt1/cpt2 if it is not testing')
        u.input_check(
            cpt_tie is None or (type(cpt_tie) is str and str is not ''),
            f'node flag cpt_tie must be a non-empty string')
        u.input_check(
            not (fixed_cpt and fixed_zeros),
            f'node flags fixed_cpt and fixed_zeros cannot be both True')
        u.input_check(not (fixed_cpt and cpt_tie),
                      f'node cpt cannot be tied if it is also fixed')
        u.input_check(not (fixed_zeros and cpt_tie),
                      f'node cpt cannot be tied if it has fixed zeros')
        u.input_check(cpt is None or (cpt1 is None and cpt2 is None),
                      f'node cannot have both cpt and cpt1/cpt2')
        u.input_check(
            (cpt1 is None) == (cpt2 is None),
            f'node cpt1 and cpt2 must both be specified if the node is testing'
        )

        # shortcut for specifying equal cpt1/cpt2
        if testing and cpt is not None:
            assert cpt1 is None and cpt2 is None
            cpt1 = cpt2 = cpt
            cpt = None

        # infer testing flag if needed (flag is optional)
        if testing is None:
            testing = cpt1 is not None and cpt2 is not None

        u.input_check(not testing or parents,
                      f'testing node must have parents')

        # use random cpts if not specified (used usually for testing)
        assert testing in (True, False)
        card = len(values)
        cards = tuple(p.card for p in parents)

        if testing and cpt1 is None:
            cpt1 = tbn.cpt.random(card, cards)
            cpt2 = tbn.cpt.random(card, cards)
        if not testing and cpt is None:
            cpt = tbn.cpt.random(card, cards)

        # populate node attributes
        self._id = next(Node.ID)  # need not be unique (clones have same id)
        self._name = name  # a unique string identifier of node
        self._testing = testing  # whether node is testing
        self._fixed_cpt = fixed_cpt  # cpt cannot be trained
        self._fixed_zeros = fixed_zeros  # zero probabilities in cpt will not be trained
        self._functional = functional  # whether node is functional

        # -the following attributes may change when preparing network for inference
        # -node values may be pruned, network edges may be pruned and cpts may
        #  may be expanded to tabular form and/or pruned due to edge/value pruning
        self._values = values  # becomes a tuple if values are pruned
        self._parents = parents  # becomes a tuple (must match cpt order)
        self._cpt = cpt  # becomes np array
        self._cpt1 = cpt1  # becomes np array
        self._cpt2 = cpt2  # becomes np array
        self._cpt_tie = cpt_tie  # tied cpts may have different shapes after pruning

        # derived attributes that may also change when preparing for inference
        family = [*parents, self]
        self._card = card
        self._family = family  # becomes a tuple (must match cpt order)
        self._children = []  # updated when children added to network

        # further attributes that are set later
        self._for_inference = False  # set when preparing for inference
        self._tbn = None  # set when node added to a tbn
 def node(self,name):
     node = self._n2o.get(name,None)
     u.input_check(node,f'node {name} does not exist in TBN {self.name}')
     return node
    def metric(self, evidence, labels, metric_type, *, batch_size=64):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(labels, one_hot=(metric_type == 'CA')),
                      f'labels ill formatted')
        u.input_check(data.mar_matches_output(labels, self.output_node),
                      f'labels must match query node of tbn')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')

        u.show(f'\nComputing {metric_type}: evidence size {evd_size}, '
               f'batch size {batch_size}')

        start_eval_time = time.perf_counter()

        batches, _ = data.data_batches(evidence, labels, batch_size)
        result = 0
        for evd_batch, lab_batch in batches:
            bresult = self.tac_graph.compute_metric(metric_type, evd_batch,
                                                    lab_batch)
            result += bresult * len(lab_batch)
        result /= evd_size  # average weighted by batch size (last batch may be smaller)

        evaluation_time = time.perf_counter() - start_eval_time
        time_per_example = evaluation_time / evd_size

        u.show(f'{metric_type} Time: {evaluation_time:.3f} sec '
               f'({time_per_example:.4f} sec per example)')

        return result
    def fit(self,
            evidence,
            marginals,
            loss_type,
            metric_type,
            *,
            batch_size=32):
        evd_size = data.evd_size(evidence)  # number of examples
        batch_size = min(evd_size, batch_size)  # used batch size

        u.input_check(self.trainable, f'TAC is not trainable')
        u.input_check(data.is_evidence(evidence), f'evidence is ill formatted')
        u.input_check(
            data.evd_is_hard(evidence, self.input_nodes,
                             self.hard_input_nodes), f'evidence must be hard')
        u.input_check(data.evd_matches_input(evidence, self.input_nodes),
                      f'evidence must match evidence nodes of tbn')
        u.input_check(data.is_marginals(marginals), f'marginals ill formatted')
        u.input_check(data.mar_matches_output(marginals, self.output_node),
                      f'marginals must match query node of tbn')
        u.input_check(loss_type in self.loss_types,
                      f'loss {loss_type} is not supported')
        u.input_check(metric_type in self.metric_types,
                      f'metric {metric_type} is not supported')
        u.input_check(
            data.evd_size(evidence) == len(marginals),
            f'evidence size must match marginals size')

        u.show(f'\nTraining {self.circuit_type}:')
        start_training_time = time.perf_counter()

        epoch_count = self.trainer.train(evidence, marginals, loss_type,
                                         metric_type, batch_size)

        training_time = time.perf_counter() - start_training_time
        time_per_epoch = training_time / epoch_count

        u.show(
            f'Training Time: {training_time:.3f} sec ({time_per_epoch:.3f} sec per epoch)'
        )