Beispiel #1
0
def test_list_of_dicts_large_predict_proba():
    obs = [{'large_monty_friend' : True,  'large_monty_guest': 'A',
        'large_monty_prize': 'A', 'large_monty': 'C'}]
    y1 = DiscreteDistribution({0: 0.0472, 1: 0.781, 2: 0.17167})
    y2 = DiscreteDistribution({True: 0.8562, False: 0.143776})
    y_hat = large_monty_network.predict_proba(obs)

    assert_equal(y_hat[0][0], True)
    assert_equal(y_hat[0][1], 'A')
    assert_equal(y_hat[0][2], 'A')
    assert_equal(y_hat[0][3], 'C')
    assert_discrete_equal(y_hat[0][4], y1, 3)
    assert_discrete_equal(y_hat[0][5], y2, 3)

    obs = [{'large_monty_friend' : True, 'large_monty_prize': 'A',
        'large_monty': 'C', 'large_monty_remaining' : 2}]
    y1 = DiscreteDistribution({'A': 0.5, 'B': 0.5, 'C': 0.0})
    y2 = DiscreteDistribution({True: 0.75, False: 0.25})
    y_hat = large_monty_network.predict_proba(obs)

    assert_equal(y_hat[0][0], True)
    assert_equal(y_hat[0][2], 'A')
    assert_equal(y_hat[0][3], 'C')
    assert_equal(y_hat[0][4], 2)
    assert_discrete_equal(y_hat[0][1], y1)
    assert_discrete_equal(y_hat[0][5], y2)
    def get_bayesnet(self):
        door_lock = DiscreteDistribution({'d1': 0.7, 'd2': 0.3})

        clock_alarm = DiscreteDistribution( { 'a1' : 0.8, 'a2' : 0.2} )

        light = ConditionalProbabilityTable(
            [[ 'd1', 'a1', 'l1', 0.96 ],
             ['d1', 'a1', 'l2', 0.04 ],
             [ 'd1', 'a2', 'l1', 0.89 ],
             [ 'd1', 'a2', 'l2', 0.11 ],
             [ 'd2', 'a1', 'l1', 0.96 ],
             [ 'd2', 'a1', 'l2', 0.04 ],
             [ 'd2', 'a2', 'l1', 0.89 ],
             [ 'd2', 'a2', 'l2', 0.11 ]], [door_lock, clock_alarm])



        coffee_maker = ConditionalProbabilityTable(
            [[ 'a1', 'c1', 0.92 ],
             [ 'a1', 'c2', 0.08 ],
             [ 'a2', 'c1', 0.03 ],
             [ 'a2', 'c2', 0.97 ]], [clock_alarm] )

        s_door_lock = State(door_lock, name="door_lock")
        s_clock_alarm = State(clock_alarm, name="clock_alarm")
        s_light = State(light, name="light")
        s_coffee_maker = State(coffee_maker, name="coffee_maker")
        network = BayesianNetwork("User_pref")
        network.add_nodes(s_door_lock, s_clock_alarm, s_light, s_coffee_maker)

        network.add_edge(s_door_lock,s_light)
        network.add_edge(s_clock_alarm,s_coffee_maker)
        network.add_edge(s_clock_alarm,s_light)
        network.bake()
        return network
Beispiel #3
0
def build_an_hmm_example():
    # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state
    # because it says the probability of seeing each character when the system is in that state
    d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40})
    d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
    d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10})

    s1 = State(d1, name="s1")
    s2 = State(d2, name="s2")
    s3 = State(d3, name="s3")

    model = HiddenMarkovModel('example')
    model.add_states([s1, s2, s3])
    model.add_transition(model.start, s1, 0.90)
    model.add_transition(model.start, s2, 0.10)
    model.add_transition(s1, s1, 0.80)
    model.add_transition(s1, s2, 0.20)
    model.add_transition(s2, s2, 0.90)
    model.add_transition(s2, s3, 0.10)
    model.add_transition(s3, s3, 0.70)
    model.add_transition(s3, model.end, 0.30)
    model.bake()

    for i in range(len(model.states)):
        print(model.states[i].name)
    model.plot()
    #print(model.log_probability(list('ACGACTATTCGAT')))

    #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1]))

    print("forward:", model.forward(list('ACG')))
def test_check_input_list():
    obs = ['A', None, None]
    _check_input(obs, monty_network)

    obs = ['A', numpy.nan, numpy.nan]
    _check_input(obs, monty_network)

    obs = numpy.array(['A', None, None])
    _check_input(obs, monty_network)

    obs = numpy.array(['A', numpy.nan, numpy.nan])
    _check_input(obs, monty_network)

    obs = numpy.array(['A', 'B', 'C'])
    _check_input(obs, monty_network)

    obs = numpy.array(['NaN', numpy.nan, numpy.nan])
    assert_raises(ValueError, _check_input, obs, monty_network)

    obs = numpy.array(['A', 'B', 'D'])
    assert_raises(ValueError, _check_input, obs, monty_network)

    obs = ['A']
    assert_raises(ValueError, _check_input, obs, monty_network)

    obs = ['A', 'C', 'E', 'F']
    assert_raises(ValueError, _check_input, obs, monty_network)

    d = DiscreteDistribution({'A': 0.25, 'B': 0.25, 'C': 0.25})
    obs = [d, None, None]
    _check_input(obs, monty_network)

    d = DiscreteDistribution({'A': 0.25, 'B': 0.25, 'D': 0.25})
    obs = [d, None, None]
    assert_raises(ValueError, _check_input, obs, monty_network)
Beispiel #5
0
def test_monty():
    a = monty_network.predict_proba({'monty': 'A'})

    discrete_equality(a[monty_index], DiscreteDistribution(
        {'A': 1.0, 'B': 0.0, 'C': 0.0}))
    discrete_equality(a[guest_index], a[prize_index])
    discrete_equality(a[guest_index], DiscreteDistribution(
        {'A': 0.0, 'B': 1. / 2, 'C': 1. / 2}))
Beispiel #6
0
def hmmer2pom(hmm):
    # set up environment
    from math import exp
    from pomegranate import DiscreteDistribution,HiddenMarkovModel,State
    tags = dict(); header = 0; alphabet = None; hmmlines = list()

    # parse HMMER file
    for line in hmm.splitlines():
        l = line.strip()
        if len(l) == 0 or l[0] == '#':
            continue
        elif header == 0:
            if l.startswith('HMM') and l[3] != 'E': # beginning of actual HMM
                header = 1; alphabet = l.split()[1:]
            else:
                parts = l.strip().split()
                if parts[0] in tags:
                    if not isinstance(tags[parts[0]], list):
                        tags[parts[0]] = [tags[parts[0]]]
                    tags[parts[0]].append(' '.join(parts[1:]))
                else:
                    tags[parts[0]] = ' '.join(parts[1:])
        elif header == 1:
            header = 2
        else:
            if l.startswith('COMPO'):
                parts = l.strip().split(); tags[parts[0]] = ' '.join(parts[1:])
            else:
                hmmlines.append(l)

    # create all states
    model = HiddenMarkovModel(tags['NAME']); tmpstates = list(); K = 0
    i_emit = hmmlines[0].split(); tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I0")) # insertion state
    for l in range(2,len(hmmlines),3):
        m_emit,i_emit,state_trans = [hmmlines[l+i].split() for i in range(0,3)]; K = int(m_emit[0])
        tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(m_emit[i+1])) for i in range(len(alphabet))}), name="M%d" % K)) # match state
        tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I%d" % K)) # insertion state
        tmpstates.append(State(None, name="D%d" % K)) # deletion state
    assert K != 0, "No match states in profile HMM"
    model.add_states(tmpstates); name2state = {state.name:state for state in tmpstates}; name2state["M0"] = model.start; name2state["M%d"%(K+1)] = model.end

    # create all transitions
    for l in range(1,len(hmmlines),3):
        k = int(l/3); parts = hmmlines[l].split()
        model.add_transition(name2state["M%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[0])))     # 0: M_k -> M_k+1
        model.add_transition(name2state["M%d"%k], name2state["I%d"%k],     exp(-1*float(parts[1])))     # 1: M_k -> I_k
        if parts[2] != '*': # no D_k+1 in last row
            model.add_transition(name2state["M%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[2]))) # 2: M_k -> D_k+1
        model.add_transition(name2state["I%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[3])))     # 3: I_k -> M_k+1
        model.add_transition(name2state["I%d"%k], name2state["I%d"%k],     exp(-1*float(parts[4])))     # 4: I_k -> I_k
        if k != 0: # no D0 state
            model.add_transition(name2state["D%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[5]))) # 5: D_k -> M_k+1
        if parts[6] != '*': # no D0 state and no D_k+1 in last row
            model.add_transition(name2state["D%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[6]))) # 6: D_k -> D_k+1
    model.bake()
    return model.to_json()
Beispiel #7
0
def setup_monty():
    # Build a model of the Monty Hall Problem
    global monty_network, monty_index, prize_index, guest_index

    random.seed(0)

    # Friends emissions are completely random
    guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})

    # The actual prize is independent of the other distributions
    prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})
    # Monty is dependent on both the guest and the prize.
    monty = ConditionalProbabilityTable(
        [['A', 'A', 'A', 0.0],
         ['A', 'A', 'B', 0.5],
         ['A', 'A', 'C', 0.5],
         ['A', 'B', 'A', 0.0],
         ['A', 'B', 'B', 0.0],
         ['A', 'B', 'C', 1.0],
         ['A', 'C', 'A', 0.0],
         ['A', 'C', 'B', 1.0],
         ['A', 'C', 'C', 0.0],
         ['B', 'A', 'A', 0.0],
         ['B', 'A', 'B', 0.0],
         ['B', 'A', 'C', 1.0],
         ['B', 'B', 'A', 0.5],
         ['B', 'B', 'B', 0.0],
         ['B', 'B', 'C', 0.5],
         ['B', 'C', 'A', 1.0],
         ['B', 'C', 'B', 0.0],
         ['B', 'C', 'C', 0.0],
         ['C', 'A', 'A', 0.0],
         ['C', 'A', 'B', 1.0],
         ['C', 'A', 'C', 0.0],
         ['C', 'B', 'A', 1.0],
         ['C', 'B', 'B', 0.0],
         ['C', 'B', 'C', 0.0],
         ['C', 'C', 'A', 0.5],
         ['C', 'C', 'B', 0.5],
         ['C', 'C', 'C', 0.0]], [guest, prize])

    # Make the states
    s1 = State(guest, name="guest")
    s2 = State(prize, name="prize")
    s3 = State(monty, name="monty")

    # Make the bayes net, add the states, and the conditional dependencies.
    monty_network = BayesianNetwork("test")
    monty_network.add_nodes(s1, s2, s3)
    monty_network.add_edge(s1, s3)
    monty_network.add_edge(s2, s3)
    monty_network.bake()

    monty_index = monty_network.states.index(s3)
    prize_index = monty_network.states.index(s2)
    guest_index = monty_network.states.index(s1)
Beispiel #8
0
def test_conditional():
	phditis = DiscreteDistribution({True: 0.01, False: 0.99})
	test_result = ConditionalProbabilityTable(
		[[True,  True,  0.95],
		 [True,  False, 0.05],
		 [False, True,  0.05],
		 [False, False, 0.95]], [phditis])

	assert discrete_equality(test_result.marginal(),
							 DiscreteDistribution({False: 0.941, True: 0.059}))
Beispiel #9
0
    def __init__(self):
        Pollution = DiscreteDistribution({'F': 0.9, 'T': 0.1})
        Smoker = DiscreteDistribution({'T': 0.3, 'F': 0.7})
        print(Smoker)
        Cancer = ConditionalProbabilityTable([
            ['T', 'T', 'T', 0.05],
            ['T', 'T', 'F', 0.95],
            ['T', 'F', 'T', 0.02],
            ['T', 'F', 'F', 0.98],
            ['F', 'T', 'T', 0.03],
            ['F', 'T', 'F', 0.97],
            ['F', 'F', 'T', 0.001],
            ['F', 'F', 'F', 0.999],
        ], [Pollution, Smoker])
        print(Cancer)
        XRay = ConditionalProbabilityTable([
            ['T', 'T', 0.9],
            ['T', 'F', 0.1],
            ['F', 'T', 0.2],
            ['F', 'F', 0.8],
        ], [Cancer])
        Dyspnoea = ConditionalProbabilityTable([
            ['T', 'T', 0.65],
            ['T', 'F', 0.35],
            ['F', 'T', 0.3],
            ['F', 'F', 0.7],
        ], [Cancer])
        s1 = Node(Pollution, name="Pollution")
        s2 = Node(Smoker, name="Smoker")
        s3 = Node(Cancer, name="Cancer")
        s4 = Node(XRay, name="XRay")
        s5 = Node(Dyspnoea, name="Dyspnoea")

        model = BayesianNetwork("Lung Cancer")
        model.add_states(s1, s2, s3, s4, s5)
        model.add_edge(s1, s3)
        model.add_edge(s2, s3)
        model.add_edge(s3, s4)
        model.add_edge(s3, s5)
        model.bake()
        self.model = model

        meta = []
        name_mapper = ["Pollution", "Smoker", "Cancer", "XRay", "Dyspnoea"]
        for i in range(self.model.node_count()):
            meta.append({
                "name": name_mapper[i],
                "type": "categorical",
                "size": 2,
                "i2s": ['T', 'F']
            })
        self.meta = meta
Beispiel #10
0
def test_guest_with_monty():
    b = monty_network.predict_proba({'guest': 'A', 'monty': 'B'})
    c = monty_network.predict_proba({'guest': 'A', 'monty': 'C'})

    assert_equal(b[guest_index], 'A')
    assert_equal(b[monty_index], 'B')
    assert_discrete_equal(b[prize_index], DiscreteDistribution(
        {'A': 1. / 3, 'B': 0.0, 'C': 2. / 3}))

    assert_equal(c[guest_index], 'A')
    assert_equal(c[monty_index], 'C')
    assert_discrete_equal(c[prize_index], DiscreteDistribution(
        {'A': 1. / 3, 'B': 2. / 3, 'C': 0.0}))
Beispiel #11
0
def test_io_fit():
    d1 = DiscreteDistribution({True: 0.6, False: 0.4})
    d2 = ConditionalProbabilityTable([
        [True, 'A', 0.2],
        [True, 'B', 0.8],
        [False, 'A', 0.3],
        [False, 'B', 0.7]], [d1])
    d3 = ConditionalProbabilityTable([
        ['A', 0, 0.3],
        ['A', 1, 0.7],
        ['B', 0, 0.8],
        ['B', 1, 0.2]], [d2])

    n1 = Node(d1)
    n2 = Node(d2)
    n3 = Node(d3)

    model1 = BayesianNetwork()
    model1.add_nodes(n1, n2, n3)
    model1.add_edge(n1, n2)
    model1.add_edge(n2, n3)
    model1.bake()
    model1.fit(X, weights=weights)

    d1 = DiscreteDistribution({True: 0.2, False: 0.8})
    d2 = ConditionalProbabilityTable([
        [True, 'A', 0.7],
        [True, 'B', 0.2],
        [False, 'A', 0.4],
        [False, 'B', 0.6]], [d1])
    d3 = ConditionalProbabilityTable([
        ['A', 0, 0.9],
        ['A', 1, 0.1],
        ['B', 0, 0.0],
        ['B', 1, 1.0]], [d2])

    n1 = Node(d1)
    n2 = Node(d2)
    n3 = Node(d3)

    model2 = BayesianNetwork()
    model2.add_nodes(n1, n2, n3)
    model2.add_edge(n1, n2)
    model2.add_edge(n2, n3)
    model2.bake()
    model2.fit(data_generator)

    logp1 = model1.log_probability(X)
    logp2 = model2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
def test_check_input_list_of_dicts():
    obs = {'guest': 'A'}
    _check_input([obs], monty_network)

    obs = {'guest': 'NaN'}
    assert_raises(ValueError, _check_input, [obs], monty_network)

    obs = {'guest': None}
    assert_raises(ValueError, _check_input, [obs], monty_network)

    obs = {'guest': numpy.nan}
    assert_raises(ValueError, _check_input, [obs], monty_network)

    obs = {'guest': 'NaN', 'prize': 'B'}
    assert_raises(ValueError, _check_input, [obs], monty_network)

    obs = {'guest': 'A', 'prize': 'C'}
    _check_input([obs], monty_network)

    obs = {'guest': 'A', 'prize': 'C', 'monty': 'C'}
    _check_input([obs], monty_network)

    obs = {'guest': DiscreteDistribution({'A': 0.25, 'B': 0.25, 'C': 0.50})}
    _check_input([obs], monty_network)

    obs = {'hello': 'A', 'prize': 'B'}
    assert_raises(ValueError, _check_input, [obs], monty_network)

    obs = [{
        'guest': 'A'
    }, {
        'guest': 'A',
        'prize': 'C'
    }, {
        'guest': 'A',
        'prize': 'C',
        'monty': 'C'
    }, {
        'guest': DiscreteDistribution({
            'A': 0.25,
            'B': 0.25,
            'C': 0.50
        })
    }]
    _check_input(obs, monty_network)

    obs.append({'guest': 'NaN', 'prize': 'B'})
    assert_raises(ValueError, _check_input, obs, monty_network)
Beispiel #13
0
    def update_hmm(self):
        num_states = self.num_states
        start_prob = self.start_prob
        num_emissions = self.num_emissions

        hmm = HiddenMarkovModel('hmm')
        dist = [
            DiscreteDistribution(
                dict(zip(range(num_emissions), self.emissions[i])))
            for i in range(num_states)
        ]
        states = [
            State(dist[i], 's' + str(i).zfill(2)) for i in range(num_states)
        ]
        hmm.add_states(states)
        for i in range(num_states):
            s_i = states[i]
            hmm.add_transition(hmm.start, s_i, start_prob[i])
            for j in range(num_states):
                s_j = states[j]
                p = self.transitions[i, j]
                hmm.add_transition(s_i, s_j, p)

        self.hmm = hmm
        self.hmm.bake()
Beispiel #14
0
def train_model(data: np.ndarray,
                clusters: int = 5,
                init_nodes: list = None) -> BayesianNetwork:

    bn = BayesNet()
    #Сluster the initial data in order to fill in a hidden variable based on the distribution of clusters
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data)
    labels = kmeans.labels_
    hidden_dist = DiscreteDistribution.from_samples(labels)
    hidden_var = np.array(hidden_dist.sample(data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    latent = (new_data.shape[1]) - 1

    #Train the network structure on data taking into account a hidden variable
    bn = hc_rr(new_data, latent=latent, init_nodes=init_nodes)
    structure = []
    nodes = sorted(list(bn.nodes()))
    for rv in nodes:
        structure.append(tuple(bn.F[rv]['parents']))
    structure = tuple(structure)
    bn = BayesianNetwork.from_structure(new_data, structure)
    bn.bake()
    #Learn a hidden variable
    hidden_var = np.array([np.nan] * (data.shape[0]))
    new_data = np.column_stack((data, hidden_var))
    bn.predict(new_data)
    bn.fit(new_data)
    bn.bake()
    return (bn)
Beispiel #15
0
def make_insert(zone, name):
    emission = {}
    total = 0
    for column in zone['columns']:
        for el in column.elements:
            if el != '-':
                if el not in emission:
                    emission[el] = 2
                    total += 2
                else:
                    emission[el] += 1
                    total += 1
    for key in emission:
        emission[key] = emission[key] / total
    # print(emission)
    return {
        'type':
        'insert',
        'emission':
        emission,
        'zone':
        zone,
        'insert_state':
        State(DiscreteDistribution(emission), name='insert ' + name)
    }
Beispiel #16
0
 def get_insert_dist(self, n_features, initial_seq):
     if isinstance(initial_seq[0], int) \
             or np.issubdtype(initial_seq[0], np.integer): #equal distribution
         return DiscreteDistribution.from_samples(range(n_features))
     else:  #distribution based on initial sequence
         return MultivariateGaussianDistribution.from_samples(
             np.array(initial_seq))
def test_list_of_dicts_predict_proba_parallel():
    obs = [{
        'guest': 'A',
        'monty': 'B'
    }, {
        'guest': 'B',
        'prize': 'A'
    }, {
        'monty': 'C',
        'prize': 'B'
    }, {
        'monty': 'B'
    }, {
        'prize': 'A'
    }]
    y = DiscreteDistribution({'A': 1. / 3, 'B': 0., 'C': 2. / 3})
    y_hat = monty_network.predict_proba(obs, n_jobs=2)

    assert_equal(y_hat[0][0], 'A')
    assert_equal(y_hat[0][2], 'B')
    assert_discrete_equal(y_hat[0][1], y)

    assert_equal(y_hat[1][0], 'B')
    assert_equal(y_hat[1][1], 'A')

    assert_equal(y_hat[3][2], 'B')
    assert_equal(y_hat[4][1], 'A')
    def train(self):
        logger.info("Building tossing graphs...")
        start_time = time.time()

        tossing_path_collection = self._c['tossing']
        logger.info("Found %d paths" % len(tossing_path_collection))

        target_dict = self._c['target_dict']

        logger.info('length of tossing collection is %d' %
                    len(tossing_path_collection))

        train_ratio = 0.8
        train_border = int(len(tossing_path_collection) * train_ratio)
        logger.info(
            'taking %d data to train, %d to test' %
            (train_border, len(tossing_path_collection) - train_border))

        total = target_dict['__total']
        distribution = {k: v / total for k, v in target_dict.items()}

        distribution.pop('__total', None)

        paths = [t.get_assignee_path() for t in tossing_path_collection]

        # get discrete distribution
        zeroth_dist = DiscreteDistribution(distribution)

        first_chain = MarkovChain.from_samples(paths)  # ([zeroth_dist])
        first_chain.fit(paths)

        logger.info('Fitting the paths took {} seconds'.format(time.time() -
                                                               start_time))

        return self._c
def test_cpd_sampling():
    d1 = DiscreteDistribution({"A": 0.1, "B": 0.9})
    d2 = ConditionalProbabilityTable(
        [["A", "A", 0.1], ["A", "B", 0.9], ["B", "A", 0.7], ["B", "B", 0.3]],
        [d1])

    # P(A) = 0.1*0.1 + 0.9*0.7 = 0.64
    # P(B) = 0.1*0.9 + 0.9*0.3 = 0.36
    true = [0.64, 0.36]
    est = numpy.bincount([0 if d2.sample() == "A" else 1
                          for i in range(1000)]) / 1000.0
    assert_almost_equal(est[0], true[0], 1)
    assert_almost_equal(est[1], true[1], 1)

    # when A is observed, it reduces to [0.1, 0.9]
    true1 = [0.1, 0.9]
    par_val = {}
    par_val[d1] = "A"
    est = numpy.bincount([
        0 if d2.sample(parent_values=par_val) == "A" else 1
        for i in range(1000)
    ]) / 1000.0
    assert_almost_equal(est[0], true1[0], 1)
    assert_almost_equal(est[1], true1[1], 1)

    true2 = [0.7, 0.3]
    par_val = {}
    par_val[d1] = "B"
    est = numpy.bincount([
        0 if d2.sample(parent_values=par_val) == "A" else 1
        for i in range(1000)
    ]) / 1000.0
    assert_almost_equal(est[0], true2[0], 1)
    assert_almost_equal(est[1], true2[1], 1)
Beispiel #20
0
def make_main(zone, name):
    emission = {}
    total = 0
    for el in zone['column'].elements:
        if el != '-':
            if el not in emission:
                emission[el] = 2
                total += 2
            else:
                emission[el] += 1
                total += 1

    for key in emission:
        emission[key] = emission[key] / total
    # print('main', emission)
    return {
        'type':
        'main',
        'emission':
        emission,
        'zone':
        zone,
        'main_state':
        State(DiscreteDistribution(emission), name='main ' + name),
        'delete_state':
        State(None, name='none delete ' + name) if zone['delete'] else None
    }
Beispiel #21
0
def setup_titanic():
    # Build a model of the titanic disaster
    global titanic_network, passenger, gender, tclass

    # Passengers on the Titanic either survive or perish
    passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4})

    # Gender, given survival data
    gender = ConditionalProbabilityTable(
        [['survive', 'male', 0.0], ['survive', 'female', 1.0],
         ['perish', 'male', 1.0], ['perish', 'female', 0.0]], [passenger])

    # Class of travel, given survival data
    tclass = ConditionalProbabilityTable(
        [['survive', 'first', 0.0], ['survive', 'second', 1.0],
         ['survive', 'third', 0.0], ['perish', 'first', 1.0],
         ['perish', 'second', 0.0], ['perish', 'third', 0.0]], [passenger])

    # State objects hold both the distribution, and a high level name.
    s1 = State(passenger, name="passenger")
    s2 = State(gender, name="gender")
    s3 = State(tclass, name="class")

    # Create the Bayesian network object with a useful name
    titanic_network = BayesianNetwork("Titanic Disaster")

    # Add the three nodes to the network
    titanic_network.add_nodes(s1, s2, s3)

    # Add transitions which represent conditional dependencies, where the
    # second node is conditionally dependent on the first node (Monty is
    # dependent on both guest and prize)
    titanic_network.add_edge(s1, s2)
    titanic_network.add_edge(s1, s3)
    titanic_network.bake()
Beispiel #22
0
def sequence_state_factory(states_data, name):
    states = []
    for index, data in enumerate(states_data):
        state = State(DiscreteDistribution(data.states_distribution),
                      name=name + str(index))
        states.append(state)
    return states
def test_single_list_predict_proba():
    obs = ['A', None, 'B']
    y = DiscreteDistribution({'A': 1. / 3, 'B': 0., 'C': 2. / 3})
    y_hat = monty_network.predict_proba(obs)

    assert_equal(y_hat[0], 'A')
    assert_equal(y_hat[2], 'B')
    assert_discrete_equal(y_hat[1], y)
Beispiel #24
0
def test_list_of_dicts_predict_proba():
    obs = [{'guest': 'A',  'monty': 'B'}]
    y = DiscreteDistribution({'A': 1./3, 'B': 0., 'C': 2./3})
    y_hat = monty_network.predict_proba(obs)

    assert_equal(y_hat[0][0], 'A')
    assert_equal(y_hat[0][2], 'B')
    assert_discrete_equal(y_hat[0][1], y)
Beispiel #25
0
def bake_model(tags_sequence, words_sequence):
    """
    'tags' are the time-demand labels that generate the emitted demand level.
    Demand level are represented by 'words'
    """
    # rdemand
    words = [x for x in chain(*words_sequence)]
    tag_unigrams = unigram_counts(words)
    tag_bigrams = bigram_counts(words)

    # Uniform distribution for starting and ending labels
    all_labels = list(set(words))
    tag_starts = starting_counts(all_labels)
    tag_ends = ending_counts(all_labels)

    basic_model = HiddenMarkovModel(name="base-hmm-tagger")

    # Emission count
    label_train = tags_sequence
    rdemand_train = words_sequence
    emission_count = pair_counts(rdemand_train, label_train)

    # States with emission probability distributions P(word | tag)
    states = []
    for rdemand, label_dict in emission_count.items():
        dist_tag = DiscreteDistribution({
            label: cn / tag_unigrams[rdemand]
            for label, cn in label_dict.items()
        })
        states.append(State(dist_tag, name=rdemand))

    basic_model.add_states(states)
    state_names = [s.name for s in states]
    state_index = {tag: num for num, tag in enumerate(state_names)}

    # Start transition
    total_start = sum(tag_starts.values())
    for tag, cn in tag_starts.items():
        # sname = state_index[tag]
        basic_model.add_transition(basic_model.start, states[state_index[tag]],
                                   cn / total_start)

    # End transition
    total_end = sum(tag_ends.values())
    for tag, cn in tag_ends.items():
        basic_model.add_transition(states[state_index[tag]], basic_model.end,
                                   cn / total_end)

    # Edges between states for the observed transition frequencies P(tag_i | tag_i-1)
    for key, value in tag_bigrams.items():
        basic_model.add_transition(states[state_index[key[0]]],
                                   states[state_index[key[1]]],
                                   value / tag_unigrams[key[0]])

    # Finalize the model
    basic_model.bake()

    return basic_model
Beispiel #26
0
def state_sequence_from(emissions, name):
    states = []
    for index, emission in enumerate(emissions):
        distribution = DiscreteDistribution(emission)
        state_name = name + '_' + str(index)
        print('creado estado', state_name)
        state = State(distribution, name=state_name)
        states.append(state)
    return states, [1] * (len(states) - 1)
Beispiel #27
0
def test_guest_monty():
    a = monty_network.predict_proba({'guest': 'A'})
    b = monty_network.predict_proba({'guest': 'B'})
    c = monty_network.predict_proba({'guest': 'C'})

    prize_correct = DiscreteDistribution(
        {'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})

    discrete_equality(a[prize_index], b[prize_index])
    discrete_equality(a[prize_index], c[prize_index])
    discrete_equality(a[prize_index], prize_correct)

    discrete_equality(a[monty_index], DiscreteDistribution(
        {'A': 0.0, 'B': 1. / 2, 'C': 1. / 2}))
    discrete_equality(b[monty_index], DiscreteDistribution(
        {'A': 1. / 2, 'B': 0.0, 'C': 1. / 2}))
    discrete_equality(c[monty_index], DiscreteDistribution(
        {'A': 1. / 2, 'B': 1. / 2, 'C': 0.0}))
Beispiel #28
0
 def get_match_dist(self, index, n_features, initial_seq):
     if isinstance(initial_seq[index], int):
         return DiscreteDistribution.from_samples(range(n_features))
         #return DiscreteDistribution.from_samples(np.concatenate(
         #    (np.repeat(index, INITIAL_EMPHASIS), range(n_features))))
     else:
         return MultivariateGaussianDistribution.from_samples(
             np.concatenate(
                 (np.tile(index,
                          (INITIAL_EMPHASIS, 1)), np.array(initial_seq))))
Beispiel #29
0
def buildHmm(minAmpliconLength, maxGap, windowSize):
    b_bkgd_1 = 0.1
    a_interstate = b_bkgd_1**(2 * minAmpliconLength / windowSize)
    b_amp_0 = (a_interstate)**(0.5 * windowSize / maxGap)
    b_amp_1 = 1 - b_amp_0
    b_bkgd_0 = 1 - b_bkgd_1
    bkgdDist = DiscreteDistribution({0: b_bkgd_0, 1: b_bkgd_1})
    ampDist = DiscreteDistribution({0: b_amp_0, 1: b_amp_1})
    s_bkgd = State(bkgdDist, name='background')
    s_amp = State(ampDist, name='amplicon')
    hmm = HiddenMarkovModel()
    hmm.add_states(s_bkgd, s_amp)
    hmm.add_transition(hmm.start, s_bkgd, 1 - a_interstate)
    hmm.add_transition(hmm.start, s_amp, a_interstate)
    hmm.add_transition(s_bkgd, s_bkgd, 1 - a_interstate)
    hmm.add_transition(s_bkgd, s_amp, a_interstate)
    hmm.add_transition(s_amp, s_bkgd, a_interstate)
    hmm.add_transition(s_amp, s_amp, 1 - a_interstate)
    hmm.bake()
    return hmm
Beispiel #30
0
def with_variations(dist, name):
    st = State(dist, name=name)
    sti = State(DiscreteDistribution({
        'a': 0.25,
        'c': 0.25,
        'g': 0.25,
        't': 0.25
    }),
                name='i_' + name)
    std = State(None, name='d_' + name)
    return st, sti, std
def test_discrete():
	d = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})

	assert_equal(d.log_probability('C'), -1.3862943611198906)
	assert_equal(d.log_probability('A'), d.log_probability('C'))
	assert_equal(d.log_probability('G'), d.log_probability('T'))
	assert_equal(d.log_probability('a'), float('-inf'))

	seq = "ACGTACGTTGCATGCACGCGCTCTCGCGC"
	d.fit(list(seq))

	assert_equal(d.log_probability('C'), -0.9694005571881036)
	assert_equal(d.log_probability('A'), -1.9810014688665833)
	assert_equal(d.log_probability('T'), -1.575536360758419)

	seq = "ACGTGTG"
	d.fit(list(seq), weights=[0., 1., 2., 3., 4., 5., 6.])

	assert_equal(d.log_probability('A'), float('-inf'))
	assert_equal(d.log_probability('C'), -3.044522437723423)
	assert_equal(d.log_probability('G'), -0.5596157879354228)

	d.summarize(list("ACG"), weights=[0., 1., 2.])
	d.summarize(list("TGT"), weights=[3., 4., 5.])
	d.summarize(list("G"), weights=[6.])
	d.from_summaries()

	assert_equal(d.log_probability('A'), float('-inf'))
	assert_equal(round(d.log_probability('C'), 4), -3.0445)
	assert_equal(round(d.log_probability('G'), 4), -0.5596)

	d = DiscreteDistribution({'A': 0.0, 'B': 1.0})
	d.summarize(list("ABABABAB"))
	d.summarize(list("ABAB"))
	d.summarize(list("BABABABABABABABABA"))
	d.from_summaries(inertia=0.75)
	assert_equal(d.parameters[0], {'A': 0.125, 'B': 0.875})

	d = DiscreteDistribution({'A': 0.0, 'B': 1.0})
	d.summarize(list("ABABABAB"))
	d.summarize(list("ABAB"))
	d.summarize(list("BABABABABABABABABA"))
	d.from_summaries(inertia=0.5)
	assert_equal(d.parameters[0], {'A': 0.25, 'B': 0.75})

	d.freeze()
	d.fit(list('ABAABBAAAAAAAAAAAAAAAAAA'))
	assert_equal(d.parameters[0], {'A': 0.25, 'B': 0.75})

	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'])
	assert_equal(d.parameters[0], {'A': 0.75, 'B': 0.25})

	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'], pseudocount=0.5)
	assert_equal(d.parameters[0], {'A': 0.70, 'B': 0.30})

	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'], pseudocount=6)
	assert_equal(d.parameters[0], {'A': 0.5625, 'B': 0.4375})

	e = Distribution.from_json(d.to_json())
	assert_equal(e.name, "DiscreteDistribution")
	assert_equal(e.parameters[0], {'A': 0.5625, 'B': 0.4375})

	f = pickle.loads(pickle.dumps(e))
	assert_equal(f.name, "DiscreteDistribution")
	assert_equal(f.parameters[0], {'A': 0.5625, 'B': 0.4375})