Exemple #1
0
    def initHMM(self, length):
        a = 1.0 / length

        # Transition probabilities
        trans = np.array([[1-a,   a,   0,   0],   # Pre ->
                          [  0, 1-a, a/2, a/2],   # HQ  ->
                          [  0,   0,   1,   0],   # PostQuiet ->
                          [  0,   0,   0,   1] ]) # PostActive ->

        # emission probabilities
        eps = 1e-4
        emit = np.array([[ 0.25, 0.25, 0.50 ],    # Emit | Pre
                         [ 0.16, 0.84-eps, eps ], # Emit | HQ
                         [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet
                         [ 0.25, 0.25, 0.50 ] ])  # Emit | PostActive
        #                   A0    A1    A2

        # Start state distribution
        start = np.array([0.34, 0.33, 0.33, 0])

        hmm = MultinomialHMM(n_components=nStates)
        hmm.transmat_ = trans
        hmm.startprob_ = start
        hmm.emissionprob_ = emit
        return hmm
Exemple #2
0
    def initHMM(self):
        # self._hmm = MultinomialHMM(n_components=self._N, startprob_prior=None, transmat_prior=None, 
        #     algorithm='viterbi', random_state=None, n_iter=self._maxIters, tol=0.01, 
        #     verbose=True, params='ste', init_params='ste')

        self._hmm = MultinomialHMM(n_components=self._N, n_iter=self._maxIters, 
            verbose=True, params='ste', init_params='ste')
Exemple #3
0
    def __init__(self, model_file=None, components=None):

        if os.path.exists(model_file):
            self.model = joblib.load(model_file)
        else:
            alu_file = 'Alu_sequence.pkl'
            if os.path.exists(alu_file):
                locis = joblib.load(alu_file)
            else:
                locis = read_sequence('hg19_Alu.bed', 0)
                locis = random.sample(locis, 100000)
                for l in tqdm(locis):
                    l.init_seq()
                    l.decode_seq()
                locis = list(filter(lambda l: l.seq is not None, locis))
                joblib.dump(locis, alu_file)

            print('Alu Loaded')
            locis = locis[0:5000]
            model = MultinomialHMM(n_components=components,
                                   verbose=True,
                                   n_iter=50)
            x = np.concatenate(list(map(attrgetter('seq'), locis)))
            x = np.reshape(x, [x.shape[0], 1])
            length = list(map(attrgetter('length'), locis))
            model.fit(x, length)
            self.model = model
            joblib.dump(self.model, model_file)
Exemple #4
0
    def train(self, obs_seq_list: list, state_seq_list: list, obs_set: list,
              state_set: list, file):
        """
        :param obs_seq_list: observation sequence list [[o1, o2, o3], [o1, o2, o3]...]
        :param state_seq_list: state sequence list [[s1, s2, s3], [s1, s2, s3]...]
        :param obs_set: all possible observation state
        :param state_set: all possible state
        """
        self.obs_seq_list = obs_seq_list
        self.state_seq_list = state_seq_list
        self.obs_set = obs_set
        self.state_set = state_set
        self.counter = Counter(''.join(state_seq_list))

        self.hmm = MultinomialHMM(n_components=len(self.state_set))

        self.startprob, self.transmat, self.emissionprob = \
            self._init_state(), self._trans_state(), self._emit_state()
        self.hmm.startprob_ = self.startprob
        self.hmm.transmat_ = self.transmat
        self.hmm.emissionprob_ = self.emissionprob

        if file is not None:
            with open(file, 'wb') as f:
                pickle.dump(self, f)
Exemple #5
0
def buildHMM(HMMFactory):

    model = MultinomialHMM(n_components=2, n_iter=200)
    model.startprob_ = HMMFactory.hiddenProb()
    model.transmat_ = HMMFactory.transMatrix()
    model.emissionprob_ = HMMFactory.emissionMatrix()
    return model
def main():
    rand_p_matrix = np.random.rand(4, 4)
    rand_b_matrix = np.random.rand(4, 3)

    print("\nGernerating p matrix...............")
    p_matrix = normalization(rand_p_matrix)
    print(p_matrix)

    print("\nGernerating b matrix...............")
    b_matrix = normalization(rand_b_matrix)
    print(b_matrix)

    # Generate 1000 observations
    O, _ = generate_observation(1000, p_matrix, b_matrix)

    # training the selection of number of states
    aic = []
    bic = []
    likelihood = []
    m = 3
    print("\nTraining the HMM for selection of number of states........")
    for n in range(2, 30):
        observations = LabelEncoder().fit_transform(O)
        model = MultinomialHMM(n_components=n, random_state=200263453)
        model.fit(np.atleast_2d(observations))
        logL = model.score(np.atleast_2d(observations))
        p = compute_p(n, m)
        a = AIC(logL, p)
        b = BIC(logL, observations, p)
        likelihood.append(logL)
        aic.append(a)
        bic.append(b)
    plot(aic, 'AIC')
    plot(bic, 'BIC')
    plot(likelihood, 'Log likelihood')
Exemple #7
0
	def __init__(self, n_iter=100):
		MultinomialHMM.__init__(self, n_components=len(self.states), n_iter=n_iter)

		self.voca = dict()
		self.word_freq = defaultdict(int)
		self.max_num_segs = 0
		self.n_training = 0
Exemple #8
0
 def get_model(self):
     """ returns a multinomial hmm"""
     model = MultinomialHMM(n_components=self.get_max(),
                            params='e',
                            init_params='')
     model.startprob_ = self.get_start()
     model.transmat_ = self.get_transition()
     model.emissionprob_ = self.get_emission()
     return model
Exemple #9
0
 def get_model(self):
     """
     初始化hmm模型
     """
     model = MultinomialHMM(n_components=len(self.states))
     model.startprob_ = self.init_p
     model.transmat_ = self.trans_p
     model.emissionprob_ = self.emit_p
     return model
Exemple #10
0
 def get_model(self):
     """
     初始化hmm模型
     """
     model = MultinomialHMM(n_components=len(self.states))
     model.startprob_ = self.init_p
     model.transmat_ = self.trans_p
     model.emissionprob_ = self.emit_p
     return model
Exemple #11
0
def fit_hmm_learn(X, n_states):
    samples = np.concatenate(X)
    lengths = [len(x) for x in X]

    hmm_learn_model = MultinomialHMM(n_components=n_states)
    hmm_learn_model.fit(samples, lengths)

    # Label data using hmmlearn model
    return hmm_learn_model.predict(samples, lengths)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95):
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    
    # Set up the hidden markov model.  We are modeling the non-event states as "0"
    # and event states as "1"
    
    # Transition matrix with heavy weight on the diagonals ensures that the model
    # is likely to stick in the same state rather than rapidly switching.  In other
    # words, the predictions will be relatively "smooth"
    trans_matrix = array([[.999, .001],
                      [.001,.999]])

    # Emission matrix - state 0 is likely to emit symbol 0, and vice versa
    # In other words, events are likely to be outliers
    emission_matrix = array([[.95, .05],
                             [.4, .6]])
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Exemple #13
0
 def predict(self, x, init_prob=None, method='hmmlearn', window=-1):
     """Predict result based on HMM
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     if method == 'hmmlearn':
         model = MultinomialHMM(self.num_states, n_iter=100)
         model.n_features = self.num_observations
         model.startprob_ = init_prob
         model.emissionprob_ = self.B
         model.transmat_ = self.A
         if window == -1:
             result = model.predict(x)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = model.predict(x[0:window])
             for i in range(window, x.shape[0]):
                 result[i] = model.predict(x[i - window + 1:i + 1])[-1]
     else:
         if window == -1:
             result = self.decode(x, init_prob)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = self.decode(x[0:window], init_prob)
             for i in range(window, x.shape[0]):
                 result[i] = self.decode(x[i - window + 1:i + 1],
                                         init_prob)[-1]
     return result
Exemple #14
0
def train_hmm():
    """
    HMM for sequence learning.
    """
    print "Loading training data..."
    train_sequence, num_classes = get_sequence("./train_data/*")

    print "Build HMM..."
    model = MultinomialHMM(n_components=2)

    print "Train HMM..."
    model.fit([train_sequence])
Exemple #15
0
def build_hmm(trans, emis, seed=1):
    """Builds and returns hmm_model given the transition and emission probabilities matrices"""
    hmm = MultinomialHMM(n_components=trans.shape[0],
                         algorithm="viterbi",
                         random_state=seed)
    hmm.__setattr__("n_features", emis.shape[1])
    hmm.__setattr__("emissionprob_", emis)
    hmm.__setattr__("startprob_", np.array(
        [1] + [0] *
        (trans.shape[0] - 1)))  # We will always start at the first state
    hmm.__setattr__("transmat_", trans)
    return hmm
def train_hmm():
    """
    HMM for sequence learning.
    """
    print "Loading training data..."
    train_sequence, num_classes = get_sequence("./train_data/*")

    print "Build HMM..."
    model = MultinomialHMM(n_components=2)

    print "Train HMM..."
    model.fit([train_sequence])
    def __init__(self, observed):
        """
        Initializes the object and sets the internal state.

        Args:
            observed: array-like, shape (n_samples, n_features)
        """
        self.observed = np.array(observed)

        if len(self.observed.shape) == 1:
            self.observed = self.observed.reshape(-1, 1)
        # TODO: Check other parameters to this constructor
        self.model = MultinomialHMM(n_components=2, n_iter=100)
class BKT:
    """
    Implements the Bayesian Knowledge Tracing model. This only
    implements the Viterbi and EM algorithms. These may be used
    together to implement an Intelligent Tutoring System.
    """
    def __init__(self, observed):
        """
        Initializes the object and sets the internal state.

        Args:
            observed: array-like, shape (n_samples, n_features)
        """
        self.observed = np.array(observed)

        if len(self.observed.shape) == 1:
            self.observed = self.observed.reshape(-1, 1)
        # TODO: Check other parameters to this constructor
        self.model = MultinomialHMM(n_components=2, n_iter=100)

    def fit(self) -> None:
        """
        Fits the model to the observed states. Uses the EM algorithm
        to estimate model parameters.
        """
        self.model.fit(self.observed)

    def get_model_params(self) -> tuple:
        """
        Returns the model parameters. This must be run only after
        calling the `fit` function.

        Returns:
            (A, pi, B): The start probabilities, the transition
                        probabilities, and the emission probabilities.
        """
        return np.round_(self.model.startprob_, 2), np.round_(self.model.transmat_, 2), \
            np.round_(self.model.emissionprob_, 2)

    def predict(self, sequence) -> np.array:
        """
        Returns the most likely hidden state sequence corresponding to
        `sequence`.

        Args:
            sequence: List of observable states

        Returns:
            state_sequence: Array
        """
        return self.model.predict(sequence)
Exemple #19
0
def test_HMM():
    np.random.seed(12345)
    np.set_printoptions(precision=5, suppress=True)

    P = default_hmm()
    ls, obs = P["latent_states"], P["obs_types"]

    # generate a new sequence
    O = generate_training_data(P, n_steps=30, n_examples=25)

    tol = 1e-5
    n_runs = 5
    best, best_theirs = (-np.inf, []), (-np.inf, [])
    for _ in range(n_runs):
        hmm = MultinomialHMM()
        A_, B_, pi_ = hmm.fit(O, ls, obs, tol=tol, verbose=True)

        theirs = MHMM(
            tol=tol,
            verbose=True,
            n_iter=int(1e9),
            transmat_prior=1,
            startprob_prior=1,
            algorithm="viterbi",
            n_components=len(ls),
        )

        O_flat = O.reshape(1, -1).flatten().reshape(-1, 1)
        theirs = theirs.fit(O_flat, lengths=[O.shape[1]] * O.shape[0])

        hmm2 = MultinomialHMM(A=A_, B=B_, pi=pi_)
        like = np.sum([hmm2.log_likelihood(obs) for obs in O])
        like_theirs = theirs.score(O_flat, lengths=[O.shape[1]] * O.shape[0])

        if like > best[0]:
            best = (like, {"A": A_, "B": B_, "pi": pi_})

        if like_theirs > best_theirs[0]:
            best_theirs = (
                like_theirs,
                {
                    "A": theirs.transmat_,
                    "B": theirs.emissionprob_,
                    "pi": theirs.startprob_,
                },
            )
    print("Final log likelihood of sequence: {:.5f}".format(best[0]))
    print("Final log likelihood of sequence (theirs): {:.5f}".format(
        best_theirs[0]))
    plot_matrices(P, best, best_theirs)
def rolling_score(
    record: SeqRecord,
    model: hmm.MultinomialHMM,
    metadata: pd.DataFrame,
    window_size: int = 1000,
    overlap: int = 950,
) -> List[Dict[str, Union[str, float, int]]]:
    scores = []

    enc = {"A": 0, "C": 1, "G": 2, "T": 3}
    sequence = np.array([enc.get(c, 0) for c in str(record.seq)])

    for start, end in rolling.window_idx(len(sequence), window_size, overlap):
        subsequence = sequence[start:end].reshape(-1, 1)

        score = model.score(subsequence) / (end - start)
        scores.append({
            "id": record.id,
            "start": start,
            "score": score,
            "relative_start": start / len(sequence),
            **{
                k: list(v.values())[0]
                for k, v in metadata[metadata.aid == record.id].to_dict().items(
                )
            },
        })

    return scores
Exemple #21
0
    def __init__(self):
        self.states = list(CapabilityBehaviourState)
        self.observations = list(InteractionObservation)

        self.hmm = MultinomialHMM(n_components=len(self.states))

        self.state_history = []

        # When many similar capabilities are being used, it is quite often
        # the case that they will be in the same state. This means agent
        # capabilities will have the same results as each other.
        # This can lead to bad cases, for example, where VeryGood behaviours
        # all fail at the same time.
        # To prevent this synchronisation, each behaviour is given their own
        # different initial seed to mix with the seed provided for an interaction.
        self.individual_seed = 0
Exemple #22
0
def hmm():
    """
    vocabulary-acc = 0.9369
    :return:返回mlp模型
    """
    model = MultinomialHMM(n_components=2, n_iter=100, algorithm="viterbi")
    return model
Exemple #23
0
def get_hmm_model(state):
    """Creates an instance of MultinomialHMM, which follows sklearn interface
    Input:
    - state: dictionnary
        where the keys are HiddenMarkovModelProbability choices
        where the values are the probabilities matrices or arrays which
        describes the according hidden markov model state
    Returns: an instance of a trained MultinomialHMM
    """
    hmm_model = MultinomialHMM(n_components=len(SleepStage))

    hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name]
    hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name]
    hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name]

    return hmm_model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries,
                      threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None):
            
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    

  
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Exemple #25
0
def detect_events_hmm(mahal_timeseries,
                      c_timeseries,
                      global_pace_timeseries,
                      threshold_quant=.95,
                      trans_matrix=DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX,
                      initial_state=None):

    #Sort the keys of the timeseries chronologically
    sorted_dates = sorted(mahal_timeseries)

    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if (mahal_list[i] > threshold or c_list[i] == 1):
            symbols.append(1)
        else:
            symbols.append(0)

    # Actually set up the hmm
    model = MultinomialHMM(n_components=2,
                           transmat=trans_matrix,
                           startprob=initial_state)
    model.emissionprob_ = emission_matrix

    # Make the predictions
    lnl, predictions = model.decode(symbols)

    events = get_all_events(predictions, sorted_dates, mahal_list,
                            global_pace_list, expected_pace_list)

    # Sort events by duration, starting with the long events
    events.sort(key=lambda x: x[2], reverse=True)
    return events, predictions
def test_DiscreteHMM_decode(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    while i < cases:
        tol=1e-3
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        # symbols is the number of unqiue observation types.
        symbols = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            # the actual length is seq_length + 1
            seq_length = symbols
            this_x = np.random.choice(range(symbols), size=seq_length, replace=False)
            X.append(this_x)
            lengths.append(seq_length)
        max_iter = 100


        hmm_gold = MultinomialHMM(n_components=hidden_states, n_iter=100, tol=tol)
        X_gold = np.concatenate(X).reshape((-1,1))
        hmm_gold.fit(X_gold, lengths)
        gold_A = hmm_gold.transmat_
        gold_B = hmm_gold.emissionprob_
        gold_pi = hmm_gold.startprob_
        gold_logprob, gold_state_sequence = hmm_gold.decode(X_gold, lengths)
        hmm_mine = DiscreteHMM(hidden_states=hidden_states,
                               symbols=symbols,
                               A=gold_A,
                               B=gold_B,
                               pi=gold_pi)
        mine_logprob_list = []
        mine_state_sequence = []
        for this_x in X:
            this_mine_logprob, this_mine_state_sequence = hmm_mine.decode(this_x)
            mine_logprob_list.append(this_mine_logprob)
            mine_state_sequence.append(this_mine_state_sequence)
        mine_state_sequence = np.concatenate(mine_state_sequence)
        mine_logprob = sum(mine_logprob_list)
        assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal)
        assert_almost_equal(mine_state_sequence, gold_state_sequence, decimal=N_decimal)
        i+=1
    print('Successfully testing the function of computing decodes in discrete HMM!')
Exemple #27
0
 def __init__(self,
              t,
              theta,
              rho,
              algorithm='viterbi',
              random_state=None,
              n_iter=20, tol=0,
              verbose=False):
     MultinomialHMM.__init__(self, n_components=len(t)+1,
                             algorithm=algorithm,
                             random_state=random_state,
                             n_iter=n_iter, tol=tol,
                             verbose=verbose)
     self.t = np.append(np.append([0], t), [np.inf])
     self.tau = np.diff(self.t)
     self.theta = theta
     self.rho = rho
def main():
    hmm = MultinomialHMM(n_components=5)
    
    T = np.random.random(size=(5, 5))
    T = T/T.sum(axis=1).reshape((5, 1))
    hmm.transmat_ = T

    pi = np.random.random(size=(5,))
    pi = pi/pi.sum()
    hmm.startprob_ = pi

    emit = np.random.random(size=(5, 10))
    emit = emit/emit.sum(axis=1).reshape((5, 1))
    hmm.emissionprob_ = emit

    X = np.zeros((20, 25)).astype(np.int)
    for i in range(20):
        x, _ = hmm.sample(n_samples=25)
        X[i] = x.reshape((25,))

    # load the PyTorch HMM
    phmm = HMM(z_dim=5, x_dim=10)
    phmm.T = torch.Tensor(T.T)
    phmm.pi = torch.Tensor(pi)
    phmm.emit = torch.Tensor(emit.T)

    # compute PyTorch HMM forward-backward
    my_marginals = phmm.log_marginal(torch.Tensor(X.T))

    # compute hmmlearn version
    true_marginals = np.zeros(20)
    for i in range(20):
        true_marginals[i] = hmm.score(X[i].reshape((-1, 1)))

    assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
Exemple #29
0
class CapabilityBehaviour:
    def __init__(self):
        self.states = list(CapabilityBehaviourState)
        self.observations = list(InteractionObservation)

        self.hmm = MultinomialHMM(n_components=len(self.states))

        self.state_history = []

        # When many similar capabilities are being used, it is quite often
        # the case that they will be in the same state. This means agent
        # capabilities will have the same results as each other.
        # This can lead to bad cases, for example, where VeryGood behaviours
        # all fail at the same time.
        # To prevent this synchronisation, each behaviour is given their own
        # different initial seed to mix with the seed provided for an interaction.
        self.individual_seed = 0

    def next_interaction(self, seed: int, t: float):
        (x, state_sequence) = self.hmm.sample(1, random_state=seed ^ self.individual_seed)

        assert len(state_sequence) == 1
        assert len(x) == 1
        assert len(x[0]) == 1

        chosen_state = np.array([
            1 if state_sequence[0] == n else 0
            for n in range(len(self.states))
        ])

        # Update the state of where the HMM is
        self.hmm.startprob_ = chosen_state @ self.hmm.transmat_

        self.state_history.append((t, self.states[state_sequence[0]]))

        return self.observations[x[0][0]]

    def peek_interaction(self, seed: int):
        (x, state_sequence) = self.hmm.sample(1, random_state=seed ^ self.individual_seed)

        assert len(state_sequence) == 1
        assert len(x) == 1
        assert len(x[0]) == 1

        return self.observations[x[0][0]]
Exemple #30
0
def main():
    rand_p_matrix = np.random.rand(4, 4)
    rand_b_matrix = np.random.rand(4, 3)

    print("\nGernerating p matrix...............")
    p_matrix = normalization(rand_p_matrix)
    print(p_matrix)

    print("\nGernerating b matrix...............")
    b_matrix = normalization(rand_b_matrix)
    print(b_matrix)

    # Generate 1000 observations
    O, Q = generate_observation(1000, p_matrix, b_matrix)

    O_seq = [1, 2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 3]
    pi = (1, 0, 0, 0)
    print("\nThe Orginal Observation Sequence O: {}".format(O[:12]))
    print("The probability 𝑝(𝑂|𝜆) is {} with O: {}".format(
        forward(O_seq, p_matrix, b_matrix, pi)[-1].sum(), O_seq))

    print("\nThe Orginal Sequence Q: {}".format(Q[:12]))
    print("The Most Probable Sequence Q: {} with O: {}".format(
        list(viterbi(O_seq, p_matrix, b_matrix, pi)), O_seq))

    obersvations = LabelEncoder().fit_transform(O)
    model = MultinomialHMM(n_components=4)
    model.fit(np.atleast_2d(obersvations))
    est_pi = model.startprob_
    est_p = model.transmat_
    est_b = model.emissionprob_
    print("\nThe estimated transition matrix P:\n {}".format(est_p))
    print("\nThe estimated event matrix B:\n {}".format(est_b))
    print("\nThe estimated start probability pi:\n {}".format(est_pi))

    _, p = chisquare(p_matrix, est_p, axis=None)
    print("\np-value of transition matrix P: {}".format(p))

    _, p = chisquare(b_matrix, est_b, axis=None)
    print("p-value of event matrix B: {}".format(p))

    _, p = chisquare(pi, est_pi, axis=None)
    print("p-value of start probability pi: {}".format(p))
Exemple #31
0
def fit(seqs, n_components=1):
    MultinomialHMM(n_components=n_components,
                   startprob_prior=1.0,
                   transmat_prior=1.0,
                   algorithm='viterbi',
                   random_state=1,
                   n_iter=100,
                   tol=0.01,
                   verbose=True,
                   params='ste',
                   init_params='ste')
Exemple #32
0
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None):
    from hmmlearn.hmm import MultinomialHMM  # introduces a lot of dependencies
    hmm = MultinomialHMM(n_components=z_dim)

    if params is None:
        T = np.random.random(size=(z_dim, z_dim))
        T = T/T.sum(axis=1).reshape((z_dim, 1))

        pi = np.random.random(size=(z_dim,))
        pi = pi/pi.sum()

        emit = np.random.random(size=(z_dim, x_dim))
        emit = emit/emit.sum(axis=1).reshape((z_dim, 1))
    else:
        T, pi, emit = params

    hmm.transmat_ = T
    hmm.startprob_ = pi
    hmm.emissionprob_ = emit

    X = np.zeros((N, seq_len)).astype(np.int)
    for i in range(N):
        x, _ = hmm.sample(n_samples=seq_len)
        X[i] = x.reshape((seq_len,))

    return (T, pi, emit), HMMData(X)
Exemple #33
0
    def test_viterbi_case_random(self):
        for i in range(1000):
            # init
            self.n_state = np.random.randint(1,10)
            self.n_output = np.random.randint(1,10)
            self.step = np.random.randint(1,200)
            p = np.random.random(self.n_state)
            startprob = p/p.sum()
            p = np.random.random((self.n_state,self.n_state))
            transmat = p/p.sum(axis=1).reshape(-1,1)
            p = np.random.random((self.n_state,self.n_output))
            emissionprob = p/p.sum(axis=1).reshape(-1,1)
            X = np.random.choice(self.n_output,self.step).reshape(-1,1)

            # hmmlearn
            model = MultinomialHMM(n_components=self.n_state,)
            model.startprob_ = startprob
            model.transmat_ = transmat
            model.emissionprob_ = emissionprob
            y = model.predict(X)

            # my hmm
            hmm = HMM()
            pred = hmm.viterbi(startprob, transmat, emissionprob, X)
            self.assertTrue(np.array_equal(y, pred))
Exemple #34
0
 def predict_prob(self, x, init_prob=None, window=-1):
     """Predict the probability
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     model = MultinomialHMM(self.num_states)
     model.n_features = self.num_observations
     model.startprob_ = init_prob
     model.emissionprob_ = self.B
     model.transmat_ = self.A
     return model.predict_proba(x)
Exemple #35
0
    def predict(self, day_to_predict):
        # Get records of 30 days before day_to_predict
        previous_thirty_days = get_previous_month(self.time_series, day_to_predict)
        binary_crime_sequence = previous_thirty_days['Violent Crime Committed?'].values.tolist()

        # Unsupervised HMM can't account for string of identical emissions.
        # If we see such a string, just predict the same emission for the following day.
        if binary_crime_sequence == [1]*30:
            return True
        if binary_crime_sequence == [0]*30:
            return False

        votes = []
        # Train nine HMMs. They are initialized randomly, so we take "votes" from nine HMMs.
        #  Why 9? Odd numbers preclude ties.
        #  And nine is a decent tradeoff between performance and getting bad results by chance
        for _ in range(3):
            # Train HMM
            model = MultinomialHMM(n_components=3, n_iter=10000)
            model.fit([np.array(binary_crime_sequence)])

            # Determine the most likely state of the last day in the sequence
            last_state_probs = model.predict_proba(binary_crime_sequence)[-1]
            current_state = self.get_most_likely(last_state_probs)

            # Determine the most likely state of the day we're trying to predict
            transition_probs = model.transmat_[current_state]
            next_state = self.get_most_likely(transition_probs)

            # Determine the most likely emission (crime/no crime) from a day in that state
            emissions = model.emissionprob_[next_state]
            vote = self.get_most_likely(emissions)

            # Record this HMM's vote
            votes.append(vote)

        # Votes are 1 for crime, 0 for no crime. Return True if majority votes for crime.
        return sum(votes) > 1
Exemple #36
0
 def __init__(self,
              n_components=1,
              startprob_prior=1.0,
              transmat_prior=1.0,
              algorithm="viterbi",
              random_state=None,
              n_iter=10,
              tol=1e-2,
              verbose=False,
              params="ste",
              init_params="ste"):
     MultinomialHMM.__init__(self,
                             n_components=n_components,
                             startprob_prior=startprob_prior,
                             transmat_prior=transmat_prior,
                             algorithm=algorithm,
                             random_state=random_state,
                             n_iter=n_iter,
                             tol=tol,
                             verbose=verbose,
                             params=params,
                             init_params=init_params)
     return
def calculate_hmm_m(training_set, test_set, taxonomy, cursor, connection, settings):
    da_id_taxonomy = find_da_id(taxonomy, cursor)
    states, start_probability, transition_probability = start_transition_probability_extraction(training_set, taxonomy)
    n_states = len(states)

    feature_list, emissions = extract_features_training_set(training_set, taxonomy, states, settings)

    # print model.transmat_
    con_pathes, test_obs, emissions = extract_features_test_set(test_set, taxonomy, feature_list, emissions, settings)

    model = MultinomialHMM(n_components=n_states)
    model._set_startprob(start_probability)
    model._set_transmat(transition_probability)
    model._set_emissionprob(emissions)
    da_predictions(test_obs, model, con_pathes, states, da_id_taxonomy, taxonomy, cursor, connection)
def run_hmm_model(input_df, n_unique, A_df, Eta, n_iter = 10000, 
                        tol=1e-2, verbose = False, params = 'e', init_params = ''):
    '''
        Runs the hmm model and returns the predicted results, score and model 

            input_df : The dataframe of keypresses 

            n_unique : number of unqique chars 


            A_df : Dataframe of trasnmission matrix 

            Eta : Emissions matrix 

            n_iter : Max number of iterations for hmm

            tol : The value to stop the hmm model if score does not improve by more than this 

            verbose : Whether or not to print out 

            params : Parameters to tune 

            init_params : Paramters to initialize
    '''
    # Propotion of characters starting words in english 
    char_counts = get_char_counts()

    # Construct model 
    hmm = MultinomialHMM(n_components=n_unique, startprob_prior=np.append(0, char_counts.values), 
               transmat_prior=A_df.values, algorithm='viterbi', 
               random_state=None, n_iter=n_iter, tol=tol, 
               verbose=verbose, params=params, init_params=init_params)
    
    # Set values 
    hmm.emissionprob_ = Eta
    hmm.transmat_ = A_df.values
    hmm.startprob_ = np.append(0, char_counts.values)

    # Feed in the clusters as the expected output
    model_input = input_df['cluster'].values
    
    # Reshape    
    if len(model_input.shape) == 1:
        model_input = model_input.reshape((len(model_input), 1))
    
    # Fit the model
    hmm = hmm.fit(model_input)

    # Score model
    score, results = hmm.decode(model_input)

    return score, results, hmm  
def buildHMM(num_states, n_iter=10, tol=0.01):
    model = MultinomialHMM(n_components=num_states, n_iter=n_iter, tol=tol)
    model.n_features = 3
    return model
smoothing_tolerance = 1 #number of indices
sampling_interval = 3600 #seconds

discrete_obs, delta_hws, delta_fas = [], [], []
for idx in mice:
    d = _data_on_mouse(data, idx, smoothing_time_radius,
                       smoothing_amplitude_radius, smoothing_tolerance, 
                       sampling_interval, bins)
    discrete_obs.append(d[0])
    delta_hws.append(d[1])
    delta_fas.append(d[2])

X = np.array(discrete_obs)


model = MultinomialHMM(n_components = n_components)
predictions = []
for i in range(7):
    held_out_X = np.vstack((X[:i], X[i+1:]))
    model.fit(held_out_X)
    predictions.append(model.decode(X[i].reshape(X[i].shape[0], 1)))

f, axarr = plt.subplots(7, 1)
yranges = np.arange(n_components+1, dtype=float)/n_components
colors = plt.cm.rainbow(np.linspace(0, 1, n_components))
for i in range(7):
    states, indices = _axvspan_maker(predictions[i][1])
    for s, idxs in zip(states, indices): 
        axarr[i].axvspan(idxs[0], idxs[1], ymin=yranges[s], ymax=yranges[s+1], color=colors[s])
plt.show()