def initHMM(self, length): a = 1.0 / length # Transition probabilities trans = np.array([[1-a, a, 0, 0], # Pre -> [ 0, 1-a, a/2, a/2], # HQ -> [ 0, 0, 1, 0], # PostQuiet -> [ 0, 0, 0, 1] ]) # PostActive -> # emission probabilities eps = 1e-4 emit = np.array([[ 0.25, 0.25, 0.50 ], # Emit | Pre [ 0.16, 0.84-eps, eps ], # Emit | HQ [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet [ 0.25, 0.25, 0.50 ] ]) # Emit | PostActive # A0 A1 A2 # Start state distribution start = np.array([0.34, 0.33, 0.33, 0]) hmm = MultinomialHMM(n_components=nStates) hmm.transmat_ = trans hmm.startprob_ = start hmm.emissionprob_ = emit return hmm
def initHMM(self): # self._hmm = MultinomialHMM(n_components=self._N, startprob_prior=None, transmat_prior=None, # algorithm='viterbi', random_state=None, n_iter=self._maxIters, tol=0.01, # verbose=True, params='ste', init_params='ste') self._hmm = MultinomialHMM(n_components=self._N, n_iter=self._maxIters, verbose=True, params='ste', init_params='ste')
def __init__(self, model_file=None, components=None): if os.path.exists(model_file): self.model = joblib.load(model_file) else: alu_file = 'Alu_sequence.pkl' if os.path.exists(alu_file): locis = joblib.load(alu_file) else: locis = read_sequence('hg19_Alu.bed', 0) locis = random.sample(locis, 100000) for l in tqdm(locis): l.init_seq() l.decode_seq() locis = list(filter(lambda l: l.seq is not None, locis)) joblib.dump(locis, alu_file) print('Alu Loaded') locis = locis[0:5000] model = MultinomialHMM(n_components=components, verbose=True, n_iter=50) x = np.concatenate(list(map(attrgetter('seq'), locis))) x = np.reshape(x, [x.shape[0], 1]) length = list(map(attrgetter('length'), locis)) model.fit(x, length) self.model = model joblib.dump(self.model, model_file)
def train(self, obs_seq_list: list, state_seq_list: list, obs_set: list, state_set: list, file): """ :param obs_seq_list: observation sequence list [[o1, o2, o3], [o1, o2, o3]...] :param state_seq_list: state sequence list [[s1, s2, s3], [s1, s2, s3]...] :param obs_set: all possible observation state :param state_set: all possible state """ self.obs_seq_list = obs_seq_list self.state_seq_list = state_seq_list self.obs_set = obs_set self.state_set = state_set self.counter = Counter(''.join(state_seq_list)) self.hmm = MultinomialHMM(n_components=len(self.state_set)) self.startprob, self.transmat, self.emissionprob = \ self._init_state(), self._trans_state(), self._emit_state() self.hmm.startprob_ = self.startprob self.hmm.transmat_ = self.transmat self.hmm.emissionprob_ = self.emissionprob if file is not None: with open(file, 'wb') as f: pickle.dump(self, f)
def buildHMM(HMMFactory): model = MultinomialHMM(n_components=2, n_iter=200) model.startprob_ = HMMFactory.hiddenProb() model.transmat_ = HMMFactory.transMatrix() model.emissionprob_ = HMMFactory.emissionMatrix() return model
def main(): rand_p_matrix = np.random.rand(4, 4) rand_b_matrix = np.random.rand(4, 3) print("\nGernerating p matrix...............") p_matrix = normalization(rand_p_matrix) print(p_matrix) print("\nGernerating b matrix...............") b_matrix = normalization(rand_b_matrix) print(b_matrix) # Generate 1000 observations O, _ = generate_observation(1000, p_matrix, b_matrix) # training the selection of number of states aic = [] bic = [] likelihood = [] m = 3 print("\nTraining the HMM for selection of number of states........") for n in range(2, 30): observations = LabelEncoder().fit_transform(O) model = MultinomialHMM(n_components=n, random_state=200263453) model.fit(np.atleast_2d(observations)) logL = model.score(np.atleast_2d(observations)) p = compute_p(n, m) a = AIC(logL, p) b = BIC(logL, observations, p) likelihood.append(logL) aic.append(a) bic.append(b) plot(aic, 'AIC') plot(bic, 'BIC') plot(likelihood, 'Log likelihood')
def __init__(self, n_iter=100): MultinomialHMM.__init__(self, n_components=len(self.states), n_iter=n_iter) self.voca = dict() self.word_freq = defaultdict(int) self.max_num_segs = 0 self.n_training = 0
def get_model(self): """ returns a multinomial hmm""" model = MultinomialHMM(n_components=self.get_max(), params='e', init_params='') model.startprob_ = self.get_start() model.transmat_ = self.get_transition() model.emissionprob_ = self.get_emission() return model
def get_model(self): """ 初始化hmm模型 """ model = MultinomialHMM(n_components=len(self.states)) model.startprob_ = self.init_p model.transmat_ = self.trans_p model.emissionprob_ = self.emit_p return model
def fit_hmm_learn(X, n_states): samples = np.concatenate(X) lengths = [len(x) for x in X] hmm_learn_model = MultinomialHMM(n_components=n_states) hmm_learn_model.fit(samples, lengths) # Label data using hmmlearn model return hmm_learn_model.predict(samples, lengths)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Set up the hidden markov model. We are modeling the non-event states as "0" # and event states as "1" # Transition matrix with heavy weight on the diagonals ensures that the model # is likely to stick in the same state rather than rapidly switching. In other # words, the predictions will be relatively "smooth" trans_matrix = array([[.999, .001], [.001,.999]]) # Emission matrix - state 0 is likely to emit symbol 0, and vice versa # In other words, events are likely to be outliers emission_matrix = array([[.95, .05], [.4, .6]]) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def predict(self, x, init_prob=None, method='hmmlearn', window=-1): """Predict result based on HMM """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) if method == 'hmmlearn': model = MultinomialHMM(self.num_states, n_iter=100) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A if window == -1: result = model.predict(x) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = model.predict(x[0:window]) for i in range(window, x.shape[0]): result[i] = model.predict(x[i - window + 1:i + 1])[-1] else: if window == -1: result = self.decode(x, init_prob) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = self.decode(x[0:window], init_prob) for i in range(window, x.shape[0]): result[i] = self.decode(x[i - window + 1:i + 1], init_prob)[-1] return result
def train_hmm(): """ HMM for sequence learning. """ print "Loading training data..." train_sequence, num_classes = get_sequence("./train_data/*") print "Build HMM..." model = MultinomialHMM(n_components=2) print "Train HMM..." model.fit([train_sequence])
def build_hmm(trans, emis, seed=1): """Builds and returns hmm_model given the transition and emission probabilities matrices""" hmm = MultinomialHMM(n_components=trans.shape[0], algorithm="viterbi", random_state=seed) hmm.__setattr__("n_features", emis.shape[1]) hmm.__setattr__("emissionprob_", emis) hmm.__setattr__("startprob_", np.array( [1] + [0] * (trans.shape[0] - 1))) # We will always start at the first state hmm.__setattr__("transmat_", trans) return hmm
def __init__(self, observed): """ Initializes the object and sets the internal state. Args: observed: array-like, shape (n_samples, n_features) """ self.observed = np.array(observed) if len(self.observed.shape) == 1: self.observed = self.observed.reshape(-1, 1) # TODO: Check other parameters to this constructor self.model = MultinomialHMM(n_components=2, n_iter=100)
class BKT: """ Implements the Bayesian Knowledge Tracing model. This only implements the Viterbi and EM algorithms. These may be used together to implement an Intelligent Tutoring System. """ def __init__(self, observed): """ Initializes the object and sets the internal state. Args: observed: array-like, shape (n_samples, n_features) """ self.observed = np.array(observed) if len(self.observed.shape) == 1: self.observed = self.observed.reshape(-1, 1) # TODO: Check other parameters to this constructor self.model = MultinomialHMM(n_components=2, n_iter=100) def fit(self) -> None: """ Fits the model to the observed states. Uses the EM algorithm to estimate model parameters. """ self.model.fit(self.observed) def get_model_params(self) -> tuple: """ Returns the model parameters. This must be run only after calling the `fit` function. Returns: (A, pi, B): The start probabilities, the transition probabilities, and the emission probabilities. """ return np.round_(self.model.startprob_, 2), np.round_(self.model.transmat_, 2), \ np.round_(self.model.emissionprob_, 2) def predict(self, sequence) -> np.array: """ Returns the most likely hidden state sequence corresponding to `sequence`. Args: sequence: List of observable states Returns: state_sequence: Array """ return self.model.predict(sequence)
def test_HMM(): np.random.seed(12345) np.set_printoptions(precision=5, suppress=True) P = default_hmm() ls, obs = P["latent_states"], P["obs_types"] # generate a new sequence O = generate_training_data(P, n_steps=30, n_examples=25) tol = 1e-5 n_runs = 5 best, best_theirs = (-np.inf, []), (-np.inf, []) for _ in range(n_runs): hmm = MultinomialHMM() A_, B_, pi_ = hmm.fit(O, ls, obs, tol=tol, verbose=True) theirs = MHMM( tol=tol, verbose=True, n_iter=int(1e9), transmat_prior=1, startprob_prior=1, algorithm="viterbi", n_components=len(ls), ) O_flat = O.reshape(1, -1).flatten().reshape(-1, 1) theirs = theirs.fit(O_flat, lengths=[O.shape[1]] * O.shape[0]) hmm2 = MultinomialHMM(A=A_, B=B_, pi=pi_) like = np.sum([hmm2.log_likelihood(obs) for obs in O]) like_theirs = theirs.score(O_flat, lengths=[O.shape[1]] * O.shape[0]) if like > best[0]: best = (like, {"A": A_, "B": B_, "pi": pi_}) if like_theirs > best_theirs[0]: best_theirs = ( like_theirs, { "A": theirs.transmat_, "B": theirs.emissionprob_, "pi": theirs.startprob_, }, ) print("Final log likelihood of sequence: {:.5f}".format(best[0])) print("Final log likelihood of sequence (theirs): {:.5f}".format( best_theirs[0])) plot_matrices(P, best, best_theirs)
def rolling_score( record: SeqRecord, model: hmm.MultinomialHMM, metadata: pd.DataFrame, window_size: int = 1000, overlap: int = 950, ) -> List[Dict[str, Union[str, float, int]]]: scores = [] enc = {"A": 0, "C": 1, "G": 2, "T": 3} sequence = np.array([enc.get(c, 0) for c in str(record.seq)]) for start, end in rolling.window_idx(len(sequence), window_size, overlap): subsequence = sequence[start:end].reshape(-1, 1) score = model.score(subsequence) / (end - start) scores.append({ "id": record.id, "start": start, "score": score, "relative_start": start / len(sequence), **{ k: list(v.values())[0] for k, v in metadata[metadata.aid == record.id].to_dict().items( ) }, }) return scores
def __init__(self): self.states = list(CapabilityBehaviourState) self.observations = list(InteractionObservation) self.hmm = MultinomialHMM(n_components=len(self.states)) self.state_history = [] # When many similar capabilities are being used, it is quite often # the case that they will be in the same state. This means agent # capabilities will have the same results as each other. # This can lead to bad cases, for example, where VeryGood behaviours # all fail at the same time. # To prevent this synchronisation, each behaviour is given their own # different initial seed to mix with the seed provided for an interaction. self.individual_seed = 0
def hmm(): """ vocabulary-acc = 0.9369 :return:返回mlp模型 """ model = MultinomialHMM(n_components=2, n_iter=100, algorithm="viterbi") return model
def get_hmm_model(state): """Creates an instance of MultinomialHMM, which follows sklearn interface Input: - state: dictionnary where the keys are HiddenMarkovModelProbability choices where the values are the probabilities matrices or arrays which describes the according hidden markov model state Returns: an instance of a trained MultinomialHMM """ hmm_model = MultinomialHMM(n_components=len(SleepStage)) hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name] hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name] hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name] return hmm_model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix=DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if (mahal_list[i] > threshold or c_list[i] == 1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key=lambda x: x[2], reverse=True) return events, predictions
def test_DiscreteHMM_decode(cases: str) -> None: np.random.seed(12346) cases = int(cases) i = 1 N_decimal = 4 while i < cases: tol=1e-3 n_samples = np.random.randint(10, 50) hidden_states = np.random.randint(3, 6) # symbols is the number of unqiue observation types. symbols = np.random.randint(4, 9) X = [] lengths = [] for _ in range(n_samples): # the actual length is seq_length + 1 seq_length = symbols this_x = np.random.choice(range(symbols), size=seq_length, replace=False) X.append(this_x) lengths.append(seq_length) max_iter = 100 hmm_gold = MultinomialHMM(n_components=hidden_states, n_iter=100, tol=tol) X_gold = np.concatenate(X).reshape((-1,1)) hmm_gold.fit(X_gold, lengths) gold_A = hmm_gold.transmat_ gold_B = hmm_gold.emissionprob_ gold_pi = hmm_gold.startprob_ gold_logprob, gold_state_sequence = hmm_gold.decode(X_gold, lengths) hmm_mine = DiscreteHMM(hidden_states=hidden_states, symbols=symbols, A=gold_A, B=gold_B, pi=gold_pi) mine_logprob_list = [] mine_state_sequence = [] for this_x in X: this_mine_logprob, this_mine_state_sequence = hmm_mine.decode(this_x) mine_logprob_list.append(this_mine_logprob) mine_state_sequence.append(this_mine_state_sequence) mine_state_sequence = np.concatenate(mine_state_sequence) mine_logprob = sum(mine_logprob_list) assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal) assert_almost_equal(mine_state_sequence, gold_state_sequence, decimal=N_decimal) i+=1 print('Successfully testing the function of computing decodes in discrete HMM!')
def __init__(self, t, theta, rho, algorithm='viterbi', random_state=None, n_iter=20, tol=0, verbose=False): MultinomialHMM.__init__(self, n_components=len(t)+1, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose) self.t = np.append(np.append([0], t), [np.inf]) self.tau = np.diff(self.t) self.theta = theta self.rho = rho
def main(): hmm = MultinomialHMM(n_components=5) T = np.random.random(size=(5, 5)) T = T/T.sum(axis=1).reshape((5, 1)) hmm.transmat_ = T pi = np.random.random(size=(5,)) pi = pi/pi.sum() hmm.startprob_ = pi emit = np.random.random(size=(5, 10)) emit = emit/emit.sum(axis=1).reshape((5, 1)) hmm.emissionprob_ = emit X = np.zeros((20, 25)).astype(np.int) for i in range(20): x, _ = hmm.sample(n_samples=25) X[i] = x.reshape((25,)) # load the PyTorch HMM phmm = HMM(z_dim=5, x_dim=10) phmm.T = torch.Tensor(T.T) phmm.pi = torch.Tensor(pi) phmm.emit = torch.Tensor(emit.T) # compute PyTorch HMM forward-backward my_marginals = phmm.log_marginal(torch.Tensor(X.T)) # compute hmmlearn version true_marginals = np.zeros(20) for i in range(20): true_marginals[i] = hmm.score(X[i].reshape((-1, 1))) assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
class CapabilityBehaviour: def __init__(self): self.states = list(CapabilityBehaviourState) self.observations = list(InteractionObservation) self.hmm = MultinomialHMM(n_components=len(self.states)) self.state_history = [] # When many similar capabilities are being used, it is quite often # the case that they will be in the same state. This means agent # capabilities will have the same results as each other. # This can lead to bad cases, for example, where VeryGood behaviours # all fail at the same time. # To prevent this synchronisation, each behaviour is given their own # different initial seed to mix with the seed provided for an interaction. self.individual_seed = 0 def next_interaction(self, seed: int, t: float): (x, state_sequence) = self.hmm.sample(1, random_state=seed ^ self.individual_seed) assert len(state_sequence) == 1 assert len(x) == 1 assert len(x[0]) == 1 chosen_state = np.array([ 1 if state_sequence[0] == n else 0 for n in range(len(self.states)) ]) # Update the state of where the HMM is self.hmm.startprob_ = chosen_state @ self.hmm.transmat_ self.state_history.append((t, self.states[state_sequence[0]])) return self.observations[x[0][0]] def peek_interaction(self, seed: int): (x, state_sequence) = self.hmm.sample(1, random_state=seed ^ self.individual_seed) assert len(state_sequence) == 1 assert len(x) == 1 assert len(x[0]) == 1 return self.observations[x[0][0]]
def main(): rand_p_matrix = np.random.rand(4, 4) rand_b_matrix = np.random.rand(4, 3) print("\nGernerating p matrix...............") p_matrix = normalization(rand_p_matrix) print(p_matrix) print("\nGernerating b matrix...............") b_matrix = normalization(rand_b_matrix) print(b_matrix) # Generate 1000 observations O, Q = generate_observation(1000, p_matrix, b_matrix) O_seq = [1, 2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 3] pi = (1, 0, 0, 0) print("\nThe Orginal Observation Sequence O: {}".format(O[:12])) print("The probability 𝑝(𝑂|𝜆) is {} with O: {}".format( forward(O_seq, p_matrix, b_matrix, pi)[-1].sum(), O_seq)) print("\nThe Orginal Sequence Q: {}".format(Q[:12])) print("The Most Probable Sequence Q: {} with O: {}".format( list(viterbi(O_seq, p_matrix, b_matrix, pi)), O_seq)) obersvations = LabelEncoder().fit_transform(O) model = MultinomialHMM(n_components=4) model.fit(np.atleast_2d(obersvations)) est_pi = model.startprob_ est_p = model.transmat_ est_b = model.emissionprob_ print("\nThe estimated transition matrix P:\n {}".format(est_p)) print("\nThe estimated event matrix B:\n {}".format(est_b)) print("\nThe estimated start probability pi:\n {}".format(est_pi)) _, p = chisquare(p_matrix, est_p, axis=None) print("\np-value of transition matrix P: {}".format(p)) _, p = chisquare(b_matrix, est_b, axis=None) print("p-value of event matrix B: {}".format(p)) _, p = chisquare(pi, est_pi, axis=None) print("p-value of start probability pi: {}".format(p))
def fit(seqs, n_components=1): MultinomialHMM(n_components=n_components, startprob_prior=1.0, transmat_prior=1.0, algorithm='viterbi', random_state=1, n_iter=100, tol=0.01, verbose=True, params='ste', init_params='ste')
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None): from hmmlearn.hmm import MultinomialHMM # introduces a lot of dependencies hmm = MultinomialHMM(n_components=z_dim) if params is None: T = np.random.random(size=(z_dim, z_dim)) T = T/T.sum(axis=1).reshape((z_dim, 1)) pi = np.random.random(size=(z_dim,)) pi = pi/pi.sum() emit = np.random.random(size=(z_dim, x_dim)) emit = emit/emit.sum(axis=1).reshape((z_dim, 1)) else: T, pi, emit = params hmm.transmat_ = T hmm.startprob_ = pi hmm.emissionprob_ = emit X = np.zeros((N, seq_len)).astype(np.int) for i in range(N): x, _ = hmm.sample(n_samples=seq_len) X[i] = x.reshape((seq_len,)) return (T, pi, emit), HMMData(X)
def test_viterbi_case_random(self): for i in range(1000): # init self.n_state = np.random.randint(1,10) self.n_output = np.random.randint(1,10) self.step = np.random.randint(1,200) p = np.random.random(self.n_state) startprob = p/p.sum() p = np.random.random((self.n_state,self.n_state)) transmat = p/p.sum(axis=1).reshape(-1,1) p = np.random.random((self.n_state,self.n_output)) emissionprob = p/p.sum(axis=1).reshape(-1,1) X = np.random.choice(self.n_output,self.step).reshape(-1,1) # hmmlearn model = MultinomialHMM(n_components=self.n_state,) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emissionprob y = model.predict(X) # my hmm hmm = HMM() pred = hmm.viterbi(startprob, transmat, emissionprob, X) self.assertTrue(np.array_equal(y, pred))
def predict_prob(self, x, init_prob=None, window=-1): """Predict the probability """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) model = MultinomialHMM(self.num_states) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A return model.predict_proba(x)
def predict(self, day_to_predict): # Get records of 30 days before day_to_predict previous_thirty_days = get_previous_month(self.time_series, day_to_predict) binary_crime_sequence = previous_thirty_days['Violent Crime Committed?'].values.tolist() # Unsupervised HMM can't account for string of identical emissions. # If we see such a string, just predict the same emission for the following day. if binary_crime_sequence == [1]*30: return True if binary_crime_sequence == [0]*30: return False votes = [] # Train nine HMMs. They are initialized randomly, so we take "votes" from nine HMMs. # Why 9? Odd numbers preclude ties. # And nine is a decent tradeoff between performance and getting bad results by chance for _ in range(3): # Train HMM model = MultinomialHMM(n_components=3, n_iter=10000) model.fit([np.array(binary_crime_sequence)]) # Determine the most likely state of the last day in the sequence last_state_probs = model.predict_proba(binary_crime_sequence)[-1] current_state = self.get_most_likely(last_state_probs) # Determine the most likely state of the day we're trying to predict transition_probs = model.transmat_[current_state] next_state = self.get_most_likely(transition_probs) # Determine the most likely emission (crime/no crime) from a day in that state emissions = model.emissionprob_[next_state] vote = self.get_most_likely(emissions) # Record this HMM's vote votes.append(vote) # Votes are 1 for crime, 0 for no crime. Return True if majority votes for crime. return sum(votes) > 1
def __init__(self, n_components=1, startprob_prior=1.0, transmat_prior=1.0, algorithm="viterbi", random_state=None, n_iter=10, tol=1e-2, verbose=False, params="ste", init_params="ste"): MultinomialHMM.__init__(self, n_components=n_components, startprob_prior=startprob_prior, transmat_prior=transmat_prior, algorithm=algorithm, random_state=random_state, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params) return
def calculate_hmm_m(training_set, test_set, taxonomy, cursor, connection, settings): da_id_taxonomy = find_da_id(taxonomy, cursor) states, start_probability, transition_probability = start_transition_probability_extraction(training_set, taxonomy) n_states = len(states) feature_list, emissions = extract_features_training_set(training_set, taxonomy, states, settings) # print model.transmat_ con_pathes, test_obs, emissions = extract_features_test_set(test_set, taxonomy, feature_list, emissions, settings) model = MultinomialHMM(n_components=n_states) model._set_startprob(start_probability) model._set_transmat(transition_probability) model._set_emissionprob(emissions) da_predictions(test_obs, model, con_pathes, states, da_id_taxonomy, taxonomy, cursor, connection)
def run_hmm_model(input_df, n_unique, A_df, Eta, n_iter = 10000, tol=1e-2, verbose = False, params = 'e', init_params = ''): ''' Runs the hmm model and returns the predicted results, score and model input_df : The dataframe of keypresses n_unique : number of unqique chars A_df : Dataframe of trasnmission matrix Eta : Emissions matrix n_iter : Max number of iterations for hmm tol : The value to stop the hmm model if score does not improve by more than this verbose : Whether or not to print out params : Parameters to tune init_params : Paramters to initialize ''' # Propotion of characters starting words in english char_counts = get_char_counts() # Construct model hmm = MultinomialHMM(n_components=n_unique, startprob_prior=np.append(0, char_counts.values), transmat_prior=A_df.values, algorithm='viterbi', random_state=None, n_iter=n_iter, tol=tol, verbose=verbose, params=params, init_params=init_params) # Set values hmm.emissionprob_ = Eta hmm.transmat_ = A_df.values hmm.startprob_ = np.append(0, char_counts.values) # Feed in the clusters as the expected output model_input = input_df['cluster'].values # Reshape if len(model_input.shape) == 1: model_input = model_input.reshape((len(model_input), 1)) # Fit the model hmm = hmm.fit(model_input) # Score model score, results = hmm.decode(model_input) return score, results, hmm
def buildHMM(num_states, n_iter=10, tol=0.01): model = MultinomialHMM(n_components=num_states, n_iter=n_iter, tol=tol) model.n_features = 3 return model
smoothing_tolerance = 1 #number of indices sampling_interval = 3600 #seconds discrete_obs, delta_hws, delta_fas = [], [], [] for idx in mice: d = _data_on_mouse(data, idx, smoothing_time_radius, smoothing_amplitude_radius, smoothing_tolerance, sampling_interval, bins) discrete_obs.append(d[0]) delta_hws.append(d[1]) delta_fas.append(d[2]) X = np.array(discrete_obs) model = MultinomialHMM(n_components = n_components) predictions = [] for i in range(7): held_out_X = np.vstack((X[:i], X[i+1:])) model.fit(held_out_X) predictions.append(model.decode(X[i].reshape(X[i].shape[0], 1))) f, axarr = plt.subplots(7, 1) yranges = np.arange(n_components+1, dtype=float)/n_components colors = plt.cm.rainbow(np.linspace(0, 1, n_components)) for i in range(7): states, indices = _axvspan_maker(predictions[i][1]) for s, idxs in zip(states, indices): axarr[i].axvspan(idxs[0], idxs[1], ymin=yranges[s], ymax=yranges[s+1], color=colors[s]) plt.show()