def hmm(): """ vocabulary-acc = 0.9369 :return:返回mlp模型 """ model = MultinomialHMM(n_components=2, n_iter=100, algorithm="viterbi") return model
def main(): hmm = MultinomialHMM(n_components=5) T = np.random.random(size=(5, 5)) T = T/T.sum(axis=1).reshape((5, 1)) hmm.transmat_ = T pi = np.random.random(size=(5,)) pi = pi/pi.sum() hmm.startprob_ = pi emit = np.random.random(size=(5, 10)) emit = emit/emit.sum(axis=1).reshape((5, 1)) hmm.emissionprob_ = emit X = np.zeros((20, 25)).astype(np.int) for i in range(20): x, _ = hmm.sample(n_samples=25) X[i] = x.reshape((25,)) # load the PyTorch HMM phmm = HMM(z_dim=5, x_dim=10) phmm.T = torch.Tensor(T.T) phmm.pi = torch.Tensor(pi) phmm.emit = torch.Tensor(emit.T) # compute PyTorch HMM forward-backward my_marginals = phmm.log_marginal(torch.Tensor(X.T)) # compute hmmlearn version true_marginals = np.zeros(20) for i in range(20): true_marginals[i] = hmm.score(X[i].reshape((-1, 1))) assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
def main(): rand_p_matrix = np.random.rand(4, 4) rand_b_matrix = np.random.rand(4, 3) print("\nGernerating p matrix...............") p_matrix = normalization(rand_p_matrix) print(p_matrix) print("\nGernerating b matrix...............") b_matrix = normalization(rand_b_matrix) print(b_matrix) # Generate 1000 observations O, _ = generate_observation(1000, p_matrix, b_matrix) # training the selection of number of states aic = [] bic = [] likelihood = [] m = 3 print("\nTraining the HMM for selection of number of states........") for n in range(2, 30): observations = LabelEncoder().fit_transform(O) model = MultinomialHMM(n_components=n, random_state=200263453) model.fit(np.atleast_2d(observations)) logL = model.score(np.atleast_2d(observations)) p = compute_p(n, m) a = AIC(logL, p) b = BIC(logL, observations, p) likelihood.append(logL) aic.append(a) bic.append(b) plot(aic, 'AIC') plot(bic, 'BIC') plot(likelihood, 'Log likelihood')
def initHMM(self): # self._hmm = MultinomialHMM(n_components=self._N, startprob_prior=None, transmat_prior=None, # algorithm='viterbi', random_state=None, n_iter=self._maxIters, tol=0.01, # verbose=True, params='ste', init_params='ste') self._hmm = MultinomialHMM(n_components=self._N, n_iter=self._maxIters, verbose=True, params='ste', init_params='ste')
def train(self, obs_seq_list: list, state_seq_list: list, obs_set: list, state_set: list, file): """ :param obs_seq_list: observation sequence list [[o1, o2, o3], [o1, o2, o3]...] :param state_seq_list: state sequence list [[s1, s2, s3], [s1, s2, s3]...] :param obs_set: all possible observation state :param state_set: all possible state """ self.obs_seq_list = obs_seq_list self.state_seq_list = state_seq_list self.obs_set = obs_set self.state_set = state_set self.counter = Counter(''.join(state_seq_list)) self.hmm = MultinomialHMM(n_components=len(self.state_set)) self.startprob, self.transmat, self.emissionprob = \ self._init_state(), self._trans_state(), self._emit_state() self.hmm.startprob_ = self.startprob self.hmm.transmat_ = self.transmat self.hmm.emissionprob_ = self.emissionprob if file is not None: with open(file, 'wb') as f: pickle.dump(self, f)
def initHMM(self, length): a = 1.0 / length # Transition probabilities trans = np.array([[1-a, a, 0, 0], # Pre -> [ 0, 1-a, a/2, a/2], # HQ -> [ 0, 0, 1, 0], # PostQuiet -> [ 0, 0, 0, 1] ]) # PostActive -> # emission probabilities eps = 1e-4 emit = np.array([[ 0.25, 0.25, 0.50 ], # Emit | Pre [ 0.16, 0.84-eps, eps ], # Emit | HQ [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet [ 0.25, 0.25, 0.50 ] ]) # Emit | PostActive # A0 A1 A2 # Start state distribution start = np.array([0.34, 0.33, 0.33, 0]) hmm = MultinomialHMM(n_components=nStates) hmm.transmat_ = trans hmm.startprob_ = start hmm.emissionprob_ = emit return hmm
def test_viterbi_case_random(self): for i in range(1000): # init self.n_state = np.random.randint(1,10) self.n_output = np.random.randint(1,10) self.step = np.random.randint(1,200) p = np.random.random(self.n_state) startprob = p/p.sum() p = np.random.random((self.n_state,self.n_state)) transmat = p/p.sum(axis=1).reshape(-1,1) p = np.random.random((self.n_state,self.n_output)) emissionprob = p/p.sum(axis=1).reshape(-1,1) X = np.random.choice(self.n_output,self.step).reshape(-1,1) # hmmlearn model = MultinomialHMM(n_components=self.n_state,) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emissionprob y = model.predict(X) # my hmm hmm = HMM() pred = hmm.viterbi(startprob, transmat, emissionprob, X) self.assertTrue(np.array_equal(y, pred))
def __init__(self, model_file=None, components=None): if os.path.exists(model_file): self.model = joblib.load(model_file) else: alu_file = 'Alu_sequence.pkl' if os.path.exists(alu_file): locis = joblib.load(alu_file) else: locis = read_sequence('hg19_Alu.bed', 0) locis = random.sample(locis, 100000) for l in tqdm(locis): l.init_seq() l.decode_seq() locis = list(filter(lambda l: l.seq is not None, locis)) joblib.dump(locis, alu_file) print('Alu Loaded') locis = locis[0:5000] model = MultinomialHMM(n_components=components, verbose=True, n_iter=50) x = np.concatenate(list(map(attrgetter('seq'), locis))) x = np.reshape(x, [x.shape[0], 1]) length = list(map(attrgetter('length'), locis)) model.fit(x, length) self.model = model joblib.dump(self.model, model_file)
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None): from hmmlearn.hmm import MultinomialHMM # introduces a lot of dependencies hmm = MultinomialHMM(n_components=z_dim) if params is None: T = np.random.random(size=(z_dim, z_dim)) T = T/T.sum(axis=1).reshape((z_dim, 1)) pi = np.random.random(size=(z_dim,)) pi = pi/pi.sum() emit = np.random.random(size=(z_dim, x_dim)) emit = emit/emit.sum(axis=1).reshape((z_dim, 1)) else: T, pi, emit = params hmm.transmat_ = T hmm.startprob_ = pi hmm.emissionprob_ = emit X = np.zeros((N, seq_len)).astype(np.int) for i in range(N): x, _ = hmm.sample(n_samples=seq_len) X[i] = x.reshape((seq_len,)) return (T, pi, emit), HMMData(X)
def buildHMM(HMMFactory): model = MultinomialHMM(n_components=2, n_iter=200) model.startprob_ = HMMFactory.hiddenProb() model.transmat_ = HMMFactory.transMatrix() model.emissionprob_ = HMMFactory.emissionMatrix() return model
def predict(self, x, init_prob=None, method='hmmlearn', window=-1): """Predict result based on HMM """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) if method == 'hmmlearn': model = MultinomialHMM(self.num_states, n_iter=100) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A if window == -1: result = model.predict(x) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = model.predict(x[0:window]) for i in range(window, x.shape[0]): result[i] = model.predict(x[i - window + 1:i + 1])[-1] else: if window == -1: result = self.decode(x, init_prob) else: result = np.zeros(x.shape[0], dtype=np.int) result[0:window] = self.decode(x[0:window], init_prob) for i in range(window, x.shape[0]): result[i] = self.decode(x[i - window + 1:i + 1], init_prob)[-1] return result
def fit_hmm_learn(X, n_states): samples = np.concatenate(X) lengths = [len(x) for x in X] hmm_learn_model = MultinomialHMM(n_components=n_states) hmm_learn_model.fit(samples, lengths) # Label data using hmmlearn model return hmm_learn_model.predict(samples, lengths)
def get_model(self): """ 初始化hmm模型 """ model = MultinomialHMM(n_components=len(self.states)) model.startprob_ = self.init_p model.transmat_ = self.trans_p model.emissionprob_ = self.emit_p return model
def get_model(self): """ returns a multinomial hmm""" model = MultinomialHMM(n_components=self.get_max(), params='e', init_params='') model.startprob_ = self.get_start() model.transmat_ = self.get_transition() model.emissionprob_ = self.get_emission() return model
def get_hmm(df, n_components, n_features): _, state_list = get_ubie_label(df["label"]) pred_list = get_pred_for_hmm(df["pred"]) clf = MultinomialHMM(n_components=n_components) clf.n_features = n_features clf.transmat_ = get_transmat(state_list) clf.emissionprob_ = get_emission(pred_list, state_list) clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05]) return clf
def fit(seqs, n_components=1): MultinomialHMM(n_components=n_components, startprob_prior=1.0, transmat_prior=1.0, algorithm='viterbi', random_state=1, n_iter=100, tol=0.01, verbose=True, params='ste', init_params='ste')
def build_hmm(trans, emis, seed=1): """Builds and returns hmm_model given the transition and emission probabilities matrices""" hmm = MultinomialHMM(n_components=trans.shape[0], algorithm="viterbi", random_state=seed) hmm.__setattr__("n_features", emis.shape[1]) hmm.__setattr__("emissionprob_", emis) hmm.__setattr__("startprob_", np.array( [1] + [0] * (trans.shape[0] - 1))) # We will always start at the first state hmm.__setattr__("transmat_", trans) return hmm
def predict_prob(self, x, init_prob=None, window=-1): """Predict the probability """ if init_prob is None: init_prob = np.array( [1 / self.num_states for i in range(self.num_states)]) model = MultinomialHMM(self.num_states) model.n_features = self.num_observations model.startprob_ = init_prob model.emissionprob_ = self.B model.transmat_ = self.A return model.predict_proba(x)
def train_hmm(): """ HMM for sequence learning. """ print "Loading training data..." train_sequence, num_classes = get_sequence("./train_data/*") print "Build HMM..." model = MultinomialHMM(n_components=2) print "Train HMM..." model.fit([train_sequence])
def __init__(self, observed): """ Initializes the object and sets the internal state. Args: observed: array-like, shape (n_samples, n_features) """ self.observed = np.array(observed) if len(self.observed.shape) == 1: self.observed = self.observed.reshape(-1, 1) # TODO: Check other parameters to this constructor self.model = MultinomialHMM(n_components=2, n_iter=100)
def __init__(self, M): self.con = MultinomialHMM(n_components=M) self.incon = MultinomialHMM(n_components=M) self.daID = { 'ass': 0, 'bck': 1, 'be.neg': 2, 'be.pos': 3, 'el.ass': 4, 'el.inf': 5, 'el.sug': 6, 'el.und': 7, 'fra': 8, 'inf': 9, 'off': 10, 'oth': 11, 'stl': 12, 'sug': 13, 'und': 14 } self.da_choose_n = itertools.combinations([ 'ass', 'bck', 'be.neg', 'be.pos', 'el.ass', 'el.inf', 'el.sug', 'el.und', 'fra', 'inf', 'off', 'oth', 'stl', 'sug', 'und' ], 4)
def __init__(self): self.states = list(CapabilityBehaviourState) self.observations = list(InteractionObservation) self.hmm = MultinomialHMM(n_components=len(self.states)) self.state_history = [] # When many similar capabilities are being used, it is quite often # the case that they will be in the same state. This means agent # capabilities will have the same results as each other. # This can lead to bad cases, for example, where VeryGood behaviours # all fail at the same time. # To prevent this synchronisation, each behaviour is given their own # different initial seed to mix with the seed provided for an interaction. self.individual_seed = 0
def get_hmm_model(state): """Creates an instance of MultinomialHMM, which follows sklearn interface Input: - state: dictionnary where the keys are HiddenMarkovModelProbability choices where the values are the probabilities matrices or arrays which describes the according hidden markov model state Returns: an instance of a trained MultinomialHMM """ hmm_model = MultinomialHMM(n_components=len(SleepStage)) hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name] hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name] hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name] return hmm_model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix=DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if (mahal_list[i] > threshold or c_list[i] == 1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key=lambda x: x[2], reverse=True) return events, predictions
def test_DiscreteHMM_decode(cases: str) -> None: np.random.seed(12346) cases = int(cases) i = 1 N_decimal = 4 while i < cases: tol=1e-3 n_samples = np.random.randint(10, 50) hidden_states = np.random.randint(3, 6) # symbols is the number of unqiue observation types. symbols = np.random.randint(4, 9) X = [] lengths = [] for _ in range(n_samples): # the actual length is seq_length + 1 seq_length = symbols this_x = np.random.choice(range(symbols), size=seq_length, replace=False) X.append(this_x) lengths.append(seq_length) max_iter = 100 hmm_gold = MultinomialHMM(n_components=hidden_states, n_iter=100, tol=tol) X_gold = np.concatenate(X).reshape((-1,1)) hmm_gold.fit(X_gold, lengths) gold_A = hmm_gold.transmat_ gold_B = hmm_gold.emissionprob_ gold_pi = hmm_gold.startprob_ gold_logprob, gold_state_sequence = hmm_gold.decode(X_gold, lengths) hmm_mine = DiscreteHMM(hidden_states=hidden_states, symbols=symbols, A=gold_A, B=gold_B, pi=gold_pi) mine_logprob_list = [] mine_state_sequence = [] for this_x in X: this_mine_logprob, this_mine_state_sequence = hmm_mine.decode(this_x) mine_logprob_list.append(this_mine_logprob) mine_state_sequence.append(this_mine_state_sequence) mine_state_sequence = np.concatenate(mine_state_sequence) mine_logprob = sum(mine_logprob_list) assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal) assert_almost_equal(mine_state_sequence, gold_state_sequence, decimal=N_decimal) i+=1 print('Successfully testing the function of computing decodes in discrete HMM!')
def _init_HMM(self): rospy.loginfo('[slip_detector] Instantiating HMM...') self.possible_states = ['slip', 'no_slip'] # Define initial state, state transition, and observation probabilities: initial_state_dist = np.array([0.2, 0.8]) state_trans_probs = np.array([[0.5, 0.5], [0.3, 0.7]]) observation_probs = np.array([[0.1, 0.1, 0.8, 0.0], [0.1, 0.1, 0.2, 0.6]]) # Instantiate the model: self.HMM = MultinomialHMM(n_components=len(self.possible_states)) self.HMM.startprob_ = initial_state_dist self.HMM.transmat_ = state_trans_probs self.HMM.emissionprob_ = observation_probs
def main(): rand_p_matrix = np.random.rand(4, 4) rand_b_matrix = np.random.rand(4, 3) print("\nGernerating p matrix...............") p_matrix = normalization(rand_p_matrix) print(p_matrix) print("\nGernerating b matrix...............") b_matrix = normalization(rand_b_matrix) print(b_matrix) # Generate 1000 observations O, Q = generate_observation(1000, p_matrix, b_matrix) O_seq = [1, 2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 3] pi = (1, 0, 0, 0) print("\nThe Orginal Observation Sequence O: {}".format(O[:12])) print("The probability 𝑝(𝑂|𝜆) is {} with O: {}".format( forward(O_seq, p_matrix, b_matrix, pi)[-1].sum(), O_seq)) print("\nThe Orginal Sequence Q: {}".format(Q[:12])) print("The Most Probable Sequence Q: {} with O: {}".format( list(viterbi(O_seq, p_matrix, b_matrix, pi)), O_seq)) obersvations = LabelEncoder().fit_transform(O) model = MultinomialHMM(n_components=4) model.fit(np.atleast_2d(obersvations)) est_pi = model.startprob_ est_p = model.transmat_ est_b = model.emissionprob_ print("\nThe estimated transition matrix P:\n {}".format(est_p)) print("\nThe estimated event matrix B:\n {}".format(est_b)) print("\nThe estimated start probability pi:\n {}".format(est_pi)) _, p = chisquare(p_matrix, est_p, axis=None) print("\np-value of transition matrix P: {}".format(p)) _, p = chisquare(b_matrix, est_b, axis=None) print("p-value of event matrix B: {}".format(p)) _, p = chisquare(pi, est_pi, axis=None) print("p-value of start probability pi: {}".format(p))
def test_viterbi_case_handcraft(self): # init startprob = np.array([0.6, 0.4]) transmat = np.array([[0.7, 0.3], [0.4, 0.6]]) emissionprob = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) X = np.array([1,0,2,0,2,1,0,1,1]).reshape(-1,1) # hmmlearn model = MultinomialHMM(n_components=2) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emissionprob y = model.predict(X) # my hmm hmm = HMM() pred = hmm.viterbi(startprob, transmat, emissionprob, X) self.assertTrue(np.array_equal(y, pred))
def fit(self, data): """ Estimates model parameters by initializing a Gaussian HMM for each class label and fitting data for that model :param data: matrix with the dimensions [number of datapoints][2][1 or 2] In the first matrix dimension, each datapoint will be stored. In the second dimension, at index 0, the veracity label of a given rumour will be stored. At index 1, the features will be stored. The third dimension will be of size 1 or 2, depending on whether only SDQC labels are used for the prediction, or timestamps are also included as features. :return: the HMM model, with sub-models fitted for each data label """ classes = dict() feature_count = len(data[1][1][0]) # partition data in labels for datapoint in data: if datapoint[0] not in classes: classes[datapoint[0]] = [] classes[datapoint[0]].append(datapoint[1]) # Make and fit model for each label for veracity_label, sdqc_labels in classes.items(): lengths = [len(x) for x in sdqc_labels] thread_flat = np.array(flatten(sdqc_labels)).reshape( -1, feature_count) if veracity_label not in self.models: if self.model_type == 'gaussian': self.models[veracity_label] = GaussianHMM( n_components=self.components).fit(thread_flat, lengths=lengths) elif self.model_type == 'multinomial': # If timestamps are used, the MultinomialHMM ignores these, as it does not support float values thread_flat = [[int(x[0])] for x in thread_flat] self.models[veracity_label] = MultinomialHMM( n_components=self.components).fit(thread_flat, lengths=lengths) return self
high = high + 1 elif percent >= .50: highMid = highMid + 1 elif percent >= .25: lowMid = lowMid + 1 else: low = low + 1 matrix[1, 0] = low / len(wins) matrix[1, 1] = lowMid / len(wins) matrix[1, 2] = highMid / len(wins) matrix[1, 3] = high / len(wins) return matrix # Load Data filename = 'data.csv' X = np.loadtxt(filename, delimiter=',') player1 = X[:, 0] player2 = X[:, 1] record = X[:, 2] print "stateProbs(record)", stateProbs(record) print "eProbs(player1, record", eProbs(player1, record) clf = MultinomialHMM(n_components=2) clf.transmat_ = stateProbs(record) clf.emissionprob_ = eProbs(player1, record) print "here" clf.fit(clf.transmat_, clf.emissionprob_) clf.predict(player1)