예제 #1
0
def hmm():
    """
    vocabulary-acc = 0.9369
    :return:返回mlp模型
    """
    model = MultinomialHMM(n_components=2, n_iter=100, algorithm="viterbi")
    return model
예제 #2
0
def main():
    hmm = MultinomialHMM(n_components=5)
    
    T = np.random.random(size=(5, 5))
    T = T/T.sum(axis=1).reshape((5, 1))
    hmm.transmat_ = T

    pi = np.random.random(size=(5,))
    pi = pi/pi.sum()
    hmm.startprob_ = pi

    emit = np.random.random(size=(5, 10))
    emit = emit/emit.sum(axis=1).reshape((5, 1))
    hmm.emissionprob_ = emit

    X = np.zeros((20, 25)).astype(np.int)
    for i in range(20):
        x, _ = hmm.sample(n_samples=25)
        X[i] = x.reshape((25,))

    # load the PyTorch HMM
    phmm = HMM(z_dim=5, x_dim=10)
    phmm.T = torch.Tensor(T.T)
    phmm.pi = torch.Tensor(pi)
    phmm.emit = torch.Tensor(emit.T)

    # compute PyTorch HMM forward-backward
    my_marginals = phmm.log_marginal(torch.Tensor(X.T))

    # compute hmmlearn version
    true_marginals = np.zeros(20)
    for i in range(20):
        true_marginals[i] = hmm.score(X[i].reshape((-1, 1)))

    assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
예제 #3
0
def main():
    rand_p_matrix = np.random.rand(4, 4)
    rand_b_matrix = np.random.rand(4, 3)

    print("\nGernerating p matrix...............")
    p_matrix = normalization(rand_p_matrix)
    print(p_matrix)

    print("\nGernerating b matrix...............")
    b_matrix = normalization(rand_b_matrix)
    print(b_matrix)

    # Generate 1000 observations
    O, _ = generate_observation(1000, p_matrix, b_matrix)

    # training the selection of number of states
    aic = []
    bic = []
    likelihood = []
    m = 3
    print("\nTraining the HMM for selection of number of states........")
    for n in range(2, 30):
        observations = LabelEncoder().fit_transform(O)
        model = MultinomialHMM(n_components=n, random_state=200263453)
        model.fit(np.atleast_2d(observations))
        logL = model.score(np.atleast_2d(observations))
        p = compute_p(n, m)
        a = AIC(logL, p)
        b = BIC(logL, observations, p)
        likelihood.append(logL)
        aic.append(a)
        bic.append(b)
    plot(aic, 'AIC')
    plot(bic, 'BIC')
    plot(likelihood, 'Log likelihood')
예제 #4
0
    def initHMM(self):
        # self._hmm = MultinomialHMM(n_components=self._N, startprob_prior=None, transmat_prior=None, 
        #     algorithm='viterbi', random_state=None, n_iter=self._maxIters, tol=0.01, 
        #     verbose=True, params='ste', init_params='ste')

        self._hmm = MultinomialHMM(n_components=self._N, n_iter=self._maxIters, 
            verbose=True, params='ste', init_params='ste')
예제 #5
0
    def train(self, obs_seq_list: list, state_seq_list: list, obs_set: list,
              state_set: list, file):
        """
        :param obs_seq_list: observation sequence list [[o1, o2, o3], [o1, o2, o3]...]
        :param state_seq_list: state sequence list [[s1, s2, s3], [s1, s2, s3]...]
        :param obs_set: all possible observation state
        :param state_set: all possible state
        """
        self.obs_seq_list = obs_seq_list
        self.state_seq_list = state_seq_list
        self.obs_set = obs_set
        self.state_set = state_set
        self.counter = Counter(''.join(state_seq_list))

        self.hmm = MultinomialHMM(n_components=len(self.state_set))

        self.startprob, self.transmat, self.emissionprob = \
            self._init_state(), self._trans_state(), self._emit_state()
        self.hmm.startprob_ = self.startprob
        self.hmm.transmat_ = self.transmat
        self.hmm.emissionprob_ = self.emissionprob

        if file is not None:
            with open(file, 'wb') as f:
                pickle.dump(self, f)
예제 #6
0
    def initHMM(self, length):
        a = 1.0 / length

        # Transition probabilities
        trans = np.array([[1-a,   a,   0,   0],   # Pre ->
                          [  0, 1-a, a/2, a/2],   # HQ  ->
                          [  0,   0,   1,   0],   # PostQuiet ->
                          [  0,   0,   0,   1] ]) # PostActive ->

        # emission probabilities
        eps = 1e-4
        emit = np.array([[ 0.25, 0.25, 0.50 ],    # Emit | Pre
                         [ 0.16, 0.84-eps, eps ], # Emit | HQ
                         [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet
                         [ 0.25, 0.25, 0.50 ] ])  # Emit | PostActive
        #                   A0    A1    A2

        # Start state distribution
        start = np.array([0.34, 0.33, 0.33, 0])

        hmm = MultinomialHMM(n_components=nStates)
        hmm.transmat_ = trans
        hmm.startprob_ = start
        hmm.emissionprob_ = emit
        return hmm
예제 #7
0
    def test_viterbi_case_random(self):
        for i in range(1000):
            # init
            self.n_state = np.random.randint(1,10)
            self.n_output = np.random.randint(1,10)
            self.step = np.random.randint(1,200)
            p = np.random.random(self.n_state)
            startprob = p/p.sum()
            p = np.random.random((self.n_state,self.n_state))
            transmat = p/p.sum(axis=1).reshape(-1,1)
            p = np.random.random((self.n_state,self.n_output))
            emissionprob = p/p.sum(axis=1).reshape(-1,1)
            X = np.random.choice(self.n_output,self.step).reshape(-1,1)

            # hmmlearn
            model = MultinomialHMM(n_components=self.n_state,)
            model.startprob_ = startprob
            model.transmat_ = transmat
            model.emissionprob_ = emissionprob
            y = model.predict(X)

            # my hmm
            hmm = HMM()
            pred = hmm.viterbi(startprob, transmat, emissionprob, X)
            self.assertTrue(np.array_equal(y, pred))
예제 #8
0
    def __init__(self, model_file=None, components=None):

        if os.path.exists(model_file):
            self.model = joblib.load(model_file)
        else:
            alu_file = 'Alu_sequence.pkl'
            if os.path.exists(alu_file):
                locis = joblib.load(alu_file)
            else:
                locis = read_sequence('hg19_Alu.bed', 0)
                locis = random.sample(locis, 100000)
                for l in tqdm(locis):
                    l.init_seq()
                    l.decode_seq()
                locis = list(filter(lambda l: l.seq is not None, locis))
                joblib.dump(locis, alu_file)

            print('Alu Loaded')
            locis = locis[0:5000]
            model = MultinomialHMM(n_components=components,
                                   verbose=True,
                                   n_iter=50)
            x = np.concatenate(list(map(attrgetter('seq'), locis)))
            x = np.reshape(x, [x.shape[0], 1])
            length = list(map(attrgetter('length'), locis))
            model.fit(x, length)
            self.model = model
            joblib.dump(self.model, model_file)
예제 #9
0
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None):
    from hmmlearn.hmm import MultinomialHMM  # introduces a lot of dependencies
    hmm = MultinomialHMM(n_components=z_dim)

    if params is None:
        T = np.random.random(size=(z_dim, z_dim))
        T = T/T.sum(axis=1).reshape((z_dim, 1))

        pi = np.random.random(size=(z_dim,))
        pi = pi/pi.sum()

        emit = np.random.random(size=(z_dim, x_dim))
        emit = emit/emit.sum(axis=1).reshape((z_dim, 1))
    else:
        T, pi, emit = params

    hmm.transmat_ = T
    hmm.startprob_ = pi
    hmm.emissionprob_ = emit

    X = np.zeros((N, seq_len)).astype(np.int)
    for i in range(N):
        x, _ = hmm.sample(n_samples=seq_len)
        X[i] = x.reshape((seq_len,))

    return (T, pi, emit), HMMData(X)
예제 #10
0
def buildHMM(HMMFactory):

    model = MultinomialHMM(n_components=2, n_iter=200)
    model.startprob_ = HMMFactory.hiddenProb()
    model.transmat_ = HMMFactory.transMatrix()
    model.emissionprob_ = HMMFactory.emissionMatrix()
    return model
예제 #11
0
 def predict(self, x, init_prob=None, method='hmmlearn', window=-1):
     """Predict result based on HMM
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     if method == 'hmmlearn':
         model = MultinomialHMM(self.num_states, n_iter=100)
         model.n_features = self.num_observations
         model.startprob_ = init_prob
         model.emissionprob_ = self.B
         model.transmat_ = self.A
         if window == -1:
             result = model.predict(x)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = model.predict(x[0:window])
             for i in range(window, x.shape[0]):
                 result[i] = model.predict(x[i - window + 1:i + 1])[-1]
     else:
         if window == -1:
             result = self.decode(x, init_prob)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = self.decode(x[0:window], init_prob)
             for i in range(window, x.shape[0]):
                 result[i] = self.decode(x[i - window + 1:i + 1],
                                         init_prob)[-1]
     return result
예제 #12
0
def fit_hmm_learn(X, n_states):
    samples = np.concatenate(X)
    lengths = [len(x) for x in X]

    hmm_learn_model = MultinomialHMM(n_components=n_states)
    hmm_learn_model.fit(samples, lengths)

    # Label data using hmmlearn model
    return hmm_learn_model.predict(samples, lengths)
예제 #13
0
 def get_model(self):
     """
     初始化hmm模型
     """
     model = MultinomialHMM(n_components=len(self.states))
     model.startprob_ = self.init_p
     model.transmat_ = self.trans_p
     model.emissionprob_ = self.emit_p
     return model
예제 #14
0
 def get_model(self):
     """ returns a multinomial hmm"""
     model = MultinomialHMM(n_components=self.get_max(),
                            params='e',
                            init_params='')
     model.startprob_ = self.get_start()
     model.transmat_ = self.get_transition()
     model.emissionprob_ = self.get_emission()
     return model
예제 #15
0
파일: hmm.py 프로젝트: SmartDataLab/EML
def get_hmm(df, n_components, n_features):
    _, state_list = get_ubie_label(df["label"])
    pred_list = get_pred_for_hmm(df["pred"])
    clf = MultinomialHMM(n_components=n_components)
    clf.n_features = n_features
    clf.transmat_ = get_transmat(state_list)
    clf.emissionprob_ = get_emission(pred_list, state_list)
    clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05])
    return clf
예제 #16
0
def fit(seqs, n_components=1):
    MultinomialHMM(n_components=n_components,
                   startprob_prior=1.0,
                   transmat_prior=1.0,
                   algorithm='viterbi',
                   random_state=1,
                   n_iter=100,
                   tol=0.01,
                   verbose=True,
                   params='ste',
                   init_params='ste')
예제 #17
0
def build_hmm(trans, emis, seed=1):
    """Builds and returns hmm_model given the transition and emission probabilities matrices"""
    hmm = MultinomialHMM(n_components=trans.shape[0],
                         algorithm="viterbi",
                         random_state=seed)
    hmm.__setattr__("n_features", emis.shape[1])
    hmm.__setattr__("emissionprob_", emis)
    hmm.__setattr__("startprob_", np.array(
        [1] + [0] *
        (trans.shape[0] - 1)))  # We will always start at the first state
    hmm.__setattr__("transmat_", trans)
    return hmm
예제 #18
0
 def predict_prob(self, x, init_prob=None, window=-1):
     """Predict the probability
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     model = MultinomialHMM(self.num_states)
     model.n_features = self.num_observations
     model.startprob_ = init_prob
     model.emissionprob_ = self.B
     model.transmat_ = self.A
     return model.predict_proba(x)
예제 #19
0
def train_hmm():
    """
    HMM for sequence learning.
    """
    print "Loading training data..."
    train_sequence, num_classes = get_sequence("./train_data/*")

    print "Build HMM..."
    model = MultinomialHMM(n_components=2)

    print "Train HMM..."
    model.fit([train_sequence])
예제 #20
0
    def __init__(self, observed):
        """
        Initializes the object and sets the internal state.

        Args:
            observed: array-like, shape (n_samples, n_features)
        """
        self.observed = np.array(observed)

        if len(self.observed.shape) == 1:
            self.observed = self.observed.reshape(-1, 1)
        # TODO: Check other parameters to this constructor
        self.model = MultinomialHMM(n_components=2, n_iter=100)
 def __init__(self, M):
     self.con = MultinomialHMM(n_components=M)
     self.incon = MultinomialHMM(n_components=M)
     self.daID = {
         'ass': 0,
         'bck': 1,
         'be.neg': 2,
         'be.pos': 3,
         'el.ass': 4,
         'el.inf': 5,
         'el.sug': 6,
         'el.und': 7,
         'fra': 8,
         'inf': 9,
         'off': 10,
         'oth': 11,
         'stl': 12,
         'sug': 13,
         'und': 14
     }
     self.da_choose_n = itertools.combinations([
         'ass', 'bck', 'be.neg', 'be.pos', 'el.ass', 'el.inf', 'el.sug',
         'el.und', 'fra', 'inf', 'off', 'oth', 'stl', 'sug', 'und'
     ], 4)
예제 #22
0
    def __init__(self):
        self.states = list(CapabilityBehaviourState)
        self.observations = list(InteractionObservation)

        self.hmm = MultinomialHMM(n_components=len(self.states))

        self.state_history = []

        # When many similar capabilities are being used, it is quite often
        # the case that they will be in the same state. This means agent
        # capabilities will have the same results as each other.
        # This can lead to bad cases, for example, where VeryGood behaviours
        # all fail at the same time.
        # To prevent this synchronisation, each behaviour is given their own
        # different initial seed to mix with the seed provided for an interaction.
        self.individual_seed = 0
예제 #23
0
def get_hmm_model(state):
    """Creates an instance of MultinomialHMM, which follows sklearn interface
    Input:
    - state: dictionnary
        where the keys are HiddenMarkovModelProbability choices
        where the values are the probabilities matrices or arrays which
        describes the according hidden markov model state
    Returns: an instance of a trained MultinomialHMM
    """
    hmm_model = MultinomialHMM(n_components=len(SleepStage))

    hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name]
    hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name]
    hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name]

    return hmm_model
예제 #24
0
def detect_events_hmm(mahal_timeseries,
                      c_timeseries,
                      global_pace_timeseries,
                      threshold_quant=.95,
                      trans_matrix=DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX,
                      initial_state=None):

    #Sort the keys of the timeseries chronologically
    sorted_dates = sorted(mahal_timeseries)

    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if (mahal_list[i] > threshold or c_list[i] == 1):
            symbols.append(1)
        else:
            symbols.append(0)

    # Actually set up the hmm
    model = MultinomialHMM(n_components=2,
                           transmat=trans_matrix,
                           startprob=initial_state)
    model.emissionprob_ = emission_matrix

    # Make the predictions
    lnl, predictions = model.decode(symbols)

    events = get_all_events(predictions, sorted_dates, mahal_list,
                            global_pace_list, expected_pace_list)

    # Sort events by duration, starting with the long events
    events.sort(key=lambda x: x[2], reverse=True)
    return events, predictions
def test_DiscreteHMM_decode(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    while i < cases:
        tol=1e-3
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        # symbols is the number of unqiue observation types.
        symbols = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            # the actual length is seq_length + 1
            seq_length = symbols
            this_x = np.random.choice(range(symbols), size=seq_length, replace=False)
            X.append(this_x)
            lengths.append(seq_length)
        max_iter = 100


        hmm_gold = MultinomialHMM(n_components=hidden_states, n_iter=100, tol=tol)
        X_gold = np.concatenate(X).reshape((-1,1))
        hmm_gold.fit(X_gold, lengths)
        gold_A = hmm_gold.transmat_
        gold_B = hmm_gold.emissionprob_
        gold_pi = hmm_gold.startprob_
        gold_logprob, gold_state_sequence = hmm_gold.decode(X_gold, lengths)
        hmm_mine = DiscreteHMM(hidden_states=hidden_states,
                               symbols=symbols,
                               A=gold_A,
                               B=gold_B,
                               pi=gold_pi)
        mine_logprob_list = []
        mine_state_sequence = []
        for this_x in X:
            this_mine_logprob, this_mine_state_sequence = hmm_mine.decode(this_x)
            mine_logprob_list.append(this_mine_logprob)
            mine_state_sequence.append(this_mine_state_sequence)
        mine_state_sequence = np.concatenate(mine_state_sequence)
        mine_logprob = sum(mine_logprob_list)
        assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal)
        assert_almost_equal(mine_state_sequence, gold_state_sequence, decimal=N_decimal)
        i+=1
    print('Successfully testing the function of computing decodes in discrete HMM!')
예제 #26
0
    def _init_HMM(self):
        rospy.loginfo('[slip_detector] Instantiating HMM...')
        self.possible_states = ['slip', 'no_slip']

        # Define initial state, state transition, and observation probabilities:
        initial_state_dist = np.array([0.2, 0.8])

        state_trans_probs = np.array([[0.5, 0.5], [0.3, 0.7]])

        observation_probs = np.array([[0.1, 0.1, 0.8, 0.0],
                                      [0.1, 0.1, 0.2, 0.6]])

        # Instantiate the model:
        self.HMM = MultinomialHMM(n_components=len(self.possible_states))
        self.HMM.startprob_ = initial_state_dist
        self.HMM.transmat_ = state_trans_probs
        self.HMM.emissionprob_ = observation_probs
예제 #27
0
def main():
    rand_p_matrix = np.random.rand(4, 4)
    rand_b_matrix = np.random.rand(4, 3)

    print("\nGernerating p matrix...............")
    p_matrix = normalization(rand_p_matrix)
    print(p_matrix)

    print("\nGernerating b matrix...............")
    b_matrix = normalization(rand_b_matrix)
    print(b_matrix)

    # Generate 1000 observations
    O, Q = generate_observation(1000, p_matrix, b_matrix)

    O_seq = [1, 2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 3]
    pi = (1, 0, 0, 0)
    print("\nThe Orginal Observation Sequence O: {}".format(O[:12]))
    print("The probability 𝑝(𝑂|𝜆) is {} with O: {}".format(
        forward(O_seq, p_matrix, b_matrix, pi)[-1].sum(), O_seq))

    print("\nThe Orginal Sequence Q: {}".format(Q[:12]))
    print("The Most Probable Sequence Q: {} with O: {}".format(
        list(viterbi(O_seq, p_matrix, b_matrix, pi)), O_seq))

    obersvations = LabelEncoder().fit_transform(O)
    model = MultinomialHMM(n_components=4)
    model.fit(np.atleast_2d(obersvations))
    est_pi = model.startprob_
    est_p = model.transmat_
    est_b = model.emissionprob_
    print("\nThe estimated transition matrix P:\n {}".format(est_p))
    print("\nThe estimated event matrix B:\n {}".format(est_b))
    print("\nThe estimated start probability pi:\n {}".format(est_pi))

    _, p = chisquare(p_matrix, est_p, axis=None)
    print("\np-value of transition matrix P: {}".format(p))

    _, p = chisquare(b_matrix, est_b, axis=None)
    print("p-value of event matrix B: {}".format(p))

    _, p = chisquare(pi, est_pi, axis=None)
    print("p-value of start probability pi: {}".format(p))
예제 #28
0
    def test_viterbi_case_handcraft(self):
        # init
        startprob = np.array([0.6, 0.4])
        transmat = np.array([[0.7, 0.3],
                                    [0.4, 0.6]])
        emissionprob = np.array([[0.1, 0.4, 0.5],
                                    [0.6, 0.3, 0.1]])
        X = np.array([1,0,2,0,2,1,0,1,1]).reshape(-1,1)

        # hmmlearn
        model = MultinomialHMM(n_components=2)
        model.startprob_ = startprob
        model.transmat_ = transmat
        model.emissionprob_ = emissionprob
        y = model.predict(X)

        # my hmm
        hmm = HMM()
        pred = hmm.viterbi(startprob, transmat, emissionprob, X)
        self.assertTrue(np.array_equal(y, pred))
예제 #29
0
    def fit(self, data):
        """
        Estimates model parameters by initializing a Gaussian HMM for each class label and fitting data for that model

        :param data: matrix with the dimensions [number of datapoints][2][1 or 2]
        In the first matrix dimension, each datapoint will be stored. In the second dimension, at index 0, the veracity
        label of a given rumour will be stored. At index 1, the features will be stored. The third dimension will be of
        size 1 or 2, depending on whether only SDQC labels are used for the prediction, or timestamps are also included
        as features.
        :return: the HMM model, with sub-models fitted for each data label
        """
        classes = dict()

        feature_count = len(data[1][1][0])

        # partition data in labels
        for datapoint in data:
            if datapoint[0] not in classes:
                classes[datapoint[0]] = []
            classes[datapoint[0]].append(datapoint[1])

        # Make and fit model for each label
        for veracity_label, sdqc_labels in classes.items():
            lengths = [len(x) for x in sdqc_labels]
            thread_flat = np.array(flatten(sdqc_labels)).reshape(
                -1, feature_count)
            if veracity_label not in self.models:
                if self.model_type == 'gaussian':
                    self.models[veracity_label] = GaussianHMM(
                        n_components=self.components).fit(thread_flat,
                                                          lengths=lengths)
                elif self.model_type == 'multinomial':
                    # If timestamps are used, the MultinomialHMM ignores these, as it does not support float values
                    thread_flat = [[int(x[0])] for x in thread_flat]
                    self.models[veracity_label] = MultinomialHMM(
                        n_components=self.components).fit(thread_flat,
                                                          lengths=lengths)
        return self
            high = high + 1
        elif percent >= .50:
            highMid = highMid + 1
        elif percent >= .25:
            lowMid = lowMid + 1
        else:
            low = low + 1
    matrix[1, 0] = low / len(wins)
    matrix[1, 1] = lowMid / len(wins)
    matrix[1, 2] = highMid / len(wins)
    matrix[1, 3] = high / len(wins)
    return matrix


# Load Data
filename = 'data.csv'
X = np.loadtxt(filename, delimiter=',')

player1 = X[:, 0]
player2 = X[:, 1]
record = X[:, 2]

print "stateProbs(record)", stateProbs(record)
print "eProbs(player1, record", eProbs(player1, record)
clf = MultinomialHMM(n_components=2)
clf.transmat_ = stateProbs(record)
clf.emissionprob_ = eProbs(player1, record)
print "here"
clf.fit(clf.transmat_, clf.emissionprob_)
clf.predict(player1)