Example #1
0
    def fit_model( self, observations ):
        '''
        fits (MLE) the parameters to the sequences (with values in
        np.arange(self.n_obs_states)) of obsvervations using EM iterations

        @param observations List of observations ({0,...,n_obs_states} valued list)
        '''
        
        hmm.baum_welch(self.model, np.array(observations), \
                epochs = self.epochs, graph = False)

        print 'trans matrix (self.model.A) is ', self.model.A
        print 'markov to observed matrix (self.model.B) is ', self.model.B
Example #2
0
    def fit_model(self, observations):
        '''
        fits (MLE) the parameters to the sequences (with values in
        np.arange(self.n_obs_states)) of obsvervations using EM iterations

        @param observations List of observations ({0,...,n_obs_states} valued list)
        '''

        hmm.baum_welch(self.model, np.array(observations), \
                epochs = self.epochs, graph = False)

        print 'trans matrix (self.model.A) is ', self.model.A
        print 'markov to observed matrix (self.model.B) is ', self.model.B
Example #3
0
def learning_grid(args: argparse.Namespace) -> HMM:
    grid = get_grids()[args.idx]

    samples = []
    for s in range(args.num_samples):
        obs, _ = grid.get_sequence(length=np.random.randint(5, 11))
        samples += [obs]

    hmm = baum_welch(N=grid.states_no, M=len(Grid.COLORS), samples=samples)

    return hmm
Example #4
0
 def test_train_model(self):
     '''Dishonest Casino Example - EM algorithm.'''
     # Create transition probability matrix
     A = np.array([[0.99, 0.01],
                   [0.01, 0.99]])
     # Create observable probability distribution matrix. Casino biased toward "6" in state "1".        
     B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                           [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
     # Create set of all observable symbols
     V = [1, 2, 3, 4, 5, 6]
 
     # Instantiate an HMM, note Pi is uniform probability distribution by default
     m = hmm.HMM(2, A=A, B=B, V=V)
     
     Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
     c = [Obs]
     hmm.baum_welch(m, c, epochs=15, graph=False)
     TestHmm.assert_model_matrices_almost_equal(m, 
                                                ([[0.856658708052639, 0.14334129194736125], [2.454940916925095e-16, 1.0]],
                                                 [[0.28329354031233306, 0.2866825838637413, 0.14334129194736112, 0.14334129194736112, 0.14334129192821368, 9.896623857864685e-13], [0.004706380704415612, 4.3023359620169447e-11, 3.2510873580469717e-111, 1.2201233032249015e-54, 0.19905872387205914, 0.7962348953805019]],
                                                 [1.0, 4.364785210913299e-122]))
Example #5
0
def learn_simple(args: argparse.Namespace) -> HMM:
    true_hmm = get_simple_models()[args.idx]

    samples = []
    for s in range(args.num_samples):
        obs = true_hmm.sample_sequence(length=10)
        samples += [obs]

    hmm = baum_welch(true_hmm.N,
                     true_hmm.M,
                     samples,
                     num_it=args.num_it,
                     plot=args.plot)

    diff(true_hmm, hmm)

    return hmm
gamma = hmm.gammas(data_test, states, alpha_scaled,
                   beta_scaled, scale_alpha)

for i in states:
    y = np.zeros(100)
    for t in range(100):
        y[t] = gamma[t][i]
    plt.figure()
    plt.plot(y)
    plt.title("State %i" % (i+1))

# 4
(start_proba1, transition_proba1, means1, covariances1,
 logllh1, iteration1) = hmm.baum_welch(data_train, states, start_proba_init,
                                       transition_proba_init,
                                       means_init, covariances_init,
                                       delta=1e-4)
# 5
plt.figure()
plt.plot(logllh1)

logllh2 = []
for i in range(iteration1+1):
    alpha_scaled2, scale_alpha2 = hmm.forward(data_test, states,
                                              start_proba1[i],
                                              transition_proba1[i],
                                              means1[i],
                                              covariances1[i])
    logllh2.append(hmm.loglike(states, alpha_scaled2, scale_alpha2))
plt.figure()
plt.plot(logllh2)
Example #7
0
def train():
    symbols = 100
    states = 30
    e, q, d, bins = preprocess(symbols)

    model = hmm.random_model(states,symbols)
    gen = hmm.synthetic(model)
    sampl = [next(gen) for _ in range(1000)]

    plt.ion()
    plt.clf()
    plt.subplot(311)
    plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape)
    plt.subplot(312)
    plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape)
    plt.subplot(313)
    plt.plot(sampl)
    plt.show()
    plt.pause(0.001)

    i = 0
    plt.savefig("out{}.png".format(i))

    # try:
    step = 10000
    sig = q
    length = len(sig)
    fro = 0
    to = step

    print("\nIteration {}".format(i))
    while True:

        with open("db_ecg.pickle","wb") as pfile:
            pickle.dump(model,pfile)

        print("batch from {} to {}".format(fro,to),end="\r")

        i+=1
        if to >= length - 9*step:
            print("\nIteration {}".format(i))
            fro = 0
            to = step


        obs = [ ]

        # tmp_fro = fro
        # tmp_to = to
        # for x in range(8):
        #     obs.append(sig[tmp_fro:tmp_to])
        #     tmp_fro += step
        #     tmp_to += step
        o = sig[fro:to]

        fro += step
        to += step

        # for o in obs:
        #     model = hmm.baum_welch(o,model)
        for i in range(100):
            model = hmm.baum_welch(o,model)

        gen = hmm.synthetic(model)
        sampl = [next(gen) for _ in range(1000)]
        # model = hmm.batch_baum_welch(obs,model)

        plt.clf()
        plt.subplot(311)
        plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape)
        plt.subplot(312)
        plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape)
        plt.subplot(313)
        plt.plot(sampl)
        plt.show()
        plt.pause(0.001)
        plt.savefig("out{}.png".format(i))
    # except:
    #     pass

    plt.ioff()
    plt.subplot(311)
    plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape)
    plt.subplot(312)
    plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape)
    plt.subplot(313)
    plt.plot(sampl)
    plt.show()

    return model, bins
Example #8
0
def coalhmm(args):
    """
  Trains and tests a Coal-HMM
  @param args   (argparse.Namespace)   Arguments provided by user: filename,
                                       sample, rounds
  """
    # from table 2 in Hobolth et al.
    # mean_fragment_length_HC1 = 1684
    # mean_fragment_length_others = 65
    # probability_leaving_HC1 = 3 * s = 1 / 1684
    s = 1.0 / (1684 * 3)
    # probability_leaving_others = 1 / 65 = u + 2 * v
    # u + 2 * v = 1 / 65
    stationary = (0.49, 0.17, 0.17, 0.17)
    # stationary = np.array([psi, (1 - psi) / 3, (1 - psi) / 3, (1 - psi) / 3])
    psi = 0.49
    # psi = 1 / (1 + 3 * s / u)
    # 1 + 3 * s / u = 1 / psi
    # u + 3 * s = u / psi
    # 3 * s = (1 / psi - 1) * u
    u = 3 * s / (1 / psi - 1)
    v = (1 / 65.0 - u) / 2

    # Transition probability: HC1, HC2, HG, CG
    transition = np.array([[1 - 3 * s, s, s, s], [u, 1 - (u + 2 * v), v, v],
                           [u, v, 1 - (u + 2 * v), v],
                           [u, v, v, 1 - (u + 2 * v)]])

    print "Reading alignments"
    original_alignments = [np.array(a) for a in utils.read_maf(args.filename)]
    print "done"
    for j in range(args.rounds):
        print "ROUND {}".format(j)

        print "sampling"
        if args.sample is not None:
            alignments = [
                original_alignments[i] for i in np.random.choice(
                    np.arange(len(original_alignments)), args.sample, False)
            ]
        else:
            alignments = original_alignments
        print "Number of alignments: {}".format(len(alignments))
        print "done"

        print "felsenstein"
        groupings = {}
        emission = np.zeros((4, 5**4))
        for alignment in alignments:
            _, len_alignment = alignment.shape
            for i in range(len_alignment):
                column = "".join(alignment[:, i])
                if column not in groupings:
                    groupings[column] = len(groupings)

                    trees = utils.generate_trees(alignment[:, i])

                    # Felsenstein to get emission
                    for i, t in enumerate(trees):
                        emission[i, groupings[column]] = math.exp(
                            felsenstein.felsensteins(t))
        print "done"

        print "BW"
        initial = np.array([0.25, 0.25, 0.25, 0.25])
        # Baum welsh to update matrices
        emission, transition = hmm.baum_welch(initial, emission, transition,
                                              alignments, groupings)
        print "done"

        print "viterbi"
        # use viterbi to see which state we are in the longest
        hidden_states = []
        for alignment in alignments:
            hidden_states.append(
                hmm.viterbi(initial, emission, transition, alignment,
                            groupings))
        print "done"

        # calculate time spent in a state
        counts = Counter([s for states in hidden_states for s in states])
        print "Number of bases in each state: ", counts
Example #9
0
        output+=symbols[str(a)]
    output+=","
    
print(output)


timeData=[8,8.5,8.7,9,9.5,12,13,17,17.5,19,20,
          8.5,8.7,9,9.5,10,12,13,17,17.5,19,20
       ]
hours=[int(i) for i in timeData]
minutes=[ 10*(timeData[i]-hours[i]) for i in range(len(hours))]

# Create an HMM

model=hmm.HMM(n_states=5,V=[1,2,3,4,5,6,7,8,9])

model=hmm.baum_welch(model, data, epochs=100)

plt.imshow(model.A)
plt.show()
print(model.A)
print(model.B)
print(model.F)

# TODO
# Before moving forward with all thisfirst define some standard format to read all this data
# maybe also think about the idea of having an HMM with discrete and continuous data mixed, maybe
# it is not such a crazy idea after all and it maybe not even hard to implement.


Example #10
0
  n_A = 2
  n_B = len(dic)

  A = np.random.rand(n_A,n_A) + 3
  A /= A.sum(axis=1)[:,np.newaxis]
  B = np.random.rand(n_A,n_B) + 3
  B /= B.sum(axis=1)[:,np.newaxis]
  start = np.random.rand(n_A)
  start /= start.sum()

  np.set_printoptions(precision=5, suppress=True)
############################################################

  begin = time.time()
  print('begin =', begin)
  hmm.baum_welch(A, B, start, O, 5000, eps=1e-10, verbose=False)
  print('cost =', time.time() - begin)

  print(A)
  print(start)

  print('emission prob:')  
  for i in range(n_B):
    print(inv_dic[i], B[:,i])

  pl.style.use('ggplot')
  fig, ax = pl.subplots(2,1)
  pl.setp(ax, xticks=range(len(vocab)), 
      xticklabels=vocab,
      xlim=(-1, len(vocab)),
      ylim=(0,1))