def fit_model( self, observations ): ''' fits (MLE) the parameters to the sequences (with values in np.arange(self.n_obs_states)) of obsvervations using EM iterations @param observations List of observations ({0,...,n_obs_states} valued list) ''' hmm.baum_welch(self.model, np.array(observations), \ epochs = self.epochs, graph = False) print 'trans matrix (self.model.A) is ', self.model.A print 'markov to observed matrix (self.model.B) is ', self.model.B
def fit_model(self, observations): ''' fits (MLE) the parameters to the sequences (with values in np.arange(self.n_obs_states)) of obsvervations using EM iterations @param observations List of observations ({0,...,n_obs_states} valued list) ''' hmm.baum_welch(self.model, np.array(observations), \ epochs = self.epochs, graph = False) print 'trans matrix (self.model.A) is ', self.model.A print 'markov to observed matrix (self.model.B) is ', self.model.B
def learning_grid(args: argparse.Namespace) -> HMM: grid = get_grids()[args.idx] samples = [] for s in range(args.num_samples): obs, _ = grid.get_sequence(length=np.random.randint(5, 11)) samples += [obs] hmm = baum_welch(N=grid.states_no, M=len(Grid.COLORS), samples=samples) return hmm
def test_train_model(self): '''Dishonest Casino Example - EM algorithm.''' # Create transition probability matrix A = np.array([[0.99, 0.01], [0.01, 0.99]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1". B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] c = [Obs] hmm.baum_welch(m, c, epochs=15, graph=False) TestHmm.assert_model_matrices_almost_equal(m, ([[0.856658708052639, 0.14334129194736125], [2.454940916925095e-16, 1.0]], [[0.28329354031233306, 0.2866825838637413, 0.14334129194736112, 0.14334129194736112, 0.14334129192821368, 9.896623857864685e-13], [0.004706380704415612, 4.3023359620169447e-11, 3.2510873580469717e-111, 1.2201233032249015e-54, 0.19905872387205914, 0.7962348953805019]], [1.0, 4.364785210913299e-122]))
def learn_simple(args: argparse.Namespace) -> HMM: true_hmm = get_simple_models()[args.idx] samples = [] for s in range(args.num_samples): obs = true_hmm.sample_sequence(length=10) samples += [obs] hmm = baum_welch(true_hmm.N, true_hmm.M, samples, num_it=args.num_it, plot=args.plot) diff(true_hmm, hmm) return hmm
gamma = hmm.gammas(data_test, states, alpha_scaled, beta_scaled, scale_alpha) for i in states: y = np.zeros(100) for t in range(100): y[t] = gamma[t][i] plt.figure() plt.plot(y) plt.title("State %i" % (i+1)) # 4 (start_proba1, transition_proba1, means1, covariances1, logllh1, iteration1) = hmm.baum_welch(data_train, states, start_proba_init, transition_proba_init, means_init, covariances_init, delta=1e-4) # 5 plt.figure() plt.plot(logllh1) logllh2 = [] for i in range(iteration1+1): alpha_scaled2, scale_alpha2 = hmm.forward(data_test, states, start_proba1[i], transition_proba1[i], means1[i], covariances1[i]) logllh2.append(hmm.loglike(states, alpha_scaled2, scale_alpha2)) plt.figure() plt.plot(logllh2)
def train(): symbols = 100 states = 30 e, q, d, bins = preprocess(symbols) model = hmm.random_model(states,symbols) gen = hmm.synthetic(model) sampl = [next(gen) for _ in range(1000)] plt.ion() plt.clf() plt.subplot(311) plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape) plt.subplot(312) plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape) plt.subplot(313) plt.plot(sampl) plt.show() plt.pause(0.001) i = 0 plt.savefig("out{}.png".format(i)) # try: step = 10000 sig = q length = len(sig) fro = 0 to = step print("\nIteration {}".format(i)) while True: with open("db_ecg.pickle","wb") as pfile: pickle.dump(model,pfile) print("batch from {} to {}".format(fro,to),end="\r") i+=1 if to >= length - 9*step: print("\nIteration {}".format(i)) fro = 0 to = step obs = [ ] # tmp_fro = fro # tmp_to = to # for x in range(8): # obs.append(sig[tmp_fro:tmp_to]) # tmp_fro += step # tmp_to += step o = sig[fro:to] fro += step to += step # for o in obs: # model = hmm.baum_welch(o,model) for i in range(100): model = hmm.baum_welch(o,model) gen = hmm.synthetic(model) sampl = [next(gen) for _ in range(1000)] # model = hmm.batch_baum_welch(obs,model) plt.clf() plt.subplot(311) plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape) plt.subplot(312) plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape) plt.subplot(313) plt.plot(sampl) plt.show() plt.pause(0.001) plt.savefig("out{}.png".format(i)) # except: # pass plt.ioff() plt.subplot(311) plt.imshow(model.transitions,interpolation='nearest', shape=model.transitions.shape) plt.subplot(312) plt.imshow(model.emissions,interpolation='nearest', shape=model.emissions.shape) plt.subplot(313) plt.plot(sampl) plt.show() return model, bins
def coalhmm(args): """ Trains and tests a Coal-HMM @param args (argparse.Namespace) Arguments provided by user: filename, sample, rounds """ # from table 2 in Hobolth et al. # mean_fragment_length_HC1 = 1684 # mean_fragment_length_others = 65 # probability_leaving_HC1 = 3 * s = 1 / 1684 s = 1.0 / (1684 * 3) # probability_leaving_others = 1 / 65 = u + 2 * v # u + 2 * v = 1 / 65 stationary = (0.49, 0.17, 0.17, 0.17) # stationary = np.array([psi, (1 - psi) / 3, (1 - psi) / 3, (1 - psi) / 3]) psi = 0.49 # psi = 1 / (1 + 3 * s / u) # 1 + 3 * s / u = 1 / psi # u + 3 * s = u / psi # 3 * s = (1 / psi - 1) * u u = 3 * s / (1 / psi - 1) v = (1 / 65.0 - u) / 2 # Transition probability: HC1, HC2, HG, CG transition = np.array([[1 - 3 * s, s, s, s], [u, 1 - (u + 2 * v), v, v], [u, v, 1 - (u + 2 * v), v], [u, v, v, 1 - (u + 2 * v)]]) print "Reading alignments" original_alignments = [np.array(a) for a in utils.read_maf(args.filename)] print "done" for j in range(args.rounds): print "ROUND {}".format(j) print "sampling" if args.sample is not None: alignments = [ original_alignments[i] for i in np.random.choice( np.arange(len(original_alignments)), args.sample, False) ] else: alignments = original_alignments print "Number of alignments: {}".format(len(alignments)) print "done" print "felsenstein" groupings = {} emission = np.zeros((4, 5**4)) for alignment in alignments: _, len_alignment = alignment.shape for i in range(len_alignment): column = "".join(alignment[:, i]) if column not in groupings: groupings[column] = len(groupings) trees = utils.generate_trees(alignment[:, i]) # Felsenstein to get emission for i, t in enumerate(trees): emission[i, groupings[column]] = math.exp( felsenstein.felsensteins(t)) print "done" print "BW" initial = np.array([0.25, 0.25, 0.25, 0.25]) # Baum welsh to update matrices emission, transition = hmm.baum_welch(initial, emission, transition, alignments, groupings) print "done" print "viterbi" # use viterbi to see which state we are in the longest hidden_states = [] for alignment in alignments: hidden_states.append( hmm.viterbi(initial, emission, transition, alignment, groupings)) print "done" # calculate time spent in a state counts = Counter([s for states in hidden_states for s in states]) print "Number of bases in each state: ", counts
output+=symbols[str(a)] output+="," print(output) timeData=[8,8.5,8.7,9,9.5,12,13,17,17.5,19,20, 8.5,8.7,9,9.5,10,12,13,17,17.5,19,20 ] hours=[int(i) for i in timeData] minutes=[ 10*(timeData[i]-hours[i]) for i in range(len(hours))] # Create an HMM model=hmm.HMM(n_states=5,V=[1,2,3,4,5,6,7,8,9]) model=hmm.baum_welch(model, data, epochs=100) plt.imshow(model.A) plt.show() print(model.A) print(model.B) print(model.F) # TODO # Before moving forward with all thisfirst define some standard format to read all this data # maybe also think about the idea of having an HMM with discrete and continuous data mixed, maybe # it is not such a crazy idea after all and it maybe not even hard to implement.
n_A = 2 n_B = len(dic) A = np.random.rand(n_A,n_A) + 3 A /= A.sum(axis=1)[:,np.newaxis] B = np.random.rand(n_A,n_B) + 3 B /= B.sum(axis=1)[:,np.newaxis] start = np.random.rand(n_A) start /= start.sum() np.set_printoptions(precision=5, suppress=True) ############################################################ begin = time.time() print('begin =', begin) hmm.baum_welch(A, B, start, O, 5000, eps=1e-10, verbose=False) print('cost =', time.time() - begin) print(A) print(start) print('emission prob:') for i in range(n_B): print(inv_dic[i], B[:,i]) pl.style.use('ggplot') fig, ax = pl.subplots(2,1) pl.setp(ax, xticks=range(len(vocab)), xticklabels=vocab, xlim=(-1, len(vocab)), ylim=(0,1))