예제 #1
0
def plotter(s_list, z_list, prms, figtitle, msg):
    from ml_utils.plot_models import PlotModels
    logger.info('plotting %s %s' % (str(figtitle), msg))
    pi = prms[0]
    phi = arr(prms[1])
    pm = PlotModels(4, 2, figtitle)
    pm.multi_bar((0, 0), pi, title=r'$\pi$', xlbl='cat each state')
    pm.multi_bar((0, 1), phi, title=r'$\phi$', xlbl='cat each batch')
    n_states, n_cat = pi.shape
    # --- s hist
    s_seq = [histogram(x, bins=range(n_states + 1))[0] for x in s_list]
    s_seq = arr(s_seq).astype(float)
    s_seq = s_seq / s_seq.sum(1)[:, newaxis]
    ymax = s_seq.max()
    pm.plot_states_indv((1, 0), s_seq.T, ymax=ymax, title='S', cspan=1)
    # --- s_seq
    s_seq = concatenate(s_list, 0)
    pm.plot_states_indv((1, 1), s_seq, title='S', cspan=1)
    # --- z_hist
    z_seq = [histogram(x, bins=range(n_cat + 1))[0] for x in z_list]
    z_seq = arr(z_seq).astype(float)
    z_seq = z_seq / z_seq.sum(1)[:, newaxis]
    ymax = z_seq.max()
    pm.plot_states_indv((2, 0), z_seq.T, ymax=ymax, title='Z', cspan=1)
    # --- z_seq
    z_seq = concatenate(z_list, 0)
    pm.plot_states_indv((2, 1), z_seq, title='Z', cspan=1)
    tmp = [tile(prms[1][b], (x.shape[0], 1)).T for b, x in enumerate(s_list)]
    phi_seq = concatenate(tmp, 1)
    pm.plot_states_stack((3, 0), phi_seq, title='Phi', cspan=1)
    pm.plot_states_indv((3, 1), phi_seq, title='Phi', cspan=1)
    pm.sup_title('%s %s' % (str(figtitle), msg))
    pm.tight_layout()
예제 #2
0
 def update(self, Y, max_em_itr=20):
     '''
     hmm.update(Y)
     '''
     # --- Index and array for VB
     if self.vbs is None:
         self.vbs = zeros(max_em_itr)
         ibgn = 0
     else:
         ibgn = len(self.vbs)
         self.vbs = append(self.vbs, zeros(max_em_itr))
     iend = ibgn + max_em_itr
     # --- data size
     self.data_dim, data_len = Y.shape
     # --- initialise expectation
     self.init_expt_s(data_len)
     # --- Y Y'
     YY = einsum('dt,et->det', Y, Y)
     # --- EM iteration
     logger.info('Update order: %s' % self.update_order)
     for i in range(ibgn, iend):
         self.log_info_update_itr(iend, i, interval_digit=1)
         for j, uo in enumerate(self.update_order):
             if uo == 'E':
                 self.qs.update(Y, self.theta, YY)
             elif uo == 'M':
                 self.theta.update(Y, self.qs.expt, self.qs.expt2, YY)
             else:
                 logger.error('%s is not supported' % uo)
         do_stop_itr = self._update_vb(i)
         # --- early stop
         if do_stop_itr:
             break
     self.expt_s = self.qs.expt
예제 #3
0
 def load_params(self, file_name):
     if os.path.exists(file_name):
         with open(file_name, 'rb') as f:
             prm = pickle.load(f)
         self.set_params(prm)
         logger.info('Loaded: %s' % file_name)
         return True
     else:
         return False
예제 #4
0
 def log_info_update_itr(cls, max_itr, itr, msg='', interval_digit=1):
     len_digit = len('%d' % max_itr)
     if len_digit - interval_digit < 0:
         interval_digit = 0
     interval = 10**interval_digit
     if itr % interval == 0 or itr == (max_itr - 1):
         logger.info('%s update :%5d / %5d (interval %d)' %
                     (msg, itr + 1, max_itr, interval))
     logger.debug('%s update :%5d / %5d' % (msg, itr, max_itr))
예제 #5
0
 def save_params(self, file_name, by_posterior=True):
     prm = self.get_params(by_posterior)
     try:
         with open(file_name, 'wb') as f:
             pickle.dump(prm, f)
         logger.info('Saved: %s' % file_name)
         return True
     except Exception as exception:
         logger.error(exception)
         return False
예제 #6
0
    def is_conversed(cls, vbs, i, th):
        dst = False
        if i >= cls.min_itr:
            vb_prv = nround(vbs[i - 1], decimals=cls.decimals)
            vb_cur = nround(vbs[i], decimals=cls.decimals)
            vb_diff = vb_cur - vb_prv
            dst = True if vb_diff < th else False
        if dst:
            logger.info(' '.join([
                'Conversed.',
                'iteration at %d.' % i,
                'VB diff %f < %f' % (vb_diff, th),
                '%3d: %f' % (i - 1, vbs[i - 1]),
                '%3d: %f' % (i, vbs[i]),
            ]))

        return dst
예제 #7
0
 def update(self, Y, max_em_itr):
     '''
     mfa.update()
     @argvs
     Y: np.array(data_dim, data_len)
     '''
     logger.info('update order %s, in Theta %s' %
                 (self.update_order, self.theta.update_order))
     ibgn = 0
     iend = max_em_itr
     for i in range(ibgn, iend):
         for j, uo in enumerate(self.update_order):
             if i % 10 == 0:
                 logger.info('iteration %3d (%s)' % (i, uo))
             if uo == 'E':
                 self.zs.update(Y, self.theta)
             elif uo == 'M':
                 self.theta.update(Y, self.zs)
             else:
                 logger.error('%s is not supported' % uo)
                 sys.exit(-1)
예제 #8
0
    def __init__(self, fa_dim, data_dim, n_states, **args):
        '''
        mfa = Mfa(fa_dim, data_dim, n_states, n_states)
        '''
        self.data_dim = data_dim
        self.fa_dim = fa_dim
        self.aug_dim = fa_dim + 1
        self.n_states = n_states
        self.update_order = args.get('update_order', ['E', 'M'])
        self._expt_init_mode = args.get('expt_init_mode', 'random')
        logger.info('\n'.join([
            '',
            '%10s: %2d' % ('data_dim', self.data_dim),
            '%10s: %2d' % ('fa_dim', self.fa_dim),
            '%10s: %2d' % ('aug_dim', self.aug_dim),
            '%10s: %2d' % ('n_states', self.n_states),
            '%10s: %s' % ('update', self.update_order),
            '%10s: %s' % ('init_mode', self._expt_init_mode),
        ]))

        # --- theta and zs
        self.zs = qZS(fa_dim, n_states, expt_init_mode=self._expt_init_mode)
        self.theta = Theta(self.fa_dim, self.data_dim, self.n_states)
예제 #9
0
def init_expt(data_len, n_states, obs=None, mode='random'):
    '''
    mode: random or kmeans
    '''
    expt = ones((n_states, data_len)) / n_states
    if mode == 'kmeans' and obs is not None:
        logger.info('mode=kmeans')
        if data_len != obs.shape[-1]:
            logger.warning('data_len is different from obs.shape[-1]')
        km = KMeans(n_clusters=n_states)
        km.fit(obs.T)
        eps = 1e-2
        expt_by_km = zeros((n_states, obs.shape[-1]))
        for k in range(n_states):
            expt_by_km[k, km.labels_ == k] = 1 - (eps * (n_states - 1))
        expt_by_km[expt_by_km == 0] = eps
        expt = (expt + expt_by_km) / 2.0
    elif mode == 'random':
        logger.info('mode=random')
        alpha_pi = ones(n_states)
        expt = dirichlet(alpha_pi, size=data_len).T
    else:
        logger.info('mode=flat (%s not supported)' % mode)
    return expt
예제 #10
0
 def save_params(self, file_name):
     was_saved = self.theta.save_params(file_name)
     logger.info('saved %s' % file_name)
     return was_saved