Ejemplo n.º 1
0
 def _help_do_mstep(self, stats):
     #add pseudo counts for nan entries
     help_denum = _add_pseudo_counts( stats['post'] )
     self.p[0] = stats['post_emission'][0] / (self.n[0] * help_denum)
     self.p[1] = stats['post_emission'][1] / (self.n[1] * help_denum)
     
     self.p[0] = _add_pseudo_counts(self.p[0])
     self.p[1] = _add_pseudo_counts(self.p[1])
     
     self.merge_distr()
Ejemplo n.º 2
0
    def _help_do_mstep(self, stats):
        #add pseudo counts for nan entries
        help_denum = _add_pseudo_counts(stats['post'])

        self.p[0] = stats['post_emission'][0] / (self.n[0] * help_denum)
        self.p[1] = stats['post_emission'][1] / (self.n[1] * help_denum)

        self.p[0] = _add_pseudo_counts(self.p[0])
        self.p[1] = _add_pseudo_counts(self.p[1])

        self.merge_distr()
Ejemplo n.º 3
0
 def _help_do_mstep(self, stats):
     for dim in range(self.n_features):
         for comp in range(self.distr_magnitude):
             for state in range(self.n_components):
                 self.c[dim][comp][state] = stats['post_sum_l'][dim][comp][
                     state] / _add_pseudo_counts(stats['post'][state])
                 if comp == 0:
                     self.p[dim][comp][state] = stats['post_sum_l_emisson'][
                         dim][comp][state] / (_add_pseudo_counts(
                             stats['post_sum_l_factor'][dim][comp][state]))
                     self.p[dim][comp][state] = _add_pseudo_counts(
                         self.p[dim][comp][state])
                 else:
                     self.p[dim][comp][
                         state] = self.factors[comp] * self.p[dim][0][state]
     self.merge_distr(stats['weights'])
Ejemplo n.º 4
0
 def _help_do_mstep(self, stats):
     #add pseudo counts for nan entries
     for dim in range(self.n_features):
         for state in range(self.n_components):
             for comp in range(self.distr_magnitude):
                 self.p[dim][state][comp] = stats['post_sum_l_emisson'][dim][state][comp] / (self.n[dim] * _add_pseudo_counts(stats['post_sum_l'][dim][state][comp])) 
                 self.p[dim][state][comp] = _add_pseudo_counts(self.p[dim][state][comp])
                 self.c[dim][state][comp] = stats['post_sum_l'][dim][state][comp] / stats['post'][state]
Ejemplo n.º 5
0
 def _help_do_mstep(self, stats):
     #add pseudo counts for nan entries
     for dim in range(self.n_features):
         for state in range(self.n_components):
             for comp in range(self.distr_magnitude):
                 self.p[dim][state][comp] = stats['post_sum_l_emisson'][dim][state][comp] / (self.n[dim] * _add_pseudo_counts(stats['post_sum_l'][dim][state][comp])) 
                 self.p[dim][state][comp] = _add_pseudo_counts(self.p[dim][state][comp])
                 self.c[dim][state][comp] = stats['post_sum_l'][dim][state][comp] / stats['post'][state]
Ejemplo n.º 6
0
    def _help_accumulate_sufficient_statistics(self, obs, stats, posteriors):
        posteriors = _valid_posteriors(posteriors, obs)
        i = 0
        print("run...! start at " + str(time()), file=sys.stderr)
        for t, symbol in enumerate(obs):
            stats['post'] += posteriors[t]
            for dim in range(self.n_features):
                for comp in range(self.distr_magnitude):
                    #lookup
                    index = (symbol[dim],
                             tuple([
                                 self.p[dim][comp][state]
                                 for state in range(self.n_components)
                             ]))
                    if index not in lookup_poisson_state:
                        tmp = np.array([
                            self._get_poisson(symbol[dim],
                                              self.p[dim][comp][state])
                            for state in range(self.n_components)
                        ])
                        lookup_poisson_state[index] = tmp
                    h = lookup_poisson_state[index]
                    enum = self.c[dim][comp] * h
                    denum = np.array([
                        self._get_value(state, symbol, dim)
                        for state in range(self.n_components)
                    ])

                    i += 1
                    try:
                        help = posteriors[t] * enum / _add_pseudo_counts(denum)
                    except:
                        print("%s \n" % i, file=sys.stderr)
                        print("%s %s %s \n" % (denum, symbol, dim),
                              file=sys.stderr)
                        print("%s \n" % (self.c), file=sys.stderr)
                        print("%s \n" % (self.p), file=sys.stderr)
                        print("%s \n" % (posteriors[t]), file=sys.stderr)
                        print("%s \n" % (enum), file=sys.stderr)
                        help = np.array([
                            1.0 / self.distr_magnitude,
                            1.0 / self.distr_magnitude,
                            1.0 / self.distr_magnitude
                        ])
                    stats['post_sum_l'][dim][comp] += help
                    stats['post_sum_l_emisson'][dim][
                        comp] += help * symbol[dim]
                    stats['post_sum_l_factor'][dim][
                        comp] += help * self.factors[comp]

                    if posteriors[t][1] > 0.5 or posteriors[t][2] > 0.5:
                        if posteriors[t][1] >= posteriors[t][2]:
                            stats['weights'][dim][state][0] += 1
                        if posteriors[t][2] > posteriors[t][1]:
                            stats['weights'][dim][state][1] += 1

        #print(self.p)
        stats['posterior'] = np.copy(posteriors)
Ejemplo n.º 7
0
    def _help_do_mstep(self, stats):
        for dim in range(self.n_features):
            for comp in range(self.distr_magnitude):
                for state in range(self.n_components):
                    self.c[dim][comp][state] = stats['post_sum_l'][dim][comp][state] / _add_pseudo_counts(stats['post'][state])
#                     if comp == 0:
                    self.p[dim][comp][state] = stats['post_sum_l_emisson'][dim][comp][state] / (_add_pseudo_counts(stats['post_sum_l_factor'][dim][comp][state])) 
                    self.p[dim][comp][state] = _add_pseudo_counts(self.p[dim][comp][state])
#                     else:
#                        self.p[dim][comp][state] = self.factors[comp] * self.p[dim][0][state]
        self.merge_distr(stats['weights'])
Ejemplo n.º 8
0
 def _help_accumulate_sufficient_statistics(self, obs, stats, posteriors):
     posteriors = _valid_posteriors(posteriors, obs)
     i = 0
     print("run...! start at " + str(time()), file=sys.stderr)
     for t, symbol in enumerate(obs):
         stats['post'] += posteriors[t]
         for dim in range(self.n_features):
             for comp in range(self.distr_magnitude):
                 #lookup
                 index = (symbol[dim], tuple([self.p[dim][comp][state] for state in range(self.n_components)]))
                 if index not in lookup_poisson_state: 
                     tmp = np.array([self._get_poisson(symbol[dim], self.p[dim][comp][state]) for state in range(self.n_components)])
                     lookup_poisson_state[index] = tmp
                 h = lookup_poisson_state[index]
                 enum = self.c[dim][comp] * h
                 denum = np.array([self._get_value(state, symbol, dim) for state in range(self.n_components)])
                 
                 i += 1
                 try:
                     help = posteriors[t] * enum / _add_pseudo_counts(denum)
                 except:
                     print("%s \n" %i, file=sys.stderr)
                     print("%s %s %s \n" %(denum, symbol, dim), file=sys.stderr)
                     print("%s \n" %(self.c), file=sys.stderr)
                     print("%s \n" %(self.p), file=sys.stderr)
                     print("%s \n" %(posteriors[t]), file=sys.stderr)
                     print("%s \n" %(enum), file=sys.stderr)
                     help = np.array([1.0/self.distr_magnitude, 1.0/self.distr_magnitude, 1.0/self.distr_magnitude])
                 stats['post_sum_l'][dim][comp] += help
                 stats['post_sum_l_emisson'][dim][comp] += help * symbol[dim]
                 stats['post_sum_l_factor'][dim][comp] += help * self.factors[comp]
                 
                 if posteriors[t][1] > 0.5 or posteriors[t][2] > 0.5:
                     if posteriors[t][1] >= posteriors[t][2]:
                         stats['weights'][dim][state][0] += 1
                     if posteriors[t][2] > posteriors[t][1]:
                         stats['weights'][dim][state][1] += 1
                     
     #print(self.p)        
     stats['posterior'] = np.copy(posteriors)