def __call__(self, data): # empirically, (and interestingly): this (near 16-bit) shift factor lets # the exposed ref property be 1 while giving occasional 0 dB output on a # Mac with "Use ambient noise reduction" set to on and "Input volume" # slider set to the minimum (settings which give about the lowest level # signal data you can get on the mac) ref_factor = N.float32(1 / (1 << 18)) # this should be an adequate range of dBs clip_lo, clip_hi = 0, 140 assert data.shape[-1] == self.data_length band = data[self.select] assert len(band) >= 1 ref = N.float32(self.ref * len(band) * ref_factor) band *= band bandsum = band.sum() # XXX not ndarrays.... sum = float(bandsum) if False: # slight smoothing; bizarre: causes Python ncurses to bog down! sum = (self.prev + sum) / 2 self.prev = sum # space is 10 * log10 (energy) dB = (10 * math.log10(sum / ref)) if sum > ref else 0 dB = max(clip_lo, dB) dB = min(clip_hi, dB) dcheck(vumeterlog) and dprint(vumeterlog, 'dB', N.float32(dB), 'channels', len(band), 'ref', self.ref, 'scaled_ref', ref, 'energy', bandsum) # we return the dB in decibel scaling return dB
def safe_log_divide(x, y): """ Compute x - y where x and y are either scalars, Numpy arrays of the same shape, or Numpy arrays that can be projected into the same shape, and make sure that (-Inf) - (-Inf) is handled correctly and quietly. Complain if the second operand is -Inf but the first is not. >>> zero = quiet_log(0.0) >>> result = safe_log_divide(zero, 1.0) >>> result == zero True >>> safe_log_divide(1.0, zero) Traceback (most recent call last): ValueError: log division by zero >>> result = safe_log_divide(zero, zero) >>> result == zero True >>> x = numpy.zeros((3,2), dtype=float) - 3.2 >>> x[:,1] = quiet_log(0.0) >>> x array([[-3.2, -Inf], [-3.2, -Inf], [-3.2, -Inf]]) >>> safe_log_divide(x, x) array([[ 0., -Inf], [ 0., -Inf], [ 0., -Inf]]) >>> y = numpy.zeros((3,2), dtype=float) - 3.2 >>> safe_log_divide(x, y) array([[ 0., -Inf], [ 0., -Inf], [ 0., -Inf]]) >>> safe_log_divide(y, x) Traceback (most recent call last): ValueError: log division by zero """ with numpy.errstate(invalid='ignore'): POS_INF = 1.0 - LOG_ZERO ret = numpy.subtract(x, y) if isinstance(ret, numpy.ndarray): if (ret == POS_INF).any(): dc = dcheck("math_raise") dc and dc("Arguments: numerator = \n%s\ndenominator = \n%s" % (x, y)) raise ValueError("log division by zero") numpy.place(ret, numpy.isnan(ret), LOG_ZERO) elif numpy.isnan(ret): ret = LOG_ZERO elif ret == POS_INF: dc = dcheck("math_raise") dc and dc("Arguments: numerator = \n%s\ndenominator = \n%s" % (x, y)) raise ValueError("log division by zero") return ret
def _init_models_rand(self, primer): k = self.num_components d = self.dimension self._weights = np.ones(k, dtype=float) / k dc1 = dcheck("gaussian_priming") if primer is None: means = np.array([self.rand.normalvariate(0,1) for x in range(d) for y in range(k)]) self.set_means(np.reshape(means, (k, d))) if self.covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE: vars = np.array([0.7] * d * k) # vars = np.array([self.rand.normalvariate(0,1) ** 2 for x in range(d) for y in range(k)]) self.set_vars(np.reshape(vars, (k, d))) else: assert self.covariance_type is GaussianModelBase.FULL_COVARIANCE self.set_vars(np.resize(np.eye(d) * 0.7, (k, d, d))) else: # use primer means = [] vars = [] dc1 and dc1("priming with model = %s" % (primer,)) mean_primer = primer.copy() mean_primer.set_vars(0.25 * primer.vars) dc1 and dc1("priming means with model = %s" % (mean_primer,)) for y in xrange(k): # loop over components means.append(mean_primer.sample()) vars.append(primer.vars) means = np.array(means).reshape(k, d) dc1 and dc1("primed means = %s" % (means,)) self.set_means(means) if self.covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE: vars = np.array(vars).reshape(k, d) else: vars = np.array(vars).reshape(k, d, d) dc1 and dc1("primed vars = %s" % (vars,)) self.set_vars(vars)
def process(self, event): label, scores = event self.send(event) # We try to do as little as possible unless we're going to do printing, so # there are many early outs here. if label is None: return self._long_term_count += 1 self._short_term_count += 1 if label == scores[0][1]: self._long_term_correct += 1 self._short_term_correct += 1 if self._long_term_count % self._interval != 0: return dc = dcheck("acc_track") if not dc: return to_print = list(self._line_template) lt_acc = int(100 * self._long_term_correct / self._long_term_count) assert 0 <= lt_acc <= 100 to_print[lt_acc] = '+' st_acc = int(100 * self._short_term_correct / self._short_term_count) assert 0 <= st_acc <= 100 to_print[st_acc] = 'x' if lt_acc == st_acc: to_print[st_acc] = '*' if self._short_term_count > self._window_size: self._short_term_count = self._short_term_correct = 0 dc(DebugPrint.NO_PREFIX, ''.join(to_print))
def read_htk_mmf_file(file, log_domain=False): """ Read HTK mmf files into our model structure. file should be a filestream opened on an HTK MMF file. hmm_mgr and gmm_mgr must be HmmMgr and GmmMgr instances, respectivly, and both must be in the NOT_ADAPTING state. The return is a dict with the names of the models as keys and the model indices in hmm_mgr as values. """ # in general these imports should be at module scope, but doing so causes an # import circularity as hmm_mgr.py needs to import from *this* module # (htkmmf), so we delay things and do it in the function that needs these # symbols from onyx.am.gaussian import GaussianMixtureModel from onyx.am.modelmgr import GmmMgr from onyx.am.hmm import Hmm from onyx.am.hmm_mgr import HmmMgr dc = dcheck("htkmmf_read") covar_map = {'diagc' : GaussianMixtureModel.DIAGONAL_COVARIANCE, 'fullc' : GaussianMixtureModel.FULL_COVARIANCE } contents = file.read() dc and dc("contents = \n%s" % (contents,)) try: result = mmf.parse('top', contents) except Exception, any: print "HTK MMF parse error: " + str(any) return None
def __eq__(self, other): dc = dcheck("gmm_eq") if (self.dimension, self.covariance_type) != (other.dimension, other.covariance_type): dc and dc("dimensions or covariance_types are not eq") return False if (self.means != other.means).any(): dc and dc("means are not eq: self.means = %s; other.means = %s, cmp = %s" % (self.means, other.means, self.means == other.means)) return False if (self.vars != other.vars).any(): dc and dc("vars are not eq: self.vars = %s; other.vars = %s" % (self.vars, other.vars)) return False return True
def accum_sequence(self, mi, gamma, comp_scores, seq): """ Accumulate for a sequence of datapoints. mi is a model index; gamma is a Numpy array of shape (N+1,) where N = len(seq). comp_scores is a list of length N containing . seq is an iterable of observations in the form of Numpy arrays. This call can only be made when the adaptation state is ACCUMULATING. """ dc = dcheck("mm_as") self._verify_index_for_accum(mi) self._require_state("ACCUMULATING") seq = tuple(seq) assert len(seq) == len(gamma) - 1 assert len(seq) == len(comp_scores) dc and dc("Accumulating sequence: %s\n gamma = %s\n comp_scores = %s" % (seq, gamma, comp_scores)) self._accums[mi].accum_sequence(gamma, comp_scores, seq)
def __call__(self, labeled_data): label, data = labeled_data assert label in self.labels assert all(len(datum) == self.nfeatures for datum in data) index = self.labels.index(label) relevance = self.samples_seen[index] / len(data) self.samples_seen[index] += len(data) self.models[index].set_relevances((relevance, relevance)) self.models[index].adapt(data) dc = dcheck('SimpleGaussianTrainer') if dc: dc('samples_seen', tuple(self.samples_seen)) for label, model in izip(self.labels, self.models): dc(' ', '%-6s:' % label, model) return label
def set_vars(self, v, rel_factor=1.0): dc = dcheck("gaussian_numeric_error") self._verify_reasonable(v) assert 0.0 <= rel_factor <= 1.0 self._score_tuple = self._cholesky = None if self.covariance_type is GaussianModelBase.FULL_COVARIANCE: assert all(len(x) == self.dimension for x in v) est_vars = np.array(v, dtype=float) self._vars = est_vars if rel_factor == 1.0 else rel_factor * est_vars + (1 - rel_factor) * self.vars try: self._var_recip_scaled = None self._var_inv = np.linalg.inv(self._vars) var_det = np.linalg.det(self._vars) self._denominator = np.sqrt(pow((2 * np.pi), self.dimension) * var_det) except np.linalg.LinAlgError, e: dc and dc("Error setting full covar matrix: %s\nself._vars = %s" % (e, self._vars)) raise e
def accum_sequence(self, mi, gamma, comp_scores, seq): """ Accumulate for a sequence of datapoints. mi is a model index; gamma is a Numpy array of shape (N+1,) where N = len(seq). comp_scores is a list of length N containing . seq is an iterable of observations in the form of Numpy arrays. This call can only be made when the adaptation state is ACCUMULATING. """ dc = dcheck("mm_as") self._verify_index_for_accum(mi) self._require_state("ACCUMULATING") seq = tuple(seq) assert len(seq) == len(gamma) - 1 assert len(seq) == len(comp_scores) dc and dc( "Accumulating sequence: %s\n gamma = %s\n comp_scores = %s" % (seq, gamma, comp_scores)) self._accums[mi].accum_sequence(gamma, comp_scores, seq)
def process(data): db = vumeter(data) db_queue.append(db) tag_queue.extend(utt_tagger(db)) if tag_queue and db_queue: tags = list() #while tag_queue and db_queue: if True: # XXX the tracker_range info is diagnostic tag, tracker_range = tag_queue.popleft() db = db_queue.popleft() tags.append(tag) state_change = (tag != last_tag[0]) if state_change: last_tag[0] = tag dc = dcheck(endpointdisplay) if dc: toggle[0] ^= 0x1 tog = toggle[0] line = list(blank_line) dbindex = min(vu_top, int(db*utt.scale)) if utt.fill and not state_change: nfill = (dbindex+1)//2 line[tog:dbindex+tog:2] = pixel * nfill line[dbindex] = pixel if utt.trak: lo, high = tuple(min(vu_top, int(x*utt.scale)) for x in tracker_range) line[lo] = pixel if not utt.fill else ' ' line[high] = pixel if tag and tog: line[-bar_length:] = bar if state_change: line[:bar_length] = bar ## if not utt.fill: ## line[2:bar_length+2] = bar ## else: ## line[2:bar_length+2] = barx dc(DebugPrint.NO_PREFIX, ' ', ''.join(line)) return tags else: return ()
def _apply_one_accum_set(self, mi): dc = dcheck("mm_aoas") m, a = self._models[mi], self._accums[mi] dc and dc("For model %d, accums are %s" % (mi, a)) if a.num_frames_accumulated == 0: return est_weights = a.zeroth_accum / a.norm_accum est_means = a.first_accum / a.zeroth_accum[:, newaxis] mu_sq = numpy.zeros_like(a.second_accum) for ci in xrange(a.num_components): if self._covariance_type is GaussianModelBase.FULL_COVARIANCE: mu_sq[ci] = numpy.outer(est_means[ci], est_means[ci]) else: assert ( self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE or self._covariance_type is GaussianModelBase.DUMMY_COVARIANCE ) mu_sq[ci] = est_means[ci] ** 2 if self._covariance_type is GaussianModelBase.FULL_COVARIANCE: divisor = a.zeroth_accum[:, newaxis, newaxis] else: assert ( self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE or self._covariance_type is GaussianModelBase.DUMMY_COVARIANCE ) divisor = a.zeroth_accum[:, newaxis] dc and dc("mu_sq = %s" % (mu_sq,)) div = a.second_accum / divisor dc and dc("div = %s" % (div,)) est_vars = div - mu_sq # XXX I don't know what the right thing to do here is for the full covariance case # Also, we may want to move this clipping to gaussian.py if self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE: MIN_DEV = 2.0e-20 MAX_DEV = 2.0e20 est_vars.clip(min=MIN_DEV, max=MAX_DEV, out=est_vars) m.set_weights(est_weights) m.set_means(est_means) dc and dc("Estimated vars = %s" % (est_vars,)) m.set_vars(est_vars)
def _apply_one_accum_set(self, mi): dc = dcheck("mm_aoas") m, a = self._models[mi], self._accums[mi] dc and dc("For model %d, accums are %s" % (mi, a)) if a.num_frames_accumulated == 0: return est_weights = a.zeroth_accum / a.norm_accum est_means = a.first_accum / a.zeroth_accum[:, newaxis] mu_sq = numpy.zeros_like(a.second_accum) for ci in xrange(a.num_components): if self._covariance_type is GaussianModelBase.FULL_COVARIANCE: mu_sq[ci] = numpy.outer(est_means[ci], est_means[ci]) else: assert (self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE or self._covariance_type is GaussianModelBase.DUMMY_COVARIANCE) mu_sq[ci] = est_means[ci]**2 if self._covariance_type is GaussianModelBase.FULL_COVARIANCE: divisor = a.zeroth_accum[:, newaxis, newaxis] else: assert ( self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE or self._covariance_type is GaussianModelBase.DUMMY_COVARIANCE) divisor = a.zeroth_accum[:, newaxis] dc and dc("mu_sq = %s" % (mu_sq, )) div = a.second_accum / divisor dc and dc("div = %s" % (div, )) est_vars = div - mu_sq # XXX I don't know what the right thing to do here is for the full covariance case # Also, we may want to move this clipping to gaussian.py if self._covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE: MIN_DEV = 2.0e-20 MAX_DEV = 2.0e+20 est_vars.clip(min=MIN_DEV, max=MAX_DEV, out=est_vars) m.set_weights(est_weights) m.set_means(est_means) dc and dc("Estimated vars = %s" % (est_vars, )) m.set_vars(est_vars)
def __call__(self, value): # XXX gap semantics tag, data = value result = self.result ret = () if tag != self.prev_tag: if result: # careful: return a one-item sequence of (tag, (events...)) rettag = self.prev_tag retseq = tuple(result) ret = ((rettag, retseq),) dc = dcheck(runslog) if dc: dc(ret[0]) dc(rettag) for index, item in enumerate(retseq): dc(' ', index) for val in item: dc(' ', ' ', val) del result[:] self.prev_tag = tag result.append(data) return ret
def __call__(self, value): dc = dcheck('UtteranceTagger') low_factor, high_factor, min_range, start_window, start_count, stop_window, stop_count = self.data state, count, queue = self.state # update the range trackers range_min, range_max = tracker_range = self.tracker(value) dc and dc('state', state, ' count', count, ' queue', len(queue), ' value', value, ' range', range_min, range_max) if state is BAC: # in background state, so we're looking for enough speechy stuff; # we require a range so as not to trigger in noise range_max = max(range_max, range_min + min_range) high_t = range_min + (range_max - range_min) * high_factor if value > high_t: queue.append((1, tracker_range)) count += 1 dc and dc('new_count', count) else: queue.append((0, tracker_range)) if len(queue) < start_window: self.state = state, count, queue return () assert len(queue) == start_window if count >= start_count: ret = tuple((True, outrange) for inc, outrange in queue) assert len(ret) == start_window state = UTT dc and dc('new_state', state) count = 0 queue.clear() self.state = state, count, queue return ret tag = False else: assert state is UTT # in utterance state, so we're looking for enough backgroundy stuff; # we don't limit the range here, thus we don't get trapped when the # background level undergoes a sudden and persistent rise in level low_t = range_min + (range_max - range_min) * low_factor if value <= low_t: queue.append((1, tracker_range)) count += 1 dc and dc('new_count', count) else: queue.append((0, tracker_range)) if len(queue) < stop_window: self.state = state, count, queue return () assert len(queue) == stop_window if count >= stop_count: ret = tuple((True, outrange) for inc, outrange in queue) assert len(ret) == stop_window state = BAC dc and dc('new_state', state) count = 0 queue.clear() self.state = state, count, queue return ret tag = True # queue is full, but no state change dc and dc('tag', tag) inc, outrange = queue.popleft() count -= inc assert 0 <= count <= len(queue) self.state = state, count, queue return ((tag, outrange), )
def _get_EM_updates(self, data): dc1 = dcheck("gaussian") dc2 = dcheck("gaussian_pt") n = len(data) k = self.num_components d = self.dimension norm_sum = np.zeros((k), dtype=float) mean_sum = np.zeros((k, d), dtype=float) colon_slice = slice(None) # There's some Numpy cleverness here to allow us to treat the two # covariance cases with the same code. In the diagonal case, things are # pretty much simple operations on vectors, although we do have to # broadcast the data across components. In the full case, we have to # take outer products of the data at one point and of the means at # another, and we have to broadcast the norm across two dimensions when # dividing by it in the var calculation. Some of this is accomplished # by using variables which are tuples of slice objects. Note that if s0 # and s1 are slice objects, then arr[s0, s1] == arr[(s0, s1)], and that # the colon_slice constructed here is the equivalent of using a ':' in # square brackets if self.covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE: var_sum = np.zeros((k, d), dtype=float) square_data_op = np.multiply norm_slices = (colon_slice, np.newaxis) square_mean_slices1 = (colon_slice, ) square_mean_slices2 = (colon_slice, ) else: assert self.covariance_type is GaussianModelBase.FULL_COVARIANCE var_sum = np.zeros((k, d, d), dtype=float) square_data_op = np.outer norm_slices = (colon_slice, np.newaxis, np.newaxis) square_mean_slices1 = (colon_slice, colon_slice, np.newaxis) square_mean_slices2 = (colon_slice, np.newaxis, colon_slice) for x in data: dc2 and dc2("Processing frame %s" % (x,)) norm, raw, ssum = self.get_estimate(x) dc2 and dc2("norm = %s, raw = %s, ssum = %s" % (norm, raw, ssum)) norm_sum += norm mean_sum += norm[:, np.newaxis] * x var_sum += norm[norm_slices] * square_data_op(x, x) w = norm_sum / n assert (norm_sum != 0).all() mu = mean_sum[:,:] / norm_sum[:,np.newaxis] dc1 and dc1("w update = %s" % (w,)) dc1 and dc1("norm_sum = %s" % (norm_sum,)) dc1 and dc1("mean_sum = %s" % (mean_sum,)) dc1 and dc1("var_sum = %s" % (var_sum,)) dc1 and dc1("mu.shape = %s" % (mu.shape,)) dc1 and dc1("mean_sum.shape = %s" % (mean_sum.shape,)) dc1 and dc1("norm_sum.shape = %s" % (norm_sum.shape,)) assert (norm_sum != 0).all() var = var_sum / norm_sum[norm_slices] - (mu[square_mean_slices1] * mu[square_mean_slices2]) if (self.covariance_type is GaussianModelBase.DIAGONAL_COVARIANCE): # XXX I don't know what the right thing to do here is for the full covariance case # Also, we may want to move this clipping to the set_vars function rather than here; # note that this code was copied from modelmgr.py which has a similar issue MIN_DEV = 2.0e-20 MAX_DEV = 2.0e+20 var.clip(min=MIN_DEV, max=MAX_DEV, out=var) assert (var != 0).all() return w, mu, var, norm
def __call__(self, value): dc = dcheck('UtteranceTagger') low_factor, high_factor, min_range, start_window, start_count, stop_window, stop_count = self.data state, count, queue = self.state # update the range trackers range_min, range_max = tracker_range = self.tracker(value) dc and dc('state', state, ' count', count, ' queue', len(queue), ' value', value, ' range', range_min, range_max) if state is BAC: # in background state, so we're looking for enough speechy stuff; # we require a range so as not to trigger in noise range_max = max(range_max, range_min + min_range) high_t = range_min + (range_max - range_min) * high_factor if value > high_t: queue.append((1, tracker_range)) count += 1 dc and dc('new_count', count) else: queue.append((0, tracker_range)) if len(queue) < start_window: self.state = state, count, queue return () assert len(queue) == start_window if count >= start_count: ret = tuple((True, outrange) for inc, outrange in queue) assert len(ret) == start_window state = UTT dc and dc('new_state', state) count = 0 queue.clear() self.state = state, count, queue return ret tag = False else: assert state is UTT # in utterance state, so we're looking for enough backgroundy stuff; # we don't limit the range here, thus we don't get trapped when the # background level undergoes a sudden and persistent rise in level low_t = range_min + (range_max - range_min) * low_factor if value <= low_t: queue.append((1, tracker_range)) count += 1 dc and dc('new_count', count) else: queue.append((0, tracker_range)) if len(queue) < stop_window: self.state = state, count, queue return () assert len(queue) == stop_window if count >= stop_count: ret = tuple((True, outrange) for inc, outrange in queue) assert len(ret) == stop_window state = BAC dc and dc('new_state', state) count = 0 queue.clear() self.state = state, count, queue return ret tag = True # queue is full, but no state change dc and dc('tag', tag) inc, outrange = queue.popleft() count -= inc assert 0 <= count <= len(queue) self.state = state, count, queue return ((tag, outrange),)