def check_valid_prior(filename):
    from Util import load_txt_vector
    v = load_txt_vector(filename)
    v = numpy.array(v)
    assert v.ndim == 1
    assert all(v < 0.0), "log space assumed"
    v = numpy.exp(v)  # to std prob space
    tot = numpy.sum(v)
    assert numpy.isclose(tot, 1.0, atol=1e-4)
Beispiel #2
0
  def _load_priors(self):
    """
    This will optionally initialize self.priors of shape (self.output_dim,), in -log space,
    already multiplied by any prior scale.

    :return: nothing
    """
    scale = float(self.sprint_opts["prior_scale"])
    if not scale:
      return
    filename = self.sprint_opts["prior_file"]
    # We expect a filename to the priors, stored as txt, in +log space.
    assert isinstance(filename, str)
    assert os.path.exists(filename)
    from Util import load_txt_vector
    prior = load_txt_vector(filename)  # +log space
    self.priors = -numpy.array(prior, dtype="float32") * numpy.float32(scale)  # -log space
    assert self.priors.shape == (self.output_dim,), "dim mismatch: %r != %i" % (self.priors.shape, self.output_dim)
Beispiel #3
0
  def _load_priors(self):
    """
    This will optionally initialize self.priors of shape (self.output_dim,), in -log space,
    already multiplied by any prior scale.

    :return: nothing
    """
    scale = float(self.sprint_opts["prior_scale"])
    if not scale:
      return
    filename = self.sprint_opts["prior_file"]
    # We expect a filename to the priors, stored as txt, in +log space.
    assert isinstance(filename, str)
    assert os.path.exists(filename)
    from Util import load_txt_vector
    prior = load_txt_vector(filename)  # +log space
    self.priors = -numpy.array(prior, dtype="float32") * numpy.float32(scale)  # -log space
    assert self.priors.shape == (self.output_dim,), "dim mismatch: %r != %i" % (self.priors.shape, self.output_dim)
Beispiel #4
0
 def __init__(self, prior_scale=0.0, log_prior=None, use_label_priors=0,
              compute_priors_via_baum_welch=False,
              ce_smoothing=0.0, ce_target_layer_align=None,
              exp_normalize=True,
              am_scale=1, gamma=1, bw_norm_class_avg=False,
              sigmoid_outputs=False, exp_outputs=False, gauss_outputs=False,
              log_score_penalty=0,
              loss_with_softmax_prob=False,
              loss_like_ce=False, trained_softmax_prior=False,
              sprint_opts=None, warp_ctc_lib=None,
              **kwargs):
   super(SequenceOutputLayer, self).__init__(**kwargs)
   self.prior_scale = prior_scale
   if use_label_priors:
     self.set_attr("use_label_priors", use_label_priors)
   if prior_scale:
     self.set_attr("prior_scale", prior_scale)
   if log_prior is not None:
     # We expect a filename to the priors, stored as txt, in +log space.
     assert isinstance(log_prior, str)
     self.set_attr("log_prior", log_prior)
     from Util import load_txt_vector
     assert os.path.exists(log_prior)
     log_prior = load_txt_vector(log_prior)
     assert len(log_prior) == self.attrs['n_out'], "dim missmatch: %i != %i" % (len(log_prior), self.attrs['n_out'])
     log_prior = numpy.array(log_prior, dtype="float32")
   if compute_priors_via_baum_welch:
     self.set_attr("compute_priors_via_baum_welch", compute_priors_via_baum_welch)
     assert self.attrs.get("compute_priors", False)
   self.log_prior = log_prior
   self.ce_smoothing = ce_smoothing
   if ce_smoothing:
     self.set_attr("ce_smoothing", ce_smoothing)
   if ce_target_layer_align:
     self.set_attr("ce_target_layer_align", ce_target_layer_align)
   self.exp_normalize = exp_normalize
   if not exp_normalize:
     self.set_attr("exp_normalize", exp_normalize)
   if sigmoid_outputs:
     self.set_attr("sigmoid_outputs", sigmoid_outputs)
   if exp_outputs:
     self.set_attr("exp_outputs", exp_outputs)
   if gauss_outputs:
     self.set_attr("gauss_outputs", gauss_outputs)
   if log_score_penalty:
     self.set_attr("log_score_penalty", log_score_penalty)
   if loss_with_softmax_prob:
     self.set_attr("loss_with_softmax_prob", loss_with_softmax_prob)
   if am_scale != 1:
     self.set_attr("am_scale", am_scale)
   if gamma != 1:
     self.set_attr("gamma", gamma)
   if bw_norm_class_avg:
     self.set_attr("bw_norm_class_avg", bw_norm_class_avg)
   self.loss_like_ce = loss_like_ce
   if loss_like_ce:
     self.set_attr("loss_like_ce", loss_like_ce)
   if trained_softmax_prior:
     self.set_attr('trained_softmax_prior', trained_softmax_prior)
     assert not self.attrs.get('compute_priors', False)
     initialization = numpy.zeros((self.attrs['n_out'],), 'float32')
     if self.log_prior is not None:
       # Will use that as initialization.
       assert self.log_prior.shape == initialization.shape
       initialization = self.log_prior
     self.trained_softmax_prior_p = self.add_param(theano.shared(initialization, 'trained_softmax_prior_p'))
     self.priors = T.nnet.softmax(self.trained_softmax_prior_p).reshape((self.attrs['n_out'],))
     self.log_prior = T.log(self.priors)
   self.sprint_opts = sprint_opts
   if sprint_opts:
     self.set_attr("sprint_opts", sprint_opts)
   if warp_ctc_lib:
     self.set_attr("warp_ctc_lib", warp_ctc_lib)
   self.initialize()