def sample(self, eta, count=1): exp_x0 = safe_exp(eta[0]) prob_x0 = exp_x0 / (exp_x0 + safe_exp(self.base_model.log_partition(eta[1:]))) results = np.zeros(count) nonzero = np.random.random(size=count) > prob_x0 results[nonzero] = self.base_model.sample(eta[1:], count=nonzero.sum()) return results
def grad_log_partition(self, eta): exp_base_log_partition = safe_exp(self.base_model.log_partition(eta[1:])) exp_x0 = safe_exp(eta[0]) denominator = exp_base_log_partition + exp_x0 w = (exp_base_log_partition / denominator) return np.concatenate(((exp_x0 / denominator)[:,np.newaxis].T, self.base_model.grad_log_partition(eta[1:]) * w), axis=0)
def grad_log_partition(self, eta): exp_base_log_partition = safe_exp( self.base_model.log_partition(eta[1:])) exp_x0 = safe_exp(eta[0]) denominator = exp_base_log_partition + exp_x0 w = (exp_base_log_partition / denominator) return np.concatenate( ((exp_x0 / denominator)[:, np.newaxis].T, self.base_model.grad_log_partition(eta[1:]) * w), axis=0)
def diagonal_hessian_log_partition(self, eta): base_log_partition = self.base_model.log_partition(eta[1:]) exp_base_log_partition = safe_exp(base_log_partition) exp_x0 = safe_exp(eta[0]) exp_sum = safe_exp(eta[0] + base_log_partition) sum_exp = exp_base_log_partition + exp_x0 sq_sum_exp = safe_sq(sum_exp) diag_hess_base = self.base_model.diagonal_hessian_log_partition(eta[1:]) sq_grad_base = safe_sq(self.base_model.grad_log_partition(eta[1:])) numerator = np.zeros(diag_hess_base.shape) numerator[:,sq_sum_exp != np.inf] = (sum_exp[sq_sum_exp != np.inf] * diag_hess_base[:, sq_sum_exp != np.inf] + exp_x0[sq_sum_exp != np.inf] * sq_grad_base[:, sq_sum_exp != np.inf]) / sq_sum_exp[sq_sum_exp != np.inf] return np.concatenate(((exp_sum / sq_sum_exp)[:,np.newaxis].T, exp_base_log_partition * numerator), axis=0)
def test_safe_exp(self): """ Tests safe_exp. """ a = safe_exp(0) self.assertEqual(int(a), 1) max_float = sys.float_info.max log_max = np.log(max_float) a = safe_exp(log_max) self.assertLess(a, float("inf")) tmp_io = io.StringIO() with redirect_stdout(tmp_io): a = safe_exp(log_max * 10) assert ('RuntimeWarning' not in tmp_io.getvalue())
def compute_perplexity(model, sess, name): """Compute perplexity of the output of the model. Args: model: model for compute perplexity. sess: tensorflow session to use. name: name of the batch. Returns: The perplexity of the eval outputs. """ total_loss = 0 total_predict_count = 0 start_time = time.time() step = 0 while True: try: loss, predict_count, batch_size = model.eval(sess) total_loss += loss * batch_size total_predict_count += predict_count step += 1 if step % 500 == 0: ls = total_loss / total_predict_count ppl = misc.safe_exp(ls) print_out(" ## After %d steps, loss %.2f - ppl %.3f" % (step, ls, ppl)) except tf.errors.OutOfRangeError: break perplexity = safe_exp(total_loss / total_predict_count) print_time(" eval %s: perplexity %.2f" % (name, perplexity), start_time) return perplexity
def compute_perplexity(model, sess, name): """Compute perplexity of the output of the model. Args: model: model for compute perplexity. sess: tensorflow session to use. name: name of the batch. Returns: The perplexity of the eval outputs. """ total_loss = 0 total_predict_count = 0 start_time = time.time() while True: try: output_tuple = model.eval(sess) total_loss += output_tuple.eval_loss * output_tuple.batch_size total_predict_count += output_tuple.predict_count except tf.errors.OutOfRangeError: break perplexity = utils.safe_exp(total_loss / total_predict_count) utils.print_time(" eval %s: perplexity %.2f" % (name, perplexity), start_time) return perplexity
def rvs(self, n=None): """ Returns independent observations of this random variable. """ mu = self.__mu S = self.__S if n is None: normal_values = np.array(mnormal.rvs(mu, S), ndmin=1) lognormal_values = [safe_exp(v) for v in normal_values] return np.array(lognormal_values) else: all_lognormal_values = [] for _ in range(n): normal_values = mnormal.rvs(mu, S) lognormal_values = [safe_exp(v) for v in normal_values] all_lognormal_values.append(lognormal_values) return np.array(all_lognormal_values)
def diagonal_hessian_log_partition(self, eta): base_log_partition = self.base_model.log_partition(eta[1:]) exp_base_log_partition = safe_exp(base_log_partition) exp_x0 = safe_exp(eta[0]) exp_sum = safe_exp(eta[0] + base_log_partition) sum_exp = exp_base_log_partition + exp_x0 sq_sum_exp = safe_sq(sum_exp) diag_hess_base = self.base_model.diagonal_hessian_log_partition( eta[1:]) sq_grad_base = safe_sq(self.base_model.grad_log_partition(eta[1:])) numerator = np.zeros(diag_hess_base.shape) numerator[:, sq_sum_exp != np.inf] = ( sum_exp[sq_sum_exp != np.inf] * diag_hess_base[:, sq_sum_exp != np.inf] + exp_x0[sq_sum_exp != np.inf] * sq_grad_base[:, sq_sum_exp != np.inf] ) / sq_sum_exp[sq_sum_exp != np.inf] return np.concatenate(((exp_sum / sq_sum_exp)[:, np.newaxis].T, exp_base_log_partition * numerator), axis=0)
def process_stats(hparams, stats, info, global_step, steps_per_stats, log_f): """Update info and check for overflow.""" # Per-step info info["avg_step_time"] = stats["step_time"] / steps_per_stats info["avg_grad_norm"] = stats["grad_norm"] / steps_per_stats info["avg_sequence_count"] = stats["sequence_count"] / steps_per_stats info["speed"] = stats["word_count"] / (1000 * stats["step_time"]) # Check for overflow is_overflow = False if hparams.task == "joint": # Per-predict info info["train_ppl"] = (utils.safe_exp(stats["slot_train_loss"] / stats["predict_count"])) train_ppl = info["train_ppl"] if math.isnan(train_ppl) or math.isinf(train_ppl) or train_ppl > 1e20: utils.print_out(" step %d overflow, stop early" % global_step, log_f) is_overflow = True return is_overflow
def log_partition(self, eta): return np.log(1 + safe_exp(eta))
def log_partition(self, eta): return np.log( safe_exp(eta[0]) + safe_exp(self.base_model.log_partition(eta[1:])))
def sample(self, eta, count=1): exp_eta = safe_exp(eta) p = exp_eta / (1 + exp_eta) return np.random.random(size=count) < p
def hessian_log_partition(self, eta): exp_eta = safe_exp(eta) return -exp_eta / safe_sq(exp_eta + 1)
def grad_log_partition(self, eta): exp_eta = safe_exp(eta) return exp_eta / (exp_eta + 1.0)
def log_partition(self, eta): return np.log(safe_exp(eta[0]) + safe_exp(self.base_model.log_partition(eta[1:])))
def pdf(self, x): """ Returns an approximate value of the probability density function of this random variable on point x. """ return safe_exp(self.log_pdf(x))