def _ppl_log_single_document(self, docdata): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.phiT * self.pi[1] + self.phiB * self.pi[0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot(self.psi[doc_u, :], self.eta) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.theta # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti try: ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) except FloatingPointError as e: raise e return ppl_w_log, ppl_e_log, ppl_log
def _ppl_log_single_document_off_shell(self, docdata, display=False ): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.GLV["theta"] # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti try: ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) except FloatingPointError as e: raise e ### test ### if display: self._log("ppl_log_single_document_off_shell for doc %d" % d) self._log("docToken %s" % str(docToken)) self._log("ppl_w_k_scaled %s" % str(ppl_w_k_scaled)) self._log("ppl_e_k_scaled %s" % str(ppl_e_k_scaled)) self._log("prob_e_m %s" % str(prob_e_m)) self._log("prob_g_m %s" % str(self.GLV["psi"][doc_u, :])) return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_u.shape[0]
def _ppl_log_single_document_off_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata Nd = docToken.shape[0] prob_z_alpha = np.ones([self.K, Nd], dtype=np.float64) ppl_w_z = self.phi[:, docToken] prob_z = prob_z_alpha * ppl_w_z prob_z_sum = np.sum( prob_z, axis=1 ) # product over dirichlet is the same of sum over dirichlet priors prob_e = probNormalize(np.dot(prob_z_sum, self.eta)) ppl_e_log = -np.sum(np.log(prob_e)[doc_e]) doc_E = np.sum(np.identity(self.E, dtype=np.float64)[:, doc_e], axis=1) docE = probNormalize(doc_E + SMOOTH_FACTOR) ppl_e_z_log = -(np.tensordot(np.log(docE), self.eta - 1.0, axes=(0, 1)) + np.log(self.eta_beta_inv)) ppl_e_z_scaled, ppl_e_z_constant = expConstantIgnore( -ppl_e_z_log, constant_output=True) prob_w = probNormalize(np.dot(ppl_e_z_scaled, self.phi)) ppl_w_log = -np.sum(np.log(prob_w)[docToken]) ppl_log = np.nan return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_e.shape[0]
def _predict_single_document_off_shell(self, docdata): d, docToken, [doc_u, doc_e] = docdata prob_w_kv = (self.GLV["phiT"] * self.GLV["pi"][1] + self.GLV["phiB"] * self.GLV["pi"][0]) ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore(- ppl_w_k_log, constant_output=True) prob_k = self.GLV["theta"] prob_e_mk = np.dot(self.GLV["psi"][doc_u, :], self.GLV["eta"]) prob_e_m = probNormalize(np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) return prob_e_m.tolist(), doc_e.tolist()
def _ppl_log_single_document_off_shell( self, docdata): ### potential underflow problem d, docToken, [doc_u, doc_e] = docdata prob_w_kv = self.GLV["phi"] ppl_w_k_log = -np.sum(np.log(prob_w_kv[:, docToken]), axis=1) ppl_w_k_scaled, ppl_w_k_constant = expConstantIgnore( -ppl_w_k_log, constant_output=True) # (actual ppl^(-1)) prob_e_mk = np.dot( self.GLV["psi"][doc_u, :], (np.tensordot(self.GLV["pi"], self.GLV["eta"], axes=(1, 0)) * self.GLV["c"][1] + np.tensordot(self.GLV["piB"], self.GLV["eta"], axes=(0, 0)) * self.GLV["c"][0])) ppl_e_k_log = -np.sum( np.log(prob_e_mk[np.arange(doc_u.shape[0]), :, doc_e]), axis=0) ppl_e_k_scaled, ppl_e_k_constant = expConstantIgnore( -ppl_e_k_log, constant_output=True) # (actual ppl^(-1)) prob_k = self.GLV["theta"] # for emoti given words prob_e_m = probNormalize( np.tensordot(prob_e_mk, np.multiply(prob_k, ppl_w_k_scaled), axes=(1, 0))) ppl_e_log = -np.sum(np.log(prob_e_m[np.arange(doc_u.shape[0]), doc_e])) # for words given emoti ! same prob_w for different n prob_w = probNormalize( np.tensordot(prob_w_kv, np.multiply(prob_k, ppl_e_k_scaled), axes=(0, 0))) ppl_w_log = -np.sum(np.log(prob_w[docToken])) # for both words & emoti ppl_log = -(np.log( np.inner(ppl_w_k_scaled, np.multiply(ppl_e_k_scaled, prob_k))) + ppl_w_k_constant + ppl_e_k_constant) return [ppl_w_log, ppl_e_log, ppl_log], docToken.shape[0], doc_u.shape[0]