def write_topic_terms(self, top_term_mat): """ Write the topic x term matrix to the database @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations """ ntops = top_term_mat.shape[0] nterms = top_term_mat.shape[1] execution_str = 'INSERT INTO topic_term (id, topic, term, score) VALUES(NULL, ?, ?, ?)' for topic_no in xrange(ntops): topic = top_term_mat[topic_no,:] res = generic_generator((topic_no,)*nterms, range(nterms), topic) self.dbase.executemany(execution_str, res)
def write_topic_topic(self, top_term_mat): """ Write the topic x topic matrix to the database @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations """ # TODO make distance metric a user option execution_str = 'INSERT INTO topic_topic (id, topic_a, topic_b, score) VALUES(NULL, ?, ?, ?)' for i in xrange(top_term_mat.shape[0]): scores = 1/hellinger_distance(top_term_mat[i,:]**0.5, top_term_mat[i+1:,:]**0.5) scores[np.where(np.isinf(scores))] = -1 res = generic_generator((i,)*len(scores), range(i+1, i+1+len(scores)), scores) self.dbase.executemany(execution_str, res)