Exemple #1
0
 def write_topic_terms(self, top_term_mat):
     """
     Write the topic x term matrix to the database
     @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations
     """
     ntops = top_term_mat.shape[0]
     nterms = top_term_mat.shape[1]
     execution_str = 'INSERT INTO topic_term (id, topic, term, score) VALUES(NULL, ?, ?, ?)'
     for topic_no in xrange(ntops):
         topic = top_term_mat[topic_no,:]
         res = generic_generator((topic_no,)*nterms, range(nterms), topic)
         self.dbase.executemany(execution_str, res)
Exemple #2
0
 def write_topic_topic(self, top_term_mat):
     """
     Write the topic x topic matrix to the database
     @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations
     """
     # TODO make distance metric a user option
     execution_str = 'INSERT INTO topic_topic (id, topic_a, topic_b, score) VALUES(NULL, ?, ?, ?)'
     for i in xrange(top_term_mat.shape[0]):
         scores = 1/hellinger_distance(top_term_mat[i,:]**0.5, top_term_mat[i+1:,:]**0.5)
         scores[np.where(np.isinf(scores))] = -1
         res = generic_generator((i,)*len(scores), range(i+1, i+1+len(scores)), scores)
         self.dbase.executemany(execution_str, res)
Exemple #3
0
 def write_topic_terms(self, top_term_mat):
     """
     Write the topic x term matrix to the database
     @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations
     """
     ntops = top_term_mat.shape[0]
     nterms = top_term_mat.shape[1]
     execution_str = 'INSERT INTO topic_term (id, topic, term, score) VALUES(NULL, ?, ?, ?)'
     for topic_no in xrange(ntops):
         topic = top_term_mat[topic_no, :]
         res = generic_generator((topic_no, ) * nterms, range(nterms),
                                 topic)
         self.dbase.executemany(execution_str, res)
Exemple #4
0
 def write_topic_topic(self, top_term_mat):
     """
     Write the topic x topic matrix to the database
     @param top_term_mat: topics x terms matrix, should represent log-lieklihood for accurate calculations
     """
     # TODO make distance metric a user option
     execution_str = 'INSERT INTO topic_topic (id, topic_a, topic_b, score) VALUES(NULL, ?, ?, ?)'
     for i in xrange(top_term_mat.shape[0]):
         scores = 1 / hellinger_distance(top_term_mat[i, :]**0.5,
                                         top_term_mat[i + 1:, :]**0.5)
         scores[np.where(np.isinf(scores))] = -1
         res = generic_generator((i, ) * len(scores),
                                 range(i + 1, i + 1 + len(scores)), scores)
         self.dbase.executemany(execution_str, res)