Esempio n. 1
0
 def __init__(self, k=10, lda_model=None):
     # build model
     if lda_model:
         self.lda = lda_model
     else:
         self.lda = LDAmodel(n_topics=500,
                             n_passes=50,
                             vocabulary='combined')
     # parameters
     self.k = k
Esempio n. 2
0
class LDAExpansion(QueryExpansion):
    def __init__(self, k=10, lda_model=None):
        # build model
        if lda_model:
            self.lda = lda_model
        else:
            self.lda = LDAmodel(n_topics=500,
                                n_passes=50,
                                vocabulary='combined')
        # parameters
        self.k = k

    def expand(self, query):
        tokens = tokenize(query.lower())
        latent = self.lda.tokens2latent(tokens)
        extra_terms = []
        for topic in latent:
            topn = self.lda.model.show_topic(topicid=topic[0],
                                             topn=round(self.k * topic[1]))
            extra_terms += [e[1] for e in topn]
        extra_terms = list(set(extra_terms))
        new_query = query + " " + " ".join(extra_terms)
        return new_query

    def __str__(self):
        return self.__class__.__name__ + str("(k=%s)" % self.k)
Esempio n. 3
0
 def __init__(self, k=10, lda_model=None):
     # build model
     if lda_model:
         self.lda = lda_model
     else:
         self.lda = LDAmodel(n_topics=500, n_passes=50, vocabulary='combined')
     # parameters
     self.k = k
Esempio n. 4
0
class LDAExpansion(QueryExpansion):

    def __init__(self, k=10, lda_model=None):
        # build model
        if lda_model:
            self.lda = lda_model
        else:
            self.lda = LDAmodel(n_topics=500, n_passes=50, vocabulary='combined')
        # parameters
        self.k = k

    def expand(self, query):
        tokens = tokenize(query.lower())
        latent = self.lda.tokens2latent(tokens)
        extra_terms = []
        for topic in latent:
            topn = self.lda.model.show_topic(topicid=topic[0], topn=round(self.k*topic[1]))
            extra_terms += [e[1] for e in topn]
        extra_terms = list(set(extra_terms))
        new_query = query + " " + " ".join(extra_terms)
        return new_query

    def __str__(self):
        return self.__class__.__name__ + str("(k=%s)" % self.k )