Python tokenize Exemples, papers.utils.tokenize Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def lProbLine(self, l, threshold=None):
     total = 0.
     for w in tokenize(l):
         lp = self.lp(w)
         if threshold == None or lp >= -threshold:
             total += lp
     return total

Exemple #2

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def lProbLine(self, l, threshold=None):
     total = 0.0
     for w in tokenize(l):
         lp = self.lp(w)
         if threshold == None or lp >= -threshold:
             total += lp
     return total

Exemple #3

0

Afficher le fichier

Fichier : similarity.py Projet : jilljenn/dissemin

 def fetchData(self, author):
     pubs = author.paper.publication_set.all()[:5]
     titles = [a.full_title() for a in pubs]
     for r in author.paper.oairecord_set.all()[:5]:
         if r.keywords:
             titles.append(r.keywords)
     titles = map(lambda t: set(filter_punctuation(tokenize(t))), titles)
     return titles

Exemple #4

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def nlProbLine(self, l):
     total = 0.0
     lgt = 0
     for w in tokenize(l):
         total += self.lp(w)
         lgt += 1
     if lgt > 0:
         return total / lgt
     return 0.0

Exemple #5

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def nlProbLine(self, l):
     total = 0.
     lgt = 0
     for w in tokenize(l):
         total += self.lp(w)
         lgt += 1
     if lgt > 0:
         return total / lgt
     return 0.

Exemple #6

0

Afficher le fichier

Fichier : relevance.py Projet : jilljenn/dissemin

 def _normalizedWScore(self, line, researcher, explain=False):
     topicScore = self.models[researcher.department_id].nlProbLine(line)
     langScore = self.lang.nlProbLine(line)
     if explain:
         words = tokenize(line)
         for w in words:
             a = self.models[researcher.department_id].lp(w)
             b = self.lang.lp(w)
             print('      '+w+'\t'+str(a)+'-'+str(b)+' = '+str(a-b))
     return topicScore - langScore

Exemple #7

0

Afficher le fichier

 def _normalizedWScore(self, line, researcher, explain=False):
     topicScore = self.models[researcher.department_id].nlProbLine(line)
     langScore = self.lang.nlProbLine(line)
     if explain:
         words = tokenize(line)
         for w in words:
             a = self.models[researcher.department_id].lp(w)
             b = self.lang.lp(w)
             print('      ' + w + '\t' + str(a) + '-' + str(b) + ' = ' +
                   str(a - b))
     return topicScore - langScore

Exemple #8

0

Afficher le fichier

    def get_distr(self, string, debug=False):
        # Tokenize
        words = tokenize(string)

        # To BOW
        bow = self.dct.doc2bow(words)

        # To topics
        distr = self.lda[bow]
        if debug:
            for (topic_id,value) in distr[:10]:
                print "Topic id %d, value %.3f" % (topic_id,value)
                print self._print_topic(self.lda.show_topic(topic_id))

        return distr

Exemple #9

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def probLine(self, l):
     total = 1.0
     for w in tokenize(l):
         total *= self.p(w)
     return total

Exemple #10

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def feedLine(self, l):
     for w in tokenize(l):
         self._countWord(w)

Exemple #11

0

Afficher le fichier

 def test_tokenize(self):
     self.assertEqual(tokenize('Hello world!'), ['Hello', 'world!'])
     self.assertEqual(tokenize('99\tbottles\nof  beeron \tThe Wall'), ['99', 'bottles', 'of', 'beeron', 'The', 'Wall'])

Exemple #12

0

Afficher le fichier

Fichier : similarity.py Projet : jilljenn/dissemin

 def fetchData(self, author):
     contributors = [r.contributors for r in author.paper.oairecord_set.all()]
     contributors = filter(lambda x: x != None, contributors)
     ta = ' '.join(contributors)
     return set(filter_punctuation(tokenize(ta)))

Exemple #13

0

Afficher le fichier

Fichier : similarity.py Projet : jilljenn/dissemin

 def fetchData(self, author):
     return set(filter_punctuation(tokenize(author.paper.title)))

Exemple #14

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def probLine(self, l):
     total = 1.
     for w in tokenize(l):
         total *= self.p(w)
     return total

Exemple #15

0

Afficher le fichier

Fichier : model.py Projet : jilljenn/dissemin

 def feedLine(self, l):
     for w in tokenize(l):
         self._countWord(w)