Esempi in Python per TextProcessor

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: textprocess

Classe/tipologia: TextProcessor

Esempi su hotexamples.com: 2

TextProcessor in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per textprocess.TextProcessor, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

build_tfidf_matrix(1)

calculate_idfs(1)

create_vocabulary(1)

preprocessing(1)

transformation(1)

Esempio n. 1

Mostra file

File: unittest_textprocessing.py Progetto: osmanbaskaya/acm_mak

 def setUp(self):
     all_articles = get_all_offline_articles()
     self.tp =  TextProcessor(all_articles)

Esempio n. 2

Mostra file

File: unittest_textprocessing.py Progetto: osmanbaskaya/acm_mak

class TextProcessingTests(unittest.TestCase):
    

    def setUp(self):
        all_articles = get_all_offline_articles()
        self.tp =  TextProcessor(all_articles)




    def test_preprocessing(self):
        """ Preprocessing consists of couple of steps such as 
            punctuation removing. This method tests that all
            punctuations are removed.
        """

        punctuations = string.punctuation
        #print "all punctuations are %s" % punctuations
        self.tp.preprocessing()
        product = itertools.product(punctuations, self.tp.trainset)
        no_punctuation = True
        for p, article in product:
            #print p, article
            if p in article.text:
                no_punctuation = False
                break
        self.failUnless(no_punctuation == True)



    def test_transformation(self):

        #TODO: Remove this function. Useless now

        train_set = ("The sky is blue.", "The sun is bright.")
        #test_set = ("The sun in the sky is bright.",
                    #"We can see the shining sun, the bright sun.")
        count_vectorizer = CountVectorizer('english')
        count_vectorizer.fit_transform(train_set)


    def test_cosine(self):
        #mov1 = 2910 # starwars a new hope
        #mov1 = 1948 # matrix
        mov1 = 1319 # matrix
        #mov1 = 3317 # when harry met sally
        self.tp.preprocessing()
        self.tp.create_vocabulary()
        self.tp.transformation()
        self.tp.calculate_idfs()
        self.tp.build_tfidf_matrix()
        # Movie which compare every movies with
        m1 = self.tp.tf_idf_matrix.getrow(mov1)
        sims = []
        for i in range(len(self.tp.trainset)):
            m2 = self.tp.tf_idf_matrix.getrow(i)
            cos_sim = (m1 * np.transpose(m2)).todense()
            cos_sim = cos_sim[0,0]
            if cos_sim > 0.07:
                sims.append([cos_sim, self.tp.trainset[i].title])

        sims = sorted(sims, key=itemgetter(0), reverse=True)
        print_m(sims[:15])
        leng = len(sims)
        print "\n\n"
        print_m(sims[leng-10:leng])

    def test_similar_items(self):
        pass