def test(): dao_de_jing = text.Text.from_file(os.path.join('..', 'data', 'ddj.txt')) lunyu = text.Text.from_file(os.path.join('..', 'data', 'lunyu.txt')) wenyan_corpus = corpus.Corpus(text_model=model.BinaryModel()) wenyan_corpus.add_text(lunyu) wenyan_corpus.add_text(dao_de_jing) #print(wenyan_corpus.vector_space) clustering = wenyan_corpus.clustering(n_clusters=2) display.render_html(wenyan_corpus, clustering, 'output.html')
def setUp(self): self.corpus = corpus.Corpus(min_occurrences=2) self.corpus.add_text(text.Text(DDJ_START, 'ddj_start'))
def setUp(self): self.corpus = corpus.Corpus() self.corpus.add_text(text.Text(DDJ_START, 'ddj_start')) self.corpus.add_text(text.Text(DDJ_END, 'ddj_end'))
def setUp(self): self.corpus = corpus.Corpus()
def setUp(self): self.corpus = corpus.Corpus(text_model=model.BagOfWordsModel()) self.corpus.add_text(text.Text(DDJ_START, 'ddj_start'))