def training(self): filename = path + '\Corpus\{0}_ya.txt'.format(self.target_word) # ----- loading Wiktionary data ----- self.worddata = WiktionaryData(self.target_word, self.language) # ---- loading corpus ----- self.corpus = Corpus() self.corpus.load_corpus(filename, self.language, self.target_word) count = len(self.corpus.paragraphes) corpus_paragraphes_count = count # ----- add definitions and examples to corpus for d in self.worddata.definitions: paragraph = Paragraph(count) d['parnmb'] = count s = Sentence(d['sent'].fulltext, self.language, self.target_word) if 'ex' in d: for exmpl in d['ex']: s.stemmed_tokens += exmpl.stemmed_tokens s.tokens += exmpl.tokens paragraph.add_sentence(s) paragraph.add_definition(d) self.corpus.add_paragraph(paragraph) count += 1 rng = numpy.array(range(corpus_paragraphes_count, len(self.corpus.paragraphes))) # ---- creating tf-idf matrix ----- self.corpus.create_tf_idf_matrix() # ----- draw ----- draw2d = Drawing(2, self.corpus.tf_idf_matrix, len(self.worddata.definitions), rng) draw2d.draw() draw3d = Drawing(3, self.corpus.tf_idf_matrix, len(self.worddata.definitions), rng) draw3d.draw() del(draw2d) del(draw3d) # ----- matrix reduction ----- if self.reduction_mode == 'SVD': self.pca_object = myPCA(0.95, 2) self.pca_object.count_number_of_principal_components(self.corpus.tf_idf_matrix) new_X = self.pca_object.reduce_matrix_dimension(self.corpus.tf_idf_matrix) else: new_X = self.corpus.tf_idf_matrix # ----- clustering ----- self.cluster_object = Cluster('K-means', len(self.worddata.definitions), new_X, rng) self.cluster_object.clustering() self.cluster_object.make_word_clusters(self.corpus) print('Trainig is done')
def main(): print("Composite pattern Shapes..") cir1 = Circle() cir1.draw("red") cir2 = Circle() cir2.draw("blue") tri1 = Triangle() tri1.draw("green") drawing = Drawing() drawing.add(cir1) drawing.add(tri1) drawing.draw("yellow") drawing.add(cir2) drawing.draw("orange") drawing.remove(tri1) drawing.draw("orange")