def apply(self): if self.data: pb = OWGUI.ProgressBar(self, iterations=len(self.data)) self.data = orange.ExampleTable(orange.Domain(self.tmpDom), self.tmpData) newdata = orngText.extractLetterNGram(self.data, self.size + 2, textAttribute=self.textAttributePos, callback=pb.advance) self.lblFeatureNo.setText("\nNo. of features: \n%d" % len(newdata.domain.getmetas(orngText.TEXTMETAID))) self.send("Example Table", newdata) pb.finish() else: self.send("Example Table", None)
if __name__ == "__main__": #from orngTextCorpus import * import pickle, orngText ## os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/") appl = QApplication(sys.argv) ow = OWCorrAnalysis() #owb = OWBagofWords.OWBagofWords() t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') #owb.data = t #owb.show() stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt') p = orngText.Preprocess(language='hr') print('Done with loading') t1 = orngText.extractLetterNGram(t, 2) #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2) #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3) #t1 = orngText.extractNamedEntities(t, stopwords = stop) #t1 = orngText.bagOfWords(t1, stopwords = stop) print(len(t1.domain.getmetas(orngText.TEXTMETAID))) print('Done with extracting') #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98) #print len(t2.domain.getmetas()) print('Done with feature selection') appl.setMainWidget(ow) #t3 = orngText.DSS(t2, 'WF', 'MIN', 1) #print 'Done with document selection' ow.dataset(t1) print('Done') ow.show()
if __name__=="__main__": #from orngTextCorpus import * import cPickle, orngText ## os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/") appl = QApplication(sys.argv) ow = OWCorrAnalysis() #owb = OWBagofWords.OWBagofWords() t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') #owb.data = t #owb.show() stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt') p = orngText.Preprocess(language = 'hr') print 'Done with loading' t1 = orngText.extractLetterNGram(t, 2) #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2) #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3) #t1 = orngText.extractNamedEntities(t, stopwords = stop) #t1 = orngText.bagOfWords(t1, stopwords = stop) print len(t1.domain.getmetas(orngText.TEXTMETAID)) print 'Done with extracting' #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98) #print len(t2.domain.getmetas()) print 'Done with feature selection' appl.setMainWidget(ow) #t3 = orngText.DSS(t2, 'WF', 'MIN', 1) #print 'Done with document selection' ow.dataset(t1) print 'Done' ow.show()
maxword = word if freq < min: min = freq minword = word sum += freq pb.advance() avg = sum / len(words) if min == (): min = 0 self.lblFeatNo.setText("No. of features: %d" % len(words)) self.lblMin.setText("Min: %d Min word = %s" % (min, minword)) self.lblMax.setText("Max: %d Max word = %s" % (max,maxword)) self.lblAvg.setText("Avg: %.3f" % avg) pb.finish() def selectionChanged(self): if self.data: self.applyButton.setDisabled(0) if __name__ == "__main__": a = QApplication(sys.argv) t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') t2 = orngText.extractLetterNGram(t) #print t2.domain.getmetas().values() ow = OWTextFeatureSelection() a.setMainWidget(ow) ow.show() ow.dataset(t2) a.exec_loop()