def loadFile(self): if self.fileIndex: fn = self.recentFiles[self.fileIndex] self.recentFiles.remove(fn) self.recentFiles.insert(0, fn) self.fileIndex = 0 else: fn = self.recentFiles[0] self.filecombo.clear() for file in self.recentFiles: self.filecombo.addItem(os.path.split(file)[1]) self.filecombo.updateGeometry() self.error() data = None try: import orngText if fn[-4:] == ".xml": data = orngText.loadFromXML(fn) elif fn[-4:] == ".sgm": data = orngText.loadReuters(os.path.split(fn)[0]) else: data = orngText.loadFromListWithCategories(fn) if not data: self.error("Unknown file format or no documents") except: self.error("Cannot read the file") self.send("Example Table", data)
self.data = orange.ExampleTable(orange.Domain(data.domain), data) self.tmpData = orange.ExampleTable(data) self.tmpDom = orange.Domain(data.domain) self.data.domain = orange.Domain(data.domain) #self.apply() else: self.data = None self.tmpData = None def apply(self): if self.data: self.data = orange.ExampleTable(orange.Domain(self.tmpDom), self.tmpData) if self.size == 3: newdata = orngText.extractNamedEntities(self.data, stopwords = self.stopwords) else: newdata = orngText.extractWordNGram(self.data, n = self.size + 2, stopwords = self.stopwords, threshold = self.threshold, measure = self.measureDict[self.measure]) self.lblFeatureNo.setText("\nNo. of features: \n%d" % len(newdata.domain.getmetas(orngText.TEXTMETAID))) self.send("Example Table", newdata) else: self.send("Example Table", None) if __name__ == "__main__": t = orngText.loadFromXML(r'c:\test\msnbc.xml') a = QApplication(sys.argv) ow = OWWordNgram() ow.data = t a.setMainWidget(ow) ow.show() a.exec_loop()
self.graph.radius = 100.0 return self.graph.radius = ( self.graph.axisScale(QwtPlot.xBottom).interval().maxValue() - self.graph.axisScale(QwtPlot.xBottom).interval().minValue() ) * self.percRadius / 100.0 if __name__ == "__main__": #from orngTextCorpus import * import pickle, orngText ## os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/") appl = QApplication(sys.argv) ow = OWCorrAnalysis() #owb = OWBagofWords.OWBagofWords() t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') #owb.data = t #owb.show() stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt') p = orngText.Preprocess(language='hr') print('Done with loading') t1 = orngText.extractLetterNGram(t, 2) #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2) #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3) #t1 = orngText.extractNamedEntities(t, stopwords = stop) #t1 = orngText.bagOfWords(t1, stopwords = stop) print(len(t1.domain.getmetas(orngText.TEXTMETAID))) print('Done with extracting') #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98) #print len(t2.domain.getmetas()) print('Done with feature selection')