def loadFile(self): if self.fileIndex: fn = self.recentFiles[self.fileIndex] self.recentFiles.remove(fn) self.recentFiles.insert(0, fn) self.fileIndex = 0 else: fn = self.recentFiles[0] self.filecombo.clear() for file in self.recentFiles: self.filecombo.addItem(os.path.split(file)[1]) self.filecombo.updateGeometry() self.error() try: self.stopwords = orngText.loadWordSet(fn) except: self.error("Cannot read the file")
self.graph.axisScale(QwtPlot.xBottom).interval().maxValue() - self.graph.axisScale(QwtPlot.xBottom).interval().minValue() ) * self.percRadius / 100.0 if __name__ == "__main__": #from orngTextCorpus import * import pickle, orngText ## os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/") appl = QApplication(sys.argv) ow = OWCorrAnalysis() #owb = OWBagofWords.OWBagofWords() t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') #owb.data = t #owb.show() stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt') p = orngText.Preprocess(language='hr') print('Done with loading') t1 = orngText.extractLetterNGram(t, 2) #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2) #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3) #t1 = orngText.extractNamedEntities(t, stopwords = stop) #t1 = orngText.bagOfWords(t1, stopwords = stop) print(len(t1.domain.getmetas(orngText.TEXTMETAID))) print('Done with extracting') #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98) #print len(t2.domain.getmetas()) print('Done with feature selection') appl.setMainWidget(ow) #t3 = orngText.DSS(t2, 'WF', 'MIN', 1) #print 'Done with document selection'
self.graph.radius = 100.0 return self.graph.radius = (self.graph.axisScale(QwtPlot.xBottom).interval().maxValue() - self.graph.axisScale(QwtPlot.xBottom).interval().minValue()) * self.percRadius / 100.0; if __name__=="__main__": #from orngTextCorpus import * import cPickle, orngText ## os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/") appl = QApplication(sys.argv) ow = OWCorrAnalysis() #owb = OWBagofWords.OWBagofWords() t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml') #owb.data = t #owb.show() stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt') p = orngText.Preprocess(language = 'hr') print 'Done with loading' t1 = orngText.extractLetterNGram(t, 2) #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2) #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3) #t1 = orngText.extractNamedEntities(t, stopwords = stop) #t1 = orngText.bagOfWords(t1, stopwords = stop) print len(t1.domain.getmetas(orngText.TEXTMETAID)) print 'Done with extracting' #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98) #print len(t2.domain.getmetas()) print 'Done with feature selection' appl.setMainWidget(ow) #t3 = orngText.DSS(t2, 'WF', 'MIN', 1) #print 'Done with document selection'