def loadFile(self):
        if self.fileIndex:
            fn = self.recentFiles[self.fileIndex]
            self.recentFiles.remove(fn)
            self.recentFiles.insert(0, fn)
            self.fileIndex = 0
        else:
            fn = self.recentFiles[0]

        self.filecombo.clear()
        for file in self.recentFiles:
            self.filecombo.addItem(os.path.split(file)[1])
        self.filecombo.updateGeometry()

        self.error()
        data = None
        try:
            import orngText
            if fn[-4:] == ".xml":
                data = orngText.loadFromXML(fn)
            elif fn[-4:] == ".sgm":
                data = orngText.loadReuters(os.path.split(fn)[0])
            else:
                data = orngText.loadFromListWithCategories(fn)

            if not data:
                self.error("Unknown file format or no documents")
        except:
            self.error("Cannot read the file")
        
        self.send("Example Table", data)
            self.data = orange.ExampleTable(orange.Domain(data.domain), data)
            self.tmpData = orange.ExampleTable(data)
            self.tmpDom = orange.Domain(data.domain)            
            
            self.data.domain = orange.Domain(data.domain)
            #self.apply()
        else:
            self.data = None
            self.tmpData = None

    def apply(self):
        if self.data:
            self.data = orange.ExampleTable(orange.Domain(self.tmpDom), self.tmpData)
            if self.size == 3:
                newdata = orngText.extractNamedEntities(self.data, stopwords = self.stopwords)
            else:
                newdata = orngText.extractWordNGram(self.data, n = self.size + 2, stopwords = self.stopwords, threshold = self.threshold, measure = self.measureDict[self.measure])
            self.lblFeatureNo.setText("\nNo. of features: \n%d" % len(newdata.domain.getmetas(orngText.TEXTMETAID)))
            self.send("Example Table", newdata)
        else:
            self.send("Example Table", None)
            
if __name__ == "__main__":
    t = orngText.loadFromXML(r'c:\test\msnbc.xml')
    a = QApplication(sys.argv)
    ow = OWWordNgram()
    ow.data = t
    a.setMainWidget(ow)
    ow.show()
    a.exec_loop()        
Example #3
0
        self.graph.radius = 100.0
        return
        self.graph.radius = (
            self.graph.axisScale(QwtPlot.xBottom).interval().maxValue() -
            self.graph.axisScale(QwtPlot.xBottom).interval().minValue()
        ) * self.percRadius / 100.0

if __name__ == "__main__":
    #from orngTextCorpus import *
    import pickle, orngText
    ##    os.chdir("/home/mkolar/Docs/Diplomski/repository/orange/OrangeWidgets/Other/")
    appl = QApplication(sys.argv)
    ow = OWCorrAnalysis()

    #owb = OWBagofWords.OWBagofWords()
    t = orngText.loadFromXML(r'c:\test\orange\msnbc.xml')
    #owb.data = t
    #owb.show()
    stop = orngText.loadWordSet(r'C:\tmtorange\common\en_stopwords.txt')
    p = orngText.Preprocess(language='hr')
    print('Done with loading')
    t1 = orngText.extractLetterNGram(t, 2)
    #t1 = orngText.extractWordNGram(t, stopwords = stop, measure = 'MI', threshold = 7, n=2)
    #t1 = orngText.extractWordNGram(t1, stopwords = stop, measure = 'MI', threshold = 10, n=3)
    #t1 = orngText.extractNamedEntities(t, stopwords = stop)
    #t1 = orngText.bagOfWords(t1, stopwords = stop)
    print(len(t1.domain.getmetas(orngText.TEXTMETAID)))
    print('Done with extracting')
    #t2 = orngText.FSS(t1, 'TF', 'MIN', 0.98)
    #print len(t2.domain.getmetas())
    print('Done with feature selection')