Ejemplo n.º 1
0
    def loadFile(self):
        if self.fileIndex:
            fn = self.recentFiles[self.fileIndex]
            self.recentFiles.remove(fn)
            self.recentFiles.insert(0, fn)
            self.fileIndex = 0
        else:
            fn = self.recentFiles[0]

        self.filecombo.clear()
        for file in self.recentFiles:
            self.filecombo.addItem(os.path.split(file)[1])
        #self.filecombo.updateGeometry()

        self.matrix = None
        self.labels = None
        self.data = None
        pb = OWGUI.ProgressBar(self, 100)
        
        self.error()
        try:
            self.matrix, self.labels, self.data = readMatrix(fn, pb)
        except Exception, ex:
            self.error("Error while reading the file: '%s'" % str(ex))
            return
Ejemplo n.º 2
0
    def run(self):
        plan = []
        estimate_index = 0
        for i, (selected, method, count,
                offsets) in enumerate(self.estimators):
            if self.results[i] is None and getattr(self, selected):
                plan.append((i, method,
                             [estimate_index + offset for offset in offsets]))
                estimate_index += count

        estimators = [method() for _, method, _ in plan]

        if not estimators:
            return

        pb = OWGUI.ProgressBar(self, len(self._test_data()))
        estimates = self.run_estimation(estimators, pb.advance)
        pb.finish()

        self.predictions = [v for v, _ in estimates]
        estimates = [prob.reliability_estimate for _, prob in estimates]

        for i, (index, method, estimate_indices) in enumerate(plan):
            self.results[index] = [[e[est_index] for e in estimates] \
                                   for est_index in estimate_indices]
Ejemplo n.º 3
0
 def computeMatrix(self):
     if not self.data:
         return
     data = self.data
     dist = self.metrics[self.Metrics][1](data)
     self.error(0)
     try:
         self.matrix = orange.SymMatrix(len(data))
     except orange.KernelException as ex:
         self.error(0, "Could not create distance matrix! %s" % str(ex))
         self.matrix = None
         self.send("Distance Matrix", None)
         return
     self.matrix.setattr('items', data)
     pb = OWGUI.ProgressBar(self, 100)
     milestones = orngMisc.progressBarMilestones(
         len(data) * (len(data) - 1) / 2, 100)
     count = 0
     for i in range(len(data)):
         for j in range(i + 1):
             self.matrix[i, j] = dist(data[i], data[j])
             if count in milestones:
                 pb.advance()
             count += 1
     pb.finish()
     self.send("Distance Matrix", self.matrix)
 def getLearningCurve(self, learners):
     pb = OWGUI.ProgressBar(self, iterations=self.steps * self.folds)
     curve = orngTest.learningCurveN(learners,
                                     self.data,
                                     folds=self.folds,
                                     proportions=self.curvePoints,
                                     callback=pb.advance)
     pb.finish()
     return curve
Ejemplo n.º 5
0
 def getLearningCurve(self, learners):
     pb = OWGUI.ProgressBar(self, iterations=self.steps * self.folds)
     curve = Orange.evaluation.testing.learning_curve_n(
         learners,
         self.data,
         folds=self.folds,
         proportions=self.curvePoints,
         callback=pb.advance)
     pb.finish()
     return curve
Ejemplo n.º 6
0
    def loadFile(self):
        if not hasattr(self, "originalData"):
            return
        
        if self.fileIndex:
            fn = self.recentFiles[self.fileIndex]
            self.recentFiles.remove(fn)
            self.recentFiles.insert(0, fn)
            self.fileIndex = 0
        else:
            if len(self.recentFiles) > 0:
                fn = self.recentFiles[0]
            else:
                return

        self.filecombo.clear()
        for file in self.recentFiles:
            self.filecombo.addItem(os.path.split(file)[1])
        #self.filecombo.updateGeometry()

        self.error()
        
        try:
            self.matrix = None
            self.labels = None
            self.data = None
            pb = OWGUI.ProgressBar(self, 100)
            self.matrix, self.labels, self.data = readMatrix(fn, pb)
            
            dstFile, ext = os.path.splitext(fn)
            warning = ""
            self.warning()
            if os.path.exists(dstFile + ".tab"):
                self.data = orange.ExampleTable(dstFile + ".tab")
                self.matrix.items = self.data
            else:
                warning += "ExampleTable %s not found!\n" % (dstFile + ".tab")
            if os.path.exists(dstFile + ".res"):
                self.matrix.results = pickle.load(open(dstFile + ".res", 'rb'))
            else:
                warning += "Results pickle %s not found!\n" % (dstFile + ".res")
            
            self.matrix.originalData = self.originalData
            
            if warning != "":
                self.warning(warning.rstrip())
    
            self.relabel()
        except Exception as e:
            self.error("Error while reading the file\n\n%s" % e.message)
Ejemplo n.º 7
0
    def setData(self, data):
        self.data = self.isDataWithClass(
            data, orange.VarTypes.Discrete, checkMissing=True) and data or None

        #self.setLearner()

        if self.data:
            learner = self.constructLearner()
            pb = OWGUI.ProgressBar(self, iterations=self.trees)
            learner.callback = pb.advance
            try:
                self.classifier = learner(self.data)
                self.classifier.name = self.name
            except Exception, (errValue):
                self.error(str(errValue))
                self.classifier = None
            pb.finish()
Ejemplo n.º 8
0
    def setData(self, data):
        self.data = self.isDataWithClass(
            data, orange.VarTypes.Discrete, checkMissing=True) and data or None

        #self.setLearner()

        if self.data:
            learner = self.constructLearner()
            pb = OWGUI.ProgressBar(self, iterations=self.trees)
            self.progressBarInit()
            learner.callback = lambda v: self.progressBarSet(100.0 * v)
            try:
                self.classifier = learner(self.data)
                self.classifier.name = self.name
            except Exception, (errValue):
                self.error(str(errValue))
                self.classifier = None
            self.progressBarFinished()
Ejemplo n.º 9
0
    def setData(self, data):
        self.data = self.isDataWithClass(data, orange.VarTypes.Discrete, checkMissing=True) and data or None
        
        #self.setLearner()

        if self.data:
            learner = self.constructLearner()
            pb = OWGUI.ProgressBar(self, iterations=self.trees)
            learner.callback = pb.advance
            try:
                self.classifier = learner(self.data)
                self.classifier.name = self.name
                self.streeEnabled(True)
            except Exception as errValue:
                self.error(str(errValue))
                self.classifier = None
                self.streeEnabled(False)
            pb.finish()
        else:
            self.classifier = None
            self.streeEnabled(False)

        self.send("Random Forest Classifier", self.classifier)
Ejemplo n.º 10
0
class OWExampleDistance(OWWidget):
    settingsList = ["Metrics", "Normalize"]
    contextHandlers = {"": DomainContextHandler("", ["Label"])}

    def __init__(self, parent=None, signalManager = None):
        OWWidget.__init__(self, parent, signalManager, 'ExampleDistance', wantMainArea = 0, resizingEnabled = 0)

        self.inputs = [("Data", ExampleTable, self.dataset)]
        self.outputs = [("Distances", orange.SymMatrix)]

        self.Metrics = 0
        self.Normalize = True
        self.Label = ""
        self.loadSettings()
        self.data = None
        self.matrix = None

        self.metrics = [
            ("Euclidean", orange.ExamplesDistanceConstructor_Euclidean),
            ("Pearson Correlation", orngClustering.ExamplesDistanceConstructor_PearsonR),
            ("Spearman Rank Correlation", orngClustering.ExamplesDistanceConstructor_SpearmanR),
            ("Manhattan", orange.ExamplesDistanceConstructor_Manhattan),
            ("Hamming", orange.ExamplesDistanceConstructor_Hamming),
            ("Relief", orange.ExamplesDistanceConstructor_Relief),
            ]

        cb = OWGUI.comboBox(self.controlArea, self, "Metrics", box="Distance Metrics",
            items=[x[0] for x in self.metrics],
            tooltip="Choose metrics to measure pairwise distance between examples.",
            callback=self.distMetricChanged, valueType=str)
        cb.setMinimumWidth(170)
        
        OWGUI.separator(self.controlArea)
        
        box = OWGUI.widgetBox(self.controlArea, "Normalization", 
                              addSpace=True)
        self.normalizeCB = OWGUI.checkBox(box, self, "Normalize", "Normalize data", 
                                          callback=self.computeMatrix)
        
        self.normalizeCB.setEnabled(self.Metrics in [0, 3])
        
        self.labelCombo = OWGUI.comboBox(self.controlArea, self, "Label", box="Example Label",
            items=[],
            tooltip="Attribute used for example labels",
            callback=self.setLabel, sendSelectedValue = 1)

        self.labelCombo.setDisabled(1)
        
        OWGUI.rubber(self.controlArea)

    def sendReport(self):
        self.reportSettings("Settings",
                            [("Metrics", self.metrics[self.Metrics][0]),
                             ("Label", self.Label)])
        self.reportData(self.data)

    def distMetricChanged(self):
        self.normalizeCB.setEnabled(self.Metrics in [0, 3])
        self.computeMatrix()

    def computeMatrix(self):
        if not self.data:
            return
        data = self.data
        constructor = self.metrics[self.Metrics][1]()
        constructor.normalize = self.Normalize
        dist = constructor(data)
        self.error(0)
        try:
            self.matrix = orange.SymMatrix(len(data))
        except orange.KernelException, ex:
            self.error(0, "Could not create distance matrix! %s" % str(ex))
            self.matrix = None
            self.send("Distances", None)
            return
        self.matrix.setattr('items', data)
        pb = OWGUI.ProgressBar(self, 100)
        milestones  = orngMisc.progressBarMilestones(len(data)*(len(data)-1)/2, 100)
        count = 0
        for i in range(len(data)):
            for j in range(i+1):
                self.matrix[i, j] = dist(data[i], data[j])
                if count in milestones:
                    pb.advance()
                count += 1
        pb.finish()
        self.send("Distances", self.matrix)
Ejemplo n.º 11
0
    def showWidget(self):
        self.information()
        
        if self.ow is not None:
            self.ow.topWidgetPart.hide()
            self.ow.setLayout(self.layout())
        elif self.layout() is not None: 
            sip.delete(self.layout())
            
        self.ow = None
        if self.data is None: 
            self.information("No learning data given.")
            return
        if self.model is None: return
        if "model" not in self.model.domain: return
        if "label" in self.model.domain:
            attr = self.model["label"].value.split(', ')
        
        modelType = self.model["model"].value.upper()
        
        projWidget = None
        if modelType == "SCATTERPLOT" or modelType == "SCATTTERPLOT": 
            projWidget = self.setWidget(OWScatterPlot.OWScatterPlot)

        if modelType == "RADVIZ":
            projWidget = self.setWidget(OWRadviz.OWRadviz) 
            
        if modelType == "POLYVIZ": 
            projWidget = self.setWidget(OWPolyviz.OWPolyviz) 
            
        if projWidget is not None:
            self.ow.setData(self.data)
            self.ow.setShownAttributes(attr)
            self.ow.handleNewSignals() 
        
        ################################
        ### add new model types here ###
        ################################
        
        if modelType == "SPCA" or modelType == "LINPROJ": 
            self.setWidget(OWLinProj.OWLinProj) 
            self.ow.setData(self.data)
            self.ow.setShownAttributes(attr)
            self.ow.handleNewSignals() 
            xAnchors, yAnchors = self.model["anchors"].value
            self.ow.updateGraph(None, setAnchors=1, XAnchors=xAnchors, YAnchors=yAnchors)
            
        if modelType == "TREE":
            self.setWidget(OWClassificationTreeGraph.OWClassificationTreeGraph)
            classifier = self.model["classifier"].value
            self.ow.ctree(classifier)
            
        if modelType == "BAYES":
            self.setWidget(OWNomogram.OWNomogram) 
            classifier = self.model["classifier"].value
            self.ow.classifier(classifier)
            
        if modelType == "KNN":
            exclude = [att for att in self.data.domain if att.name not in attr + [self.data.domain.classVar.name]]
            data2 = orange.Preprocessor_ignore(self.data, attributes = exclude)
            dist = orange.ExamplesDistanceConstructor_Euclidean(data2)
            smx = orange.SymMatrix(len(data2))
            smx.setattr('items', data2)
            pb = OWGUI.ProgressBar(self, 100)
            milestones = orngMisc.progressBarMilestones(len(data2)*(len(data2)-1)/2, 100)
            count = 0
            for i in range(len(data2)):
                for j in range(i+1):
                    smx[i, j] = dist(data2[i], data2[j])
                    if count in milestones:
                        pb.advance()
                    count += 1
            pb.finish()
            self.setWidget(OWMDS.OWMDS)
            self.ow.cmatrix(smx)
            
        if self.ow is not None:
            self.ow.send = self.send
            if self.layout() is not None: sip.delete(self.layout())
            self.setLayout(self.ow.layout())
            self.ow.topWidgetPart.show()
        
        self.update()
Ejemplo n.º 12
0
    def score(self, ids):
        """compute scores for the list of learners"""
        if (not self.data):
            for id in ids:
                self.learners[id].results = None
            return
        # test which learners can accept the given data set
        # e.g., regressions can't deal with classification data
        learners = []
        n = len(self.data.domain.attributes) * 2
        indices = orange.MakeRandomIndices2(
            p0=min(n, len(self.data)),
            stratified=orange.MakeRandomIndices2.StratifiedIfPossible)
        new = self.data.selectref(indices(self.data))
        #        new = self.data.selectref([1]*min(n, len(self.data)) +
        #                                  [0]*(len(self.data) - min(n, len(self.data))))
        self.warning(0)
        for l in [self.learners[id] for id in ids]:
            learner = l.learner
            if self.preprocessor:
                learner = self.preprocessor.wrapLearner(learner)
            try:
                predictor = learner(new)
                if predictor(new[0]).varType == new.domain.classVar.varType:
                    learners.append(learner)
                else:
                    l.scores = []
            except Exception as ex:
                self.warning(
                    0,
                    "Learner %s ends with exception: %s" % (l.name, str(ex)))
                l.scores = []

        if not learners:
            return

        # computation of results (res, and cm if classification)
        pb = None
        if self.resampling == 0:
            pb = OWGUI.ProgressBar(self, iterations=self.nFolds)
            res = orngTest.crossValidation(
                learners,
                self.data,
                folds=self.nFolds,
                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                callback=pb.advance,
                storeExamples=True)
            pb.finish()
        elif self.resampling == 1:
            pb = OWGUI.ProgressBar(self, iterations=len(self.data))
            res = orngTest.leaveOneOut(learners,
                                       self.data,
                                       callback=pb.advance,
                                       storeExamples=True)
            pb.finish()
        elif self.resampling == 2:
            pb = OWGUI.ProgressBar(self, iterations=self.pRepeat)
            res = orngTest.proportionTest(learners,
                                          self.data,
                                          self.pLearning / 100.,
                                          times=self.pRepeat,
                                          callback=pb.advance,
                                          storeExamples=True)
            pb.finish()
        elif self.resampling == 3:
            pb = OWGUI.ProgressBar(self, iterations=len(learners))
            res = orngTest.learnAndTestOnLearnData(learners,
                                                   self.data,
                                                   storeExamples=True,
                                                   callback=pb.advance)
            pb.finish()

        elif self.resampling == 4:
            if not self.testdata:
                for l in self.learners.values():
                    l.scores = []
                return
            pb = OWGUI.ProgressBar(self, iterations=len(learners))
            res = orngTest.learnAndTestOnTestData(learners,
                                                  self.data,
                                                  self.testdata,
                                                  storeExamples=True,
                                                  callback=pb.advance)
            pb.finish()
        if self.isclassification():
            cm = orngStat.computeConfusionMatrices(res,
                                                   classIndex=self.targetClass)

        if self.preprocessor:  # Unwrap learners
            learners = [l.wrappedLearner for l in learners]

        res.learners = learners

        for l in [self.learners[id] for id in ids]:
            if l.learner in learners:
                l.results = res

        self.error(list(range(len(self.stat))))
        scores = []
        for i, s in enumerate(self.stat):
            try:
                scores.append(eval("orngStat." + s.f))

            except Exception as ex:
                self.error(i, "An error occurred while evaluating orngStat." + s.f + "on %s due to %s" % \
                           (" ".join([l.name for l in learners]), ex))
                scores.append([None] * len(self.learners))

        for (i, l) in enumerate(learners):
            self.learners[l.id].scores = [s[i] if s else None for s in scores]

        self.sendResults()
    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Skip if title list is empty:
        if self.titleLabels == list():
            return

        # Check that something has been selected...
        if len(self.selectedTitles) == 0:
            self.infoBox.setText(u'Please select one or more titles.',
                                 'warning')
            self.send(u'Text data', None, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        # Initialize progress bar.
        progressBar = OWGUI.ProgressBar(self,
                                        iterations=len(self.selectedTitles))

        # Attempt to connect to Theatre-classique and retrieve plays...
        xml_contents = list()
        annotations = list()
        try:
            for title in self.selectedTitles:
                response = urllib2.urlopen(
                    self.document_base_url +
                    self.filteredTitleSeg[title].annotations[u'url'])
                xml_contents.append(unicode(response.read(), u'utf8'))
                annotations.append(
                    self.filteredTitleSeg[title].annotations.copy())
                progressBar.advance()  # 1 tick on the progress bar...

        # If an error occurs (e.g. http error, or memory error)...
        except:

            # Set Info box and widget to 'error' state.
            self.infoBox.setText(
                u"Couldn't download data from theatre-classique website.",
                'error')

            # Reset output channel.
            self.send(u'Text data', None, self)
            return

        # Store downloaded XML in input objects...
        for xml_content_idx in xrange(len(xml_contents)):
            newInput = Input(xml_contents[xml_content_idx], self.captionTitle)
            self.createdInputs.append(newInput)

        # If there's only one play, the widget's output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Store imported URLs as setting.
        self.importedURLs = [
            self.filteredTitleSeg[self.selectedTitles[0]].annotations[u'url']
        ]

        # Set status to OK and report data size...
        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send token...
        self.send(u'Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()
Ejemplo n.º 14
0
    def sendData(self):

        # Si le lien vers treetagger n'est pas trouve
        if self.NoLink:
            self.infoBox.setText(u"Sorry, TreeTagger's link not found.",
                                 "error")
            self.send('Text data', None)
        # Important: if input data is None, propagate this value to output...
        elif not self.inputData:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send('Text data', None)
        # affiche que quelque chose se passe...
        else:
            self.infoBox.setText(u'TreeTagger is running...', "warning")

            # Initialisation de variables
            total_tagged_text = list()
            new_segmentations = list()
            i = 0

            # Initialize progress bar.
            self.progressBar = OWGUI.ProgressBar(self, iterations=5)

            # Copie de la segmentation avec ajout d'une annotation...
            copy_of_input_seg = Segmentation()
            copy_of_input_seg.label = self.inputData.label
            for seg_idx, segment in enumerate(self.inputData):
                attr = " ".join(
                    ["%s='%s'" % item for item in segment.annotations.items()])
                segment.annotations["tt_xb"] = attr
                copy_of_input_seg.append(segment)

            # avancer la progressBar d'un cran
            self.progressBar.advance()

            concatenated_text = copy_of_input_seg.to_string(
                formatting="<xb_tt %(tt_xb)s>%(__content__)s</xb_tt>",
                display_all=True,
            )

            # avancer la progressBar d'un cran
            self.progressBar.advance()

            tagged_text = self.tag(concatenated_text)
            tagged_input = Input(tagged_text)
            tagged_segmentation = Segmenter.import_xml(tagged_input, "xb_tt")

            # avancer la progressBar d'un cran
            self.progressBar.advance()

            # Si checkBox xml active
            if self.activer_xml == True:
                xml_segmentation = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), '[unknown]'),
                        (re.compile(r"(.+)\t(.+)\t(.+)"),
                         '<w lemma="&3" type="&2">&1</w>'),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = xml_segmentation
            # Si checkBox xml desactive
            else:
                xml_segmentation = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), '[unknown]'),
                        (re.compile(r"(.+)\t(.+)\t(.+)"),
                         '<w lemma="&3" type="&2">&1</w>'),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")

            self.infoBox.dataSent('')

            # Enregistrer le lien de treetagger...
            if self.system == "nt":
                file = open("treetagger_link.txt", 'w')
            else:
                file = open(
                    os.path.normpath("/Users/" + self.user +
                                     "/treetagger_link.txt"), 'w')

            file.write(self.treetagger_link)
            file.close()

            # Clear progress bar.
            self.progressBar.finish()

            # envoyer la seguementation
            self.send('Text data', final_segmentation, self)
            self.compteur += 1
            self.sendButton.resetSettingsChangedFlag()
Ejemplo n.º 15
0
    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Clear created Inputs
        self.clearCreatedInputs()

        if self.service == u'Twitter':
            try:
                self.createdInputs = self.get_tweets(
                    self.word_to_search, self.nb_tweet, self.include_RT,
                    self.useTwitterLicenseKey,
                    (self.twitterLicenseKeysConsumerKey,
                     self.twitterLicenseKeysConsumerSecret,
                     (self.twitterLicenseKeysAccessToken,
                      self.twitterLicenseKeysAccessTokenSecret)))
            except (HTTP401Authentication, HTTP400BadRequest):
                self.infoBox.setText(
                    u'Please enter valid Twitter api keys.',
                    u'error',
                )
                self.send(u'Text data', None, self)
                return False
            except SearchEngineLimitError:
                self.infoBox.setText(
                    u'Twitter search limit has been exceeded.',
                    u'error',
                )
                self.send(u'Text data', None, self)
                return False

        elif self.service == u'Wikipedia':
            self.createdInputs = self.get_wiki_article(self.word_to_search,
                                                       self.wiki_section,
                                                       self.wiki_type_of_text)

        elif self.service == u'Bing':
            self.createdInputs = self.get_bing_entries(self.word_to_search,
                                                       self.nb_bing_entry)

        if len(self.createdInputs) == 0:
            self.infoBox.setText(
                u'Please try to change query or settings.',
                u'warning',
            )
            self.send(u'Text data', None, self)
            return False

        # Initialize progress bar
        progressBar = OWGUI.ProgressBar(self, iterations=50)

        output_segmentation = Segmenter.concatenate(self.createdInputs,
                                                    self.captionTitle,
                                                    import_labels_as=None)

        message = u'%i segment@p sent to output ' % len(output_segmentation)
        message = pluralize(message, len(output_segmentation))
        numChars = 0
        for segment in output_segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        for _ in xrange(50):
            progressBar.advance()

        # Clear progress bar.
        progressBar.finish()

        self.send('Text data', output_segmentation, self)

        self.sendButton.resetSettingsChangedFlag()