def sendData(self):
        """Send segmentation to output"""
        if not self.segmentation:
            self.infoBox.setText(u'Widget needs input.', 'warning')
            self.send('Bypassed segmentation', None, self)
            self.send('Displayed segmentation', None, self)
            return

        self.send('Bypassed segmentation',
                  Segmenter.bypass(self.segmentation, self.captionTitle), self)
        # TODO: Check if this is correct replacement for textable v1.*, v2.*
        if 'format' in self._currentWarningMessage or \
                'format' in self._currentErrorMessage:
            self.send('Displayed segmentation', None, self)
            return
        if len(self.displayedSegmentation[0].get_content()) > 0:
            self.send('Displayed segmentation', self.displayedSegmentation,
                      self)
        else:
            self.send('Displayed segmentation', None, self)
        # TODO: Differes only in capitalization with a check before
        #       Is this intentional?
        if "Format" not in self._currentErrorMessage:
            message = u'%i segment@p sent to output.' % len(self.segmentation)
            message = pluralize(message, len(self.segmentation))
            self.infoBox.setText(message)
        self.sendButton.resetSettingsChangedFlag()
예제 #2
0
 def test_bypass_deepcopy(self):
     """Does bypass deep copy input segments?"""
     segmentation = Segmenter.bypass(self.letter_seg)
     self.assertNotEqual(
         segmentation,
         self.letter_seg,
         msg="bypass doesn't deep copy input segments!"
     )
예제 #3
0
 def test_bypass_copy_annotations(self):
     """Does bypass copy annotations?"""
     segmentation = Segmenter.bypass(self.other_letter_seg)
     self.assertEqual(
         [s.annotations['a'] for s in segmentation],
         [s.annotations['a'] for s in self.other_letter_seg],
         msg="bypass doesn't copy annotations!"
     )
예제 #4
0
 def test_bypass_copy_segments(self):
     """Does bypass copy input segments?"""
     segmentation = Segmenter.bypass(self.letter_seg)
     self.assertEqual(
         [s.get_content() for s in segmentation],
         [s.get_content() for s in self.letter_seg],
         msg="bypass doesn't copy input segments!"
     )
    def huntTheLexic(self):
        """
            main I/O function, filters the inputSeg with the selected
            lexical fields and outputs a copy of the input this Segmentation
            with segments labelised according to the topic they belong in
        """

        # initiations...
        out = list()
        selectedListsNames = list()

        # first we select the topics according to the ones the user chose
        if self.titleLabels:
            selectedListsNames = [
                list(self.titleLabels)[idx] for idx in self.selectedFields
            ]

        # we can then associate the topics with their respective lists
        selectedLists = {
            key: value
            for key, value in defaultDict.items() if key in selectedListsNames
        }

        # if we have an input, we can select the segments of the input and
        # label them according to the lists they are found in
        if self.inputSeg is not None:
            for filter_list in selectedLists:
                work_list = [i for i in selectedLists[filter_list] if i]
                if work_list:
                    out.append(
                        Segmenter.select(
                            self.inputSeg,
                            self.listToRegex(work_list),
                            label=filter_list,
                        )[0])

        # lastly we define the output as a segmentation that is a copy of
        # the input, with the segments that we found labeled accordingly
        if self.labelName == "":
            labelNameVar = "Topic"
        else:
            labelNameVar = self.labelName

        self.outputSeg = Segmenter.concatenate(
            [Segmenter.bypass(self.inputSeg, label="__None__")] + out,
            merge_duplicates=True,
            label=self.captionTitle,
            import_labels_as=labelNameVar,
        )
예제 #6
0
 def test_bypass_deepcopy(self):
     """Does bypass deep copy input segments?"""
     segmentation = Segmenter.bypass(self.letter_seg)
     self.assertNotEqual(segmentation,
                         self.letter_seg,
                         msg="bypass doesn't deep copy input segments!")
예제 #7
0
 def test_bypass_copy_annotations(self):
     """Does bypass copy annotations?"""
     segmentation = Segmenter.bypass(self.other_letter_seg)
     self.assertEqual([s.annotations['a'] for s in segmentation],
                      [s.annotations['a'] for s in self.other_letter_seg],
                      msg="bypass doesn't copy annotations!")
예제 #8
0
 def test_bypass_copy_segments(self):
     """Does bypass copy input segments?"""
     segmentation = Segmenter.bypass(self.letter_seg)
     self.assertEqual([s.get_content() for s in segmentation],
                      [s.get_content() for s in self.letter_seg],
                      msg="bypass doesn't copy input segments!")
    def sendData(self):
        """(Have LTTL.Segmenter) perform the actual selection"""

        # Check that there's something on input...
        if not self.segmentation:
            self.infoBox.setText(u'Widget needs input.', 'warning')
            self.send('Selected data', None, self)
            self.send('Discarded data', None, self)
            return

        # TODO: remove message 'No label was provided.' from docs

        # Advanced settings...
        if self.displayAdvancedSettings:

            # If mode is Regex...
            if self.method == u'Regex':

                # Check that regex is not empty...
                if not self.regex:
                    self.infoBox.setText(u'Please enter a regex.', 'warning')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Prepare regex...
                regex_string = self.regex
                if (self.ignoreCase or self.unicodeDependent or self.multiline
                        or self.dotAll):
                    flags = ''
                    if self.ignoreCase:
                        flags += 'i'
                    if self.unicodeDependent:
                        flags += 'u'
                    if self.multiline:
                        flags += 'm'
                    if self.dotAll:
                        flags += 's'
                    regex_string += '(?%s)' % flags
                try:
                    regex = re.compile(regex_string)
                except re.error as re_error:
                    try:
                        message = u'Please enter a valid regex (error: %s).' % \
                                  re_error.msg
                    except AttributeError:
                        message = u'Please enter a valid regex.'
                    self.infoBox.setText(message, 'error')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Else if mode is Sample...
            elif self.method == u'Sample':

                # Get sample size...
                if self.sampleSizeMode == u'Proportion':
                    sampleSize = iround(
                        len(self.segmentation) * (self.samplingRate / 100))
                else:
                    sampleSize = self.sampleSize
                if sampleSize <= 0:
                    self.infoBox.setText(
                        message='Please enter a larger sample size',
                        state="error",
                    )
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Else if mode is Threshold...
            elif self.method == u'Threshold':

                # Get min and max count...
                if self.thresholdMode == u'Proportion':
                    minCount = iround(
                        math.ceil(
                            len(self.segmentation) *
                            (self.minProportion / 100)))
                    maxCount = iround(
                        math.floor(
                            len(self.segmentation) *
                            (self.maxProportion / 100)))
                else:
                    minCount = self.minCount
                    maxCount = self.maxCount
                if not self.applyMinThreshold:
                    minCount = 1
                if not self.applyMaxThreshold:
                    maxCount = len(self.segmentation)

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Check that autoNumberKey is not empty (if necessary)...
            if self.autoNumber:
                if self.autoNumberKey:
                    autoNumberKey = self.autoNumberKey
                else:
                    self.infoBox.setText(
                        u'Please enter an annotation key for auto-numbering.',
                        'warning')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return
            else:
                autoNumberKey = None

            # Perform selection...
            self.infoBox.setText(u"Processing, please wait...", "warning")
            self.controlArea.setDisabled(True)
            progressBar = ProgressBar(self, iterations=num_iterations)
            if self.method == u'Regex':
                regexAnnotationKeyParam = self.regexAnnotationKey
                if regexAnnotationKeyParam == u'(none)':
                    regexAnnotationKeyParam = None
                (selected_data, discarded_data) = Segmenter.select(
                    segmentation=self.segmentation,
                    regex=regex,
                    mode=self.regexMode.lower(),
                    annotation_key=regexAnnotationKeyParam or None,
                    label=self.captionTitle,
                    copy_annotations=self.copyAnnotations,
                    auto_number_as=autoNumberKey,
                    progress_callback=progressBar.advance,
                )
            elif self.method == u'Sample':
                (selected_data, discarded_data) = Segmenter.sample(
                    segmentation=self.segmentation,
                    sample_size=sampleSize,
                    mode='random',
                    label=self.captionTitle,
                    copy_annotations=self.copyAnnotations,
                    auto_number_as=autoNumberKey,
                    progress_callback=progressBar.advance,
                )
            elif self.method == u'Threshold':
                if ((minCount == 1 or not self.applyMinThreshold)
                        and (maxCount == len(self.segmentation)
                             or not self.applyMaxThreshold)):
                    selected_data = Segmenter.bypass(
                        segmentation=self.segmentation,
                        label=self.captionTitle,
                    )
                    discarded_data = None
                else:
                    thresholdAnnotationKeyParam = self.thresholdAnnotationKey
                    if thresholdAnnotationKeyParam == u'(none)':
                        thresholdAnnotationKeyParam = None
                    (selected_data, discarded_data) = Segmenter.threshold(
                        segmentation=self.segmentation,
                        annotation_key=(thresholdAnnotationKeyParam or None),
                        min_count=minCount,
                        max_count=maxCount,
                        label=self.captionTitle,
                        copy_annotations=self.copyAnnotations,
                        auto_number_as=autoNumberKey,
                        progress_callback=progressBar.advance,
                    )

        # Basic settings:
        else:

            # Check that regex is not empty...
            if not self.regex:
                self.infoBox.setText(u'Please enter a regex.', 'warning')
                self.send('Selected data', None, self)
                self.send('Discarded data', None, self)
                return

            # Get number of iterations...
            num_iterations = len(self.segmentation)

            # Perform selection...
            self.infoBox.setText(u"Processing, please wait...", "warning")
            self.controlArea.setDisabled(True)
            progressBar = ProgressBar(self, iterations=num_iterations)
            regexAnnotationKeyParam = self.regexAnnotationKey
            if regexAnnotationKeyParam == u'(none)':
                regexAnnotationKeyParam = None
            try:
                (selected_data, discarded_data) = Segmenter.select(
                    segmentation=self.segmentation,
                    regex=re.compile(self.regex + '(?u)'),
                    mode=self.regexMode.lower(),
                    annotation_key=regexAnnotationKeyParam or None,
                    label=self.captionTitle,
                    copy_annotations=True,
                    auto_number_as=None,
                    progress_callback=progressBar.advance,
                )
            except re.error as re_error:
                try:
                    message = u'Please enter a valid regex (error: %s).' % \
                              re_error.msg
                except AttributeError:
                    message = u'Please enter a valid regex.'
                self.infoBox.setText(message, 'error')
                self.send('Selected data', None, self)
                self.send('Discarded data', None, self)
                progressBar.finish()
                self.controlArea.setDisabled(False)
                return

        progressBar.finish()
        self.controlArea.setDisabled(False)

        message = u'%i segment@p sent to output.' % len(selected_data)
        message = pluralize(message, len(selected_data))
        self.infoBox.setText(message)

        self.send('Selected data', selected_data, self)
        self.send('Discarded data', discarded_data, self)
        self.sendButton.resetSettingsChangedFlag()