Exemple #1
0
 def test_sample_exception_mode(self):
     """Does sample raise exception for unknown mode?"""
     with self.assertRaises(
             ValueError,
             msg="sample doesn't raise exception for unknown mode!"):
         Segmenter.sample(
             self.entire_text_seg,
             sample_size=3,
             mode='unknown_mode',
         )
 def test_sample_exception_mode(self):
     """Does sample raise exception for unknown mode?"""
     with self.assertRaises(
         ValueError,
         msg="sample doesn't raise exception for unknown mode!"
     ):
         Segmenter.sample(
             self.entire_text_seg,
             sample_size=3,
             mode='unknown_mode',
         )
Exemple #3
0
    def test_sample_progress(self):
        """Does sample track progress?"""
        def progress_callback():
            """Mock progress callback"""
            self.count += 1

        Segmenter.sample(
            self.char_seg,
            sample_size=4,
            mode='random',
            progress_callback=progress_callback,
        )
        self.assertEqual(self.count,
                         len(self.char_seg),
                         msg="sample doesn't track progress!")
Exemple #4
0
 def test_sample_systematic_sample(self):
     """Does sample systematically sample segments?"""
     segmentation, _ = Segmenter.sample(
         self.char_seg,
         sample_size=3,
         mode='systematic',
     )
     self.assertEqual([s.start for s in segmentation], [0, 2, 4],
                      msg="sample doesn't systematically sample segments!")
    def test_sample_progress(self):
        """Does sample track progress?"""

        def progress_callback():
            """Mock progress callback"""
            self.count += 1

        Segmenter.sample(
            self.char_seg,
            sample_size=4,
            mode='random',
            progress_callback=progress_callback,
        )
        self.assertEqual(
            self.count,
            len(self.char_seg),
            msg="sample doesn't track progress!"
        )
Exemple #6
0
 def test_sample_autonumber(self):
     """Does sample autonumber input segments?"""
     segmentation, _ = Segmenter.sample(self.char_seg,
                                        sample_size=4,
                                        mode='random',
                                        auto_number_as='num')
     self.assertEqual([s.annotations['num'] for s in segmentation],
                      [1, 2, 3, 4],
                      msg="sample doesn't autonumber input segments!")
Exemple #7
0
 def test_sample_import_annotations_false(self):
     """Does sample skip importing annotations?"""
     segmentation, _ = Segmenter.sample(
         self.single_letter_seg,
         sample_size=1,
         copy_annotations=False,
     )
     self.assertFalse('b' in segmentation[0].annotations,
                      msg="sample doesn't import annotations!")
Exemple #8
0
 def test_sample_import_annotations(self):
     """Does sample import annotations?"""
     segmentation, _ = Segmenter.sample(
         self.single_letter_seg,
         sample_size=1,
         copy_annotations=True,
     )
     self.assertEqual(segmentation[0].annotations['b'],
                      '1',
                      msg="sample doesn't import annotations!")
Exemple #9
0
 def test_sample_random_sample(self):
     """Does sample randomly sample segments?"""
     segmentation, _ = Segmenter.sample(
         self.char_seg,
         sample_size=4,
         mode='random',
     )
     self.assertEqual(len(segmentation),
                      4,
                      msg="sample doesn't randomly sample segments!")
Exemple #10
0
 def test_sample_neg(self):
     """Does sample output complementary segmentation?"""
     _, segmentation = Segmenter.sample(
         self.char_seg,
         sample_size=4,
         mode='random',
     )
     self.assertEqual(
         len(segmentation),
         2,
         msg="sample doesn't output complementary segmentation!")
 def test_sample_import_annotations_false(self):
     """Does sample skip importing annotations?"""
     segmentation, _ = Segmenter.sample(
         self.single_letter_seg,
         sample_size=1,
         copy_annotations=False,
     )
     self.assertFalse(
         'b' in segmentation[0].annotations,
         msg="sample doesn't import annotations!"
     )
 def test_sample_systematic_sample(self):
     """Does sample systematically sample segments?"""
     segmentation, _ = Segmenter.sample(
         self.char_seg,
         sample_size=3,
         mode='systematic',
     )
     self.assertEqual(
         [s.start for s in segmentation],
         [0, 2, 4],
         msg="sample doesn't systematically sample segments!"
     )
 def test_sample_import_annotations(self):
     """Does sample import annotations?"""
     segmentation, _ = Segmenter.sample(
         self.single_letter_seg,
         sample_size=1,
         copy_annotations=True,
     )
     self.assertEqual(
         segmentation[0].annotations['b'],
         '1',
         msg="sample doesn't import annotations!"
     )
 def test_sample_neg(self):
     """Does sample output complementary segmentation?"""
     _, segmentation = Segmenter.sample(
         self.char_seg,
         sample_size=4,
         mode='random',
     )
     self.assertEqual(
         len(segmentation),
         2,
         msg="sample doesn't output complementary segmentation!"
     )
 def test_sample_random_sample(self):
     """Does sample randomly sample segments?"""
     segmentation, _ = Segmenter.sample(
         self.char_seg,
         sample_size=4,
         mode='random',
     )
     self.assertEqual(
         len(segmentation),
         4,
         msg="sample doesn't randomly sample segments!"
     )
 def test_sample_autonumber(self):
     """Does sample autonumber input segments?"""
     segmentation, _ = Segmenter.sample(
         self.char_seg,
         sample_size=4,
         mode='random',
         auto_number_as='num'
     )
     self.assertEqual(
         [s.annotations['num'] for s in segmentation],
         [1, 2, 3, 4],
         msg="sample doesn't autonumber input segments!"
     )
    def sendData(self):
        """(Have LTTL.Segmenter) perform the actual selection"""

        # Check that there's something on input...
        if not self.segmentation:
            self.infoBox.setText(u'Widget needs input.', 'warning')
            self.send('Selected data', None, self)
            self.send('Discarded data', None, self)
            return

        # TODO: remove message 'No label was provided.' from docs

        # Advanced settings...
        if self.displayAdvancedSettings:

            # If mode is Regex...
            if self.method == u'Regex':

                # Check that regex is not empty...
                if not self.regex:
                    self.infoBox.setText(u'Please enter a regex.', 'warning')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Prepare regex...
                regex_string = self.regex
                if (self.ignoreCase or self.unicodeDependent or self.multiline
                        or self.dotAll):
                    flags = ''
                    if self.ignoreCase:
                        flags += 'i'
                    if self.unicodeDependent:
                        flags += 'u'
                    if self.multiline:
                        flags += 'm'
                    if self.dotAll:
                        flags += 's'
                    regex_string += '(?%s)' % flags
                try:
                    regex = re.compile(regex_string)
                except re.error as re_error:
                    try:
                        message = u'Please enter a valid regex (error: %s).' % \
                                  re_error.msg
                    except AttributeError:
                        message = u'Please enter a valid regex.'
                    self.infoBox.setText(message, 'error')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Else if mode is Sample...
            elif self.method == u'Sample':

                # Get sample size...
                if self.sampleSizeMode == u'Proportion':
                    sampleSize = iround(
                        len(self.segmentation) * (self.samplingRate / 100))
                else:
                    sampleSize = self.sampleSize
                if sampleSize <= 0:
                    self.infoBox.setText(
                        message='Please enter a larger sample size',
                        state="error",
                    )
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Else if mode is Threshold...
            elif self.method == u'Threshold':

                # Get min and max count...
                if self.thresholdMode == u'Proportion':
                    minCount = iround(
                        math.ceil(
                            len(self.segmentation) *
                            (self.minProportion / 100)))
                    maxCount = iround(
                        math.floor(
                            len(self.segmentation) *
                            (self.maxProportion / 100)))
                else:
                    minCount = self.minCount
                    maxCount = self.maxCount
                if not self.applyMinThreshold:
                    minCount = 1
                if not self.applyMaxThreshold:
                    maxCount = len(self.segmentation)

                # Get number of iterations...
                num_iterations = len(self.segmentation)

            # Check that autoNumberKey is not empty (if necessary)...
            if self.autoNumber:
                if self.autoNumberKey:
                    autoNumberKey = self.autoNumberKey
                else:
                    self.infoBox.setText(
                        u'Please enter an annotation key for auto-numbering.',
                        'warning')
                    self.send('Selected data', None, self)
                    self.send('Discarded data', None, self)
                    return
            else:
                autoNumberKey = None

            # Perform selection...
            self.infoBox.setText(u"Processing, please wait...", "warning")
            self.controlArea.setDisabled(True)
            progressBar = ProgressBar(self, iterations=num_iterations)
            if self.method == u'Regex':
                regexAnnotationKeyParam = self.regexAnnotationKey
                if regexAnnotationKeyParam == u'(none)':
                    regexAnnotationKeyParam = None
                (selected_data, discarded_data) = Segmenter.select(
                    segmentation=self.segmentation,
                    regex=regex,
                    mode=self.regexMode.lower(),
                    annotation_key=regexAnnotationKeyParam or None,
                    label=self.captionTitle,
                    copy_annotations=self.copyAnnotations,
                    auto_number_as=autoNumberKey,
                    progress_callback=progressBar.advance,
                )
            elif self.method == u'Sample':
                (selected_data, discarded_data) = Segmenter.sample(
                    segmentation=self.segmentation,
                    sample_size=sampleSize,
                    mode='random',
                    label=self.captionTitle,
                    copy_annotations=self.copyAnnotations,
                    auto_number_as=autoNumberKey,
                    progress_callback=progressBar.advance,
                )
            elif self.method == u'Threshold':
                if ((minCount == 1 or not self.applyMinThreshold)
                        and (maxCount == len(self.segmentation)
                             or not self.applyMaxThreshold)):
                    selected_data = Segmenter.bypass(
                        segmentation=self.segmentation,
                        label=self.captionTitle,
                    )
                    discarded_data = None
                else:
                    thresholdAnnotationKeyParam = self.thresholdAnnotationKey
                    if thresholdAnnotationKeyParam == u'(none)':
                        thresholdAnnotationKeyParam = None
                    (selected_data, discarded_data) = Segmenter.threshold(
                        segmentation=self.segmentation,
                        annotation_key=(thresholdAnnotationKeyParam or None),
                        min_count=minCount,
                        max_count=maxCount,
                        label=self.captionTitle,
                        copy_annotations=self.copyAnnotations,
                        auto_number_as=autoNumberKey,
                        progress_callback=progressBar.advance,
                    )

        # Basic settings:
        else:

            # Check that regex is not empty...
            if not self.regex:
                self.infoBox.setText(u'Please enter a regex.', 'warning')
                self.send('Selected data', None, self)
                self.send('Discarded data', None, self)
                return

            # Get number of iterations...
            num_iterations = len(self.segmentation)

            # Perform selection...
            self.infoBox.setText(u"Processing, please wait...", "warning")
            self.controlArea.setDisabled(True)
            progressBar = ProgressBar(self, iterations=num_iterations)
            regexAnnotationKeyParam = self.regexAnnotationKey
            if regexAnnotationKeyParam == u'(none)':
                regexAnnotationKeyParam = None
            try:
                (selected_data, discarded_data) = Segmenter.select(
                    segmentation=self.segmentation,
                    regex=re.compile(self.regex + '(?u)'),
                    mode=self.regexMode.lower(),
                    annotation_key=regexAnnotationKeyParam or None,
                    label=self.captionTitle,
                    copy_annotations=True,
                    auto_number_as=None,
                    progress_callback=progressBar.advance,
                )
            except re.error as re_error:
                try:
                    message = u'Please enter a valid regex (error: %s).' % \
                              re_error.msg
                except AttributeError:
                    message = u'Please enter a valid regex.'
                self.infoBox.setText(message, 'error')
                self.send('Selected data', None, self)
                self.send('Discarded data', None, self)
                progressBar.finish()
                self.controlArea.setDisabled(False)
                return

        progressBar.finish()
        self.controlArea.setDisabled(False)

        message = u'%i segment@p sent to output.' % len(selected_data)
        message = pluralize(message, len(selected_data))
        self.infoBox.setText(message)

        self.send('Selected data', selected_data, self)
        self.send('Discarded data', discarded_data, self)
        self.sendButton.resetSettingsChangedFlag()