def test_threshold_progress(self): """Does threshold track progress?""" def progress_callback(): """Mock progress callback""" self.count += 1 Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, progress_callback=progress_callback, ) self.assertEqual(self.count, len(self.other_letter_seg), msg="threshold doesn't track progress!")
def test_threshold_select_no_min_no_max(self): """Does threshold select segments (no min, no max)?""" segmentation, _ = Segmenter.threshold(self.other_letter_seg, ) self.assertEqual( [s.get_content() for s in segmentation], ['a', 'b', 'b', 'c', 'c', 'c'], msg="threshold doesn't select segments (no min, no max)!")
def test_threshold_progress(self): """Does threshold track progress?""" def progress_callback(): """Mock progress callback""" self.count += 1 Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, progress_callback=progress_callback, ) self.assertEqual( self.count, len(self.other_letter_seg), msg="threshold doesn't track progress!" )
def test_threshold_autonumber(self): """Does threshold autonumber input segments?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, auto_number_as='num', ) self.assertEqual([s.annotations['num'] for s in segmentation], [1, 2], msg="threshold doesn't autonumber input segments!")
def test_threshold_copy_annotations_false(self): """Does threshold skip copying annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, copy_annotations=False, ) self.assertEqual([s for s in segmentation if 'a' in s.annotations], [], msg="threshold doesn't skip copying annotations!")
def test_threshold_select(self): """Does threshold select segments (min and max)?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, ) self.assertEqual( [s.get_content() for s in segmentation], ['b', 'b'], msg="threshold doesn't select segments (min and max)!")
def test_threshold_select_no_min_no_max(self): """Does threshold select segments (no min, no max)?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, ) self.assertEqual( [s.get_content() for s in segmentation], ['a', 'b', 'b', 'c', 'c', 'c'], msg="threshold doesn't select segments (no min, no max)!" )
def test_threshold_copy_annotations(self): """Does threshold copy annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, copy_annotations=True, ) self.assertEqual([s.annotations['a'] for s in segmentation], ['1', '1'], msg="threshold doesn't copy annotations!")
def test_threshold_select_annotations(self): """Does threshold select segments using annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, annotation_key='a', ) self.assertEqual( [s.get_content() for s in segmentation], ['c', 'c'], msg="threshold doesn't select segments using annotations!")
def test_threshold_select(self): """Does threshold select segments (min and max)?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, ) self.assertEqual( [s.get_content() for s in segmentation], ['b', 'b'], msg="threshold doesn't select segments (min and max)!" )
def test_threshold_autonumber(self): """Does threshold autonumber input segments?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, auto_number_as='num', ) self.assertEqual( [s.annotations['num'] for s in segmentation], [1, 2], msg="threshold doesn't autonumber input segments!" )
def test_threshold_copy_annotations_false(self): """Does threshold skip copying annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, copy_annotations=False, ) self.assertEqual( [s for s in segmentation if 'a' in s.annotations], [], msg="threshold doesn't skip copying annotations!" )
def test_threshold_copy_annotations(self): """Does threshold copy annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, copy_annotations=True, ) self.assertEqual( [s.annotations['a'] for s in segmentation], ['1', '1'], msg="threshold doesn't copy annotations!" )
def test_threshold_select_annotations(self): """Does threshold select segments using annotations?""" segmentation, _ = Segmenter.threshold( self.other_letter_seg, min_count=2, max_count=2, annotation_key='a', ) self.assertEqual( [s.get_content() for s in segmentation], ['c', 'c'], msg="threshold doesn't select segments using annotations!" )
def sendData(self): """(Have LTTL.Segmenter) perform the actual selection""" # Check that there's something on input... if not self.segmentation: self.infoBox.setText(u'Widget needs input.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # TODO: remove message 'No label was provided.' from docs # Advanced settings... if self.displayAdvancedSettings: # If mode is Regex... if self.method == u'Regex': # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Prepare regex... regex_string = self.regex if (self.ignoreCase or self.unicodeDependent or self.multiline or self.dotAll): flags = '' if self.ignoreCase: flags += 'i' if self.unicodeDependent: flags += 'u' if self.multiline: flags += 'm' if self.dotAll: flags += 's' regex_string += '(?%s)' % flags try: regex = re.compile(regex_string) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Sample... elif self.method == u'Sample': # Get sample size... if self.sampleSizeMode == u'Proportion': sampleSize = iround( len(self.segmentation) * (self.samplingRate / 100)) else: sampleSize = self.sampleSize if sampleSize <= 0: self.infoBox.setText( message='Please enter a larger sample size', state="error", ) self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Threshold... elif self.method == u'Threshold': # Get min and max count... if self.thresholdMode == u'Proportion': minCount = iround( math.ceil( len(self.segmentation) * (self.minProportion / 100))) maxCount = iround( math.floor( len(self.segmentation) * (self.maxProportion / 100))) else: minCount = self.minCount maxCount = self.maxCount if not self.applyMinThreshold: minCount = 1 if not self.applyMaxThreshold: maxCount = len(self.segmentation) # Get number of iterations... num_iterations = len(self.segmentation) # Check that autoNumberKey is not empty (if necessary)... if self.autoNumber: if self.autoNumberKey: autoNumberKey = self.autoNumberKey else: self.infoBox.setText( u'Please enter an annotation key for auto-numbering.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return else: autoNumberKey = None # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) if self.method == u'Regex': regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=regex, mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Sample': (selected_data, discarded_data) = Segmenter.sample( segmentation=self.segmentation, sample_size=sampleSize, mode='random', label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Threshold': if ((minCount == 1 or not self.applyMinThreshold) and (maxCount == len(self.segmentation) or not self.applyMaxThreshold)): selected_data = Segmenter.bypass( segmentation=self.segmentation, label=self.captionTitle, ) discarded_data = None else: thresholdAnnotationKeyParam = self.thresholdAnnotationKey if thresholdAnnotationKeyParam == u'(none)': thresholdAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.threshold( segmentation=self.segmentation, annotation_key=(thresholdAnnotationKeyParam or None), min_count=minCount, max_count=maxCount, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) # Basic settings: else: # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None try: (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=re.compile(self.regex + '(?u)'), mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=True, auto_number_as=None, progress_callback=progressBar.advance, ) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) progressBar.finish() self.controlArea.setDisabled(False) return progressBar.finish() self.controlArea.setDisabled(False) message = u'%i segment@p sent to output.' % len(selected_data) message = pluralize(message, len(selected_data)) self.infoBox.setText(message) self.send('Selected data', selected_data, self) self.send('Discarded data', discarded_data, self) self.sendButton.resetSettingsChangedFlag()