def sendData(self): """Send segmentation to output""" if not self.segmentation: self.infoBox.setText(u'Widget needs input.', 'warning') self.send('Bypassed segmentation', None, self) self.send('Displayed segmentation', None, self) return self.send('Bypassed segmentation', Segmenter.bypass(self.segmentation, self.captionTitle), self) # TODO: Check if this is correct replacement for textable v1.*, v2.* if 'format' in self._currentWarningMessage or \ 'format' in self._currentErrorMessage: self.send('Displayed segmentation', None, self) return if len(self.displayedSegmentation[0].get_content()) > 0: self.send('Displayed segmentation', self.displayedSegmentation, self) else: self.send('Displayed segmentation', None, self) # TODO: Differes only in capitalization with a check before # Is this intentional? if "Format" not in self._currentErrorMessage: message = u'%i segment@p sent to output.' % len(self.segmentation) message = pluralize(message, len(self.segmentation)) self.infoBox.setText(message) self.sendButton.resetSettingsChangedFlag()
def test_bypass_deepcopy(self): """Does bypass deep copy input segments?""" segmentation = Segmenter.bypass(self.letter_seg) self.assertNotEqual( segmentation, self.letter_seg, msg="bypass doesn't deep copy input segments!" )
def test_bypass_copy_annotations(self): """Does bypass copy annotations?""" segmentation = Segmenter.bypass(self.other_letter_seg) self.assertEqual( [s.annotations['a'] for s in segmentation], [s.annotations['a'] for s in self.other_letter_seg], msg="bypass doesn't copy annotations!" )
def test_bypass_copy_segments(self): """Does bypass copy input segments?""" segmentation = Segmenter.bypass(self.letter_seg) self.assertEqual( [s.get_content() for s in segmentation], [s.get_content() for s in self.letter_seg], msg="bypass doesn't copy input segments!" )
def huntTheLexic(self): """ main I/O function, filters the inputSeg with the selected lexical fields and outputs a copy of the input this Segmentation with segments labelised according to the topic they belong in """ # initiations... out = list() selectedListsNames = list() # first we select the topics according to the ones the user chose if self.titleLabels: selectedListsNames = [ list(self.titleLabels)[idx] for idx in self.selectedFields ] # we can then associate the topics with their respective lists selectedLists = { key: value for key, value in defaultDict.items() if key in selectedListsNames } # if we have an input, we can select the segments of the input and # label them according to the lists they are found in if self.inputSeg is not None: for filter_list in selectedLists: work_list = [i for i in selectedLists[filter_list] if i] if work_list: out.append( Segmenter.select( self.inputSeg, self.listToRegex(work_list), label=filter_list, )[0]) # lastly we define the output as a segmentation that is a copy of # the input, with the segments that we found labeled accordingly if self.labelName == "": labelNameVar = "Topic" else: labelNameVar = self.labelName self.outputSeg = Segmenter.concatenate( [Segmenter.bypass(self.inputSeg, label="__None__")] + out, merge_duplicates=True, label=self.captionTitle, import_labels_as=labelNameVar, )
def test_bypass_deepcopy(self): """Does bypass deep copy input segments?""" segmentation = Segmenter.bypass(self.letter_seg) self.assertNotEqual(segmentation, self.letter_seg, msg="bypass doesn't deep copy input segments!")
def test_bypass_copy_annotations(self): """Does bypass copy annotations?""" segmentation = Segmenter.bypass(self.other_letter_seg) self.assertEqual([s.annotations['a'] for s in segmentation], [s.annotations['a'] for s in self.other_letter_seg], msg="bypass doesn't copy annotations!")
def test_bypass_copy_segments(self): """Does bypass copy input segments?""" segmentation = Segmenter.bypass(self.letter_seg) self.assertEqual([s.get_content() for s in segmentation], [s.get_content() for s in self.letter_seg], msg="bypass doesn't copy input segments!")
def sendData(self): """(Have LTTL.Segmenter) perform the actual selection""" # Check that there's something on input... if not self.segmentation: self.infoBox.setText(u'Widget needs input.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # TODO: remove message 'No label was provided.' from docs # Advanced settings... if self.displayAdvancedSettings: # If mode is Regex... if self.method == u'Regex': # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Prepare regex... regex_string = self.regex if (self.ignoreCase or self.unicodeDependent or self.multiline or self.dotAll): flags = '' if self.ignoreCase: flags += 'i' if self.unicodeDependent: flags += 'u' if self.multiline: flags += 'm' if self.dotAll: flags += 's' regex_string += '(?%s)' % flags try: regex = re.compile(regex_string) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Sample... elif self.method == u'Sample': # Get sample size... if self.sampleSizeMode == u'Proportion': sampleSize = iround( len(self.segmentation) * (self.samplingRate / 100)) else: sampleSize = self.sampleSize if sampleSize <= 0: self.infoBox.setText( message='Please enter a larger sample size', state="error", ) self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Threshold... elif self.method == u'Threshold': # Get min and max count... if self.thresholdMode == u'Proportion': minCount = iround( math.ceil( len(self.segmentation) * (self.minProportion / 100))) maxCount = iround( math.floor( len(self.segmentation) * (self.maxProportion / 100))) else: minCount = self.minCount maxCount = self.maxCount if not self.applyMinThreshold: minCount = 1 if not self.applyMaxThreshold: maxCount = len(self.segmentation) # Get number of iterations... num_iterations = len(self.segmentation) # Check that autoNumberKey is not empty (if necessary)... if self.autoNumber: if self.autoNumberKey: autoNumberKey = self.autoNumberKey else: self.infoBox.setText( u'Please enter an annotation key for auto-numbering.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return else: autoNumberKey = None # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) if self.method == u'Regex': regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=regex, mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Sample': (selected_data, discarded_data) = Segmenter.sample( segmentation=self.segmentation, sample_size=sampleSize, mode='random', label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Threshold': if ((minCount == 1 or not self.applyMinThreshold) and (maxCount == len(self.segmentation) or not self.applyMaxThreshold)): selected_data = Segmenter.bypass( segmentation=self.segmentation, label=self.captionTitle, ) discarded_data = None else: thresholdAnnotationKeyParam = self.thresholdAnnotationKey if thresholdAnnotationKeyParam == u'(none)': thresholdAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.threshold( segmentation=self.segmentation, annotation_key=(thresholdAnnotationKeyParam or None), min_count=minCount, max_count=maxCount, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) # Basic settings: else: # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None try: (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=re.compile(self.regex + '(?u)'), mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=True, auto_number_as=None, progress_callback=progressBar.advance, ) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) progressBar.finish() self.controlArea.setDisabled(False) return progressBar.finish() self.controlArea.setDisabled(False) message = u'%i segment@p sent to output.' % len(selected_data) message = pluralize(message, len(selected_data)) self.infoBox.setText(message) self.send('Selected data', selected_data, self) self.send('Discarded data', discarded_data, self) self.sendButton.resetSettingsChangedFlag()