Esempio n. 1
0
    def sendData(self):

        # Clear created Inputs...
        self.clearCreatedInputs()

        if not self.TreetaggerPath:
            self.infoBox.setText(self.noTreetaggerPathWarning, "warning")
            self.send("Tagged data", None)
            return
        elif not self.getAvailableLanguages():
            self.infoBox.setText(self.noLanguageParameterWarning, "warning")
            self.send("Tagged data", None)
            return
        elif not self.segmentation:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send("Tagged data", None)
            return

        # Initialize progress bar.
        self.infoBox.setText(u"Processing, please wait...", "warning")
        self.controlArea.setDisabled(True)
        self.progressBar = ProgressBar(self, iterations=5)

        # Create a copy of input seg, storing annotations in temp attr...
        copy_of_input_seg = Segmentation()
        copy_of_input_seg.label = self.segmentation.label
        for seg_idx, segment in enumerate(self.segmentation):
            attr = " ".join([
                "%s=%s" % (
                    ''.join(c for c in unicodedata.normalize('NFD', item[0])
                            if unicodedata.category(c) != 'Mn'),
                    quoteattr(str(item[1])),
                ) for item in segment.annotations.items()
            ])

            segment.annotations["tt_ax"] = attr
            copy_of_input_seg.append(segment)

        self.progressBar.advance()

        # Dump segmentation in unique string to avoid multiple calls to TT...
        concatenated_text = copy_of_input_seg.to_string(
            formatting="<ax_tt %(tt_ax)s>%(__content__)s</ax_tt>",
            display_all=True,
        )

        self.progressBar.advance()

        # Tag the segmentation contents...
        tagopt = '-token -lemma -sgml -quiet'
        if self.replaceUnknown:
            tagopt += " -no-unknown"
        tagger = treetaggerwrapper.TreeTagger(
            TAGLANG=pycountry.languages.get(name=self.language).alpha_2,
            TAGOPT=tagopt,
            TAGDIR=self.TreetaggerPath,
        )
        tagged_lines = tagger.tag_text(
            concatenated_text,
            notagurl=True,
            notagemail=True,
            notagip=True,
            notagdns=True,
        )
        tagged_input = Input("\n".join(tagged_lines))
        self.createdInputs.append(tagged_input)

        # Replace <unknown> with [unknown] and " with &quot; then
        # re-segment to match the original segmentation structure.
        tagged_segmentation, _ = Segmenter.recode(
            tagged_input,
            substitutions=[
                (re.compile(r"<unknown>"), "[unknown]"),
                (re.compile(r'"""'), '"&quot;"'),
            ],
        )
        tagged_segmentation = Segmenter.import_xml(tagged_segmentation,
                                                   "ax_tt")

        self.progressBar.advance()

        # Place each output line of Treetagger in an xml tag with annotations..
        xml_segmentation, _ = Segmenter.recode(
            tagged_segmentation,
            substitutions=[
                (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                 '<w lemma="&3" pos-tag="&2">&1</w>'),
                (re.compile(r'^\n|\n$'), ''),
            ],
        )
        # Segment into individual tokens if XML output option is disabled...
        if self.outputFormat == "add XML tags":
            output_segmentation = xml_segmentation
        else:
            try:
                output_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")
            except ValueError:
                self.infoBox.setText(
                    "Please check that either the input contains well-formed "
                    "XML, or it doesn't contain instances of '&#60;' and '\x3e'",
                    "error")
                self.send("Tagged data", None)
                self.progressBar.finish()
                self.controlArea.setDisabled(False)
                return

        self.progressBar.finish()
        self.controlArea.setDisabled(False)

        output_segmentation.label = self.captionTitle
        message = u'%i segment@p sent to output.' % len(output_segmentation)
        message = pluralize(message, len(output_segmentation))
        self.infoBox.setText(message)
        self.send('Tagged data', output_segmentation, self)
        self.sendButton.resetSettingsChangedFlag()
class TestSegmentation(unittest.TestCase):
    """Test suite for LTTL Segment module"""

    def setUp(self):
        """ Setting up for the test """
        self.entire_text_seg = Input('ab cde')
        self.str_index = self.entire_text_seg[0].str_index
        self.word_seg = Segmentation(
            [
                Segment(
                    str_index=self.str_index,
                    start=0,
                    end=2,
                    annotations={'a': '1', 'bc': '20'}
                ),
                Segment(
                    str_index=self.str_index,
                    start=3,
                    end=6
                )
            ]
        )
        self.overlapping_seg = Segmentation(
            [
                Segment(str_index=self.str_index, start=3, end=5),
                Segment(str_index=self.str_index, start=4, end=6),
            ]
        )

        self.base_output_string = (
            'segment number 1\n'
            '\tcontent:\t"ab"\n'
            '\tstr_index:\t%i\n'
            '\tstart:\t0\n'
            '\tend:\t2\n'
            '\tannotations:\n'
            '\t\ta                    1\n'
            '\t\tbc                   20\n'
            'segment number 2\n'
            '\tcontent:\t"cde"\n'
            '\tstr_index:\t%i\n'
            '\tstart:\t3\n'
            '\tend:\t6'
        ) % (self.str_index, self.str_index)

        self.count = 0

    def tearDown(self):
        """Cleaning up after the test"""
        pass

    def test_creator(self):
        """Does creator return Segmentation object?"""
        self.assertIsInstance(
            Segmentation(),
            Segmentation,
            msg="creator doesn't return Segmentation object!"
        )

    def test_to_string_default_format(self):
        """Does to_string() format segmentation correctly by default?"""
        output_string = self.word_seg.to_string()
        self.assertEqual(
            output_string,
            self.base_output_string,
            msg="to_string() doesn't format segmentation correctly by default!"
        )

    def test_to_string_header(self):
        """Does to_string() format header correctly?"""
        output_string = self.word_seg.to_string(
            header='HEADER',
        )
        self.assertEqual(
            output_string,
            'HEADER' + self.base_output_string,
            msg="to_string() doesn't format header correctly!"
        )

    def test_to_string_footer(self):
        """Does to_string() format footer correctly?"""
        output_string = self.word_seg.to_string(
            footer='FOOTER',
        )
        self.assertEqual(
            output_string,
            self.base_output_string + 'FOOTER',
            msg="to_string() doesn't format footer correctly!"
        )

    def test_to_string_humanize_addresses(self):
        """Does to_string() humanize addresses?"""
        output_string = self.word_seg.to_string(
            humanize_addresses=True,
        )
        humanized_str_index = self.str_index + 1
        humanized_string = self.base_output_string.replace('t:\t3', 't:\t4')
        humanized_string = humanized_string.replace('t:\t0', 't:\t1')
        humanized_string = humanized_string.replace(
            'x:\t%i' % self.str_index,
            'x:\t%i' % humanized_str_index
        )
        self.assertEqual(
            output_string,
            humanized_string,
            msg="to_string() doesn't humanize addresses!"
        )

    def test_to_string_interpolate_builtin_variables(self):
        """Does to_string() interpolate builtin variables?"""
        output_string = self.word_seg.to_string(
            formatting=(
                '%(__num__)s,%(__content__)s,'
                '%(__str_index__)s,%(__start__)s,%(__end__)s,'
                '%(__str_index_raw__)s,%(__start_raw__)s,%(__end_raw__)s'
            )
        )
        self.assertEqual(
            output_string,
            '1,ab,%i,0,2,%i,0,2\n2,cde,%i,3,6,%i,3,6' % (
                self.str_index, self.str_index, self.str_index, self.str_index
            ),
            msg="to_string() doesn't interpolate builtin variables!"
        )

    def test_to_string_interpolate_annotations(self):
        """Does to_string() interpolate annotations?"""
        output_string = self.word_seg.to_string(
            formatting='%(a)s'
        )
        self.assertEqual(
            output_string,
            '1\n__none__',
            msg="to_string() doesn't interpolate annotations!"
        )

    def test_to_string_progress(self):
        """Does to_string track progress?"""

        def progress_callback():
            """Mock progress callback"""
            self.count += 1

        self.word_seg.to_string(
            progress_callback=progress_callback,
        )
        self.assertEqual(
            self.count,
            len(self.word_seg),
            msg="to_string doesn't track progress!"
        )

    def test_get_annotation_keys(self):
        """Does get_annotation_keys() return existing annotations?"""
        annotations = self.word_seg.get_annotation_keys()
        self.assertEqual(
            sorted(annotations),
            sorted(['a', 'bc']),
            msg="get_annotation_keys() doesn't return existing annotations!"
        )

    def test_is_non_overlapping(self):
        """Does is_non_overlapping() recognize absence of overlap?"""
        self.assertTrue(
            self.word_seg.is_non_overlapping(),
            msg="is_non_overlapping() doesn't recognize absence of overlap!"
        )

    def test_is_overlapping(self):
        """Does is_non_overlapping() recognize presence of overlap?"""
        self.assertFalse(
            self.overlapping_seg.is_non_overlapping(),
            msg="is_non_overlapping() doesn't recognize presence of overlap!"
        )
Esempio n. 3
0
class TestSegmentation(unittest.TestCase):
    """Test suite for LTTL Segment module"""
    def setUp(self):
        """ Setting up for the test """
        self.entire_text_seg = Input('ab cde')
        self.str_index = self.entire_text_seg[0].str_index
        self.word_seg = Segmentation([
            Segment(str_index=self.str_index,
                    start=0,
                    end=2,
                    annotations={
                        'a': '1',
                        'bc': '20'
                    }),
            Segment(str_index=self.str_index, start=3, end=6)
        ])
        self.overlapping_seg = Segmentation([
            Segment(str_index=self.str_index, start=3, end=5),
            Segment(str_index=self.str_index, start=4, end=6),
        ])

        self.base_output_string = ('segment number 1\n'
                                   '\tcontent:\t"ab"\n'
                                   '\tstr_index:\t%i\n'
                                   '\tstart:\t0\n'
                                   '\tend:\t2\n'
                                   '\tannotations:\n'
                                   '\t\ta                    1\n'
                                   '\t\tbc                   20\n'
                                   'segment number 2\n'
                                   '\tcontent:\t"cde"\n'
                                   '\tstr_index:\t%i\n'
                                   '\tstart:\t3\n'
                                   '\tend:\t6') % (self.str_index,
                                                   self.str_index)

        self.count = 0

    def tearDown(self):
        """Cleaning up after the test"""
        pass

    def test_creator(self):
        """Does creator return Segmentation object?"""
        self.assertIsInstance(
            Segmentation(),
            Segmentation,
            msg="creator doesn't return Segmentation object!")

    def test_to_string_default_format(self):
        """Does to_string() format segmentation correctly by default?"""
        output_string = self.word_seg.to_string()
        self.assertEqual(
            output_string,
            self.base_output_string,
            msg="to_string() doesn't format segmentation correctly by default!"
        )

    def test_to_string_delimiter(self):
        """Does to_string() format segment delimiter correctly?"""
        output_string = self.word_seg.to_string(
            segment_delimiter='DELIMITER', )
        self.assertIn(
            'DELIMITER',
            output_string,
            msg="to_string() doesn't format segment delimiter correctly!")

    def test_to_string_header(self):
        """Does to_string() format header correctly?"""
        output_string = self.word_seg.to_string(header='HEADER', )
        self.assertEqual(output_string,
                         'HEADER' + self.base_output_string,
                         msg="to_string() doesn't format header correctly!")

    def test_to_string_footer(self):
        """Does to_string() format footer correctly?"""
        output_string = self.word_seg.to_string(footer='FOOTER', )
        self.assertEqual(output_string,
                         self.base_output_string + 'FOOTER',
                         msg="to_string() doesn't format footer correctly!")

    def test_to_string_humanize_addresses(self):
        """Does to_string() humanize addresses?"""
        output_string = self.word_seg.to_string(humanize_addresses=True, )
        humanized_str_index = self.str_index + 1
        humanized_string = self.base_output_string.replace('t:\t3', 't:\t4')
        humanized_string = humanized_string.replace('t:\t0', 't:\t1')
        humanized_string = humanized_string.replace(
            'x:\t%i' % self.str_index, 'x:\t%i' % humanized_str_index)
        self.assertEqual(output_string,
                         humanized_string,
                         msg="to_string() doesn't humanize addresses!")

    def test_to_string_interpolate_builtin_variables(self):
        """Does to_string() interpolate builtin variables?"""
        output_string = self.word_seg.to_string(formatting=(
            '%(__num__)s,%(__content__)s,'
            '%(__str_index__)s,%(__start__)s,%(__end__)s,'
            '%(__str_index_raw__)s,%(__start_raw__)s,%(__end_raw__)s'))
        self.assertEqual(
            output_string,
            '1,ab,%i,0,2,%i,0,2\n2,cde,%i,3,6,%i,3,6' %
            (self.str_index, self.str_index, self.str_index, self.str_index),
            msg="to_string() doesn't interpolate builtin variables!")

    def test_to_string_interpolate_annotations(self):
        """Does to_string() interpolate annotations?"""
        output_string = self.word_seg.to_string(formatting='%(a)s')
        self.assertEqual(output_string,
                         '1\n__none__',
                         msg="to_string() doesn't interpolate annotations!")

    def test_to_string_progress(self):
        """Does to_string track progress?"""
        def progress_callback():
            """Mock progress callback"""
            self.count += 1

        self.word_seg.to_string(progress_callback=progress_callback, )
        self.assertEqual(self.count,
                         len(self.word_seg),
                         msg="to_string doesn't track progress!")

    def test_get_annotation_keys(self):
        """Does get_annotation_keys() return existing annotations?"""
        annotations = self.word_seg.get_annotation_keys()
        self.assertEqual(
            sorted(annotations),
            sorted(['a', 'bc']),
            msg="get_annotation_keys() doesn't return existing annotations!")

    def test_is_non_overlapping(self):
        """Does is_non_overlapping() recognize absence of overlap?"""
        self.assertTrue(
            self.word_seg.is_non_overlapping(),
            msg="is_non_overlapping() doesn't recognize absence of overlap!")

    def test_is_overlapping(self):
        """Does is_non_overlapping() recognize presence of overlap?"""
        self.assertFalse(
            self.overlapping_seg.is_non_overlapping(),
            msg="is_non_overlapping() doesn't recognize presence of overlap!")
Esempio n. 4
0
    def sendData(self):

        # Si le lien vers treetagger n"est pas trouve
        if self.NoLink:
            self.infoBox.setText(u"Sorry, TreeTagger's link not found.",
                                 "error")
            self.send("Text data", None)
        # Important: if input data is None, propagate this value to output...
        elif not self.inputData:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send("Text data", None)
        # affiche que quelque chose se passe...
        else:
            self.infoBox.setText(u"TreeTagger is running...", "warning")

            # Initialisation de variables
            total_tagged_text = list()
            new_segmentations = list()
            i = 0

            # Initialize progress bar.
            self.progressBar = gui.ProgressBar(self, iterations=5)

            # Copie de la segmentation avec ajout d"une annotation...
            copy_of_input_seg = Segmentation()
            copy_of_input_seg.label = self.inputData.label
            for seg_idx, segment in enumerate(self.inputData):
                attr = " ".join(
                    ["%s='%s'" % item for item in segment.annotations.items()])
                segment.annotations["tt_xb"] = attr
                copy_of_input_seg.append(segment)

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            concatenated_text = copy_of_input_seg.to_string(
                formatting="<xb_tt %(tt_xb)s>%(__content__)s</xb_tt>",
                display_all=True,
            )

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            tagged_text = self.tag(concatenated_text)
            tagged_input = Input(tagged_text)
            tagged_segmentation = Segmenter.import_xml(tagged_input, "xb_tt")

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            # Si checkBox xml active
            if self.activer_xml == True:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = xml_segmentation
            # Si checkBox xml desactive
            else:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")

            self.infoBox.dataSent("")

            # Enregistrer le lien de treetagger...
            if self.system == "nt":
                file = open("treetagger_link.txt", "w")
            else:
                file = open(
                    os.path.normpath("/Users/" + self.user +
                                     "/treetagger_link.txt"), "w")

            file.write(self.treetagger_link)
            file.close()

            # Clear progress bar.
            self.progressBar.finish()

            # envoyer la seguementation
            self.send("Text data", final_segmentation, self)
            self.compteur += 1
            self.sendButton.resetSettingsChangedFlag()
Esempio n. 5
0
    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input", "warning")
            self.send("Linguistically analyzed data", None, self)
            return

        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait...",
            "warning",
        )
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(self.inputSeg))

        tokenizedSegments = list()

        # Process each input segment...
        for segment in self.inputSeg:

            # Input segment attributes...
            inputContent = segment.get_content()
            inputAnnotations = segment.annotations
            inputString = segment.str_index
            inputStart = segment.start or 0
            inputEnd = segment.end or len(inputContent)

            # NLP analysis...
            doc = self.nlp(inputContent)

            # Process each token in input segment...
            for token in doc:
                tokenAnnotations = inputAnnotations.copy()
                tokenAnnotations.update({
                    k: getattr(token, k)
                    for k in RELEVANT_KEYS if getattr(token, k) is not None
                })
                tokenStart = inputStart + token.idx
                tokenizedSegments.append(
                    Segment(
                        str_index=inputString,
                        start=tokenStart,
                        end=tokenStart + len(token),
                        annotations=tokenAnnotations,
                    ))

            progressBar.advance()

        outputSeg = Segmentation(tokenizedSegments, self.captionTitle)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output." % len(outputSeg)
        message = pluralize(message, len(outputSeg))
        self.infoBox.setText(message)

        print(outputSeg.to_string())

        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)

        # Send data to output...
        self.send("Linguistically analyzed data", outputSeg, self)

        self.sendButton.resetSettingsChangedFlag()
    def sendData(self):
        """Compute result of widget processing and send to output."""

        # Check that there's a model...
        if not self.model:
            self.noLanguageModelWarning()
            self.sendNoneToOutputs()
            return

        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input.", "warning")
            self.sendNoneToOutputs()
            return
       
        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait...", 
            "warning",
        )

        # Disable control area and initialize progress bar...
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(self.char_df))       

        # Get start and end pos of concatenated input segments...
        startPositions = [0]
        endPositions = list()
        numSegments = len(self.inputSeg)
        for idx in range(1, numSegments):
            prevSegLen = len(self.inputSeg[idx-1].get_content())
            startPositions.append(startPositions[-1] + prevSegLen + 1)
            endPositions.append(startPositions[-1] - 1)
        endPositions.append(startPositions[-1] + 
                            len(self.inputSeg[-1].get_content()) + 1)

        # Get or update character aliases...
        find_pairs = sys.modules['charnetto.find_pairs']
        characters = [entry.split(", ") for entry in self.characters]
        find_pairs.map_names(self.char_df, characters)

        # Initializations...
        charSegments = list()
        currentSegmentIdx = 0
                
        # For each character token in Charnetto's output...
        for index, charToken in self.char_df.iterrows():
        
            # Skip non-PER named entities.
            if charToken["tag"] != "PER":
                continue

            # Get index of containing segment...
            while charToken["end_pos"] > endPositions[currentSegmentIdx]:
                currentSegmentIdx += 1
                
            # Create segment for char with its actual coordinates...
            strIndex = self.inputSeg[currentSegmentIdx].str_index
            start = charToken["start_pos"]-startPositions[currentSegmentIdx]
            end = charToken["end_pos"]-startPositions[currentSegmentIdx]
            annotations = {"id": charToken["alias"]}
            charSegments.append(Segment(strIndex, start, end, annotations))
            
            progressBar.advance()

        # Send output...
        outputSegmentation = Segmentation(charSegments, 
                                           label=self.captionTitle)
        self.send("Character segmentation", outputSegmentation, self)
        print(outputSegmentation.to_string())

        # Set status to OK and report data size...
        message = "%i segment@p sent to output." % len(outputSegmentation)
        message = pluralize(message, len(outputSegmentation))
        self.infoBox.setText(message)
        
        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)
                
        self.sendButton.resetSettingsChangedFlag()             
    def sendData(self):
        
        # Si le lien vers treetagger n'est pas trouve
        if self.NoLink:
            self.infoBox.setText(
                u"Sorry, TreeTagger's link not found.",
                "error"
            )
            self.send('Text data', None)
        # Important: if input data is None, propagate this value to output...
        elif not self.inputData:
            self.infoBox.setText(
                u"Widget needs input",
                "warning"
            )
            self.send('Text data', None)
        # affiche que quelque chose se passe...
        else:
            self.infoBox.setText(
                u'TreeTagger is running...',
                "warning"
            )

            # Initialisation de variables
            total_tagged_text = list()
            new_segmentations = list()
            i = 0
            
            # Initialize progress bar.
            self.progressBar = OWGUI.ProgressBar(
                self,
                iterations = 5
            )
            
            # Copie de la segmentation avec ajout d'une annotation...
            copy_of_input_seg = Segmentation()
            copy_of_input_seg.label = self.inputData.label
            for seg_idx, segment in enumerate(self.inputData):
                attr = " ".join(
                    ["%s='%s'" % item for item in segment.annotations.items()]
                )
                segment.annotations["tt_xb"] = attr
                copy_of_input_seg.append(segment)
            
            # avancer la progressBar d'un cran
            self.progressBar.advance()

            concatenated_text = copy_of_input_seg.to_string(
                formatting="<xb_tt %(tt_xb)s>%(__content__)s</xb_tt>",
                display_all=True,
            )
            
            
            
            # avancer la progressBar d'un cran
            self.progressBar.advance()
            
            tagged_text = self.tag(concatenated_text)
            tagged_input = Input(tagged_text)
            tagged_segmentation = Segmenter.import_xml(tagged_input, "xb_tt")
            
            # avancer la progressBar d'un cran
            self.progressBar.advance()
            
            # Si checkBox xml active
            if self.activer_xml == True:
                xml_segmentation = Segmenter.recode(
                        tagged_segmentation,
                        substitutions = [
                            (re.compile(r"<unknown>"), '[unknown]'),
                            (re.compile(
                                r"(.+)\t(.+)\t(.+)"),
                                '<w lemma="&3" type="&2">&1</w>'
                            ),
                            (re.compile(r'"""'), '"&quot;"'),
                        ],
                    )
                final_segmentation = xml_segmentation
            # Si checkBox xml desactive
            else:
                xml_segmentation = Segmenter.recode(
                        tagged_segmentation,
                        substitutions=[
                            (re.compile(r"<unknown>"), '[unknown]'),
                            (re.compile(
                                r"(.+)\t(.+)\t(.+)"),
                                '<w lemma="&3" type="&2">&1</w>'
                            ),
                            (re.compile(r'"""'), '"&quot;"'),

                        ],
                    )
                final_segmentation = Segmenter.import_xml(
                    xml_segmentation,
                    "w"
                )

            self.infoBox.dataSent('')

            # Enregistrer le lien de treetagger...
            if self.system == "nt":
                file = open("treetagger_link.txt", 'w')
            else:
                file = open(os.path.normpath(
                    "/Users/" + self.user + "/treetagger_link.txt"),
                    'w'
                )

            file.write(self.treetagger_link)
            file.close()

            # Clear progress bar.
            self.progressBar.finish()

            # envoyer la seguementation
            self.send('Text data', final_segmentation, self)
            self.compteur += 1
            self.sendButton.resetSettingsChangedFlag()