def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.fileLabels = list()
        self.selectedFileLabels = list()
        self.newFiles = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''
        self.pdfPassword = u''  # SuperTextFiles
        self.ocrForce = False  # SuperTextFiles
        self.ocrLanguages = u'eng'  # SuperTextFiles
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value='file',
            orientation='horizontal',
            label=u'File path:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )
        gui.separator(widget=basicFileBoxLine1, width=5)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )
        gui.separator(widget=basicFileBox, width=3)
        advancedEncodingsCombobox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(advancedEncodingsCombobox)
        addAutoDetectEncoding(advancedEncodingsCombobox)
        gui.separator(widget=basicFileBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFileBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        defaultLabelWidth = 120  # SuperTextFiles

        # File box
        fileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        fileBoxLine1 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.fileListbox = gui.listBox(
            widget=fileBoxLine1,
            master=self,
            value='selectedFileLabels',
            labels='fileLabels',
            callback=self.updateFileBoxButtons,
            tooltip=(
                u"The list of files whose content will be imported.\n"
                u"\nIn the output segmentation, the content of each\n"
                u"file appears in the same position as in the list.\n"
                u"\nColumn 1 shows the file's name.\n"
                u"Column 2 shows the file's annotation (if any).\n"
                # Start SuperTextFiles
                # u"Column 3 shows the file's encoding." # removed
                u"Column 3 shows the file's password (if any).\n"
                u"Column 4 shows the file's languages (if any).\n"
                u"Column 5 shows if OCR is forced.\n"
                u"Column 6 shows the file's encoding."
                # End SuperTextFiles
            ),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.fileListbox.setFont(font)
        fileBoxCol2 = gui.widgetBox(
            widget=fileBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected file upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected file downward in the list."),
        )
        self.removeButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected file from the list."),
        )
        self.clearAllButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all files from the list."),
        )
        self.exportButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Export List',
            callback=self.exportList,
            tooltip=(u"Open a dialog for selecting a file where the file\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Import List',
            callback=self.importList,
            tooltip=(u"Open a dialog for selecting a file list to\n"
                     u"import (in JSON format). Files from this list\n"
                     u"will be added to those already imported."),
        )
        fileBoxLine2 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='vertical',
        )
        # Add file box
        addFileBox = gui.widgetBox(
            widget=fileBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFileBoxLine1 = gui.widgetBox(
            widget=addFileBox,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=addFileBoxLine1,
            master=self,
            value='newFiles',
            orientation='horizontal',
            label=u'File path(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"The paths of the files that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFileBoxLine1, width=5)
        gui.button(
            widget=addFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting files.\n\n"
                     u"To select multiple files at once, either draw a\n"
                     u"selection box around them, or use shift and/or\n"
                     u"ctrl + click.\n\n"
                     u"Selected file paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFileBox, width=3)
        basicEncodingsCombobox = gui.comboBox(
            widget=addFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(basicEncodingsCombobox)
        addAutoDetectEncoding(basicEncodingsCombobox)
        self.encoding = self.encoding
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationKey',
            orientation='horizontal',
            label=u'Annotation key:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify a custom annotation\n"
                     u"key associated with each file that is about to be\n"
                     u"added to the list."),
        )
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationValue',
            orientation='horizontal',
            label=u'Annotation value:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify the annotation value\n"
                     u"associated with the above annotation key."),
        )

        ### Start SuperTextFiles addition
        gui.separator(widget=addFileBox, width=3)
        # Field for PDF password
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='pdfPassword',
            orientation='horizontal',
            label=u'PDF password:'******'ocrLanguages',
            orientation='horizontal',
            label=u'OCR Language(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify languages\n"
                     u"for the OCR process. Ex.: fra+ita"),
        )

        gui.checkBox(
            widget=addFileBox,
            master=self,
            value='ocrForce',
            label=u'Force OCR',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Force to use an OCR detection on this file"),
        )
        ### End SuperTextFiles addition

        gui.separator(widget=addFileBox, width=3)
        self.addButton = gui.button(
            widget=addFileBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the file(s) currently displayed in the\n"
                     u"'Files' text field to the list.\n\n"
                     u"Each of these files will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other files may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(fileBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenames',
            label=u'Import file names with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Import file names as annotations."),
        )
        self.importFilenamesKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenamesKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for importing file names."),
        )
        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate files with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for file auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)
class SuperTextFiles(OWTextableBaseWidget):
    """Textable widget to import PDF files and if necessary to do an Optical
    Character Recognition (OCR)"""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Super Text Files"
    description = "Import data from raw text and PDF files"
    icon = "icons/SuperTextFiles.svg"
    priority = 1  # TODO

    #----------------------------------------------------------------------
    # Channel definitions....

    inputs = [('Message', JSONMessage, "inputMessage", widget.Single)]
    outputs = [('Text data', Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    files = settings.Setting([])
    encoding = settings.Setting('(auto-detect)')
    autoNumber = settings.Setting(False)
    autoNumberKey = settings.Setting(u'num')
    importFilenames = settings.Setting(True)
    importFilenamesKey = settings.Setting(u'filename')
    lastLocation = settings.Setting('.')
    displayAdvancedSettings = settings.Setting(False)
    file = settings.Setting(u'')

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.fileLabels = list()
        self.selectedFileLabels = list()
        self.newFiles = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''
        self.pdfPassword = u''  # SuperTextFiles
        self.ocrForce = False  # SuperTextFiles
        self.ocrLanguages = u'eng'  # SuperTextFiles
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value='file',
            orientation='horizontal',
            label=u'File path:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )
        gui.separator(widget=basicFileBoxLine1, width=5)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )
        gui.separator(widget=basicFileBox, width=3)
        advancedEncodingsCombobox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(advancedEncodingsCombobox)
        addAutoDetectEncoding(advancedEncodingsCombobox)
        gui.separator(widget=basicFileBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFileBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        defaultLabelWidth = 120  # SuperTextFiles

        # File box
        fileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        fileBoxLine1 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.fileListbox = gui.listBox(
            widget=fileBoxLine1,
            master=self,
            value='selectedFileLabels',
            labels='fileLabels',
            callback=self.updateFileBoxButtons,
            tooltip=(
                u"The list of files whose content will be imported.\n"
                u"\nIn the output segmentation, the content of each\n"
                u"file appears in the same position as in the list.\n"
                u"\nColumn 1 shows the file's name.\n"
                u"Column 2 shows the file's annotation (if any).\n"
                # Start SuperTextFiles
                # u"Column 3 shows the file's encoding." # removed
                u"Column 3 shows the file's password (if any).\n"
                u"Column 4 shows the file's languages (if any).\n"
                u"Column 5 shows if OCR is forced.\n"
                u"Column 6 shows the file's encoding."
                # End SuperTextFiles
            ),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.fileListbox.setFont(font)
        fileBoxCol2 = gui.widgetBox(
            widget=fileBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected file upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected file downward in the list."),
        )
        self.removeButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected file from the list."),
        )
        self.clearAllButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all files from the list."),
        )
        self.exportButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Export List',
            callback=self.exportList,
            tooltip=(u"Open a dialog for selecting a file where the file\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Import List',
            callback=self.importList,
            tooltip=(u"Open a dialog for selecting a file list to\n"
                     u"import (in JSON format). Files from this list\n"
                     u"will be added to those already imported."),
        )
        fileBoxLine2 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='vertical',
        )
        # Add file box
        addFileBox = gui.widgetBox(
            widget=fileBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFileBoxLine1 = gui.widgetBox(
            widget=addFileBox,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=addFileBoxLine1,
            master=self,
            value='newFiles',
            orientation='horizontal',
            label=u'File path(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"The paths of the files that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFileBoxLine1, width=5)
        gui.button(
            widget=addFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting files.\n\n"
                     u"To select multiple files at once, either draw a\n"
                     u"selection box around them, or use shift and/or\n"
                     u"ctrl + click.\n\n"
                     u"Selected file paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFileBox, width=3)
        basicEncodingsCombobox = gui.comboBox(
            widget=addFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(basicEncodingsCombobox)
        addAutoDetectEncoding(basicEncodingsCombobox)
        self.encoding = self.encoding
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationKey',
            orientation='horizontal',
            label=u'Annotation key:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify a custom annotation\n"
                     u"key associated with each file that is about to be\n"
                     u"added to the list."),
        )
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationValue',
            orientation='horizontal',
            label=u'Annotation value:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify the annotation value\n"
                     u"associated with the above annotation key."),
        )

        ### Start SuperTextFiles addition
        gui.separator(widget=addFileBox, width=3)
        # Field for PDF password
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='pdfPassword',
            orientation='horizontal',
            label=u'PDF password:'******'ocrLanguages',
            orientation='horizontal',
            label=u'OCR Language(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify languages\n"
                     u"for the OCR process. Ex.: fra+ita"),
        )

        gui.checkBox(
            widget=addFileBox,
            master=self,
            value='ocrForce',
            label=u'Force OCR',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Force to use an OCR detection on this file"),
        )
        ### End SuperTextFiles addition

        gui.separator(widget=addFileBox, width=3)
        self.addButton = gui.button(
            widget=addFileBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the file(s) currently displayed in the\n"
                     u"'Files' text field to the list.\n\n"
                     u"Each of these files will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other files may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(fileBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenames',
            label=u'Import file names with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Import file names as annotations."),
        )
        self.importFilenamesKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenamesKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for importing file names."),
        )
        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate files with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for file auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)

    def inputMessage(self, message):
        """Handle JSON message on input connection"""
        if not message:
            return
        self.displayAdvancedSettings = True
        self.advancedSettings.setVisible(True)
        self.clearAll()
        self.infoBox.inputChanged()
        try:
            json_data = json.loads(message.content)
            temp_files = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                pdfPassword = entry.get('pdf_password', '')  # SuperTextFiles
                ocrLanguages = entry.get('ocr_languages', '')  # SuperTextFiles
                ocrForce = entry.get('ocr_force', '')  # SuperTextFiles

                if path == '' or encoding == '' or ocrForce == '':
                    self.infoBox.setText(
                        u"Please verify keys and values of incoming "
                        u"JSON message.", 'error')
                    self.send('Text data', None, self)
                    return
                temp_files.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                    pdfPassword,  # SuperTextFiles
                    ocrLanguages,  # SuperTextFiles
                    ocrForce,  # SuperTextFiles
                ))
            self.files.extend(temp_files)
            self.sendButton.settingsChanged()
        except ValueError:
            self.infoBox.setText(
                u"Please make sure that incoming message is valid JSON.",
                'error')
            self.send('Text data', None, self)
            return

    def sendData(self):
        """Load files, create and send segmentation"""

        # Check that there's something on input...
        if ((self.displayAdvancedSettings and not self.files)
                or not (self.file or self.displayAdvancedSettings)):
            self.infoBox.setText(u'Please select input file.', 'warning')
            self.send('Text data', None, self)
            return

        # Check that autoNumberKey is not empty (if necessary)...
        if self.displayAdvancedSettings and self.autoNumber:
            if self.autoNumberKey:
                autoNumberKey = self.autoNumberKey
            else:
                self.infoBox.setText(
                    u'Please enter an annotation key for auto-numbering.',
                    'warning')
                self.send('Text data', None, self)
                return
        else:
            autoNumberKey = None

        # Clear created Inputs...
        self.clearCreatedInputs()

        fileContents = list()
        annotations = list()
        counter = 1

        if self.displayAdvancedSettings:
            myFiles = self.files
        else:
            myFiles = [[self.file, self.encoding, "", "", "", "eng", False]]

        self.infoBox.setText(u"Processing, please wait...", "warning")
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(myFiles))

        # Open and process each file successively...
        for myFile in myFiles:
            filePath = myFile[0]
            encoding = myFile[1]
            encoding = re.sub(r"[ ]\(.+", "", encoding)
            annotation_key = myFile[2]
            annotation_value = myFile[3]
            pdf_password = myFile[4]  # SuperTextFiles
            ocr_languages = myFile[5]  # SuperTextFiles
            ocr_force = myFile[6]  # SuperTextFiles

            myFiletype = filetype.guess(myFile[0])  # SuperTextFiles

            # Try to open the file...
            self.error()
            # Start SuperTextFiles
            try:
                if myFiletype is None:
                    fileContent = self.extract_raw_text(filePath, encoding)

                elif myFiletype.extension == "pdf":
                    if ocr_force is True:
                        fileContent = self.get_pdf_content(
                            filePath,
                            ocr_languages,
                        )
                    else:
                        if self.is_textual_pdf_file(filePath) is True:
                            fileContent = self.extract_text_from_pdf(filePath)
                        else:
                            fileContent = self.get_pdf_content(
                                filePath,
                                ocr_languages,
                            )

                elif myFiletype.extension in IMG_FILETYPES:
                    fileContent = self.ocrize(filePath, ocr_languages)

                if fileContent == -1:
                    message = u"Couldn't open file."
                    self.infoBox.setText(message, 'error')
                    self.send('Text data', None, self)
                    self.controlArea.setDisabled(False)
                    return

            # End SuperTextFiles

            except IOError as e:
                if "tesseract" in str(e):
                    QMessageBox.warning(None, 'Textable', str(e),
                                        QMessageBox.Ok)
                progressBar.finish()
                if len(myFiles) > 1:
                    message = u"Couldn't open file '%s'." % filePath
                else:
                    message = u"Couldn't open file."
                self.infoBox.setText(message, 'error')
                self.send('Text data', None, self)
                self.controlArea.setDisabled(False)
                return

            # Remove utf-8 BOM if necessary...
            if encoding == u'utf-8':
                fileContent = fileContent.lstrip(
                    codecs.BOM_UTF8.decode('utf-8'))

            # Normalize text (canonical decomposition then composition)...
            fileContent = normalize('NFC', fileContent)

            fileContents.append(fileContent)

            # Annotations...
            annotation = dict()
            if self.displayAdvancedSettings:
                if annotation_key and annotation_value:
                    annotation[annotation_key] = annotation_value
                if self.importFilenames and self.importFilenamesKey:
                    filename = os.path.basename(filePath)
                    annotation[self.importFilenamesKey] = filename
                if self.autoNumber and self.autoNumberKey:
                    annotation[self.autoNumberKey] = counter
                    counter += 1
            annotations.append(annotation)
            progressBar.advance()

        # Create an LTTL.Input for each file...
        if len(fileContents) == 1:
            label = self.captionTitle
        else:
            label = None
        for index in range(len(fileContents)):
            myInput = Input(fileContents[index], label)
            segment = myInput[0]
            segment.annotations.update(annotations[index])
            myInput[0] = segment
            self.createdInputs.append(myInput)

        # If there's only one file, the widget's output is the created Input.
        if len(fileContents) == 1:
            self.segmentation = self.createdInputs[0]
        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                segmentations=self.createdInputs,
                label=self.captionTitle,
                copy_annotations=True,
                import_labels_as=None,
                sort=False,
                auto_number_as=None,
                merge_duplicates=False,
                progress_callback=None,
            )

        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()
        self.controlArea.setDisabled(False)

        self.send('Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def extract_raw_text(self, filePath, encoding):
        """This function receive a filePath and an encoding value and return a
        string with the text of the given file."""
        if encoding == "(auto-detect)":
            detector = UniversalDetector()
            fh = open(filePath, 'rb')
            for line in fh:
                detector.feed(line)
                if detector.done: break
            detector.close()
            fh.close()
            encoding = detector.result['encoding']
        fh = open(
            filePath,
            mode='rU',
            encoding=encoding,
        )
        try:
            i = 0
            fileContent = ""
            chunks = list()
            for chunk in iter(lambda: fh.read(CHUNK_LENGTH), ""):
                chunks.append('\n'.join(chunk.splitlines()))
                i += CHUNK_LENGTH
                if i % (CHUNK_NUM * CHUNK_LENGTH) == 0:
                    fileContent += "".join(chunks)
                    chunks = list()
            if len(chunks):
                fileContent += "".join(chunks)
            del chunks
            return fileContent
        except UnicodeError:
            progressBar.finish()
            if len(myFiles) > 1:
                message = u"Please select another encoding "    \
                        + u"for file %s." % filePath
            else:
                message = u"Please select another encoding."
            self.infoBox.setText(message, 'error')
            self.send('Text data', None, self)
            self.controlArea.setDisabled(False)
            return
        finally:
            fh.close()

    def is_textual_pdf_file(self, filePath):
        """Evaluate the content of the pdf file"""
        with pdfplumber.open(filePath, password=self.pdfPassword) as fh:
            first_page = fh.pages[0]
            text = first_page.extract_text()

            if text is None or text.isspace() is True:
                return False
            else:
                return True

    def extract_text_from_pdf(self, filePath):
        """Extract all readable text contents"""
        fileContent = ""
        with pdfplumber.open(filePath, password=self.pdfPassword) as fh:
            for page in fh.pages:
                fileContent += page.extract_text()

        return fileContent

    def get_pdf_content(self, filePath, languages):
        """ First this function get all texts in the file if exist. Then it
        creates a list of pictures to make the OCR method."""
        text = ""
        with fitz.open(filePath) as doc:
            images = []
            for page in doc:
                text += page.getText("text")
                images += doc.getPageImageList(page.number)

            for image in images:
                xref = image[0]
                picture = fitz.Pixmap(doc, xref)

                if picture.n > 4:  # CMYK colorspace
                    picture = fitz.Pixmap(fitz.csRGB,
                                          picture)  # convert to RGB

                bytes_img = BytesIO(picture.getImageData())

                page_text = self.ocrize(bytes_img, languages)

                if page_text == -1:
                    text = -1
                    break
                elif page_text:
                    text += page_text

        return text

    def ocrize(self, image, languages):
        """Make an OCR on a list of images or an image file"""
        languages = languages.strip()  # remove trailing spaces
        if languages == "":
            languages = "eng"
        try:
            ocrized_text = image_to_string(Image.open(image), lang=languages)
            return ocrized_text
        except TesseractError as e:
            if "load" in str(e):
                QMessageBox.warning(
                    None, 'Textable',
                    "Please make sure all Tesseract parameter files for "
                    "language(s) '%s' have been installed." % languages,
                    QMessageBox.Ok)
            return -1

    def clearCreatedInputs(self):
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def importList(self):
        """Display a FileDialog and import file list"""
        filePath, _ = QFileDialog.getOpenFileName(self, u'Import File List',
                                                  self.lastLocation,
                                                  u'Text files (*)')
        if not filePath:
            return
        self.file = os.path.normpath(filePath)
        self.lastLocation = os.path.dirname(filePath)
        self.error()
        try:
            fileHandle = codecs.open(filePath, encoding='utf8')
            fileContent = fileHandle.read()
            fileHandle.close()
        except IOError:
            QMessageBox.warning(None, 'Textable', "Couldn't open file.",
                                QMessageBox.Ok)
            return
        try:
            json_data = json.loads(fileContent)
            temp_files = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                pdfPassword = entry.get('pdf_password', '')  # SuperTextFiles
                ocrLanguages = entry.get('ocr_languages', '')  # SuperTextFiles
                ocrForce = entry.get('ocr_force', '')  # SuperTextFiles

                if path == '' or encoding == '' or ocrForce == '':
                    QMessageBox.warning(
                        None, 'Textable',
                        "Selected JSON file doesn't have the right keys "
                        "and/or values.", QMessageBox.Ok)
                    return
                temp_files.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                    pdfPassword,  # SuperTextFiles
                    ocrLanguages,  # SuperTextFiles
                    ocrForce,  # SuperTextFiles
                ))
            self.files.extend(temp_files)
            if temp_files:
                self.sendButton.settingsChanged()
        except ValueError:
            QMessageBox.warning(None, 'Textable', "JSON parsing error.",
                                QMessageBox.Ok)
            return

    def exportList(self):
        """Display a FileDialog and export file list"""
        toDump = list()
        for myfile in self.files:
            toDump.append({
                'path': myfile[0],
                'encoding': myfile[1],
            })
            if myfile[2] and myfile[3]:
                toDump[-1]['annotation_key'] = myfile[2]
                toDump[-1]['annotation_value'] = myfile[3]
            # Start SuperTextFiles
            if myfile[4]:
                toDump[-1]['pdf_password'] = myfile[4]

            if myfile[5]:
                toDump[-1]['ocr_languages'] = myfile[5]

            toDump[-1]['ocr_force'] = myfile[6]
            # End SuperTextFiles

        filePath, _ = QFileDialog.getSaveFileName(
            self,
            u'Export File List',
            self.lastLocation,
        )

        if filePath:
            self.lastLocation = os.path.dirname(filePath)
            outputFile = codecs.open(
                filePath,
                encoding='utf8',
                mode='w',
                errors='xmlcharrefreplace',
            )
            outputFile.write(
                normalizeCarriageReturns(
                    json.dumps(toDump, sort_keys=True, indent=4)))
            outputFile.close()
            QMessageBox.information(None, 'Textable',
                                    'File list correctly exported',
                                    QMessageBox.Ok)

    def browse(self):
        """Display a FileDialog and select files"""
        if self.displayAdvancedSettings:
            filePathList, _ = QFileDialog.getOpenFileNames(
                self, u'Select Text File(s)', self.lastLocation,
                u'Text files (*)')
            if not filePathList:
                return
            filePathList = [os.path.normpath(f) for f in filePathList]
            self.newFiles = u' / '.join(filePathList)
            self.lastLocation = os.path.dirname(filePathList[-1])
            self.updateGUI()
        else:
            filePath, _ = QFileDialog.getOpenFileName(self, u'Open Text File',
                                                      self.lastLocation,
                                                      u'Text files (*)')
            if not filePath:
                return
            self.file = os.path.normpath(filePath)
            self.lastLocation = os.path.dirname(filePath)
            self.updateGUI()
            self.sendButton.settingsChanged()

    def moveUp(self):
        """Move file upward in Files listbox"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            if index > 0:
                temp = self.files[index - 1]
                self.files[index - 1] = self.files[index]
                self.files[index] = temp
                self.selectedFileLabels = [index - 1]
                self.sendButton.settingsChanged()

    def moveDown(self):
        """Move file downward in Files listbox"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            if index < len(self.files) - 1:
                temp = self.files[index + 1]
                self.files[index + 1] = self.files[index]
                self.files[index] = temp
                self.selectedFileLabels = [index + 1]
                self.sendButton.settingsChanged()

    def clearAll(self):
        """Remove all files from files attr"""
        del self.files[:]
        del self.selectedFileLabels[:]
        self.sendButton.settingsChanged()

    def remove(self):
        """Remove file from files attr"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            self.files.pop(index)
            del self.selectedFileLabels[:]
            self.sendButton.settingsChanged()

    def add(self):
        """Add files to files attr"""
        filePathList = re.split(r' +/ +', self.newFiles)
        for filePath in filePathList:
            encoding = re.sub(r"[ ]\(.+", "", self.encoding)
            self.files.append((
                filePath,
                encoding,
                self.newAnnotationKey,
                self.newAnnotationValue,
                self.pdfPassword,  # SuperTextFiles
                self.ocrLanguages,  # SuperTextFiles
                self.ocrForce,  # SuperTextFiles
            ))
        self.sendButton.settingsChanged()

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            if self.selectedFileLabels:
                cachedLabel = self.selectedFileLabels[0]
            else:
                cachedLabel = None
            del self.fileLabels[:]
            if self.files:
                filePaths = [f[0] for f in self.files]
                filenames = [os.path.basename(p) for p in filePaths]
                encodings = [f[1] for f in self.files]
                annotations = ['{%s: %s}' % (f[2], f[3]) for f in self.files]
                maxFilenameLen = max([len(n) for n in filenames])
                maxAnnoLen = max([len(a) for a in annotations])
                # Start SuperTextFiles
                pdfPassword = [f[4] for f in self.files]
                ocrLanguages = [f[5] for f in self.files]
                ocrForce = [str(f[6]) for f in self.files]
                maxPdfPasswordLen = max([len(n) for n in pdfPassword])
                maxOcrLanguagesLen = max([len(n) for n in ocrLanguages])
                # End SuperTextFiles

                for index in range(len(self.files)):
                    format = u'%-' + str(maxFilenameLen + 2) + u's'
                    fileLabel = format % filenames[index]
                    if maxAnnoLen > 4:
                        if len(annotations[index]) > 4:
                            format = u'%-' + str(maxAnnoLen + 2) + u's'
                            fileLabel += format % annotations[index]
                        else:
                            fileLabel += u' ' * (maxAnnoLen + 2)

                    # Start SuperTextFiles
                    format = u'%-' + str(maxPdfPasswordLen + 2) + u's'
                    fileLabel += format % pdfPassword[index]

                    format = u'%-' + str(maxOcrLanguagesLen + 2) + u's'
                    fileLabel += format % ocrLanguages[index]

                    format = u'%-' + str(5 + 2) + u's'
                    fileLabel += format % ocrForce[index]
                    # End SuperTextFiles

                    fileLabel += encodings[index]
                    self.fileLabels.append(fileLabel)
            self.fileLabels = self.fileLabels
            if cachedLabel is not None:
                self.sendButton.sendIfPreCallback = None
                self.selectedFileLabels = [cachedLabel]
                self.sendButton.sendIfPreCallback = self.updateGUI
            if self.newFiles:
                if ((self.newAnnotationKey and self.newAnnotationValue)
                        or (not self.newAnnotationKey
                            and not self.newAnnotationValue)):
                    self.addButton.setDisabled(False)
                else:
                    self.addButton.setDisabled(True)
            else:
                self.addButton.setDisabled(True)
            if self.autoNumber:
                self.autoNumberKeyLineEdit.setDisabled(False)
            else:
                self.autoNumberKeyLineEdit.setDisabled(True)
            if self.importFilenames:
                self.importFilenamesKeyLineEdit.setDisabled(False)
            else:
                self.importFilenamesKeyLineEdit.setDisabled(True)
            self.updateFileBoxButtons()
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

    def updateFileBoxButtons(self):
        """Update state of File box buttons"""
        if self.selectedFileLabels:
            self.removeButton.setDisabled(False)
            if self.selectedFileLabels[0] > 0:
                self.moveUpButton.setDisabled(False)
            else:
                self.moveUpButton.setDisabled(True)
            if self.selectedFileLabels[0] < len(self.files) - 1:
                self.moveDownButton.setDisabled(False)
            else:
                self.moveDownButton.setDisabled(True)
        else:
            self.moveUpButton.setDisabled(True)
            self.moveDownButton.setDisabled(True)
            self.removeButton.setDisabled(True)
        if len(self.files):
            self.clearAllButton.setDisabled(False)
            self.exportButton.setDisabled(False)
        else:
            self.clearAllButton.setDisabled(True)
            self.exportButton.setDisabled(True)

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)

    def onDeleteWidget(self):
        self.clearCreatedInputs()
Exemple #3
0
class ECP(OWTextableBaseWidget):
    """Textable widget for importing XML-TEI data from the Eighteenth Century
    Poetry website (http://www.eighteenthcenturypoetry.org/)
    """

    #----------------------------------------------------------------------
    # Widget"s metadata...

    name = "18th Century Poetry"
    description = "Import XML-TEI data from ECP website"
    icon = "icons/18th_century_poetry.svg"
    priority = 10

    #----------------------------------------------------------------------
    # Channel definitions (NB: no input in this case)...

    inputs = []
    outputs = [("XML-TEI data", Segmentation)]

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    selectedTitles = settings.Setting([])
    titleLabels = settings.Setting([])
    filterCriterion = settings.Setting("author")
    filterValue = settings.Setting("(all)")
    importedURLs = settings.Setting([])
    displayAdvancedSettings = settings.Setting(False)

    want_main_area = False

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.titleSeg = None
        self.filteredTitleSeg = None
        self.filterValues = dict()
        self.base_url =     \
          u"http://www.eighteenthcenturypoetry.org/works/#genres"
        self.document_base_url =     \
          u"http://www.eighteenthcenturypoetry.org"

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.updateFilterValueList,
        )

        # User interface...

        # Advanced settings checkbox (basic/advanced interface will appear
        # immediately after it...
        self.advancedSettings.draw()

        # Filter box (advanced settings only)
        filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Filter",
            orientation="vertical",
        )
        filterCriterionCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterCriterion",
            items=["author", "genre"],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Criterion:",
            labelWidth=120,
            callback=self.updateFilterValueList,
            tooltip=(
                "Please select a criterion for searching the title list\n"),
        )
        filterCriterionCombo.setMinimumWidth(120)
        gui.separator(widget=filterBox, height=3)
        self.filterValueCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterValue",
            sendSelectedValue=True,
            orientation="horizontal",
            label="Value:",
            labelWidth=120,
            callback=self.updateTitleList,
            tooltip=("Please select a value for the chosen criterion."),
        )
        gui.separator(widget=filterBox, height=3)

        # The following lines add filterBox (and a vertical separator) to the
        # advanced interface...
        self.advancedSettings.advancedWidgets.append(filterBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Title box
        titleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Titles",
            orientation="vertical",
        )
        self.titleListbox = gui.listBox(
            widget=titleBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=self.sendButton.settingsChanged,
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)
        gui.separator(widget=titleBox, height=3)
        gui.button(
            widget=titleBox,
            master=self,
            label="Refresh",
            callback=self.refreshTitleSeg,
            tooltip="Connect to ECP website and refresh list.",
        )
        gui.separator(widget=titleBox, height=3)

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because getTitleSeg may need to display an error message).
        self.getTitleSeg()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()

    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Skip if title list is empty:
        if self.titleLabels == list():
            return

        # Check that something has been selected...
        if len(self.selectedTitles) == 0:
            self.infoBox.setText("Please select one or more titles.",
                                 "warning")
            self.send("XML-TEI data", None, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        # Initialize progress bar.
        progressBar = gui.ProgressBar(self,
                                      iterations=len(self.selectedTitles))

        # Attempt to connect to ECP and retrieve plays...
        xml_contents = list()
        annotations = list()
        try:
            for title in self.selectedTitles:
                doc_url = self.document_base_url +  \
                    self.filteredTitleSeg[title].annotations["url"]
                print(doc_url)
                url = re.sub(r"/([^/]+)\.shtml", r"/\1/\1.xml", doc_url)
                print(url)
                response = urllib.request.urlopen(url)
                xml_contents.append(response.read().decode('utf-8'))
                source_annotations = \
                self.filteredTitleSeg[title].annotations.copy()
                #source_annotations["url"] = source_annotations["href"]
                #del source_annotations["href"]
                annotations.append(source_annotations)
                progressBar.advance()  # 1 tick on the progress bar...

        # If an error occurs (e.g. http error, or memory error)...
        except:
            #Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from ECP website.",
                                 "error")
            # Reset output channel.
            self.send("XML-TEI data", None, self)
            return

        # Store downloaded XML in input objects...
        for xml_content_idx in range(len(xml_contents)):
            newInput = Input(xml_contents[xml_content_idx], self.captionTitle)
            self.createdInputs.append(newInput)

        # If there"s only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Store imported URLs as setting.
        self.importedURLs = [
            self.filteredTitleSeg[self.selectedTitles[0]].annotations["url"]
        ]

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send token...
        self.send("XML-TEI data", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def getTitleSeg(self):
        """Get title segmentation, either saved locally or online"""

        # Try to open saved file in this module"s directory...
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        try:
            file = open(os.path.join(path, "cached_title_list_ecp"), "rb")
            self.titleSeg = pickle.load(file)
            file.close()
        # Else try to load list from ECP and build new seg...
        except IOError:
            self.titleSeg = self.getTitleListFromECP()

        # Build author and genre lists...
        if self.titleSeg is not None:
            self.filterValues["author"] = Processor.count_in_context(
                units={
                    "segmentation": self.titleSeg,
                    "annotation_key": "author"
                }).col_ids
            self.filterValues["author"].sort()
            self.filterValues["genre"] = Processor.count_in_context(
                units={
                    "segmentation": self.titleSeg,
                    "annotation_key": "genre"
                }).col_ids
            self.filterValues["genre"].sort()

        # Sort the segmentation alphabetically based on titles (nasty hack!)...
        self.titleSeg.buffer.sort(key=lambda s: s.annotations["title"])

        # Update title and filter value lists (only at init and on manual
        # refresh, therefore separate from self.updateGUI).
        self.updateFilterValueList()

    def refreshTitleSeg(self):
        """Refresh title segmentation from website"""
        self.titleSeg = self.getTitleListFromECP()
        # Update title and filter value lists (only at init and on manual
        # refresh, therefore separate from self.updateGUI).
        self.updateFilterValueList()

    def getTitleListFromECP(self):
        """Fetch titles from the ECP website"""

        self.infoBox.customMessage(
            "Fetching data from ECP website, please wait")

        # Attempt to connect to ECP...
        try:
            response = urllib.request.urlopen(self.base_url)
            base_html = response.read().decode('utf-8')
            self.infoBox.customMessage("Done fetching data from ECP website.")

        # If unable to connect (somehow)...
        except:

            # Set Info box and widget to "warning" state.
            self.infoBox.noDataSent(warning="Couldn't access ECP website.")

            # Empty title list box.
            self.titleLabels = list()

            # Reset output channel.
            self.send("XML-TEI data", None, self)
            return None

        # Otherwise store HTML content in LTTL Input object.
        base_html_seg = Input(base_html)

        # Remove accents from the data...
        recoded_seg, _ = Segmenter.recode(base_html_seg, remove_accents=True)

        # Extract table containing titles...
        genresListSeg = Segmenter.import_xml(
            segmentation=recoded_seg,
            element="ul",
            conditions={"id": re.compile(r"^genres-list")},
        )

        # Extract genre annotation...
        genreSeg = Segmenter.tokenize(
            segmentation=genresListSeg,
            regexes=[(re.compile(r'<a id[^>]+>(.+?)</a.+?(?=<a id|$)(?s)'), \
            "tokenize", {"genre": "&1"})],
            import_annotations=False,
        )

        # Extract works...
        titleSeg = Segmenter.tokenize(
            segmentation=genreSeg,
            regexes=[(re.compile(r'<li class="bibl".+?</span>(?s)'), \
            "tokenize")],
        )

        # Extract annotations...
        titleSeg = Segmenter.tokenize(
            segmentation=titleSeg,
            regexes=[
                (re.compile(r"^.*>\n(.+?)</span>.*$(?s)"), "tokenize", {
                    "author": "&1"
                }),
                (re.compile(r'^.*href="(/works/.+?\.shtml)">.*$(?s)'),
                 "tokenize", {
                     "url": "&1"
                 }),
                (re.compile(r'^.*shtml">(.*)</a>.*$(?s)'), "tokenize", {
                    "title": "&1"
                }),
            ],
            merge_duplicates=True,
        )

        # Try to save list in this module"s directory for future reference...
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        try:
            file = open(os.path.join(path, "cached_title_list_ecp"), "wb")
            pickle.dump(titleSeg, file, -1)
            file.close()
        except IOError:
            pass

        # Remove warning (if any)...
        self.error(0)
        self.warning(0)

        return titleSeg

    def updateFilterValueList(self):
        """Update the list of filter values"""

        # In Advanced settings mode, populate filter value list...
        if self.titleSeg is not None and self.displayAdvancedSettings:
            self.filterValueCombo.clear()
            self.filterValueCombo.addItem("(all)")
            for filterValue in self.filterValues[self.filterCriterion]:
                self.filterValueCombo.addItem(filterValue)

        # Reset filterValue if needed...
        if self.filterValue not in [
                self.filterValueCombo.itemText(i)
                for i in range(self.filterValueCombo.count())
        ]:
            self.filterValue = "(all)"
        else:
            self.filterValue = self.filterValue

        self.updateTitleList()

    def updateTitleList(self):
        """Update the list of titles"""

        # If titleSeg has not been loaded for some reason, skip.
        if self.titleSeg is None:
            return

        # In Advanced settings mode, get list of selected titles...
        if self.displayAdvancedSettings and self.filterValue != "(all)":
            self.filteredTitleSeg, _ = Segmenter.select(
                segmentation=self.titleSeg,
                regex=re.compile(r"^%s$" % self.filterValue),
                annotation_key=self.filterCriterion,
            )
        else:
            self.filteredTitleSeg = self.titleSeg

        # If criterion is not "genre" and his filter value not "all",
        # group titles with different genres...

        # Create a dictionary with "author" and "title" as key...

        unique_titles = dict()
        for title in self.filteredTitleSeg:
            title_id = (
                title.annotations["author"],
                title.annotations["title"],
            )
            try:
                unique_titles[title_id].append(title)
            except KeyError:
                unique_titles[title_id] = [title]

        # Create a list with new annotation comporting all genres...
        new_title_segments = list()
        for unique_title in unique_titles.values():
            title_genres = list()
            new_title_segments.append(unique_title[0])
            title_genres.append(unique_title[0].annotations["genre"])
            for equivalent_title in unique_title[1:]:
                title_genres.append(equivalent_title.annotations["genre"])
            new_title_segments[-1].annotations["genre"] = ", ".join(
                sorted(list(set(title_genres))))

        self.filteredTitleSeg = Segmentation(None)
        self.filteredTitleSeg.extend(new_title_segments)

        # Populate titleLabels list with the titles...
        self.titleLabels = sorted(
            [s.annotations["title"] for s in self.filteredTitleSeg])

        # Add specification (author, year and genre, depending on criterion)...
        titleLabels = self.titleLabels[:]
        for idx, titleLabel in enumerate(titleLabels):
            specs = list()
            if (self.displayAdvancedSettings == False
                    or self.filterCriterion != "author"
                    or self.filterValue == "(all)"):
                specs.append(self.filteredTitleSeg[idx].annotations["author"])
            if (self.displayAdvancedSettings == False
                    or self.filterCriterion != "genre"
                    or self.filterValue == "(all)"):
                specs.append(self.filteredTitleSeg[idx].annotations["genre"])
            titleLabels[idx] = titleLabel + " (%s)" % "; ".join(specs)
        self.titleLabels = titleLabels

        # Reset selectedTitles if needed...
        if not set(self.importedURLs).issubset(
                set(u.annotations["url"] for u in self.filteredTitleSeg)):
            self.selectedTitles = list()
        else:
            self.selectedTitles = self.selectedTitles

        self.sendButton.settingsChanged()

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

        if len(self.titleLabels) > 0:
            self.selectedTitles = self.selectedTitles

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()

    # The following method need to be copied (without any change) in
    # every Textable widget...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Exemple #4
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.titleSeg = None
        self.filteredTitleSeg = None
        self.filterValues = dict()
        self.base_url =     \
          u"http://www.eighteenthcenturypoetry.org/works/#genres"
        self.document_base_url =     \
          u"http://www.eighteenthcenturypoetry.org"

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.updateFilterValueList,
        )

        # User interface...

        # Advanced settings checkbox (basic/advanced interface will appear
        # immediately after it...
        self.advancedSettings.draw()

        # Filter box (advanced settings only)
        filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Filter",
            orientation="vertical",
        )
        filterCriterionCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterCriterion",
            items=["author", "genre"],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Criterion:",
            labelWidth=120,
            callback=self.updateFilterValueList,
            tooltip=(
                "Please select a criterion for searching the title list\n"),
        )
        filterCriterionCombo.setMinimumWidth(120)
        gui.separator(widget=filterBox, height=3)
        self.filterValueCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterValue",
            sendSelectedValue=True,
            orientation="horizontal",
            label="Value:",
            labelWidth=120,
            callback=self.updateTitleList,
            tooltip=("Please select a value for the chosen criterion."),
        )
        gui.separator(widget=filterBox, height=3)

        # The following lines add filterBox (and a vertical separator) to the
        # advanced interface...
        self.advancedSettings.advancedWidgets.append(filterBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Title box
        titleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Titles",
            orientation="vertical",
        )
        self.titleListbox = gui.listBox(
            widget=titleBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=self.sendButton.settingsChanged,
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)
        gui.separator(widget=titleBox, height=3)
        gui.button(
            widget=titleBox,
            master=self,
            label="Refresh",
            callback=self.refreshTitleSeg,
            tooltip="Connect to ECP website and refresh list.",
        )
        gui.separator(widget=titleBox, height=3)

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because getTitleSeg may need to display an error message).
        self.getTitleSeg()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()
Exemple #5
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.operation = "no"
        self.applyInclusion = False
        self.applyExclusion = False
        self.applySampling = True
        self.samplingRate = 100
        self.createdInputs = list()
        self.folderLabels = list()
        self.selectedFolderLabels = list()
        self.rootFolderPath = u''
        self.inclusionsUser = u''
        self.exclusionsUser = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''

        # self.folder is a dictionary whose keys are :'rootPath', 'maxDepth','inclusionsUser','exclusionsUser', ...
        # ... 'samplingRate' and 'fileList'
        self.folder = dict()

        # self.folders is a list of previously defined "self.folder" dictionaries
        self.folders = list()

        # self.inclusionList is the default inclusion list (used in minimal mode, ...
        # ... and in advanced mode when no inclusion has been selected)
        self.inclusionList = [".txt", ".html", ".xml", ".csv", ".rtf"]

        # self.inclusionList is the default null inclusion list (used in minimal mode, ...
        # ... and in advanced mode when no inclusion has been selected)
        self.exclusionList = []

        self.infoBox = InfoBox(widget=self.controlArea)

        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic folder box
        basicFolderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFolderBoxLine1 = gui.widgetBox(
            widget=basicFolderBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.add,
            tooltip=(u"The path of the folder."),
        )
        gui.separator(widget=basicFolderBoxLine1, width=5)
        gui.button(
            widget=basicFolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting a top folder."),
        )

        gui.separator(widget=basicFolderBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFolderBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        # folder box
        folderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        folderBoxLine1 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.folderListbox = gui.listBox(
            widget=folderBoxLine1,
            master=self,
            value='selectedFolderLabels',
            labels='folderLabels',
            callback=self.updatefolderBoxButtons,
            tooltip=(u"The list of folders whose content will be imported.\n"
                     u"\nIn the output segmentation, the content of each\n"
                     u"folder appears in the same position as in the list.\n"
                     u"\nColumn 1 shows the folder's name.\n"
                     u"Column 2 shows the folder's depth.\n"
                     u"Column 3 shows the inclusions filter.\n"
                     u"Column 4 shows the exclusions filter.\n"
                     u"Column 5 shows the folder's level of sampling."),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.folderListbox.setFont(font)
        folderBoxCol2 = gui.widgetBox(
            widget=folderBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected folder upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected folder downward in the list."),
        )
        self.removeButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected folder from the list."),
        )
        self.clearAllButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all folders from the list."),
        )
        self.exportButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'',
            callback=self.exportList,
            disabled=True,
            tooltip=(u"Open a dialog for selecting a folder where the folder\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'',
            callback=self.importList,
            disabled=True,
            tooltip=(u"Open a dialog for selecting a folder list to\n"
                     u"import (in JSON format). folders from this list\n"
                     u"will be added to those already imported."),
        )
        folderBoxLine2 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='vertical',
        )
        # Add folder box
        addFolderBox = gui.widgetBox(
            widget=folderBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFolderBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            orientation='horizontal',
        )
        # Folder path input
        gui.lineEdit(
            widget=addFolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.updateGUI,
            tooltip=(u"The paths of the folders that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFolderBoxLine1, width=5)
        # Button Browse
        gui.button(
            widget=addFolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting a top folder.\n\n"
                     u"Selected folder paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFolderBox, width=10)

        # Filter box to input include
        gui.separator(widget=addFolderBox, width=3)
        includeBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )

        # Include box
        gui.checkBox(
            widget=includeBoxLine1,
            master=self,
            value='applyInclusion',
            label=u'Include',
            labelWidth=100,
            callback=lambda: includeLineEdit.setDisabled(not self.
                                                         applyInclusion),
            tooltip=(u"Choose the inclusion(s)"),
        )
        includeLineEdit = gui.lineEdit(
            widget=includeBoxLine1,
            master=self,
            value='inclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled=True,
            labelWidth=101,
            tooltip=(u"This field lets you specify a custom filter\n"
                     u"to select the folders to be\n"
                     u"added to the list."),
        )

        # Filter box to exclude
        gui.separator(widget=addFolderBox, width=3)
        excludeBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )
        # Exclude box
        gui.checkBox(
            widget=excludeBoxLine1,
            master=self,
            value='applyExclusion',
            label=u'Exclude',
            labelWidth=100,
            disabled=False,
            callback=lambda: includeLineEdit2.setDisabled(not self.
                                                          applyExclusion),
            tooltip=(u"Exclude the inclusion(s)"),
        )
        includeLineEdit2 = gui.lineEdit(
            widget=excludeBoxLine1,
            master=self,
            value='exclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled=True,
            labelWidth=101,
            tooltip=(u"This field lets you specify a custom filter\n"
                     u"to select the folders to be\n"
                     u"added to the list."),
        )

        # Sampling box to input the level of sampling
        gui.separator(widget=addFolderBox, width=3)
        samplingBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )
        # Check box for sampling
        gui.checkBox(
            widget=samplingBoxLine1,
            master=self,
            value='applySampling',
            label=u'Sampling',
            labelWidth=100,
            disabled=False,
            callback=lambda: samplingSpin.setDisabled(not self.applySampling),
            tooltip=(u"Choose the sampling level"),
        )

        samplingSpin = gui.spin(
            widget=samplingBoxLine1,
            master=self,
            value='samplingRate',
            minv=10,
            maxv=100,
            labelWidth=50,
            orientation='horizontal',
            tooltip=(u"sampling level"),
        )
        gui.separator(widget=addFolderBox, width=3)
        self.addButton = gui.button(
            widget=addFolderBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the folder(s) currently displayed in the\n"
                     u"'folders' text field to the list.\n\n"
                     u"Each of these folders will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other folders may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(folderBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )

        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate folders with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for folder auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)
Exemple #6
0
class OWTextableTextTree(OWTextableBaseWidget):
    """Orange widget for loading text folders"""

    name = "Text Tree"
    description = "Import data from raw text trees"

    icon = "icons/Textfolders.png"

    icon = "icons/textTree.svg"

    priority = 2

    # Input and output channels...
    inputs = [('Message', JSONMessage, "inputMessage", widget.Single)]
    outputs = [('Text data', Segmentation)]

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    # Settings...
    autoSend = settings.Setting(True)
    folders = settings.Setting([])
    encoding = settings.Setting('iso-8859-1')
    operation = settings.Setting('nothing')
    sampling = settings.Setting(100)
    autoNumber = settings.Setting(False)
    autoNumberKey = settings.Setting(u'num')
    importFilenames = settings.Setting(True)
    importFolderName = settings.Setting(True)

    lastLocation = settings.Setting('.')
    displayAdvancedSettings = settings.Setting(False)
    folder = settings.Setting(u'')

    want_main_area = False

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.operation = "no"
        self.applyInclusion = False
        self.applyExclusion = False
        self.applySampling = True
        self.samplingRate = 100
        self.createdInputs = list()
        self.folderLabels = list()
        self.selectedFolderLabels = list()
        self.rootFolderPath = u''
        self.inclusionsUser = u''
        self.exclusionsUser = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''

        # self.folder is a dictionary whose keys are :'rootPath', 'maxDepth','inclusionsUser','exclusionsUser', ...
        # ... 'samplingRate' and 'fileList'
        self.folder = dict()

        # self.folders is a list of previously defined "self.folder" dictionaries
        self.folders = list()

        # self.inclusionList is the default inclusion list (used in minimal mode, ...
        # ... and in advanced mode when no inclusion has been selected)
        self.inclusionList = [".txt", ".html", ".xml", ".csv", ".rtf"]

        # self.inclusionList is the default null inclusion list (used in minimal mode, ...
        # ... and in advanced mode when no inclusion has been selected)
        self.exclusionList = []

        self.infoBox = InfoBox(widget=self.controlArea)

        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic folder box
        basicFolderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFolderBoxLine1 = gui.widgetBox(
            widget=basicFolderBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.add,
            tooltip=(u"The path of the folder."),
        )
        gui.separator(widget=basicFolderBoxLine1, width=5)
        gui.button(
            widget=basicFolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting a top folder."),
        )

        gui.separator(widget=basicFolderBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFolderBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        # folder box
        folderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        folderBoxLine1 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.folderListbox = gui.listBox(
            widget=folderBoxLine1,
            master=self,
            value='selectedFolderLabels',
            labels='folderLabels',
            callback=self.updatefolderBoxButtons,
            tooltip=(u"The list of folders whose content will be imported.\n"
                     u"\nIn the output segmentation, the content of each\n"
                     u"folder appears in the same position as in the list.\n"
                     u"\nColumn 1 shows the folder's name.\n"
                     u"Column 2 shows the folder's depth.\n"
                     u"Column 3 shows the inclusions filter.\n"
                     u"Column 4 shows the exclusions filter.\n"
                     u"Column 5 shows the folder's level of sampling."),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.folderListbox.setFont(font)
        folderBoxCol2 = gui.widgetBox(
            widget=folderBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected folder upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected folder downward in the list."),
        )
        self.removeButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected folder from the list."),
        )
        self.clearAllButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all folders from the list."),
        )
        self.exportButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'',
            callback=self.exportList,
            disabled=True,
            tooltip=(u"Open a dialog for selecting a folder where the folder\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'',
            callback=self.importList,
            disabled=True,
            tooltip=(u"Open a dialog for selecting a folder list to\n"
                     u"import (in JSON format). folders from this list\n"
                     u"will be added to those already imported."),
        )
        folderBoxLine2 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='vertical',
        )
        # Add folder box
        addFolderBox = gui.widgetBox(
            widget=folderBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFolderBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            orientation='horizontal',
        )
        # Folder path input
        gui.lineEdit(
            widget=addFolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.updateGUI,
            tooltip=(u"The paths of the folders that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFolderBoxLine1, width=5)
        # Button Browse
        gui.button(
            widget=addFolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting a top folder.\n\n"
                     u"Selected folder paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFolderBox, width=10)

        # Filter box to input include
        gui.separator(widget=addFolderBox, width=3)
        includeBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )

        # Include box
        gui.checkBox(
            widget=includeBoxLine1,
            master=self,
            value='applyInclusion',
            label=u'Include',
            labelWidth=100,
            callback=lambda: includeLineEdit.setDisabled(not self.
                                                         applyInclusion),
            tooltip=(u"Choose the inclusion(s)"),
        )
        includeLineEdit = gui.lineEdit(
            widget=includeBoxLine1,
            master=self,
            value='inclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled=True,
            labelWidth=101,
            tooltip=(u"This field lets you specify a custom filter\n"
                     u"to select the folders to be\n"
                     u"added to the list."),
        )

        # Filter box to exclude
        gui.separator(widget=addFolderBox, width=3)
        excludeBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )
        # Exclude box
        gui.checkBox(
            widget=excludeBoxLine1,
            master=self,
            value='applyExclusion',
            label=u'Exclude',
            labelWidth=100,
            disabled=False,
            callback=lambda: includeLineEdit2.setDisabled(not self.
                                                          applyExclusion),
            tooltip=(u"Exclude the inclusion(s)"),
        )
        includeLineEdit2 = gui.lineEdit(
            widget=excludeBoxLine1,
            master=self,
            value='exclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled=True,
            labelWidth=101,
            tooltip=(u"This field lets you specify a custom filter\n"
                     u"to select the folders to be\n"
                     u"added to the list."),
        )

        # Sampling box to input the level of sampling
        gui.separator(widget=addFolderBox, width=3)
        samplingBoxLine1 = gui.widgetBox(
            widget=addFolderBox,
            box=False,
            orientation='horizontal',
        )
        # Check box for sampling
        gui.checkBox(
            widget=samplingBoxLine1,
            master=self,
            value='applySampling',
            label=u'Sampling',
            labelWidth=100,
            disabled=False,
            callback=lambda: samplingSpin.setDisabled(not self.applySampling),
            tooltip=(u"Choose the sampling level"),
        )

        samplingSpin = gui.spin(
            widget=samplingBoxLine1,
            master=self,
            value='samplingRate',
            minv=10,
            maxv=100,
            labelWidth=50,
            orientation='horizontal',
            tooltip=(u"sampling level"),
        )
        gui.separator(widget=addFolderBox, width=3)
        self.addButton = gui.button(
            widget=addFolderBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the folder(s) currently displayed in the\n"
                     u"'folders' text field to the list.\n\n"
                     u"Each of these folders will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other folders may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(folderBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )

        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate folders with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for folder auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)

    def inputMessage(self, message):
        """Handle JSON message on input connection"""
        if not message:
            return
        self.displayAdvancedSettings = True
        self.advancedSettings.setVisible(True)
        self.clearAll()
        self.infoBox.inputChanged()
        try:
            jsonData = json.loads(message.content)
            tempFolders = list()
            for entry in jsonData:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                if path == '' or encoding == '':
                    self.infoBox.setText(
                        u"Please verify keys and values of incoming "
                        u"JSON message.", 'error')
                    self.send('Text data', None, self)
                    return
                depth = "0"
                options = "[i]:{unicorn}"
                tempFolders.append((
                    name,
                    path,
                    depth,
                    options,
                ))
            self.folders.extend(tempFolders)
            self.sendButton.settingsChanged()
        except ValueError:
            self.infoBox.setText(
                u"Please make sure that incoming message is valid JSON.",
                'error')
            self.send('Text data', None, self)
            return

    def sendData(self):
        """Load folders, create and send segmentation"""

        # Check that there's something on input...
        if (self.displayAdvancedSettings
                and not self.folders) or not (self.rootFolderPath
                                              or self.displayAdvancedSettings):
            self.infoBox.setText(u'Please select input folder.', 'warning')
            self.send('Text data', None, self)
            return

        # Check that autoNumberKey is not empty (if necessary)...
        if self.displayAdvancedSettings and self.autoNumber:
            if self.autoNumberKey:
                autoNumberKey = self.autoNumberKey
            else:
                self.infoBox.setText(
                    u'Please enter an annotation key for auto-numbering.',
                    'warning')
                self.send('Text data', None, self)
                return
        else:
            autoNumberKey = None

        # Clear created Inputs...
        self.clearCreatedInputs()
        annotations = list()
        counter = 1

        if self.displayAdvancedSettings:
            myFolders = self.folders
        else:
            myFolders = [self.folder]

        # Annotations...
        allFileListContent = list()
        for myFolder in myFolders:

            myFiles = myFolder['fileList']

            for myFile in myFiles:

                annotation = dict()
                annotation['file name'] = myFile['fileName']
                annotation['file depth level'] = myFile['depthLvl']
                annotation['file path'] = myFile['absoluteFilePath']
                try:
                    annotation['file encoding, confidence'] = myFile[
                        'encoding'] + ", " + str(myFile['encodingConfidence'])
                except TypeError:
                    annotation['file encoding, confidence'] = "unknown"

                depths = [k for k in myFile.keys() if k.startswith('depth_')]
                for depth in depths:
                    annotation[depth] = myFile[depth]

                annotations.append(annotation)
                allFileListContent.append(myFile['fileContent'])

        # Create an LTTL.Input for each files...

        if len(allFileListContent) == 1:
            label = self.captionTitle
        else:
            label = None
        for index in range(len(allFileListContent)):
            myInput = Input(allFileListContent[index], label)
            segment = myInput[0]
            segment.annotations.update(annotations[index])
            myInput[0] = segment
            self.createdInputs.append(myInput)

        # If there's only one file, the widget's output is the created Input.
        if len(allFileListContent) == 1:
            self.segmentation = self.createdInputs[0]
        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                segmentations=self.createdInputs,
                label=self.captionTitle,
                copy_annotations=True,
                import_labels_as=None,
                sort=False,
                auto_number_as=None,
                merge_duplicates=False,
                progress_callback=None,
            )
        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send('Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearCreatedInputs(self):
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def importList(self):
        """Display a folderDialog and import folder list"""
        folderPath = QFileDialog.getOpenFileName(self, u'Import folder List',
                                                 self.lastLocation,
                                                 u'Text folders (*)')
        if not folderPath:
            return
        self.rootFolderPath = os.path.normpath(folderPath)
        self.lastLocation = os.path.dirname(folderPath)
        self.error()
        try:
            folderHandle = codecs.open(folderPath, encoding='utf8')
            folderContent = folderHandle.read()
            folderHandle.close()
        except IOError:
            QMessageBox.warning(None, 'Textable', "Couldn't open folder.",
                                QMessageBox.Ok)
            return
        try:
            jsonData = json.loads(folderContent)
            tempFolders = list()
            for entry in jsonData:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                if path == '' or encoding == '':
                    QMessageBox.warning(
                        None, 'Textable',
                        "Selected JSON folder doesn't have the right keys "
                        "and/or values.", QMessageBox.Ok)
                    return
                tempFolders.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                ))
            self.folders.extend(tempFolders)
            if tempFolders:
                self.sendButton.settingsChanged()
        except ValueError:
            QMessageBox.warning(None, 'Textable', "JSON parsing error.",
                                QMessageBox.Ok)
            return

    def exportList(self):
        """Display a folderDialog and export folder list"""
        toDump = list()
        myFolders = self.folders
        for myFolder in myFolders:
            toDump.append({
                'path': myFolder[0],
                'encoding': myFolder[1],
            })
            if myFolder[2] and myFolder[3]:
                toDump[-1]['annotation_key'] = myFolder[2]
                toDump[-1]['annotation_value'] = myFolder[3]
        folderPath = QFileDialog.getSaveFileName(
            self,
            u'Export folder List',
            self.lastLocation,
        )

        if folderPath:
            self.lastLocation = os.path.dirname(folderPath)
            outputfolder = codecs.open(
                folderPath,
                encoding='utf8',
                mode='w',
                errors='xmlcharrefreplace',
            )
            outputfolder.write(
                normalizeCarriageReturns(
                    json.dumps(toDump, sort_keys=True, indent=4)))
            outputfolder.close()
            QMessageBox.information(None, 'Textable',
                                    'folder list correctly exported',
                                    QMessageBox.Ok)

    def getFileList(self):

        initialRootParentPath, _ = os.path.split(

            # self.rootFolderPath is the initially selected's folder parent
            self.rootFolderPath)
        fileList = list()

        # fileListExt is a list of files matching default extension
        fileListExt = list()
        depthList = list()

        progressBarZero = gui.ProgressBar(self, iterations=1)

        # Using os.walk to walk through directories :
        # Variables descriptions :
        # currPath is a STRING, the path to the directory.
        # dirNames is a LIST of the names of subdirectories.
        # fileNames is a LIST of the names of the files in currPath
        # symlink are not considered in this analysis

        for currPath, dirNames, fileNames in os.walk(self.rootFolderPath):

            currRelPath = currPath[
                len(initialRootParentPath) +
                1:]  # defines current relative path by similar initial parent path part
            currRelPathList = os.path.normpath(currRelPath).split(
                os.sep)  # splits current relative path by os separator

            for fileName in fileNames:

                # file dict is a dictionary of the file's informations will get following keys :
                # file = {
                # "absoluteFilePath",
                # "fileName",
                # "depth_0",
                # "depth_X"
                # depthLvl",
                # "fileContent"
                # }

                # 'fileContent','encoding' and 'encodingConfidence' keys are defined when function "openFileList" is called

                file = dict()

                # Initial annotations correspond different subfolders browsed by each depth level (used for depth_X annot.)
                annotations = currRelPathList[:]

                currDepth = len(annotations) - 1
                depthList.append(currDepth)

                file['absoluteFilePath'] = os.path.join(currPath, fileName)
                file['fileName'] = fileName
                file['depthLvl'] = currDepth

                file['depth_0'] = annotations[0]

                # Created an annotation by depth level, corresponding to folder names
                for i in range(1, currDepth + 1):
                    file['depth_' + str(i)] = annotations[i]

                # Apply default file extension filter
                for extension in self.inclusionList:
                    if fileName.endswith(extension):

                        # FileListExt = file list created with default inclusion criteria (text extensions from inclusionList)
                        fileListExt.append(file)

                fileList.append(file)

        # apply inclusion filter
        if self.applyInclusion:
            fileListIncl = [
                file for file in fileList
                # match in inclusion list
                if self.match(file['fileName'], self.inclusionsUserAsList)
            ]
        else:
            fileListIncl = fileListExt

        # apply exclusion filter
        if self.applyExclusion:
            fileListExcl = [
                file for file in fileListIncl
                # no match in exclusion list
                if not self.match(file['fileName'], self.exclusionsUserAsList)
            ]
        else:
            fileListExcl = fileListIncl

        # output file list
        self.fileList = fileListExcl

        if self.fileList:
            self.maxDepth = max(depthList)
            self.fileList = self.sampleFileList()
            self.openFileList()
        else:
            self.maxDepth = 0

        progressBarZero.finish()

    # Test if file contains one of the patterns in patternList
    def match(self, file, patternList):
        for pattern in patternList:
            if pattern in file:
                return True
        return False

    def openFileList(self):

        tempFileList = list()

        progressBarOpen = gui.ProgressBar(self, iterations=len(self.fileList))

        for file in self.fileList:
            fileContent = ""
            try:
                filePath = file['absoluteFilePath']
            except TypeError:
                pass

            encodings = getPredefinedEncodings()
            try:
                with open(filePath, 'rb') as openedFile:
                    fileContent = openedFile.read()
                    charsetDict = chardet.detect(fileContent)
                    detectedEncoding = charsetDict['encoding']
                    detectedConfidence = charsetDict['confidence']

                    # Chunking functionnality should be added here

                    try:
                        encodings.remove(detectedEncoding)
                        encodings.insert(0, detectedEncoding)

                    except ValueError:
                        pass

                    for encoding in encodings:
                        try:
                            self.fileContent = fileContent.decode(encoding)
                        except:
                            pass

                file['encoding'] = detectedEncoding
                file['fileContent'] = self.fileContent
                file['encodingConfidence'] = detectedConfidence
                progressBarOpen.advance()
                tempFileList.append(file)

            except IOError:
                if len(myFiles) > 1:
                    message = u"Couldn't open file '%s'." % filePath
                else:
                    message = u"Couldn't open file."
                self.infoBox.setText(message, 'error')
                self.send('Text data', None, self)
                return

        self.fileList = tempFileList

        self.folder = {
            'rootPath': self.rootFolderPath,
            'maxDepth': self.maxDepth,
            'inclusionsUser': self.inclusionsUser,
            'exclusionsUser': self.exclusionsUser,
            'samplingRate': self.samplingRate,
            'fileList': self.fileList
        }
        progressBarOpen.finish()

    def browse(self):
        """Display a QFileDialog and select a folder"""

        rootFolderPath = QFileDialog.getExistingDirectory(
            self,
            u'Select Folder(s)',
            self.lastLocation,
        )
        if not rootFolderPath:
            return

        rootFolderPath = os.path.normpath(rootFolderPath)
        self.rootFolderPath = rootFolderPath
        self.lastLocation = rootFolderPath

        if self.displayAdvancedSettings:
            pass
        else:
            self.getFileList()
            self.folder = {
                'rootPath': self.rootFolderPath,
                'maxDepth': self.maxDepth,
                'fileList': self.fileList,
            }
            self.sendButton.settingsChanged()

        self.updateGUI()

    def moveUp(self):
        """Move folder upward in folders listbox"""
        if self.selectedFolderLabels:
            index = self.selectedFolderLabels[0]
            if index > 0:
                temp = self.folders[index - 1]
                self.folders[index - 1] = self.folders[index]
                self.folders[index] = temp
                self.selectedFolderLabels.listBox.item(index -
                                                       1).setSelected(1)
                self.sendButton.settingsChanged()

    def moveDown(self):
        """Move folder downward in folders listbox"""
        if self.selectedFolderLabels:
            index = self.selectedFolderLabels[0]
            if index < len(self.folders) - 1:
                temp = self.folders[index + 1]
                self.folders[index + 1] = self.folders[index]
                self.folders[index] = temp
                self.selectedFolderLabels.listBox.item(index +
                                                       1).setSelected(1)
                self.sendButton.settingsChanged()

    def clearAll(self):
        """Remove all folders from folders attr"""
        del self.folders[:]
        del self.selectedFolderLabels[:]
        self.sendButton.settingsChanged()

    def remove(self):
        """Remove folder from folders attr"""
        if self.selectedFolderLabels:
            index = self.selectedFolderLabels[0]
            self.folders.pop(index)
            del self.selectedFolderLabels[:]
            self.sendButton.settingsChanged()

    def add(self):
        """Add folders to folders attr"""

        # Identify sequences separated by a comma (,) and deletes existing whitespaces
        self.inclusionsUserAsList = [
            x.strip() for x in self.inclusionsUser.split(",") if x.strip()
        ]
        self.exclusionsUserAsList = [
            x.strip() for x in self.exclusionsUser.split(",") if x.strip()
        ]

        # Calling the GetFileList function returns a self.fileList list of all files corresponding to either defaults
        # or optional settings
        self.getFileList()

        self.folders.append(self.folder)

        self.sendButton.settingsChanged()

    def sampleFileList(self):

        myList = list(self.fileList)

        # Sampling rate from input allows calculation of the sampling percentage
        samplePercentage = self.samplingRate / 100.0

        # The initial list is shuffled so that files from all folders can be picked randomly
        random.shuffle(myList)

        # Files are picked randomly from the previously shuffled list
        nOfFiles = int(math.ceil(len(myList) * samplePercentage))
        return myList[:nOfFiles]

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            if self.selectedFolderLabels:
                cachedLabel = self.selectedFolderLabels[0]
            else:
                cachedLabel = None

            del self.folderLabels[:]
            folderLabels = []

            if self.folders:
                folderRootPathsList = [f['rootPath'] for f in self.folders]
                maxDepthList = ['%s' % f['maxDepth'] for f in self.folders]
                inclusionsUserList = [
                    f['inclusionsUser'] for f in self.folders
                ]
                exclusionsUserList = [
                    f['exclusionsUser'] for f in self.folders
                ]
                samplingRatesList = [
                    '%s' % f['samplingRate'] for f in self.folders
                ]
                folderNamesList = [
                    os.path.basename(p) for p in folderRootPathsList
                ]
                maxFolderNameLen = max([len(n) for n in folderNamesList])

                for index in range(len(self.folders)):
                    format = u'%-' + str(maxFolderNameLen + 2) + u's'
                    folderLabel = format % folderNamesList[index]
                    folderLabel += "[d]:{" + maxDepthList[index] + "} "
                    folderLabel += "[i]:{" + inclusionsUserList[index] + "} "
                    folderLabel += "[e]:{" + exclusionsUserList[index] + "} "
                    folderLabel += "[s]:{" + samplingRatesList[index] + "%}"
                    folderLabels.append(folderLabel)

            self.folderLabels = folderLabels

            if cachedLabel is not None:
                self.sendButton.sendIfPreCallback = None
                self.selectedFolderLabels.listBox.item(
                    cachedLabel).setSelected(1)
                self.sendButton.sendIfPreCallback = self.updateGUI
            if self.rootFolderPath:
                if (self.newAnnotationKey and self.newAnnotationValue) or (
                        not self.newAnnotationKey
                        and not self.newAnnotationValue):
                    self.addButton.setDisabled(False)
                else:
                    self.addButton.setDisabled(True)
            else:
                self.addButton.setDisabled(True)
            if self.autoNumber:
                self.autoNumberKeyLineEdit.setDisabled(False)
            else:
                self.autoNumberKeyLineEdit.setDisabled(True)

            self.updatefolderBoxButtons()
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

    def updatefolderBoxButtons(self):
        """Update state of folder box buttons"""
        if self.selectedFolderLabels:
            self.removeButton.setDisabled(False)
            if self.selectedFolderLabels[0] > 0:
                self.moveUpButton.setDisabled(False)
            else:
                self.moveUpButton.setDisabled(True)
            if self.selectedFolderLabels[0] < len(self.folders) - 1:
                self.moveDownButton.setDisabled(False)
            else:
                self.moveDownButton.setDisabled(True)
        else:
            self.moveUpButton.setDisabled(True)
            self.moveDownButton.setDisabled(True)
            self.removeButton.setDisabled(True)
        if len(self.folders):
            self.clearAllButton.setDisabled(False)
            self.exportButton.setDisabled(True)
        else:
            self.clearAllButton.setDisabled(True)
            self.exportButton.setDisabled(True)

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)

    def onDeleteWidget(self):
        self.clearCreatedInputs()
Exemple #7
0
class OWTextableTextTree(OWTextableBaseWidget):
    """Orange widget for loading text folders"""

    name = "Text Tree"
    description = "Import data from raw text trees"

    icon = "icons/Textfolders.png"

    icon = "icons/textTree.svg"

    priority = 2

    # Input and output channels...
    inputs = [
        ('Message', JSONMessage, "inputMessage", widget.Single)
    ]
    outputs = [('Text data', Segmentation)]

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0]
    )

    # Settings...
    autoSend = settings.Setting(True)
    folders = settings.Setting([])
    encoding = settings.Setting('iso-8859-1')
    operation = settings.Setting('nothing')
    sampling =settings.Setting(100)
    autoNumber = settings.Setting(False)
    autoNumberKey = settings.Setting(u'num')
    importFilenames = settings.Setting(True)
    importFolderName = settings.Setting(True)
    importFolderNameKey = settings.Setting(u'folderName')
    importFileNameKey = settings.Setting(u'filename')
    FolderDepth1Key = settings.Setting(u'depth 1')
    FolderDepth2Key = settings.Setting(u'depth 2')
    FolderDepth2Key = settings.Setting(u'depth 3')
    FolderDepth2Key = settings.Setting(u'depth 4')
    FolderDepthLvl = settings.Setting(u'depth level')

    lastLocation = settings.Setting('.')
    displayAdvancedSettings = settings.Setting(False)
    folder = settings.Setting(u'')

    want_main_area = False

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.operation = "no"
        self.applyInclusion = False
        self.applyExclusion = False
        self.applySampling = True
        self.samplingRate = 100
        self.createdInputs = list()
        self.folderLabels = list()
        self.selectedfolderLabels = list()
        self.rootFolderPath = u''
        self.inclusionsUser = u''
        self.exclusionsUser = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''
        self.folders = list() # self.folders is a list of dictionaries with each dictionaries being a a folder
        self.inclusionList = [".txt",".html",".xml",".csv"] #by default empty list

        # self.exclusionList = [".png,",".PNG",".jpg",".JPG",".gif",".GIF",".tiff",".TIFF",".jpeg",".JPEG",".DS_Store"] # by default exclusions : img files, .DS_Store (macOS)
        self.exclusionList = [] # by default null
        self.infoBox = InfoBox(widget=self.controlArea)
        # self.fileList = list() #output file list

        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic folder box
        basicfolderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicfolderBoxLine1 = gui.widgetBox(
            widget=basicfolderBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicfolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.add,
            tooltip=(
                u"The path of the folder."
            ),
        )
        gui.separator(widget=basicfolderBoxLine1, width=5)
        gui.button(
            widget=basicfolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(
                u"Open a dialog for selecting a top folder."
            ),
        )
        #gui.separator(widget=basicfolderBox, width=3)
        #gui.comboBox(
#            widget=basicfolderBox,
#            master=self,
#            value='encoding',
#            items=getPredefinedEncodings(),
#            sendSelectedValue=True,
#            orientation='horizontal',
#            label=u'Encoding:',
#            labelWidth=101,
#            callback=self.sendButton.settingsChanged,
#            tooltip=(
#                u"Select input folder(s) encoding."
#            ),
#        )
        gui.separator(widget=basicfolderBox, width=3)
        self.advancedSettings.basicWidgets.append(basicfolderBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        # folder box
        folderBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        folderBoxLine1 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.folderListbox = gui.listBox(
            widget=folderBoxLine1,
            master=self,
            value='selectedfolderLabels',
            labels='folderLabels',
            callback=self.updatefolderBoxButtons,
            tooltip=(
                u"The list of folders whose content will be imported.\n"
                u"\nIn the output segmentation, the content of each\n"
                u"folder appears in the same position as in the list.\n"
                u"\nColumn 1 shows the folder's name.\n"
                u"Column 2 shows the folder's annotation (if any).\n"
                u"Column 3 shows the folder's encoding."
            ),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.folderListbox.setFont(font)
        folderBoxCol2 = gui.widgetBox(
            widget=folderBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(
                u"Move the selected folder upward in the list."
            ),
        )
        self.moveDownButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(
                u"Move the selected folder downward in the list."
            ),
        )
        self.removeButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(
                u"Remove the selected folder from the list."
            ),
        )
        self.clearAllButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(
                u"Remove all folders from the list."
            ),
        )
        self.exportButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Export List',
            callback=self.exportList,
            tooltip=(
                u"Open a dialog for selecting a folder where the folder\n"
                u"list can be exported in JSON format."
            ),
        )
        self.importButton = gui.button(
            widget=folderBoxCol2,
            master=self,
            label=u'Import List',
            callback=self.importList,
            tooltip=(
                u"Open a dialog for selecting a folder list to\n"
                u"import (in JSON format). folders from this list\n"
                u"will be added to those already imported."
            ),
        )
        folderBoxLine2 = gui.widgetBox(
            widget=folderBox,
            box=False,
            orientation='vertical',
        )
        # Add folder box
        addfolderBox = gui.widgetBox(
            widget=folderBoxLine2,
            box=True,
            orientation='vertical',
        )
        addfolderBoxLine1 = gui.widgetBox(
            widget=addfolderBox,
            orientation='horizontal',
        )
        # Folder path input
        gui.lineEdit(
            widget=addfolderBoxLine1,
            master=self,
            value='rootFolderPath',
            orientation='horizontal',
            label=u'Folder path:',
            labelWidth=101,
            callback=self.updateGUI,
            tooltip=(
                u"The paths of the folders that will be added to the\n"
                u"list when button 'Add' is clicked.\n\n"
                u"Successive paths must be separated with ' / ' \n"
                u"(whitespace + slash + whitespace). Their order in\n"
                u"the list will be the same as in this field."
            ),
        )
        gui.separator(widget=addfolderBoxLine1, width=5)
        # Button Browse
        gui.button(
            widget=addfolderBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(
                u"Open a dialog for selecting a top folder.\n\n"
                u"Selected folder paths will appear in the field to\n"
                u"the left of this button afterwards, ready to be\n"
                u"added to the list when button 'Add' is clicked."
            ),
        )
        gui.separator(widget=addfolderBox, width=10)

        # Filter choice to include only certain files or to exclude files
        # ------------
        # self.applyInclusion = False  à mettre dans le init
        # gui.checkbox()
        # callback = lambda t=self.applyInclusion : includeLineEdit.setDisabled(not t)
        # includeLineEdit = gui.lineEdit()
        # ------------

        # Filter box to input include only
        gui.separator(widget=addfolderBox, width=3)
        includeBoxLine1 = gui.widgetBox(
            widget=addfolderBox,
            box=False,
            orientation='horizontal',
        )

        # Include only box
        gui.checkBox(
            widget=includeBoxLine1,
            master=self,
            value='applyInclusion',
            label=u'Include only',
            labelWidth=100,
            callback = lambda: includeLineEdit.setDisabled(not self.applyInclusion),
            tooltip=(
                u"Choose the inclusion"
            ),
        )
        includeLineEdit = gui.lineEdit(
            widget=includeBoxLine1,
            master=self,
            value='inclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled = True,
            labelWidth=101,
            tooltip=(
                u"This field lets you specify a custom filter\n"
                u"to select the folders to be\n"
                u"added to the list."
            ),
        )

        # Filter box to exclude only
        gui.separator(widget=addfolderBox, width=3)
        excludeBoxLine1 = gui.widgetBox(
            widget=addfolderBox,
            box=False,
            orientation='horizontal',
        )
        # Exclude only box
        gui.checkBox(
            widget=excludeBoxLine1,
            master=self,
            value='applyExclusion',
            label=u'Exclude',
            labelWidth=100,
            disabled = False,
            callback = lambda: includeLineEdit2.setDisabled(not self.applyExclusion),
            tooltip=(
                u"Exclude the inclusion"
            ),
        )
        includeLineEdit2=gui.lineEdit(
            widget=excludeBoxLine1,
            master=self,
            value='exclusionsUser',
            orientation='horizontal',
            label=u'',
            disabled = True,
            labelWidth=101,
            tooltip=(
                u"This field lets you specify a custom filter\n"
                u"to select the folders to be\n"
                u"added to the list."
            ),
        )

        # Sampling box to input the level of sampling
        gui.separator(widget=addfolderBox, width=3)
        samplingBoxLine1 = gui.widgetBox(
            widget=addfolderBox,
            box=False,
            orientation='horizontal',
        )
        # Check box for sampling
        gui.checkBox(
            widget=samplingBoxLine1,
            master=self,
            value='applySampling',
            label=u'Sampling',
            labelWidth=100,
            disabled = False,
            callback = lambda: samplingSpin.setDisabled(not self.applySampling),
            tooltip=(
                u"Choose the sampling level"
            ),
        )
        # Box to input the level of samplig, spin minv = 10 and maxv = 100

        # self.importFilenamesKeyLineEdit = gui.spin(

        samplingSpin = gui.spin(
            widget=samplingBoxLine1,
            master=self,
            value='samplingRate',
            minv = 10,
            maxv = 100,
            labelWidth=50,
            orientation='horizontal',
            tooltip=(
                u"sampling level"
            ),
        )
        gui.separator(widget=addfolderBox, width=3)
        self.addButton = gui.button(
            widget=addfolderBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(
                u"Add the folder(s) currently displayed in the\n"
                u"'folders' text field to the list.\n\n"
                u"Each of these folders will be associated with the\n"
                u"specified encoding and annotation (if any).\n\n"
                u"Other folders may be selected afterwards and\n"
                u"assigned a different encoding and annotation."
            ),
        )
        self.advancedSettings.advancedWidgets.append(folderBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
#        gui.checkBox(
#           widget=optionsBoxLine1,
#            master=self,
#           value='importFilenames',
#            label=u'Import folder names with key:',
#           labelWidth=180,
#            callback=self.sendButton.settingsChanged,
#            tooltip=(
#                u"Import folder names as annotations."
#           ),
#        )
#        self.importFilenamesKeyLineEdit = gui.lineEdit(
#            widget=optionsBoxLine1,
#            master=self,
#            value='importFilenamesKey',
#            orientation='horizontal',
#            callback=self.sendButton.settingsChanged,
#            tooltip=(
#                u"Annotation key for importing folder names."
#            ),
#        )
        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"Annotate folders with increasing numeric indices."
            ),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"Annotation key for folder auto-numbering."
            ),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)

    def inputMessage(self, message):
        """Handle JSON message on input connection"""
        if not message:
            return
        self.displayAdvancedSettings = True
        self.advancedSettings.setVisible(True)
        self.clearAll()
        self.infoBox.inputChanged()
        try:
            json_data = json.loads(message.content)
            temp_folders = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                if path == '' or encoding == '':
                    self.infoBox.setText(
                        u"Please verify keys and values of incoming "
                        u"JSON message.",
                        'error'
                    )
                    self.send('Text data', None, self)
                    return
                depth = "0"
                options = "[i]:{unicorn}"
                temp_folders.append((
                    name,
                    path,
                    depth,
                    options,

                ))
            self.folders.extend(temp_folders)
            self.sendButton.settingsChanged()
        except ValueError:
            self.infoBox.setText(
                u"Please make sure that incoming message is valid JSON.",
                'error'
            )
            self.send('Text data', None, self)
            return

    def sendData(self):

        """Load folders, create and send segmentation"""

        # Check that there's something on input...
        if (
            (self.displayAdvancedSettings and not self.folders) or
            not (self.rootFolderPath or self.displayAdvancedSettings)
        ):
            self.infoBox.setText(u'Please select input folder.', 'warning')
            self.send('Text data', None, self)
            return

        # Check that autoNumberKey is not empty (if necessary)...
        if self.displayAdvancedSettings and self.autoNumber:
            if self.autoNumberKey:
                autoNumberKey = self.autoNumberKey
            else:
                self.infoBox.setText(
                    u'Please enter an annotation key for auto-numbering.',
                    'warning'
                )
                self.send('Text data', None, self)
                return
        else:
            autoNumberKey = None

        # Clear created Inputs...
        self.clearCreatedInputs()

        fileContents = list()
        annotations = list()
        counter = 1

        if self.displayAdvancedSettings:
            myFolders = self.folders
        else:
            myFolders = [[self.rootFolderPath]]

        progressBar = gui.ProgressBar(
            self,
            iterations=len(myFolders)
        )

        # Walk through each folder and open each files successively...

        fileContents = self.fileContents

        # Annotations...
        myFolders = self.folders
        for myFolder in myFolders:
            myFiles = myFolder['fileList']

            for myFile in myFiles:
                # print(myFile)
                annotation = dict()

                if self.importFileNameKey:
                    annotation[self.importFileNameKey] = myFile['fileName']

                if self.importFolderNameKey:
                    annotation[self.importFolderNameKey] = myFile['folderName']

                if self.FolderDepth1Key:
                    annotation[self.FolderDepth1Key] = myFile['depth1']

                if self.FolderDepth2Key:
                    annotation[self.FolderDepth2Key] = myFile['depth2']

                if self.FolderDepthLvl:
                    annotation[self.FolderDepthLvl] = myFile['depthLvl']

                annotations.append(annotation)
            # progressBar.advance()

        # Create an LTTL.Input for each files...

        if len(fileContents) == 1:
            label = self.captionTitle
        else:
            label = None
        for index in range(len(fileContents)):
            myInput = Input(fileContents[index], label)
            segment = myInput[0]
            segment.annotations.update(annotations[index])
            myInput[0] = segment
            self.createdInputs.append(myInput)

        # If there's only one file, the widget's output is the created Input.
        if len(fileContents) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                segmentations=self.createdInputs,
                label=self.captionTitle,
                copy_annotations=True,
                import_labels_as=None,
                sort=False,
                auto_number_as=None,
                merge_duplicates=False,
                progress_callback=None,
            )
        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        self.send('Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearCreatedInputs(self):
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def importList(self):
        """Display a folderDialog and import folder list"""
        folderPath = QFileDialog.getOpenFileName(
            self,
            u'Import folder List',
            self.lastLocation,
            u'Text folders (*)'
        )
        if not folderPath:
            return
        self.rootFolderPath = os.path.normpath(folderPath)
        self.lastLocation = os.path.dirname(folderPath)
        self.error()
        try:
            folderHandle = codecs.open(folderPath, encoding='utf8')
            folderContent = folderHandle.read()
            folderHandle.close()
        except IOError:
            QMessageBox.warning(
                None,
                'Textable',
                "Couldn't open folder.",
                QMessageBox.Ok
            )
            return
        try:
            json_data = json.loads(folderContent)
            temp_folders = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                if path == '' or encoding == '':
                    QMessageBox.warning(
                        None,
                        'Textable',
                        "Selected JSON folder doesn't have the right keys "
                        "and/or values.",
                        QMessageBox.Ok
                    )
                    return
                temp_folders.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                ))
            self.folders.extend(temp_folders)
            if temp_folders:
                self.sendButton.settingsChanged()
        except ValueError:
            QMessageBox.warning(
                None,
                'Textable',
                "JSON parsing error.",
                QMessageBox.Ok
            )
            return

    def exportList(self):
        """Display a folderDialog and export folder list"""
        toDump = list()
        myFolders = self.folders
        for myFolder in myFolders:
            toDump.append({
                'path': myFolder[0],
                'encoding': myFolder[1],
            })
            if myFolder[2] and myFolder[3]:
                toDump[-1]['annotation_key'] = myFolder[2]
                toDump[-1]['annotation_value'] = myFolder[3]
        folderPath =QFileDialog.getSaveFileName(
            self,
            u'Export folder List',
            self.lastLocation,
        )

        if folderPath:
            self.lastLocation = os.path.dirname(folderPath)
            outputfolder = codecs.open(
                folderPath,
                encoding='utf8',
                mode='w',
                errors='xmlcharrefreplace',
            )
            outputfolder.write(
                normalizeCarriageReturns(
                    json.dumps(toDump, sort_keys=True, indent=4)
                )
            )
            outputfolder.close()
            QMessageBox.information(
                None,
                'Textable',
                'folder list correctly exported',
                QMessageBox.Ok
            )

    def getFileList(self):
        #print("getFileList")

        initialRootParentPath, _ = os.path.split(self.rootFolderPath) #initial parent path is selected's folder parent folder
        fileListExt = list() # list of files matching default extension
        depthList = list()

        progressBar = gui.ProgressBar(self, iterations=1)

        for curr_path, dirnames, filenames in os.walk(self.rootFolderPath):
    	#curr_path is a STRING, the path to the directory.
    	#dirnames is a LIST of the names of subdirectories.
    	#filenames is a LIST of the names of the files in curr_path
    	#symlink non traités

            curr_rel_path = curr_path[len(initialRootParentPath)+1:] #defines current relative path by similar initial parent path part
            curr_rel_path_list = os.path.normpath(curr_rel_path).split(os.sep) #splits current relative path by os separator

            for filename in filenames:
                file = dict()
                # file = {"absoluteFilePath","foldername","filename","depth1","depth2","depth3","depth4","depth5","depth lvl"}
                # prev_non_excl_check = False
                # curr_non_excl_check = prev_non_excl_check #importing previous state of the "non-exclusion check" (opposite of exclusion check)

                annotations = curr_rel_path_list[:] # annotations are different subfolders browsed
                # print(annotations)

                curr_depth = len(annotations)

                depthList.append(curr_depth)

                file['absoluteFilePath'] = os.path.join(curr_path,filename)
                file['fileName'] = filename
                file['depthLvl'] = curr_depth

                file['folderName'] = annotations[0]

                for i in range(1, curr_depth):
                    file['depth' + str(i)] = annotations[i]
                for i in range(curr_depth, 5):
                    file['depth' + str(i)] = "0"

                # apply default file extension filter
                for extension in self.inclusionList:
                    if filename.endswith(extension):
                        fileListExt.append(file)

        # apply inclusion filter
        if self.applyInclusion:
            fileListIncl = [file for file in fileListExt
                            # match in inclusion list
                            if self.match(file['fileName'], self.inclusionsUserAsList)]
        else:
            fileListIncl = fileListExt

        # apply exclusion filter
        if self.applyExclusion:
            fileListExcl = [file for file in fileListIncl
                            # no match in exclusion list
                            if not self.match(file['fileName'], self.exclusionsUserAsList)]
        else:
            fileListExcl = fileListIncl

        # output file list
        self.fileList = fileListExcl

        if self.fileList:
            self.maxDepth = max(depthList)
            self.openFileList()
        else:
            self.maxDepth = 0
        progressBar.advance()
        progressBar.finish()

    # test if file contains one of the patterns in patternList
    def match(self, file, patternList):
        for pattern in patternList:
            if pattern in file:
                return True
        return False

    def openFileList(self):
        self.fileContents = list()
        for file in self.fileList:
            fileContent = ""
            try:
                file_path = file['absoluteFilePath']
            except TypeError:
                pass

            encodings = getPredefinedEncodings()
            with open(file_path,'rb') as opened_file:
                fileContent = opened_file.read()
                charset_dict = chardet.detect(fileContent)
                detected_encoding = charset_dict['encoding']

                # i = 0
                # chunks = list()
                #
                # for chunk in iter(lambda: opened_file.read(CHUNK_LENGTH), ""):
                #     chunks.append('\n'.join(chunk.splitlines()))
                #     i += CHUNK_LENGTH
                #     if i % (CHUNK_NUM * CHUNK_LENGTH) == 0:
                #         fileContent += "".join(str(chunks)
                #         chunk = list()
                #
                # if len(chunks):
                #     fileContent += "".join(str(chunks))
                # del chunks

                try:
                    encodings.remove(detected_encoding)
                    encodings.insert(0,detected_encoding)

                except ValueError:
                    pass

                for encoding in encodings:
                    try:
                        self.fileContent = fileContent.decode(encoding)
                    except:
                        pass


                # fileContent = normalize('NFC', str(fileContent))
                # fileContents.append(fileContent)

                self.fileContents.append(self.fileContent)

        del self.fileContents[-1]
        # print(self.fileContents)

    def browse(self):
        """Display a QFileDialog and select a folder"""

        rootFolderPath = QFileDialog.getExistingDirectory(    #Use QFileDialog.getExistingDirectory
            self,
            u'Select Folder(s)',
            self.lastLocation,
        )
        if not rootFolderPath:
            return

        rootFolderPath = os.path.normpath(rootFolderPath)
        self.rootFolderPath = rootFolderPath
        self.lastLocation = rootFolderPath


        if self.displayAdvancedSettings:
            pass
        else:
            self.add()

        self.updateGUI()

    def moveUp(self):
        """Move folder upward in folders listbox"""
        if self.selectedfolderLabels:
            index = self.selectedfolderLabels[0]
            if index > 0:
                temp = self.folders[index - 1]
                self.folders[index - 1] = self.folders[index]
                self.folders[index] = temp
                self.selectedfolderLabels.listBox.item(index - 1).setSelected(1)
                self.sendButton.settingsChanged()

    def moveDown(self):
        """Move folder downward in folders listbox"""
        if self.selectedfolderLabels:
            index = self.selectedfolderLabels[0]
            if index < len(self.folders) - 1:
                temp = self.folders[index + 1]
                self.folders[index + 1] = self.folders[index]
                self.folders[index] = temp
                self.selectedfolderLabels.listBox.item(index + 1).setSelected(1)
                self.sendButton.settingsChanged()

    def clearAll(self):
        """Remove all folders from folders attr"""
        del self.folders[:]
        del self.selectedfolderLabels[:]
        self.sendButton.settingsChanged()

    def remove(self):
        """Remove folder from folders attr"""
        if self.selectedfolderLabels:
            index = self.selectedfolderLabels[0]
            self.folders.pop(index)
            del self.selectedfolderLabels[:]
            self.sendButton.settingsChanged()

    def add(self):
        """Add folders to folders attr"""

        #rootFolderPathList = re.split(r' +/ +', self.rootFolderPath) #self.rootFolderPath = name

        # identify sequences separated by a "," and suppress the white spaces
        self.inclusionsUserAsList = [x.strip() for x in self.inclusionsUser.split(",") if x.strip()]
        self.exclusionsUserAsList = [x.strip() for x in self.exclusionsUser.split(",") if x.strip()]

        self.getFileList()
        # display the list of files
        print("Files: ", list(map(lambda f: f['fileName'], self.fileList)))

        sampleFileList = self.sampleFileList()
        # display the list of sampled files
        print("Files after sampling: ", list(map(lambda f: f['fileName'], sampleFileList)))

        self.folders.append(
            {
            'rootPath' : self.rootFolderPath,
            'maxDepth' : self.maxDepth,
            'inclusionsUser' : self.inclusionsUser,
            'exclusionsUser' : self.exclusionsUser,
            'samplingRate' : self.samplingRate,
            'fileList' : sampleFileList,
            }
        )
        # print(self.folders)
        self.sendButton.settingsChanged()

        # for folderDict in self.folders:
        #     fileList = folderDict['fileList']

    def sampleFileList(self):

        # Utilisation de la variable fileList
        # On fait une copie pour eviter de modifier self.fileList avec shuffle plus bas
        myList = list(self.fileList)

        # Initialisation d'un parametre qui decidera de l'echantillonage
        samplePercentage = self.samplingRate / 100.0
        # print(samplePercentage)

        # On melange la liste pour prendre ensuite les "samplePercentage" premiers
        random.shuffle(myList)

        # On definit le nombre de fichiers voulus selon le parametre d'echantillonage "samplePercentage", arrondi au superieur
        nOfFiles = int(math.ceil(len(myList) * samplePercentage))
        # On prend les "nOfFiles" premiers fichiers de la liste melangee
        return myList[:nOfFiles]

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            if self.selectedfolderLabels:
                cachedLabel = self.selectedfolderLabels[0]
            else:
                cachedLabel = None

            del self.folderLabels[:]
            folderLabels = []

            if self.folders:
                folderRootPathsList = [f['rootPath'] for f in self.folders]
                maxDepthList = ['%s' % f['maxDepth'] for f in self.folders]
                inclusionsUserList = [f['inclusionsUser'] for f in self.folders]
                # print(inclusionsUserList)
                exclusionsUserList = [f['exclusionsUser'] for f in self.folders]
                samplingRatesList = ['%s' % f['samplingRate'] for f in self.folders]
                folderNamesList = [os.path.basename(p) for p in folderRootPathsList]
                maxFolderNameLen = max([len(n) for n in folderNamesList])

                for index in range(len(self.folders)):
                    format = u'%-' + str(maxFolderNameLen + 2) + u's'
                    # folderLabel = format % folderNamesList[index],
                    folderLabel = format % folderNamesList[index]
                    # print(inclusionsUserList[index])
                    folderLabel += "[d]:{"+maxDepthList[index]+"} "
                    folderLabel += "[i]:{"+inclusionsUserList[index]+"} "
                    folderLabel += "[e]:{"+exclusionsUserList[index]+"} "
                    folderLabel += "[s]:{"+samplingRatesList[index]+"%}"
                    folderLabels.append(folderLabel)

            self.folderLabels = folderLabels

            if cachedLabel is not None:
                self.sendButton.sendIfPreCallback = None
                self.selectedfolderLabels.listBox.item(
                    cachedLabel
                ).setSelected(1)
                self.sendButton.sendIfPreCallback = self.updateGUI
            if self.rootFolderPath:
                if (
                    (self.newAnnotationKey and self.newAnnotationValue) or
                    (not self.newAnnotationKey and not self.newAnnotationValue)
                ):
                    self.addButton.setDisabled(False)
                else:
                    self.addButton.setDisabled(True)
            else:
                self.addButton.setDisabled(True)
            if self.autoNumber:
                self.autoNumberKeyLineEdit.setDisabled(False)
            else:
                self.autoNumberKeyLineEdit.setDisabled(True)
            # if self.importFilenames:
            #     self.importFilenamesKeyLineEdit.setDisabled(False)
            # else:
            #     self.importFilenamesKeyLineEdit.setDisabled(True)
            self.updatefolderBoxButtons()
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

    def updatefolderBoxButtons(self):
        """Update state of folder box buttons"""
        if self.selectedfolderLabels:
            self.removeButton.setDisabled(False)
            if self.selectedfolderLabels[0] > 0:
                self.moveUpButton.setDisabled(False)
            else:
                self.moveUpButton.setDisabled(True)
            if self.selectedfolderLabels[0] < len(self.folders) - 1:
                self.moveDownButton.setDisabled(False)
            else:
                self.moveDownButton.setDisabled(True)
        else:
            self.moveUpButton.setDisabled(True)
            self.moveDownButton.setDisabled(True)
            self.removeButton.setDisabled(True)
        if len(self.folders):
            self.clearAllButton.setDisabled(False)
            self.exportButton.setDisabled(False)
        else:
            self.clearAllButton.setDisabled(True)
            self.exportButton.setDisabled(True)

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)

    def onDeleteWidget(self):
        self.clearCreatedInputs()
Exemple #8
0
    def __init__(self):
        super().__init__()
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            #sendIfPreCallback = self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.showAdvancedSettings,
        )

        # Initiates output segmentation
        self.segmentation = Input(text=u"")
        self.createdInputs = list()

        self.advancedSettings.draw()

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u"File selection",
            orientation="vertical",
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation="horizontal",
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value="file",
            orientation="horizontal",
            label=u"File path :",
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )

        languageComboBox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value="language",
            # Displays the keys of the above dict of the multiple languages
            items=[(language) for language in AudioFile.dict_languages],
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Input language :",
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select the language of the input text."),
        )
        gui.separator(widget=basicFileBoxLine1, width=3)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u"Browse",
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )

        OptionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u"Segmentation at pauses",
            orientation="vertical",
            addSpace=False,
        )

        OptionBoxLine1 = gui.widgetBox(
            widget=OptionsBox,
            box=False,
            orientation="horizontal",
        )
        gui.spin(
            widget=OptionsBox,
            master=self,
            value="selected_vol",
            label="Maximum volume (in dBFS) : ",
            callback=self.sendButton.settingsChanged,
            tooltip="Select a value between 1 and 50",
            minv=1,
            maxv=50,
            step=1,
        )

        gui.spin(
            widget=OptionsBox,
            master=self,
            value="selected_dur",
            label="Minimum duration (in milliseconds) : ",
            callback=self.sendButton.settingsChanged,
            tooltip="Select a value between 1 and 1000",
            minv=1,
            maxv=1000,
            step=1,
        )

        gui.checkBox(
            widget=OptionsBox,
            master=self,
            value="selected_seg",
            label="Segment the audio file with the parameters",
            box=None,
            callback=self.sendButton.settingsChanged,
            tooltip="Leave this box unchecked if you want one and only segment."
        )

        gui.separator(widget=OptionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(OptionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()
        # Adding space between control area and send button
        gui.rubber(self.controlArea)
        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.advancedSettings.setVisible(self.displayAdvancedSettings)
Exemple #9
0
class AudioFile(OWTextableBaseWidget):

    # Widget info
    name = "AudioFile"
    description = "Import an audio file, transcribe it and segment it"
    icon = "icons/audioFile.png"
    priority = 20

    inputs = []
    outputs = [("Text data", Segmentation)]

    # Settings
    language = settings.Setting("French")
    want_main_area = False
    resizing_enabled = True
    displayAdvancedSettings = settings.Setting(False)
    file = settings.Setting(u"")
    lastLocation = settings.Setting(".")

    # Advanced settings
    selected_vol = settings.Setting(14)
    selected_dur = settings.Setting(500)
    selected_seg = settings.Setting(False)

    # Dictionnaries that contains all the languages and their corresponding encoding
    dict_languages = {
        "English": "en-US",
        "French": "fr-FR",
        "German": "de-DE",
        "Italian": "it-IT",
        "Japanese": "ja",
        "Mandarin Chinese": "zh-CN",
        "Portugese": "pt-PT",
        "Russian": "ru",
        "Spanish": "es-ES",
    }

    def __init__(self):
        super().__init__()
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            #sendIfPreCallback = self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.showAdvancedSettings,
        )

        # Initiates output segmentation
        self.segmentation = Input(text=u"")
        self.createdInputs = list()

        self.advancedSettings.draw()

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u"File selection",
            orientation="vertical",
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation="horizontal",
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value="file",
            orientation="horizontal",
            label=u"File path :",
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )

        languageComboBox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value="language",
            # Displays the keys of the above dict of the multiple languages
            items=[(language) for language in AudioFile.dict_languages],
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Input language :",
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select the language of the input text."),
        )
        gui.separator(widget=basicFileBoxLine1, width=3)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u"Browse",
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )

        OptionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u"Segmentation at pauses",
            orientation="vertical",
            addSpace=False,
        )

        OptionBoxLine1 = gui.widgetBox(
            widget=OptionsBox,
            box=False,
            orientation="horizontal",
        )
        gui.spin(
            widget=OptionsBox,
            master=self,
            value="selected_vol",
            label="Maximum volume (in dBFS) : ",
            callback=self.sendButton.settingsChanged,
            tooltip="Select a value between 1 and 50",
            minv=1,
            maxv=50,
            step=1,
        )

        gui.spin(
            widget=OptionsBox,
            master=self,
            value="selected_dur",
            label="Minimum duration (in milliseconds) : ",
            callback=self.sendButton.settingsChanged,
            tooltip="Select a value between 1 and 1000",
            minv=1,
            maxv=1000,
            step=1,
        )

        gui.checkBox(
            widget=OptionsBox,
            master=self,
            value="selected_seg",
            label="Segment the audio file with the parameters",
            box=None,
            callback=self.sendButton.settingsChanged,
            tooltip="Leave this box unchecked if you want one and only segment."
        )

        gui.separator(widget=OptionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(OptionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()
        # Adding space between control area and send button
        gui.rubber(self.controlArea)
        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.advancedSettings.setVisible(self.displayAdvancedSettings)

    def get_large_audio_transcription(self,
                                      path,
                                      language,
                                      set_silence_len=500,
                                      set_silence_threshold=14):
        """
        Splitting the large audio file into chunks
        and apply speech recognition on each of these chunks
        """
        # Create a temporary folder to handle the chunks, will be deleted upon completion of the task
        with tempfile.TemporaryDirectory() as tempDict:

            # Initialize the recognizer
            r = sr.Recognizer()

            if 'wav' not in self.file or 'mp3' not in self.file:
                return

            # Check type of the audio file and change it to wav if mp3
            audio_type = self.detect_format(path)

            if audio_type == "mp3":
                path = self.to_wav(path, tempDict)

            # Open the audio file using pydub
            sound = AudioSegment.from_wav(path)
            # Split audio sound where silence is 700 milliseconds or more and get chunks
            chunks = split_on_silence(
                sound,
                # Experiment with this value for your target audio file
                min_silence_len=set_silence_len,
                # Adjust this per requirement
                silence_thresh=sound.dBFS - set_silence_threshold,
                # Keep the silence for 1 second, adjustable as well
                keep_silence=500,
            )

            # Initiates ouput variables (depending on advanced settings)

            whole_text = ""
            segments = list()
            #Initiate alert message and progress bar
            progressBar = ProgressBar(self, iterations=len(chunks))

            # Process each chunk
            for i, audio_chunk in enumerate(chunks, start=1):
                # export audio chunk and save it in
                # the tempDict directory.
                chunk_filename = os.path.join(tempDict, f"chunk{i}.wav")
                audio_chunk.export(chunk_filename, format="wav")
                # recognize the chunk
                with sr.AudioFile(chunk_filename) as source:
                    audio_listened = r.record(source)
                    # Try converting it to text
                    try:
                        # Get the value of the chosen language in the dictionnary
                        text = r.recognize_google(
                            audio_listened,
                            language=AudioFile.dict_languages[self.language])
                    except sr.UnknownValueError as e:
                        print("Error : ", str(e))
                    else:
                        # Creates an entry of the list "segments" for each audio_chunk
                        if self.selected_seg:
                            segmented_text = f"{text.capitalize()}. "
                            print(chunk_filename, " : ", segmented_text)
                            segments.append(segmented_text)
                        # Add the segment to the segmentation
                        else:
                            # Returns transciprtion as whole_text
                            text = f"{text.capitalize()}. "
                            print(chunk_filename, " : ", text)
                            whole_text += text
                        self.infoBox.setText(u"Processing, please wait...",
                                             "warning")
                        progressBar.advance()
        # return the text for all chunks detected
        if self.selected_seg:
            progressBar.finish()
            return segments
        else:
            progressBar.finish()
            return whole_text

    def sendData(self):

        if not self.file:
            self.infoBox.setText(u"Please select input file.", "warning")
            self.send('Text data', None, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()
        # Get transcription

        try:
            transcription = self.get_large_audio_transcription(
                self.file,
                language=self.language,
                set_silence_len=self.selected_dur,
                set_silence_threshold=self.selected_vol)
        except speech_recognition.UnknownValueError as err:
            self.infoBox.setText(
                u"You seem to have overuseed the built-in API key, refer to the documentation for further informations.",
                "warning")
            self.send('Text data', None, self)
            return

        # Checks if there is a transcription
        if transcription is None:
            self.infoBox.setText(u"You must use mp3 or wav audio files.",
                                 "warning")
            self.send('Text data', None, self)
            return

        # Regex to get the name of the input file
        title = self.file
        regex = re.compile("[^(/\\)]+[mp3|wav]$")
        match = re.findall(regex, title)

        if self.selected_seg:
            for chunk in transcription:
                new_input = Input(chunk, label=match)
                self.createdInputs.append(new_input)
        else:
            new_input = Input(transcription, label=match)
            self.createdInputs.append(new_input)
        # Concatenates the segmentations in the output segmentation
        self.segmentation = Segmenter.concatenate(
            segmentations=self.createdInputs,
            label=self.captionTitle,
            copy_annotations=False,
            import_labels_as="")

        #Sending segments length
        message = " Succesfully transcripted ! % i segment@p sent to output" % len(
            self.segmentation)
        message = pluralize(message, len(self.segmentation))
        # Send token...
        self.send("Text data", self.segmentation, self)
        self.infoBox.setText(message)
        self.sendButton.resetSettingsChangedFlag()

    def setCaption(self, title):
        if "captionTitle" in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)

    def browse(self):
        audioPath, _ = QFileDialog.getOpenFileName(
            self,
            u"open Text File",
            self.lastLocation,
            u"Audio Files(*.wav;*.mp3)",
        )
        if not audioPath:
            return
        self.file = os.path.normpath(audioPath)
        self.lastLocation = os.path.dirname(audioPath)
        self.sendButton.settingsChanged()

    def showAdvancedSettings(self):

        self.advancedSettings.setVisible(self.displayAdvancedSettings)

    def detect_format(self, file):
        """A function that detects the format of a file"""
        file_type = filetype.guess(file)
        return file_type.extension

    def to_wav(self, file, tempDict):
        """A function to convert mp3 files to wav files"""

        # Destination file in the temporary directory
        destination = os.path.join(tempDict, 'temp.wav')

        subprocess.call(['/usr/local/bin/ffmpeg', '-i', file, destination])
        return destination

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()