def __init__(self):
        """Widget creator."""

        super().__init__()

        #----------------------------------------------------------------------
        # Other (non settings) attributes...
        
        self.inputSeg = None
        self.selectedCharacters = list()
        self.characters = list()
        self.mustLoad = True
        if INSTALLED_MODELS:
            self.model = INSTALLED_MODELS[0]
        else:
            self.model = ""

        #----------------------------------------------------------------------
        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )

        #----------------------------------------------------------------------
        # User interface...

        self.characterListbox = gui.listBox(
            widget=self.controlArea,
            master=self,
            value="selectedCharacters",
            labels="characters",
            callback=None,
            tooltip="List of identified characters",
        )
        
        gui.rubber(self.controlArea)

        #----------------------------------------------------------------------
        # Draw Info box and Send button...
        
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input.", "warning")
        
        # Check that there's a model...
        if not self.model:
            self.noLanguageModelWarning()
class MovieReviews(OWTextableBaseWidget):
    """An orange widget to get movie reviews from Imdb"""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Movie Reviews"
    description = "Get movie reviews from imdb"
    icon = ""
    priority = 15

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = []
    outputs = [("Segmentation", Segmentation)]

    #----------------------------------------------------------------------
    # Handles the futur versions settings

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    # Settings
    autoSend = settings.Setting(True)
    myBasket = settings.Setting([])

    #----------------------------------------------------------------------
    # GUI layout parameters...

    want_main_area = False

    def __init__(self):
        super().__init__()

        # Search filters attributs
        self.newQuery = ''
        self.type_results = 'Title'
        self.filter_results = 'Popularity'
        self.nbr_results = '10'
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        self.createdInputs = list()

        # Mandatory declaration of the info box and the send button
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=None,
        )

        #----------------------------------------------------------------------
        # User interface...

        # Creation of the different working areas
        queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search movies",
            orientation="horizontal",
        )

        filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Filters",
            orientation="horizontal",
        )

        searchButtonBox = gui.widgetBox(
            widget=self.controlArea,
            orientation="vertical",
        )

        resultBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search results",
            orientation="vertical",
        )

        resultButtonBox = gui.widgetBox(
            widget=resultBox,
            box=False,
            orientation='horizontal',
        )

        corpusBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        corpusButtonBox = gui.widgetBox(
            widget=corpusBox,
            box=False,
            orientation='horizontal',
        )

        # Allows to enter specific text to the research
        gui.lineEdit(
            widget=queryBox,
            master=self,
            value='newQuery',
            orientation='horizontal',
            label=u"Search: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        # Allows to choose a type of search
        searchType = gui.comboBox(
            widget=queryBox,
            master=self,
            value="type_results",
            items=[
                "Title",
                "Actor",
                "Genre",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Search Type: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )
        # Allows to chose a filter for the search
        searchFilter = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filter_results",
            items=[
                "Popularity",
                "Alphabetical",
                "Random",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Search by: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Allows to choose the wanted results numberp (10 by 10)
        searchNbr = gui.comboBox(
            widget=filterBox,
            master=self,
            value="nbr_results",
            items=[
                "10",
                "20",
                "30",
                "40",
                "50",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Number of results: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        self.searchButton = gui.button(
            widget=searchButtonBox,
            master=self,
            label="Search",
            callback=self.searchMovies,
            tooltip="Connect to imdbpy and make a research",
        )

        # List Box where all the searched movies are stocked
        self.titleListbox = gui.listBox(
            widget=resultBox,
            master=self,
            value="selectedTitles",
            labels="titleLabels",
            callback=None,
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        # Add movies button
        self.addButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label=u'Add to corpus',
            callback=self.addToCorpus,
            tooltip=(u"Move the selected movie downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        self.clearButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        #gui.separator(widget=queryBox, height=3)

        # Corpus where confirmed movies are moved and stocked
        self.mytitleListbox = gui.listBox(
            widget=corpusBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        # Remove movie button
        self.removeButton = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected movie from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed movies button
        self.clearmyBasket = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Clear corpus',
            callback=self.clearCorpus,
            tooltip=(u"Remove all movies from your corpus."),
        )
        self.clearmyBasket.setDisabled(True)

        gui.separator(widget=corpusBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def searchMovies(self):
        """Search from imdb movie database"""
        result_list = {}
        query_string = self.newQuery

        if query_string != "":
            counter = 1
            counter_max = int(self.nbr_results)
            result_id = 0
            result_artist = []

            self.controlArea.setDisabled(True)

            # Initialize progress bar
            progressBar = ProgressBar(self, iterations=counter_max)

            ia = imdb.IMDb()

            # movie name
            name = query_string

            # searching the movie
            search = ia.search_movie(name)
            print(search)

            # Each result is stored in a dictionnary with its title
            # and year of publication if it is specified
            for result in search:
                if counter <= counter_max:
                    #print(counter)
                    #print(counter_max)
                    try:
                        result_id += 1
                        year = result['year']
                        movie_id = result.movieID
                        result_list[result_id] = {
                            'name': result,
                            'year': year,
                            'id': movie_id
                        }
                    except KeyError:
                        result_id += 1
                        result_list[result_id] = {
                            'name': result,
                        }

                    counter += 1
                else:
                    break

            # 1 tick on the progress bar of the widget
            progressBar.advance()
            # Stored the results list in the "result_list" variable
            self.searchResults = result_list

            # Reset and clear the visible widget list
            del self.titleLabels[:]

            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:
                try:
                    result_string = f'{self.searchResults[idx]["name"]} - {self.searchResults[idx]["year"]}'
                    self.titleLabels.append(result_string)
                except KeyError:
                    result_string = f'{self.searchResults[idx]["name"]}'
                    self.titleLabels.append(result_string)

            self.titleLabels = self.titleLabels
            self.clearButton.setDisabled(False)
            self.addButton.setDisabled(False)

            # Clear progress bar.
            progressBar.finish()
            self.controlArea.setDisabled(False)

        else:
            self.infoBox.setText("Please enter a movie title", "warning")

    # Add movie to corpus
    def addToCorpus(self):
        """Add movies in your selection """
        for selectedTitle in self.selectedTitles:
            newMovie = self.searchResults[selectedTitle + 1]
            if newMovie not in self.myBasket:
                self.myBasket.append(newMovie)
        self.updateCorpus()
        self.sendButton.settingsChanged()

    # Make the movie appear in the corpus Listbox
    def updateCorpus(self):
        """Update the corpus box list in order to view the movies added"""
        self.mytitleLabels = list()
        for newMovie in self.myBasket:
            try:
                result_string = f'{newMovie["name"]} - {newMovie["year"]}'
                self.mytitleLabels.append(result_string)
            except KeyError:
                result_string = newMovie["name"]
                self.mytitleLabels.append(result_string)
        self.mytitleLabels = self.mytitleLabels

        self.clearmyBasket.setDisabled(self.myBasket == list())
        self.removeButton.setDisabled(self.myTitles == list())

    def remove(self):
        """Remove the selected movie in the corpus """
        self.myBasket = [
            movie for idx, movie in enumerate(self.myBasket)
            if idx not in self.myTitles
        ]
        self.updateCorpus()
        self.sendButton.settingsChanged()

    # Remove movies function
    def sendData(self):
        """Compute result of widget processing and send to output"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some movies first",
                "warning")
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(self, iterations=len(self.myBasket))

        # Attempt to connect to Genius and retrieve lyrics...
        selectedSongs = list()
        list_review = list()
        annotations = list()
        try:
            for item in self.myBasket:
                ia = imdb.IMDb()
                movie = ia.get_movie_reviews(item['id'])
                list_review.append(movie)
                # 1 tick on the progress bar of the widget
                progressBar.advance()

        # If an error occurs (e.g. http error, or memory error)...
        except:
            # Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from imdb", "error")
            self.controlArea.setDisabled(False)
            return

        # Store movie critics strings in input objects...
        for movie in list_review:
            #for key, value in movie.items():
            #try:
            data = movie.get('data', "")
            reviews_data = data.get('reviews')
            for review in reviews_data:
                reviews = review.get('content')
                newInput = Input(reviews)
                self.createdInputs.append(newInput)
                new_dict = review.copy()
                annotations.append(new_dict)
        """
            except:
                self.infoBox.setText(
                "The movie has no associated reviews",
                "warning"
            )
            self.controlArea.setDisabled(False)
            return
        """

        # If there's only one item, the widget's output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                import_labels_as=None,
            )

        # Annotate segments...

        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = f"{len(self.segmentation)} segment@p sent to output"
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send('Segmentation', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearResults(self):
        """Clear the results list"""
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        self.clearButton.setDisabled(True)

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def clearCorpus(self):
        """Remove all movies in the corpus"""
        self.mytitleLabels = list()
        self.myBasket = list()
        self.sendButton.settingsChanged()
        self.clearmyBasket.setDisabled(True)
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.fileLabels = list()
        self.selectedFileLabels = list()
        self.newFiles = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''
        self.pdfPassword = u''  # SuperTextFiles
        self.ocrForce = False  # SuperTextFiles
        self.ocrLanguages = u'eng'  # SuperTextFiles
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value='file',
            orientation='horizontal',
            label=u'File path:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )
        gui.separator(widget=basicFileBoxLine1, width=5)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )
        gui.separator(widget=basicFileBox, width=3)
        advancedEncodingsCombobox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(advancedEncodingsCombobox)
        addAutoDetectEncoding(advancedEncodingsCombobox)
        gui.separator(widget=basicFileBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFileBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        defaultLabelWidth = 120  # SuperTextFiles

        # File box
        fileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        fileBoxLine1 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.fileListbox = gui.listBox(
            widget=fileBoxLine1,
            master=self,
            value='selectedFileLabels',
            labels='fileLabels',
            callback=self.updateFileBoxButtons,
            tooltip=(
                u"The list of files whose content will be imported.\n"
                u"\nIn the output segmentation, the content of each\n"
                u"file appears in the same position as in the list.\n"
                u"\nColumn 1 shows the file's name.\n"
                u"Column 2 shows the file's annotation (if any).\n"
                # Start SuperTextFiles
                # u"Column 3 shows the file's encoding." # removed
                u"Column 3 shows the file's password (if any).\n"
                u"Column 4 shows the file's languages (if any).\n"
                u"Column 5 shows if OCR is forced.\n"
                u"Column 6 shows the file's encoding."
                # End SuperTextFiles
            ),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.fileListbox.setFont(font)
        fileBoxCol2 = gui.widgetBox(
            widget=fileBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected file upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected file downward in the list."),
        )
        self.removeButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected file from the list."),
        )
        self.clearAllButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all files from the list."),
        )
        self.exportButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Export List',
            callback=self.exportList,
            tooltip=(u"Open a dialog for selecting a file where the file\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Import List',
            callback=self.importList,
            tooltip=(u"Open a dialog for selecting a file list to\n"
                     u"import (in JSON format). Files from this list\n"
                     u"will be added to those already imported."),
        )
        fileBoxLine2 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='vertical',
        )
        # Add file box
        addFileBox = gui.widgetBox(
            widget=fileBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFileBoxLine1 = gui.widgetBox(
            widget=addFileBox,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=addFileBoxLine1,
            master=self,
            value='newFiles',
            orientation='horizontal',
            label=u'File path(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"The paths of the files that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFileBoxLine1, width=5)
        gui.button(
            widget=addFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting files.\n\n"
                     u"To select multiple files at once, either draw a\n"
                     u"selection box around them, or use shift and/or\n"
                     u"ctrl + click.\n\n"
                     u"Selected file paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFileBox, width=3)
        basicEncodingsCombobox = gui.comboBox(
            widget=addFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(basicEncodingsCombobox)
        addAutoDetectEncoding(basicEncodingsCombobox)
        self.encoding = self.encoding
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationKey',
            orientation='horizontal',
            label=u'Annotation key:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify a custom annotation\n"
                     u"key associated with each file that is about to be\n"
                     u"added to the list."),
        )
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationValue',
            orientation='horizontal',
            label=u'Annotation value:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify the annotation value\n"
                     u"associated with the above annotation key."),
        )

        ### Start SuperTextFiles addition
        gui.separator(widget=addFileBox, width=3)
        # Field for PDF password
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='pdfPassword',
            orientation='horizontal',
            label=u'PDF password:'******'ocrLanguages',
            orientation='horizontal',
            label=u'OCR Language(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify languages\n"
                     u"for the OCR process. Ex.: fra+ita"),
        )

        gui.checkBox(
            widget=addFileBox,
            master=self,
            value='ocrForce',
            label=u'Force OCR',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Force to use an OCR detection on this file"),
        )
        ### End SuperTextFiles addition

        gui.separator(widget=addFileBox, width=3)
        self.addButton = gui.button(
            widget=addFileBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the file(s) currently displayed in the\n"
                     u"'Files' text field to the list.\n\n"
                     u"Each of these files will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other files may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(fileBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenames',
            label=u'Import file names with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Import file names as annotations."),
        )
        self.importFilenamesKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenamesKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for importing file names."),
        )
        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate files with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for file auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)
Ejemplo n.º 4
0
class Gutenberg(OWTextableBaseWidget):
    """Textable widget for importing clean texts from Gutenberg
    (https://www.gutenberg.org/)
    """

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Gutenberg"
    description = "Gutenberg caching and importation"
    icon = "icons/gutenberg.png"
    priority = 10

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = []
    outputs = [("Gutenberg importation", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    # Saved settings
    autoSend = settings.Setting(False)
    myBasket = settings.Setting([])

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.titleQuery = ''
        self.authorQuery = ''
        self.nbr_results = 10
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (songs) in a list
        self.createdInputs = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )
        #----------------------------------------------------------------------
        # User interface...
        # Create the working area
        queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search books",
            orientation="vertical",
        )

        self.cacheGenerationButton = gui.button(
            widget=queryBox,
            master=self,
            label="Generate cache",
            callback=self.generate_cache,
            tooltip="Generate the gutenberg cache, this might take a while...",
        )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribut
        gui.lineEdit(
            widget=queryBox,
            master=self,
            value='titleQuery',
            orientation='horizontal',
            label=u"Title: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        # gui.lineEdit(
        #     widget=queryBox,
        #     master=self,
        #     value='authorQuery',
        #     orientation='horizontal',
        #     label=u"Author: ",
        #     labelWidth=120,
        #     tooltip=("Enter a string"),
        # )

        # Allows to choose the wanted results numberp (10 by 10)
        queryNbr = gui.comboBox(
            widget=queryBox,
            master=self,
            value="nbr_results",
            items=[
                "10", "20", "30", "40", "50", "60", "70", "80", "90", "100",
                "200", "300", "400", "500", "1000"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Number of results: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchFunction" attribut
        self.searchButton = gui.button(
            widget=queryBox,
            master=self,
            label="Search",
            callback=self.search,
            tooltip="Connect Genius and make a research",
        )
        self.titleListbox = gui.listBox(
            widget=queryBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=queryBox,
            box=False,
            orientation='horizontal',
        )
        # Add songs button
        self.addButton = gui.button(
            widget=boxbutton,
            master=self,
            label=u'Add to corpus',
            callback=self.add,
            tooltip=(u"Move the selected book downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        gui.separator(widget=queryBox, height=3)

        # area where confirmed songs are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of books which will be imported",
        )
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
        )
        # Remove songs button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected book from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed texts button
        self.clearmyBasketButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.clearmyBasket,
            tooltip=(u"Remove all books from your corpus."),
        )
        self.clearmyBasketButton.setDisabled(True)

        gui.separator(widget=mytitleBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # Update the selections list
        self.updateMytitleLabels()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def generate_cache(self):
        GutenbergCache.create(refresh=True,
                              download=True,
                              unpack=True,
                              parse=True,
                              deleteTemp=True)

    def search(self):
        """ Parse a query string and do a search in the Gutenberg cache
        """

        query_string = self.titleQuery

        if query_string:
            # parse query and lookup in gutenbergcache
            cache = GutenbergCache.get_cache()
            query_results = cache.native_query(
                sql_query=
                "select * from titles where upper(name) like upper('%{query}%') limit {limit}"
                .format(query=query_string, limit=self.nbr_results))
            # get the results
            self.searchResults = list(query_results)

            # display info message
            n_results = len(self.searchResults)
            self.infoBox.setText("{n} result{s} have been found".format(
                n=n_results, s="s" if n_results > 0 else ""))

            # TODO: display results
            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:
                result_string = str(idx[1])
                self.titleLabels.append(result_string)

                self.titleLabels = self.titleLabels
                self.clearButton.setDisabled(False)
                self.addButton.setDisabled(self.selectedTitles == list())

                self.controlArea.setDisabled(False)

        else:
            self.infoBox.setText("You didn't search anything", "warning")

    # Function clearing the results list
    def clearResults(self):
        """Clear the results list"""
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        self.clearButton.setDisabled(True)
        self.addButton.setDisabled(self.titleLabels == list())

    # Add texts function
    def add(self):
        """Add songs in your selection """
        for selectedTitle in self.selectedTitles:
            titleData = self.searchResults[selectedTitle]
            if titleData not in self.myBasket:
                self.myBasket.append(titleData)

        self.updateMytitleLabels()
        self.sendButton.settingsChanged()

    # Update selections function
    def updateMytitleLabels(self):
        self.mytitleLabels = list()
        for titleData in self.myBasket:
            result_string = titleData[1]
            self.mytitleLabels.append(result_string)
        self.mytitleLabels = self.mytitleLabels

        self.clearmyBasketButton.setDisabled(self.myBasket == list())
        self.removeButton.setDisabled(self.myTitles == list())

    # fonction qui retire la selection de notre panier
    def remove(self):
        """Remove the selected songs in your selection """
        self.myBasket = [
            title for idx, title in enumerate(self.myBasket)
            if idx not in self.myTitles
        ]
        self.updateMytitleLabels()
        self.sendButton.settingsChanged()

    # Clear selections function
    def clearmyBasket(self):
        """Remove all songs in your selection """
        self.mytitleLabels = list()
        self.myBasket = list()
        self.sendButton.settingsChanged()
        self.clearmyBasketButton.setDisabled(True)

    # Function computing results then sending them to the widget output
    def sendData(self):
        """Compute result of widget processing and send to output"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some books first", "warning")
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(
            self,
            iterations=len(self.myBasket),
        )

        selectedTexts = list()
        text_content = list()
        annotations = list()
        # get the Gutenberg cache
        cache = GutenbergCache.get_cache()
        try:
            # TODO: Retrieve selected texts from gutenberg
            for text in self.myBasket:

                # Get the id of the text
                query_id = cache.native_query(
                    sql_query=
                    "select gutenbergbookid from books where id == {selected_id}"
                    .format(selected_id=text[2]))
                gutenberg_id = list(query_id)

                # Get the text with Gutenbergpy
                gutenberg_text = gutenbergpy.textget.strip_headers(
                    gutenbergpy.textget.get_text_by_id(gutenberg_id[0][0]))
                text_content.append(gutenberg_text)

                annotations.append(text[1])
                progressBar.advance()

        # If an error occurs (e.g. http error, or memory error)...
        except Exception:
            # Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from Gutenberg",
                                 "error")
            self.controlArea.setDisabled(False)
            return

        # TODO: send gutenberg texts as output
        # Store downloaded lyrics strings in input objects...
        for text in text_content:
            newInput = Input(text, self.captionTitle)
            self.createdInputs.append(newInput)

        # If there"s only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # TODO: annotate with book metadata
        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update({"title": annotations[idx]})
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send("Gutenberg importation", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    # The following method needs to be copied verbatim in
    # every Textable widget that sends a segmentation...
    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
    def __init__(self):
        """Init of the module: UI and variables definition"""
        super().__init__()

        # queryBox indexes
        self.indicesPanier = list()

        #----------------------------------------------------------------------
        # User interface...

        self.infoBox = InfoBox(widget=self.controlArea, )

        #-------------------------#
        #    Main widget boxes    #
        #-------------------------#

        sourceBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )

        self.filterBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Filters',
            orientation='vertical',
            addSpace=False,
        )

        self.includeOuterBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Include',
            orientation='vertical',
            addSpace=False,
        )

        self.requestsBox = gui.widgetBox(
            widget=self.controlArea,
            orientation='horizontal',
            addSpace=False,
        )

        panierBox = gui.widgetBox(
            widget=self.controlArea,
            orientation='vertical',
            box=u'Selection',
            addSpace=False,
        )

        #-------------------#
        #    Send button    #
        #-------------------#

        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.send_data,
            infoBoxAttribute='infoBox',
        )

        #------------------------#
        #   Query box elements   #
        #------------------------#

        self.choiceBox = gui.comboBox(
            widget=sourceBox,
            master=self,
            value='mode',
            label="Mode:",
            callback=self.mode_changed,
            tooltip="Choose mode",
            orientation='horizontal',
            sendSelectedValue=True,
            items=["Subreddit", "URL", "Full text"],
            labelWidth=135,
        )

        self.modeBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        self.urlBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.urlBox,
            master=self,
            value='URL',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'Search with URL:',
            labelWidth=135,
        )

        self.subredditBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.subredditBox,
            master=self,
            value='subreddit',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'reddit.com/r/...:',
            labelWidth=135,
        )
        self.fullTextBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.fullTextBox,
            master=self,
            value='fullText',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'Search on reddit:',
            labelWidth=135,
        )

        #----------------------------#
        #    Filters box elements    #
        #----------------------------#

        self.subredditFilter = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.subredditFilter,
            master=self,
            value='sortBy',
            label=u'Sort by:',
            tooltip="Choose mode to sort your posts",
            orientation='horizontal',
            sendSelectedValue=True,
            callback=self.checkSubredditSortMode,
            items=["Hot", "New", "Controversial", "Top", "Rising"],
            labelWidth=135,
        )

        self.fullTextFilter = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.fullTextFilter,
            master=self,
            value='sortByFT',
            label="Sort by:",
            tooltip="Choose mode",
            orientation='horizontal',
            sendSelectedValue=True,
            callback=self.checkSearchSortMode,
            items=["Relevance", "Top", "New", "Comments"],
            labelWidth=135,
        )

        self.timeBox = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.timeBox,
            master=self,
            value='postedAt',
            label=u'Time:',
            tooltip="Choose mode to sort your posts",
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            sendSelectedValue=True,
            items=["All", "Past day", "Past hour", "Past month", "Past year"],
            labelWidth=135,
        )

        gui.spin(
            widget=self.filterBox,
            master=self,
            value="amount",
            minv=1,
            maxv=10000,
            label="Max amount of posts:",
            labelWidth=135,
            orientation="horizontal",
            callback=self.sendButton.settingsChanged,
            tooltip="Select the amount of posts that you want",
        )

        #--------------------------#
        #   Include box elements   #
        #--------------------------#

        self.includeBox = gui.widgetBox(
            widget=self.includeOuterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.checkBox(
            widget=self.includeBox,
            master=self,
            value='includeImage',
            label=u'Include images',
            callback=self.mode_changed,
        )

        gui.checkBox(
            widget=self.includeBox,
            master=self,
            value='includeComments',
            label=u'Include comments',
            callback=self.mode_changed,
        )

        #--------------------------#
        #   Request box elements   #
        #--------------------------#

        self.refreshButton = gui.button(
            widget=self.requestsBox,
            master=self,
            label=u'Refresh the Data',
            callback=self.refresh_content,
        )

        if len(self.labelsPanier) == 0:
            self.refreshButton.setDisabled(True)

        self.fetchButton = gui.button(
            widget=self.requestsBox,
            master=self,
            label=u'Add Request',
            callback=self.confirm_settings,
        )

        #--------------------------#
        #   Results box elements   #
        #--------------------------#

        panier = gui.listBox(
            widget=panierBox,
            master=self,
            value="indicesPanier",
            labels="labelsPanier",
            callback=lambda: self.removeButton.setDisabled(self.indicesPanier
                                                           == list()),
            tooltip="List of imported corpora.",
        )
        panier.setMinimumHeight(120)
        panier.setSelectionMode(3)

        self.update_list(self.labelsPanier)

        removalBox = gui.widgetBox(
            widget=panierBox,
            orientation='horizontal',
            addSpace=False,
        )

        self.removeButton = gui.button(
            widget=removalBox,
            master=self,
            label="Remove from selection",
            callback=self.removePressed,
            tooltip="Remove the selected corpus.",
        )

        self.removeButton.setDisabled(True)

        self.clearButton = gui.button(
            widget=removalBox,
            master=self,
            label="Clear selection",
            callback=self.clearPressed,
            tooltip="Remove all corpora from selection.",
        )

        if len(self.labelsPanier) == 0:
            self.clearButton.setDisabled(True)

        #------------------------#
        #   End of definitions   #
        #------------------------#

        gui.separator(widget=self.controlArea, height=3)  # Spacer
        gui.rubber(self.controlArea)

        # Info box...

        self.sendButton.draw()
        self.infoBox.draw()

        self.mode_changed()
        self.sendButton.settingsChanged()

        # Send data if autoSend...
        self.sendButton.sendIf()
Ejemplo n.º 6
0
class MovieReviews(OWTextableBaseWidget):
    """An orange widget to get movie reviews from Imdb"""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Movie Reviews"
    description = "Get movie reviews from imdb"
    icon = ""
    priority = 15

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = []
    outputs = [("Segmentation", Segmentation)]

    #----------------------------------------------------------------------
    # Handles the futur versions settings

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    # Settings
    autoSend = settings.Setting(False)
    myBasket = settings.Setting([])

    #----------------------------------------------------------------------
    # GUI layout parameters...

    want_main_area = False

    def __init__(self):
        super().__init__()

        # Search filters attributs
        self.newQuery = ''
        self.type_results = 'Title'
        #self.genre_searched = 'Comedy'
        self.filter_results = 'Popularity'
        self.nbr_results = '10'
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()

        # stocks the imdbpy instance
        self.ia = imdb.IMDb()
        # stock all the inputs (movie names) in a list
        self.createdInputs = list()

        # Mandatory declaration of the info box and the send button
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=None,
        )

        #----------------------------------------------------------------------
        # User interface...

        # Creation of the different working areas
        self.queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query",
            orientation="horizontal",
        )

        self.genreBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query",
            orientation="horizontal",
        )

        self.filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query options",
            orientation="horizontal",
        )

        searchButtonBox = gui.widgetBox(
            widget=self.controlArea,
            orientation="vertical",
        )

        resultBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search results",
            orientation="vertical",
        )

        # List Box where all the searched movies are stocked
        self.titleListbox = gui.listBox(
            widget=resultBox,
            master=self,
            value="selectedTitles",
            labels="titleLabels",
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.doubleClicked.connect(self.addToCorpus)
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        resultButtonBox = gui.widgetBox(
            widget=resultBox,
            box=False,
            orientation='horizontal',
        )

        corpusBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        # Corpus where confirmed movies are moved and stocked
        self.mytitleListbox = gui.listBox(
            widget=corpusBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.mytitleListbox.doubleClicked.connect(self.remove)
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        corpusButtonBox = gui.widgetBox(
            widget=corpusBox,
            box=False,
            orientation='horizontal',
        )

        # Allows to enter specific text to the research
        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='newQuery',
            orientation='horizontal',
            label=u"Search: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        # Allows to choose a type of search
        searchType = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value="type_results",
            items=[
                "Title",
                "Actor",
                #"Genre",
            ],
            sendSelectedValue=True,
            callback=self.mode_changed,
            orientation="horizontal",
            label="Search by: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )
        """genreTypes = gui.comboBox(
            widget=self.genreBox,
            master=self,
            value="genre_searched",
            items=[
                "Comedy",
                "Action",
                "Drama",
                "Horror",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Search Type: ",
            labelWidth=120,
            tooltip=(
                "Please select the desired search.\n"
            ),
        )
        """
        searchTypeGenre = gui.comboBox(
            widget=self.genreBox,
            master=self,
            value="type_results",
            items=[
                "Title",
                "Actor",
                "Genre",
            ],
            sendSelectedValue=True,
            callback=self.mode_changed,
            orientation="horizontal",
            label="Search Type: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Allows to chose a filter for the search
        self.searchFilter = gui.comboBox(
            widget=self.filterBox,
            master=self,
            value="filter_results",
            items=[
                "Year",
                "Alphabetical",
                "Random",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Sort by: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Allows to choose the wanted results numberp (10 by 10)
        self.searchNbr = gui.comboBox(
            widget=self.filterBox,
            master=self,
            value="nbr_results",
            items=[
                "10",
                "20",
                "30",
                "40",
                "50",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Results' number: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchMovies" attribute
        self.searchButton = gui.button(
            widget=searchButtonBox,
            master=self,
            label="Search",
            callback=self.searchMovies,
            tooltip="Connect to imdbpy and make a research",
        )

        # Add movies button
        self.addButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label=u'Add to corpus',
            callback=self.addToCorpus,
            tooltip=(u"Move the selected movie downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        #gui.separator(widget=queryBox, height=3)

        # Remove movie button
        self.removeButton = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected movie from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed movies button
        self.clearmyBasket = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Clear corpus',
            callback=self.clearCorpus,
            tooltip=(u"Remove all movies from your corpus."),
        )

        self.clearmyBasket.setDisabled(True)

        gui.separator(widget=corpusBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        #self.mode_changed()
        self.updateCorpus()

        self.mode_changed()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def mode_changed(self):
        self.sendButton.settingsChanged()
        if self.type_results == "Title":  # 0 = subreddit selected
            # Hide Genre box
            self.genreBox.setVisible(False)
            self.queryBox.setVisible(True)
            self.filterBox.setVisible(True)
            #self.searchFilter.setVisible(False)
            self.searchFilter.setDisabled(True)
            self.searchNbr.setVisible(True)
        elif self.type_results == "Genre":
            # Hide Title
            self.queryBox.setVisible(False)
            self.genreBox.setVisible(True)
            self.filterBox.setVisible(True)
            #self.searchFilter.setVisible(True)
            self.searchFilter.setDisabled(False)
            self.searchNbr.setVisible(True)
        elif self.type_results == "Actor":
            # searchFilter disabled
            self.queryBox.setVisible(True)
            self.genreBox.setVisible(False)
            self.filterBox.setVisible(True)
            #self.searchFilter.setVisible(True)
            self.searchFilter.setDisabled(False)
            self.searchNbr.setVisible(True)
        return

    def searchMovies(self):
        """Search from imdb movie database"""
        result_list = {}
        query_string = self.newQuery

        if query_string != "":
            counter = 1
            counter_max = int(self.nbr_results)
            result_id = 0

            self.controlArea.setDisabled(True)

            # Initialize progress bar
            progressBar = ProgressBar(self, iterations=counter_max)
            filtered_results = list()
            if self.type_results == 'Title':

                # movie name
                movie_name = query_string

                # searching the movie
                search = self.ia.search_movie(movie_name)
                for film in search:
                    if 'year' in film:
                        filtered_results.append(film)

            elif self.type_results == 'Actor':
                actor_name = query_string
                people = self.ia.search_person(actor_name)
                searched_actor = people[0].personID
                first_search = self.ia.get_person_filmography(searched_actor)

                # Checks if the user input is a valid actor/actress
                if 'actor' in first_search['data']['filmography']:
                    search = first_search['data']['filmography']['actor']
                elif 'actress' in first_search['data']['filmography']:
                    search = first_search['data']['filmography']['actress']
                else:
                    search = list()
                    self.infoBox.setText(
                        "Please enter a valid actor or actress name",
                        "warning")

                # Checks if the movie has a year associated and stores it in a list
                filtered_results = [film for film in search if 'year' in film]

            if self.filter_results == 'Random':
                random.shuffle(filtered_results)

            elif self.filter_results == 'Alphabetical':
                alpha_dict = dict()
                for result in filtered_results:
                    my_id = result.movieID
                    alpha_dict[str(result)] = my_id
                    print(alpha_dict)
                sorted_dict = sorted(alpha_dict.keys(),
                                     key=lambda x: x.lower())
                print(sorted_dict)
                filtered_results = list()
                for i in sorted_dict:
                    value = alpha_dict[i]
                    print(value)
                    print(self.ia.get_movie(value))
                    filtered_results.append(self.ia.get_movie(value))

            # Each result is stored in a dictionnary with its title
            # and year of publication if it is specified

            for result in filtered_results:
                if counter <= counter_max:
                    try:
                        result_id += 1
                        year = result['year']
                        movie_id = result.movieID
                        result_list[result_id] = {
                            'name': result,
                            'year': year,
                            'id': movie_id
                        }
                    except KeyError:
                        continue
                    counter += 1
                else:
                    break

            # 1 tick on the progress bar of the widget
            progressBar.advance()
            # Stored the results list in the "result_list" variable
            self.searchResults = result_list

            # Reset and clear the visible widget list
            del self.titleLabels[:]

            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:
                result_string = f'{self.searchResults[idx]["name"]} - {self.searchResults[idx]["year"]}'
                self.titleLabels.append(result_string)

            self.titleLabels = self.titleLabels
            self.clearButton.setDisabled(False)
            self.addButton.setDisabled(self.selectedTitles == list())

            # Clear progress bar.
            progressBar.finish()
            self.controlArea.setDisabled(False)

        else:
            self.infoBox.setText("Please type something in the search bar",
                                 "warning")

    # Add movie to corpus
    def addToCorpus(self):
        """Add movies in your selection """
        cond_list = list()
        for selectedTitle in self.selectedTitles:
            newMovie = self.searchResults[selectedTitle + 1]
            if newMovie not in self.myBasket:
                # Test if the movie has review associated, if not it refuses to add it to corpus
                movie = self.ia.get_movie_reviews(newMovie['id'])
                cond_list.append(movie)
                for movie in cond_list:
                    data = movie.get('data', "")
                    if 'reviews' in data:
                        self.myBasket.append(newMovie)
                    else:
                        self.infoBox.setText(
                            "Cannot add to corpus. One or more selected movies have no associated reviews",
                            "warning")
                        return
        self.updateCorpus()
        self.sendButton.settingsChanged()

    # Make the movie appear in the corpus Listbox
    def updateCorpus(self):
        """Update the corpus box list in order to view the movies added"""
        self.mytitleLabels = list()
        for newMovie in self.myBasket:
            result_string = f'{newMovie["name"]} - {newMovie["year"]}'
            self.mytitleLabels.append(result_string)

        self.mytitleLabels = self.mytitleLabels

        self.clearmyBasket.setDisabled(self.myBasket == list())
        self.removeButton.setDisabled(self.myTitles == list())

    def remove(self):
        """Remove the selected movie in the corpus """
        self.myBasket = [
            movie for idx, movie in enumerate(self.myBasket)
            if idx not in self.myTitles
        ]
        self.updateCorpus()
        self.sendButton.settingsChanged()

    # Remove movies function
    def sendData(self):
        """Compute result of widget processing and send to output"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some movies first",
                "warning")
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(self, iterations=len(self.myBasket))

        # Connect to imdb and add elements in lists
        list_review = list()
        list_annotation = list()
        annotations = list()
        try:
            for item in self.myBasket:
                movie = self.ia.get_movie_reviews(item['id'])
                movie_annotations = self.ia.get_movie(item['id'])
                list_review.append(movie)
                list_annotation.append(movie_annotations)
                # 1 tick on the progress bar of the widget
                progressBar.advance()

        # If an error occurs (e.g. http error, or memory error)...
        except:
            # Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from imdb", "error")
            self.controlArea.setDisabled(False)
            return

        # Store movie critics strings in input objects...
        for movie in list_review:
            data = movie.get('data', "")
            reviews_data = data.get('reviews')
            for review in reviews_data:
                reviews = review.get('content')
                newInput = Input(reviews)
                self.createdInputs.append(newInput)
        for item in list_annotation:
            print(item)
            # Store the annotation as dicts in a separate list
            annotations_dict = {"title": item, "year": item["year"]}
            annot_dict_copy = annotations_dict.copy()
            for i in range(25):
                annotations.append(annot_dict_copy)
        print(annotations)
        # If there's only one item, the widget's output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = f"{len(self.segmentation)} segment@p sent to output"
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += " (%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send('Segmentation', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearResults(self):
        """Clear the results list"""
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        self.clearButton.setDisabled(True)
        self.addButton.setDisabled(self.titleLabels == list())

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def clearCorpus(self):
        """Remove all movies in the corpus"""
        self.mytitleLabels = list()
        self.myBasket = list()
        self.sendButton.settingsChanged()
        self.clearmyBasket.setDisabled(True)
Ejemplo n.º 7
0
class ExtractCSV(OWTextableBaseWidget):
    """Textable widget for to extract CSV usign the CSV module and Sniffer."""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Extract CSV"
    description = "Extract tabulated data as a Textable Segmentation"
    icon = "icons/extractcsv.png"
    priority = 21   # TODO

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = [("CSV Data", Segmentation, "inputData")]
    outputs = [("CSV Segmentation", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...
    
    want_main_area = False

    #----------------------------------------------------------------------
    
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0]
    )
    
    autoSend = settings.Setting(False)

    content_column = settings.Setting(0)
    deleteQuotes = settings.Setting(False)

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...

        self.inputSeg = None
        self.outputSeg = None
        self.dialect = None
        self.selectedHeader = None
        self.csvSeg = list()
        # list of deleted segments
        self.contentIsNone = list()
        # list for gui
        self.headerList = list()
        self.content_column = 0
        self.headerEdit = ""

        # those are for the rename function
        self.renamedHeader = None
        self.isRenamed = False
        self.dict_keys = list()

        # preprocess
        self.deleteQuotes = False

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )
        #self.header_there = False

        #----------------------------------------------------------------------
        # User interface...

        # preprocess box...
        self.preprocessBox = gui.widgetBox(
            widget=self.controlArea,
            box="Preprocess",
            orientation="vertical",
        )
        # check box...
        self.checkQuotes = gui.checkBox(
            widget=self.preprocessBox,
            master=self,
            value='deleteQuotes',
            label='delete quotation marks',
            callback=self.delete_quotes,
        )

        # main box...
        self.mainBox = gui.widgetBox(
            widget=self.controlArea,
            box="Click to select a header to modify",
            orientation="vertical",
        )

        # List of all the headers (named with numbers if None)
        self.headerListbox = gui.listBox(
            widget=self.mainBox,
            master=self,
            value="selectedHeader",
            labels="headerList",
            callback=self.update_gui,
            selectionMode=1, # can only choose one item
            tooltip="list of all your headers",
        )

        # set "rename" button (must be aside the list)
        self.renameHeader = gui.button(
            widget=self.mainBox,
            master=self,
            label="rename",
            callback=self.set_renamebox,
            tooltip="click to rename header"
        )

        # set "use as content" button (must be aside the list)
        self.iscontentHeader = gui.button(
            widget=self.mainBox,
            master=self,
            label="use as content",
            callback=self.content_changed,
            tooltip="click to select as content"
        )

        #----------------------------------------------------------------------
        # rename box...

        self.renameBox = gui.widgetBox(
            widget=self.controlArea,
            box='Rename header',
            orientation='horizontal',
            addSpace=True,
        )
        gui.separator(widget=self.renameBox, height=3)
        self.headerEditLine = gui.lineEdit(
            widget=self.renameBox,
            master=self,
            value='headerEdit',
            orientation='horizontal',
            label='New header:',
            tooltip=(
                "Rename the selected header."
            ),
            callback=lambda: self.renameButton.setDisabled(not self.headerEdit),
        )
        self.renameButton = gui.button(
            widget=self.renameBox,
            master=self,
            label="rename",
            callback=self.rename,
            tooltip="click to rename header"
        )
        self.cancelButton = gui.button(
            widget=self.renameBox,
            master=self,
            label="cancel",
            callback=self.cancel,
            tooltip="click to cancel renaming"
        )
        #----------------------------------------------------------------------
        # interface parameters...

        self.update_gui()
        self.renameBox.setVisible(False)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")
        
        # Send data if autoSend.
        self.sendButton.sendIf()

    #----------------------------------------------------------------------

    def update_gui(self):
        if len(self.selectedHeader)==0:
            self.iscontentHeader.setDisabled(True)
            self.renameHeader.setDisabled(True)
        else:
            self.iscontentHeader.setDisabled(False)
            self.renameHeader.setDisabled(False)

    def content_changed(self):
        self.content_column = int(self.selectedHeader[0])
        self.treat_input()
        return

    def delete_quotes(self):
        self.treat_input()

    def set_renamebox(self):
        # take selectedHeader
        self.renamedHeader = int(self.selectedHeader[0])
        # appear rename gui
        self.renameBox.setVisible(True)
        self.renameButton.setDisabled(True)
        # disable other
        self.iscontentHeader.setDisabled(True)
        self.renameHeader.setDisabled(True)
        self.headerListbox.setDisabled(True)
        self.checkQuotes.setDisabled(True)

    def rename(self):
        # rename
        for key in self.dict_keys:
            # change my header name
            if self.dict_keys.index(key) == self.renamedHeader:
                self.dict_keys[self.dict_keys.index(key)] = self.headerEdit
        # implement check value
        self.isRenamed = True
        # and treat again
        self.treat_input()

        # here we get back to normal gui
        self.renameBox.setVisible(False)
        self.headerListbox.setDisabled(False)
        self.checkQuotes.setDisabled(False)
        self.update_gui()
        # clear value
        self.headerEdit = ""

    def cancel(self):
        # here we get back to normal gui
        self.renameBox.setVisible(False)
        self.headerListbox.setDisabled(False)
        self.update_gui()
        # clear value
        self.headerEdit = ""

    def treat_input(self):
        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input", "warning")
            del self.headerList[:]
            self.headerList = self.headerList
            return

        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait...", 
            "warning",
        )
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(self.inputSeg))

        # clear lists
        del self.csvSeg[:]
        del self.contentIsNone[:]

        # Process each input segment...
        for segment in self.inputSeg:
        
            # Input segment attributes...
            inputContent = segment.get_content()
            if not self.deleteQuotes == False :
                inputContent = inputContent.replace('"',"")
            inputAnnotations = segment.annotations
            inputStrIdx = segment.str_index
            inputStart = segment.start or 0
            inputEnd = segment.end or len(inputContent)
            #Call data processing
            csv_stream = io.StringIO(inputContent)
            dialect = sniffer.sniff(csv_stream.readline())
            dialect.quoting=csv.QUOTE_NONE
            csv_stream.seek(0)
            my_reader = csv.reader(csv_stream, dialect)
            position = 0
            # Process each seg in inputContent
            for seg in inputContent:
                segAnnotations = inputAnnotations.copy()
            # This  will launch if sniffer detects a header in the content.
            if sniffer.has_header(inputContent) == True:
                # go back to the start otherwise we're going to start from the
                # second row
                csv_stream.seek(0)
                # the header row is defined here.
                if self.isRenamed == False :
                    self.dict_keys = next(my_reader)
                    for key in self.dict_keys:
                    # this is position of first content
                    # TODO : separator length (if not 1)
                        position += (len(key) + 1)
                else :
                    input_keys = next(my_reader)
                    for key in input_keys:
                    # this is position of first content
                    # TODO : separator length (if not 1)
                        position += (len(key) + 1)


            # This will launch if sniffer does not detect a header 
            # in the content.
            if sniffer.has_header(inputContent) == False:
                # go back to the start otherwise we're going to start from the
                # second row. we do this here even though we don't really care
                # about the first row simply because in general we consider the
                # first row to not have any missing values
                csv_stream.seek(0)
                first_row = next(my_reader)
                n_cols = len(first_row)
                if self.isRenamed == False :
                    self.dict_keys = list()
                    for item in range(1, n_cols+1):
                        self.dict_keys.append(str(item))
                csv_stream.seek(0)


            # clear the list before appending
            del self.headerList[:]

            for key in self.dict_keys:
                # appends the headers to the gui list
                if self.dict_keys.index(key) == self.content_column:
                    self.headerList.append(str(key)+"(*content)")
                    self.headerList = self.headerList
                else :
                    self.headerList.append(str(key))
                    self.headerList = self.headerList


            for idx, row in enumerate(my_reader, start=2):
                # Get old annotations in new dictionary
                oldAnnotations = inputAnnotations.copy()
                segAnnotations = dict()
                # initiate next row starting position
                next_position = position
                for key in oldAnnotations.keys():
                    segAnnotations[key] = oldAnnotations[key]

                # This is the main part where we transform our data into
                # annotations.
                for key in self.dict_keys:
                    # segAnnotations["length"] = position
                    # segAnnotations["row"] = str(row)

                    # if column is content (first column (0) by default)
                    if self.dict_keys.index(key) == self.content_column:
                        # put value as content
                        content = row[self.dict_keys.index(key)]
                    # else we put value in annotation
                    else:
                        # only if value is not None
                        if len(row[self.dict_keys.index(key)]) != 0 :
                            segAnnotations[key] = row[self.dict_keys.index(key)]
                    # implement position and next_position depending on
                    # content column
                    if self.dict_keys.index(key) < self.content_column:
                        position += len(row[self.dict_keys.index(key)]) + 1
                        next_position += len(row[self.dict_keys.index(key)]) + 1
                    if self.dict_keys.index(key) >= self.content_column:
                        next_position += len(row[self.dict_keys.index(key)]) + 1

                if len(content) != 0:
                    self.csvSeg.append(
                        Segment(
                            str_index = inputStrIdx,
                            start = position,
                            end = position + len(content),
                            annotations = segAnnotations
                            )
                        )

                else :
                    # if no content, add idx of the row and do not append
                    # TODO : something with contentIsNone
                    self.contentIsNone.append(idx)

                # initiate new row starting position
                position = next_position
                        
            progressBar.advance()

        unSeg = len(self.csvSeg)         
        # Set status to OK and report segment analyzed...
        message = "%i segment@p analyzed." % unSeg
        message = pluralize(message, unSeg)
        message += " (Ignored %i segment@p with no content)" %      \
            len(self.contentIsNone)
        message = pluralize(message, len(self.contentIsNone))
        self.infoBox.setText(message)

        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)

        self.sendButton.resetSettingsChangedFlag()
        self.sendButton.sendIf()

    def inputData(self, newInput):
        """Process incoming data."""
        self.inputSeg = newInput
        self.infoBox.inputChanged()

        del self.dict_keys[:]
        self.isRenamed = False

        self.sendButton.sendIf()

        self.treat_input()

    def sendData(self):
        """Compute result of widget processing and send to output"""
        
        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input", "warning")
            del self.headerList[:]
            self.headerList = self.headerList
            self.send("CSV Segmentation", None, self)
            return

        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait...", 
            "warning",
        )
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(self.inputSeg))


        # Treat...
        for segment in self.csvSeg:
            
            pass
                        
            progressBar.advance()

                 
        # Set status to OK and report data size...
        outputSeg = Segmentation(self.csvSeg, label=self.captionTitle)
        if len(self.contentIsNone) == 0 :
            message = "%i segment@p sent to output." % len(outputSeg)
            message = pluralize(message, len(outputSeg))
            self.infoBox.setText(message)
        # message if one or more segments has no content and has been ignored
        elif len(self.contentIsNone) == 1:
            message = "%i segment@p sent to output. (ignored %i segment with \
            no content)" % (len(outputSeg), len(self.contentIsNone))
            message = pluralize(message, len(outputSeg))
            self.infoBox.setText(message)
        else :
            message = "%i segment@p sent to output. (ignored %i segments with \
            no content)" % (len(outputSeg), len(self.contentIsNone))
            message = pluralize(message, len(outputSeg))
            self.infoBox.setText(message)

        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)
        
        # Send data to output...
        self.send("CSV Segmentation", outputSeg, self)
        
        self.sendButton.resetSettingsChangedFlag()             

    # The following method needs to be copied verbatim in
    # every Textable widget that sends a segmentation...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
class Linguistica(OWTextableBaseWidget):
    """Textable widget for unsupervised morphology learning, using the
    "Crab Nebula" algorithm from John Golsdmith's Linguistica
    """

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Linguistica"
    description = "Unupervised morphological analysis"
    icon = "icons/linguistica.svg"
    priority = 21

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = [("Word segmentation", Segmentation, "inputData")]
    outputs = [("Morphologically analyzed data", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = True

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    minStemLen = settings.Setting(3)
    maxSuffixLen = settings.Setting(4)

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.inputSeg = None
        self.morphology = dict()
        self.selectedMainWord = None
        self.mainWords = list()
        self.selectedParse = None
        self.parses = list()
        self.selectedStemForParse = None
        self.stemsForParse = list()
        self.selectedSuffixForParse = None
        self.suffixesForParse = list()
        self.selectedMainSignature = None
        self.mainSignatures = list()
        self.wordsForSig = list()
        self.stemsForSig = list()
        self.suffixesForSig = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )

        # User interface...

        # A) Control area...

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box="Options",
            orientation="vertical",
        )
        gui.spin(
            widget=optionsBox,
            master=self,
            value='minStemLen',
            label='Minimum length of stems: ',
            callback=self.sendButton.sendIf,
            labelWidth=180,
            tooltip=(
                'Select the minimum number of required characters in stems'),
            minv=LOWER_MIN_STEM_LEN,
            maxv=MAX_MORPH_LEN,
            step=1,
        )
        gui.separator(widget=optionsBox, height=2)

        gui.rubber(self.controlArea)

        # B) Main area...

        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)

        # Tabs...
        self.tabs = QTabWidget()
        self.wordTab = QWidget()
        self.signatureTab = QWidget()
        self.tabs.addTab(self.wordTab, "Words")
        self.tabs.addTab(self.signatureTab, "Signatures")

        # Words tab...
        wordTabBox = QHBoxLayout()
        wordBox = gui.widgetBox(
            widget=self.wordTab,
            orientation="horizontal",
            margin=5,
        )

        wordBoxRight = gui.widgetBox(widget=wordBox)

        self.mainWordListbox = gui.listBox(
            widget=wordBoxRight,
            master=self,
            value="selectedMainWord",
            labels="mainWords",
            callback=self.mainWordSelected,
            tooltip="Select a word to display its possible parses.",
        )
        self.mainWordListbox.setFont(font)

        gui.separator(widget=wordBox, width=3)

        wordBoxLeft = gui.widgetBox(widget=wordBox)

        gui.label(
            widget=wordBoxLeft,
            master=self,
            label="Parse(s):",
        )

        self.parsesListbox = gui.listBox(
            widget=wordBoxLeft,
            master=self,
            value="selectedParse",
            labels="parses",
            callback=self.parseSelected,
            tooltip="Select a parse to display the corresponding signature.",
        )
        self.parsesListbox.setFont(font)

        self.sigForParseBox = gui.widgetBox(
            widget=wordBoxLeft,
            box="Signature",
        )

        gui.label(
            widget=self.sigForParseBox,
            master=self,
            label="Stem(s):",
        )

        self.stemsForParseListbox = gui.listBox(
            widget=self.sigForParseBox,
            master=self,
            labels="stemsForParse",
            tooltip="Stems associated with the parse selected above.",
        )

        gui.separator(widget=self.sigForParseBox, height=2)

        gui.label(
            widget=self.sigForParseBox,
            master=self,
            label="Suffixes(s):",
        )

        self.suffixesForParseListbox = gui.listBox(
            widget=self.sigForParseBox,
            master=self,
            labels="suffixesForParse",
            tooltip="Suffixes associated with the parse selected above.",
        )

        wordTabBox.addWidget(wordBox)
        self.wordTab.setLayout(wordTabBox)

        # Signature tab...
        signatureTabBox = QHBoxLayout()

        signatureBox = gui.widgetBox(
            widget=self.signatureTab,
            orientation="horizontal",
            margin=5,
        )

        signatureBoxRight = gui.widgetBox(widget=signatureBox)

        self.mainSignatureListbox = gui.listBox(
            widget=signatureBoxRight,
            master=self,
            value="selectedMainSignature",
            labels="mainSignatures",
            callback=self.mainSignatureSelected,
            tooltip="Select a signature to display its contents.",
        )
        self.mainSignatureListbox.setFont(font)

        gui.separator(widget=signatureBox, width=3)

        signatureBoxLeft = gui.widgetBox(widget=signatureBox)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Words:",
        )

        self.wordsForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="wordsForSig",
            tooltip="Words associated with the selected signature.",
        )
        self.wordsForSigListbox.setFont(font)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Stem(s):",
        )

        self.stemsForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="stemsForSig",
            tooltip="Stems associated with the selected signature.",
        )
        self.stemsForSigListbox.setFont(font)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Suffixes(s):",
        )

        self.suffixesForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="suffixesForSig",
            tooltip="Suffixes associated with the selected signature.",
        )
        self.suffixesForSigListbox.setFont(font)

        signatureTabBox.addWidget(signatureBox)
        self.signatureTab.setLayout(signatureTabBox)

        self.mainArea.layout().addWidget(self.tabs)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        self.setMinimumWidth(602)
        self.setMinimumHeight(317)
        self.adjustSizeWithTimer()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def inputData(self, newInput):
        """Process incoming data."""
        self.inputSeg = newInput
        self.infoBox.inputChanged()
        self.sendButton.sendIf()

    def mainSignatureSelected(self):
        """Display selected signature and generated words."""
        # Return if no selected signature...
        if len(self.selectedMainSignature) == 0:
            self.wordsForSig = list()
            return

        # Get generated words (by decreasing frequency)...
        sigs = self.morphology["signatures"]
        if self.selectedMainSignature[0] == 0:
            words = sorted([
                w for w in self.morphology["wordCounts"].keys()
                if self.morphology["parser"][w][0].signature == 0
            ])
        else:
            su = list(sigs.keys())[self.selectedMainSignature[0] - 1]
            words = ["".join(pair) for pair in itertools.product(sigs[su], su)]
        words.sort(key=self.morphology["wordCounts"].get, reverse=True)

        # Display generated words...
        max_count = self.morphology["wordCounts"][words[0]]
        padding = len(str(max_count)) + 1
        self.wordsForSig = [
            '{num: {width}} {word}'.format(
                num=self.morphology["wordCounts"][word],
                width=padding,
                word=word,
            ) for word in words
        ]

        # Display stems and suffixes in signature...
        if self.selectedMainSignature[0] > 0:
            suffixes = list(sigs.keys())[self.selectedMainSignature[0] - 1]
            self.suffixesForSig = [suffix or "NULL" for suffix in suffixes]
            self.stemsForSig = sigs[suffixes]
        else:
            self.suffixesForSig = ["NULL"]
            self.stemsForSig = sorted(words[:])

    def mainWordSelected(self):
        """Display possible parses for selected word."""

        self.sigForParseBox.setTitle(" Signature ")

        # Return if no selected word...
        if len(self.selectedMainWord) == 0:
            self.parses = list()
            return

        # Get selected word's parses...
        words = list(self.morphology["wordCounts"].keys())
        words.sort(key=self.morphology["wordCounts"].get, reverse=True)
        parses = self.morphology["parser"][words[self.selectedMainWord[0]]]

        # Display parses...
        self.parses = [
            '{score:.2f} {stem} + {suffix}'.format(
                score=parse.score,
                stem=parse.stem,
                suffix=parse.suffix if parse.suffix else "NULL",
            ) for parse in parses
        ]
        self.selectedParse = [0]
        self.parseSelected()

    def parseSelected(self):
        """Display selected parse's signature."""
        # Return if no selected parse...
        if len(self.selectedParse) == 0:
            self.stemsForParse = list()
            self.suffixesForParse = list()
            return

        # Get selected parse's signature...
        words = list(self.morphology["wordCounts"].keys())
        words.sort(key=self.morphology["wordCounts"].get, reverse=True)
        parses = self.morphology["parser"][words[self.selectedMainWord[0]]]
        parse = parses[self.selectedParse[0]]
        sigNum = parse.signature

        # Display stems and suffixes in parse's signature...
        if sigNum > 0:
            self.sigForParseBox.setTitle(" Signature {} ".format(sigNum))
            signatures = list(self.morphology["signatures"].keys())
            self.suffixesForParse = [
                suffix or "NULL" for suffix in signatures[sigNum - 1]
            ]
            self.stemsForParse =    \
                self.morphology["signatures"][signatures[sigNum-1]]
        else:
            self.sigForParseBox.setTitle(" Signature 0 ")
            self.suffixesForParse = ["NULL"]
            self.stemsForParse = sorted([
                w for w in words
                if self.morphology["parser"][w][0].signature == 0
            ])

    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Clear morphology...
        self.morphology = dict()

        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input", "warning")
            self.send("Morphologically analyzed data", None, self)
            self.updateGUI()
            return

        # Perform morphological analysis...

        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait (word count)...",
            "warning",
        )
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=100)

        # Word count...
        wordCounts = collections.Counter(
            [segment.get_content() for segment in self.inputSeg])
        self.morphology["wordCounts"] = wordCounts
        self.infoBox.setText(
            u"Processing, please wait (signature extraction)...",
            "warning",
        )
        progressBar.advance(5)  # 5 ticks on the progress bar...

        # Learn signatures...
        try:
            lxa5crab.crab_nebula.MIN_STEM_LEN = self.minStemLen
            signatures, stems, suffixes = lxa5crab.find_signatures(wordCounts)
            self.morphology["signatures"] = signatures
            self.morphology["stems"] = stems
            self.morphology["suffixes"] = suffixes
        except ValueError as e:
            self.infoBox.setText(e.__str__(), "warning")
            self.send("Morphologically analyzed data", None, self)
            self.controlArea.setDisabled(False)
            progressBar.finish()  # Clear progress bar.
            self.morphology = dict()
            self.updateGUI()
            return
        self.infoBox.setText(
            u"Processing, please wait (word parsing)...",
            "warning",
        )
        progressBar.advance(80)

        # Parse words...
        parser = lxa5crab.build_parser(wordCounts, signatures, stems, suffixes)
        self.morphology["parser"] = parser
        newSegments = list()
        num_analyzed_words = 0
        for segment in self.inputSeg:
            parses = parser[segment.get_content()]
            newSegment = segment.deepcopy()
            if parses[0].signature:
                num_analyzed_words += 1
            newSegment.annotations.update(
                {
                    "stem": parses[0].stem,
                    "suffix": parses[0].suffix  \
                                if len(parses[0].suffix) else "NULL",
                    "signature": parses[0].signature
                }
            )
            newSegments.append(newSegment)
        self.send(
            "Morphologically analyzed data",
            Segmentation(newSegments, self.captionTitle),
            self,
        )
        self.updateGUI()
        progressBar.advance(15)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output (%.2f%% analyzed)." % (len(
            self.inputSeg), (num_analyzed_words / len(self.inputSeg) * 100))
        message = pluralize(message, len(self.inputSeg))
        self.infoBox.setText(message)

        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)

        self.sendButton.resetSettingsChangedFlag()

    def updateGUI(self):
        """Update GUI state"""

        # Empty lists...
        self.mainWords = list()
        self.parses = list()
        self.stemsForParse = list()
        self.suffixesForParse = list()
        self.sigForParseBox.setTitle(" Signature ")
        self.mainSignatures = list()
        self.wordsForSig = list()
        self.stemsForSig = list()
        self.suffixesForSig = list()

        # Fill main lists if necessary...
        if len(self.morphology):

            # Main word list...
            words = list(self.morphology["wordCounts"].keys())
            words.sort(key=self.morphology["wordCounts"].get, reverse=True)
            max_count = self.morphology["wordCounts"][words[0]]
            padding = len(str(max_count)) + 1
            self.mainWords = [
                '{num: {width}} {word}'.format(
                    num=self.morphology["wordCounts"][word],
                    width=padding,
                    word=word,
                ) for word in words
            ]

            # Main signature list...
            sigs = [["NULL"]] + list(self.morphology["signatures"].keys())
            padding = len(str(len(sigs))) + 1
            self.mainSignatures = [
                '{num: {width}} {sig}'.format(
                    num=idx,
                    width=padding,
                    sig=", ".join([suff or "NULL" for suff in sig]))
                for idx, sig in enumerate(sigs)
            ]

    # The following method needs to be copied verbatim in
    # every Textable widget that sends a segmentation...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 9
0
class Treetagger(OWTextableBaseWidget):
    """Orange widget to get corpus from pattern web"""

    #----------------------------------------------------------------------
    # Widget"s metadata...

    name = "Treetagger"
    description = "..."
    icon = "icons/icon_treetagger.png"
    priority = 1

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = [("Text Input", Segmentation, "processInputData")]
    outputs = [("Text data", Segmentation)]

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    unknown = settings.Setting(False)
    activer_xml = settings.Setting(False)

    want_main_area = False

    def __init__(self):
        """Widget creator."""
        super().__init__()

        # NONE BASIC SETTING
        self.inputData = None
        self.system = os.name
        self.user = os.environ.get("USER")
        self.langues = list()
        self.created_inputs = list()
        self.language = 0
        self.check_firt_use = False
        self.createdInputs = list()
        self.compteur = 0
        self.NoLink = True

        # liste des langues possible
        self.langues_possibles = {
            "French": ["french.par", "french-abbreviations"],
            "English": ["english-utf8.par", "english-abbreviations"],
            "German": ["german-utf8.par", "german-abbreviations"],
            "Italian": ["italian-utf8.par", "italian-abbreviations"],
            "Swahili": ["swahili.par", "swahili-abbreviations"],
            "Portuguese": ["portuguese.par", "portuguese-abbreviations"],
            "Russian": ["russian.par", "russian-abbreviations"],
            "Spanish":
            ["spanish-utf8.par", "spanish-abbreviations", "spanish-mwls"],
            "Slovenian": ["slovenian-utf8.par"],
            "Slovak": ["slovak2-utf8.par"],
            "Romanian": ["romanian.par"],
            "Polish": ["polish-utf8.par"],
            "Mongolian": ["mongolian.par"],
            "Latin": ["latin.par"],
            "Galician": ["galician.par"],
            "Finnish": ["finnish-utf8.par"],
            "Estonian": ["estonian.par"],
            "Bulgarian": ["bulgarian-utf8.par"],
            "Spoken French": ["spoken-french.par", "french-abbreviations"]
        }

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(widget=self.controlArea,
                                     master=self,
                                     callback=self.sendData,
                                     infoBoxAttribute=u"infoBox",
                                     sendIfPreCallback=self.updateGUI)

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.

        # User interface...

        # OPTION BOX
        gui.separator(widget=self.controlArea, height=5)

        self.infoBox1 = gui.widgetBox(self.controlArea,
                                      u"Option",
                                      addSpace=True)

        # definir la langue
        self.langueBox = gui.comboBox(widget=self.infoBox1,
                                      master=self,
                                      value="language",
                                      items=self.langues,
                                      orientation=u"horizontal",
                                      label="Select text language :",
                                      callback=self.settings_changed)

        self.langueBox.setMaximumWidth(100)

        gui.separator(widget=self.controlArea, height=3)

        # Checkbox pour activer output avec code xml
        self.choix_xml = gui.checkBox(widget=self.infoBox1,
                                      master=self,
                                      value="activer_xml",
                                      label=" Output with XML code",
                                      callback=self.settings_changed)

        # Checkbox pour afficher unknown si le mot est inconnu
        self.choix_unknown = gui.checkBox(widget=self.infoBox1,
                                          master=self,
                                          value="unknown",
                                          label=" Output without '[unknown]'",
                                          callback=self.settings_changed)

        # The following lines:
        # Bouton pour aller cherche le lien vers treetagger...
        self.treetagger_box = gui.widgetBox(
            self.controlArea,
            u"Please, enter a correct path to TreeTagger :",
            addSpace=True)

        gui.button(widget=self.treetagger_box,
                   master=self,
                   label="Browse",
                   callback=self.treetagger_search)

        gui.separator(widget=self.treetagger_box, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # Send data if autoSend.
        self.sendButton.sendIf()

        # ajuster taille widjet
        self.adjustSizeWithTimer()

        # verifie lien treetagger
        self.treetagger_check()

    # ALL FUNCTIONS

    def treetagger_check(self):

        # liste des element que doit contenir le dossier treetagger...
        liste = list()
        tokenize = os.path.normpath("/cmd/tokenize.pl")
        tokenize_utf8 = os.path.normpath("/cmd/utf8-tokenize.perl")
        treetagger = os.path.normpath("/bin/tree-tagger")

        # definir le ce que l"on trouve dans le chemin vers treetagger
        if self.system == "nt":
            check_list = [tokenize, tokenize_utf8, treetagger + ".exe"]
        else:
            check_list = [tokenize, tokenize_utf8, treetagger]

        # definir le chemin vers treetagger automatiquement
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(
                inspect.currentframe())))  # --> temporaire

        # stoquer le lien vers treetagger (windows ou autre)...
        if self.system == "nt":
            if os.path.exists("treetagger_link.txt"):
                file = open("treetagger_link.txt", "r")
                self.treetagger_link = file.read()
            else:
                self.treetagger_link = os.path.normpath("C:\TreeTagger")

        else:
            if os.path.exists(os.path.normpath("/Users/" + \
            self.user + "/treetagger_link.txt")):
                file = open(
                    os.path.normpath("/Users/" + self.user +
                                     "/treetagger_link.txt"), "r")
                self.treetagger_link = file.read()
            else:
                self.treetagger_link = os.path.normpath(
                    "/Applications/TreeTagger")

        # verifier si le chemin est correcte
        for check in check_list:
            check = os.path.exists(self.treetagger_link + check)
            liste.append(check)

        # afficher le bouton pour aller chercher le lien
        # et verouiller le reste des posibilite...
        if False in liste:
            self.NoLink = True
            # botton encore visible et les autres verouille
            self.treetagger_box.setVisible(True)
            self.infoBox1.setDisabled(True)

            # afficher les probleme s"il y en a...
            if self.check_firt_use is False:
                self.infoBox.setText(
                    u"Please click 'Browse' and select the path \
                    to TreeTagger base folder. ", "warning")
            else:
                self.infoBox.setText(
                    u"Sorry, TreeTagger's link isn't correct.", "error")

        # cacher le bouton pour aller chercher le lien
        # et deverouiller le reste des posibilite...
        else:
            if self.check_firt_use is True:
                self.infoBox.setText(
                    u"TreeTagger's link is correct !\n\n \
                    Now, Widget needs input.", "warning")
            else:
                self.infoBox.setText(u"Widget needs input.", "warning")

            # affiche les langues
            self.language_possibility()
            for langue_actualise in self.langues:
                self.langueBox.addItem(langue_actualise)

            # modification affichage de l"interface
            self.NoLink = False
            self.treetagger_box.setVisible(False)
            self.infoBox1.setDisabled(False)

        self.saveSettings()

        return liste

    def treetagger_search(self):

        # rentre un lien comme lien de base marche pas
        self.treetagger_link = os.path.normpath(
            str(
                QFileDialog.getExistingDirectory(
                    self, u"Enter a path to Treetagger")))

        # Try to save list in this module"s directory for future reference...
        if self.system == "nt":
            file = open("treetagger_link.txt", "w")
        else:
            file = open(
                os.path.normpath("/Users/" + self.user +
                                 "/treetagger_link.txt"), "w")

        file.write(self.treetagger_link)
        file.close()

        self.check_firt_use = True

        # verifie si le lien marche
        self.treetagger_check()

    def language_possibility(self):

        # initilise que les langues installees dans treetagger
        # la liste dans son dossier
        langue_verification = os.listdir(".")

        langues_presentes = list()

        # On cherche quelles langue sont installees dans l"ordinateur
        for langue in self.langues_possibles.keys():
            check = True
            for file_utile in self.langues_possibles[langue]:
                check = check and os.path.isfile(
                    os.path.normpath(self.treetagger_link + "/lib/" +
                                     file_utile))
                if not check:
                    break
            if check:
                langues_presentes.append(langue)

        self.langues = langues_presentes

        return langues_presentes

    #recoit l"input
    def processInputData(self, inputData):

        # ici on prend le input
        self.inputData = inputData

        #change l"infobox quand input change
        if self.compteur != 0:
            self.infoBox.inputChanged()

        # Send data to output.
        self.sendButton.sendIf()

    def sendData(self):

        # Si le lien vers treetagger n"est pas trouve
        if self.NoLink:
            self.infoBox.setText(u"Sorry, TreeTagger's link not found.",
                                 "error")
            self.send("Text data", None)
        # Important: if input data is None, propagate this value to output...
        elif not self.inputData:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send("Text data", None)
        # affiche que quelque chose se passe...
        else:
            self.infoBox.setText(u"TreeTagger is running...", "warning")

            # Initialisation de variables
            total_tagged_text = list()
            new_segmentations = list()
            i = 0

            # Initialize progress bar.
            self.progressBar = gui.ProgressBar(self, iterations=5)

            # Copie de la segmentation avec ajout d"une annotation...
            copy_of_input_seg = Segmentation()
            copy_of_input_seg.label = self.inputData.label
            for seg_idx, segment in enumerate(self.inputData):
                attr = " ".join(
                    ["%s='%s'" % item for item in segment.annotations.items()])
                segment.annotations["tt_xb"] = attr
                copy_of_input_seg.append(segment)

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            concatenated_text = copy_of_input_seg.to_string(
                formatting="<xb_tt %(tt_xb)s>%(__content__)s</xb_tt>",
                display_all=True,
            )

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            tagged_text = self.tag(concatenated_text)
            tagged_input = Input(tagged_text)
            tagged_segmentation = Segmenter.import_xml(tagged_input, "xb_tt")

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            # Si checkBox xml active
            if self.activer_xml == True:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = xml_segmentation
            # Si checkBox xml desactive
            else:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")

            self.infoBox.dataSent("")

            # Enregistrer le lien de treetagger...
            if self.system == "nt":
                file = open("treetagger_link.txt", "w")
            else:
                file = open(
                    os.path.normpath("/Users/" + self.user +
                                     "/treetagger_link.txt"), "w")

            file.write(self.treetagger_link)
            file.close()

            # Clear progress bar.
            self.progressBar.finish()

            # envoyer la seguementation
            self.send("Text data", final_segmentation, self)
            self.compteur += 1
            self.sendButton.resetSettingsChangedFlag()

    def tag(self, inputData):

        # fichier temporaire...
        tmp = os.path.normpath(os.path.expanduser("~/tmp_file.txt"))
        tmp2 = os.path.normpath(os.path.expanduser("~/tmp_file2.txt"))

        # ecrire dans un premier fichier le texte
        f = open(tmp, "w", encoding="utf-8")
        f.write(inputData)
        f.close()

        # liste de langue en option...
        option = str()
        if self.langues[self.language] == "French":
            option = "-f"
        elif self.langues[self.language] == "English":
            option = "-e"
        elif self.langues[self.language] == "Italian":
            option = "-i"

        # commande perle executee pour separer le texte en mot
        if option:
            commande1 = [
                "perl",
                os.path.normpath(
                    self.treetagger_link + "/cmd/utf8-tokenize.perl"
                ),
                option,
                "-a",
                os.path.normpath(
                    self.treetagger_link + "/lib/" + \
                    self.langues_possibles[self.langues[self.language]][1]
                ),
                tmp
            ]
        else:
            commande1 = [
                "perl",
                os.path.normpath(self.treetagger_link + "/cmd/tokenize.pl"),
                "-a",
                os.path.normpath(
                    self.treetagger_link + "/lib/" + \
                    self.langues_possibles[self.langues[self.language]][1]
                ),
                tmp
            ]

        # evoyer un ordre a la ligne de commande
        if self.system == "nt":
            outcom1 = sp.Popen(commande1, stdout=sp.PIPE, shell=True)
            out = outcom1.communicate()[0]\
                         .decode(encoding="utf-8", errors="ignore")\
                         .replace("\r", "")
        else:
            outcom1 = sp.Popen(commande1, stdout=sp.PIPE, shell=False)
            out = outcom1.communicate()[0]\
                         .decode(encoding="utf-8", errors="ignore")

        # avancer la progressBar d"un cran
        self.progressBar.advance()

        # ecrire dans un deuxieme fichier le texte separe en mots
        f = codecs.open(tmp2, "w", encoding="utf-8")
        f.write(out)
        f.close()

        if self.system == "nt":
            bin_treetagger = "/bin/tree-tagger.exe"
        else:
            bin_treetagger = "/bin/tree-tagger"

        # taguer le texte avec type et lemma
        if self.unknown == True:
            commande2 = [
                os.path.normpath(self.treetagger_link + bin_treetagger),
                os.path.normpath(
                    self.treetagger_link + "/lib/" + \
                    self.langues_possibles[self.langues[self.language]][0]
                ),
                "-token",
                "-lemma",
                "-sgml",
                "-no-unknown",
                "-quiet",
                tmp2
            ]

        if self.unknown == False:
            commande2 = [
                os.path.normpath(self.treetagger_link + bin_treetagger),
                os.path.normpath(
                    self.treetagger_link + "/lib/" + \
                    self.langues_possibles[self.langues[self.language]][0]
                ),
                "-token",
                "-lemma",
                "-sgml",
                "-quiet",
                tmp2
            ]

        if self.system == "nt":
            output = sp.Popen(commande2, stdout=sp.PIPE, shell=True)
            outtext = output.communicate()[0]\
                            .decode(encoding="utf-8", errors="ignore")
        else:
            output = sp.Popen(commande2, stdout=sp.PIPE, shell=False)
            outtext = output.communicate()[0]\
                            .decode(encoding="utf-8", errors="ignore")

        # supprimer ficher temporaire
        os.remove(tmp)
        os.remove(tmp2)

        # avancer la progressBar d"un cran
        self.progressBar.advance()

        return outtext

    def updateGUI(self):
        """Update GUI state"""
        pass

    def clearCreatedInputs(self):
        #Delete all Input objects that have been created.
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def settings_changed(self):
        # eviter qu"un changement arrive
        # si le widget n"a pas encore evoyer d"output...
        if self.compteur > 0:
            return self.sendButton.settingsChanged()

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 10
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other (non-setting) attributes...
        self.segmentation = None
        self.displayedFolderLabels = list()
        self.currentFolder = self.__class__.base_url
        self.database = None
        self.selectedItems = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )

        # User interface...

        # Browse database box
        browseBox = gui.widgetBox(
            widget=self.controlArea,
            box="Browse database",
            orientation="vertical",
            addSpace=False,
        )

        upwardNavBox = gui.widgetBox(
            widget=browseBox,
            box=False,
            orientation="horizontal",
        )
        self.homeRefreshButton = gui.button(
            widget=upwardNavBox,
            master=self,
            label="Home",
            callback=self.homeRefreshPressed,
            tooltip="Return to database root.",
            # tooltip="Connect to CHILDES website and refresh corpus list.",
        )
        self.backButton = gui.button(
            widget=upwardNavBox,
            master=self,
            label="Back",
            callback=self.backPressed,
            tooltip="View parent folder.",
        )

        gui.separator(widget=browseBox, height=3)

        self.currentFolderLabel = gui.label(
            widget=browseBox,
            master=self,
            label="Current folder: /",
            tooltip="This is the currently displayed folder.",
        )

        gui.separator(widget=browseBox, height=3)

        displayedFolderListbox = gui.listBox(
            widget=browseBox,
            master=self,
            value="selectedItems",
            labels="displayedFolderLabels",
            callback=self.corpusSelected,
            tooltip="Select an item to open or import.",
        )
        displayedFolderListbox.setMinimumHeight(150)
        displayedFolderListbox.setSelectionMode(1)
        displayedFolderListbox.doubleClicked.connect(self.listBoxDoubleClicked)

        self.importedCorpusLabel = gui.label(
            widget=browseBox,
            master=self,
            label="No corpus imported yet.",
            tooltip="This is the currently imported corpus.",
        )

        gui.separator(widget=browseBox, height=3)

        downwardNavBox = gui.widgetBox(
            widget=browseBox,
            box=False,
            orientation="horizontal",
        )
        self.openButton = gui.button(
            widget=downwardNavBox,
            master=self,
            label="Open",
            callback=self.openPressed,
            tooltip="View selected folder's contents.",
        )
        self.importButton = gui.button(
            widget=downwardNavBox,
            master=self,
            label="Import",
            callback=self.importPressed,
            tooltip="Import selected item's contents.",
        )

        gui.separator(widget=browseBox, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because we may need to display an error message).
        self.loadDatabaseCache()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()
Ejemplo n.º 11
0
class Childes(OWTextableBaseWidget):
    """Textable widget for importing data in XML format from the CHILDES
    database (https://childes.talkbank.org/data-xml/).
    """

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "CHILDES"
    description = "Import XML data from the CHILDES database"
    icon = "icons/CHILDES.svg"
    priority = 12

    #----------------------------------------------------------------------
    # Channel definitions (NB: no input in this case)...

    inputs = []
    outputs = [("XML data", Segmentation)]

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    importedCorpus = settings.Setting(None)
    autoSend = settings.Setting(False)

    #----------------------------------------------------------------------
    # Other class variables...

    base_url = "https://childes.talkbank.org/data-xml/"
    cache_filename = "cache_childes"

    want_main_area = False

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other (non-setting) attributes...
        self.segmentation = None
        self.displayedFolderLabels = list()
        self.currentFolder = self.__class__.base_url
        self.database = None
        self.selectedItems = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )

        # User interface...

        # Browse database box
        browseBox = gui.widgetBox(
            widget=self.controlArea,
            box="Browse database",
            orientation="vertical",
            addSpace=False,
        )

        upwardNavBox = gui.widgetBox(
            widget=browseBox,
            box=False,
            orientation="horizontal",
        )
        self.homeRefreshButton = gui.button(
            widget=upwardNavBox,
            master=self,
            label="Home",
            callback=self.homeRefreshPressed,
            tooltip="Return to database root.",
            # tooltip="Connect to CHILDES website and refresh corpus list.",
        )
        self.backButton = gui.button(
            widget=upwardNavBox,
            master=self,
            label="Back",
            callback=self.backPressed,
            tooltip="View parent folder.",
        )

        gui.separator(widget=browseBox, height=3)

        self.currentFolderLabel = gui.label(
            widget=browseBox,
            master=self,
            label="Current folder: /",
            tooltip="This is the currently displayed folder.",
        )

        gui.separator(widget=browseBox, height=3)

        displayedFolderListbox = gui.listBox(
            widget=browseBox,
            master=self,
            value="selectedItems",
            labels="displayedFolderLabels",
            callback=self.corpusSelected,
            tooltip="Select an item to open or import.",
        )
        displayedFolderListbox.setMinimumHeight(150)
        displayedFolderListbox.setSelectionMode(1)
        displayedFolderListbox.doubleClicked.connect(self.listBoxDoubleClicked)

        self.importedCorpusLabel = gui.label(
            widget=browseBox,
            master=self,
            label="No corpus imported yet.",
            tooltip="This is the currently imported corpus.",
        )

        gui.separator(widget=browseBox, height=3)

        downwardNavBox = gui.widgetBox(
            widget=browseBox,
            box=False,
            orientation="horizontal",
        )
        self.openButton = gui.button(
            widget=downwardNavBox,
            master=self,
            label="Open",
            callback=self.openPressed,
            tooltip="View selected folder's contents.",
        )
        self.importButton = gui.button(
            widget=downwardNavBox,
            master=self,
            label="Import",
            callback=self.importPressed,
            tooltip="Import selected item's contents.",
        )

        gui.separator(widget=browseBox, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because we may need to display an error message).
        self.loadDatabaseCache()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()

    def sendData(self):
        """Compute result of widget processing and send to output"""
        if not self.importedCorpus:
            self.infoBox.setText("Please select a corpus to import.",
                                 "warning")
            self.send("XML data", None, self)
            return
        progressBar = ProgressBar(self, iterations=1)
        response = requests.get(self.importedCorpus)
        myZip = zipfile.ZipFile(io.BytesIO(response.content))
        for file in myZip.infolist():
            print(file.filename, len(myZip.read(file)))
        progressBar.advance()
        progressBar.finish()

        corpus = self.displayedFolderLabels[self.selectedItems[0]]
        self.importedCorpusLabel.setText("Corpus %s correctly imported." %
                                         corpus)
        self.infoBox.setText("All good!")
        self.sendButton.resetSettingsChangedFlag()

    def homeRefreshPressed(self):
        """Refresh database file tree"""
        self.currentFolder = self.__class__.base_url
        self.updateDisplayedFolders()

    def backPressed(self):
        """Display parent folder's contents"""
        self.currentFolder = "/".join(self.currentFolder[:-1].split("/")[:-1])
        self.currentFolder += "/"
        self.updateDisplayedFolders()

    def corpusSelected(self):
        """Import selected corpus"""
        self.updateBrowseBoxButtons()

    def openPressed(self):
        """Display selected folder's contents"""
        self.currentFolder += self.displayedFolderLabels[self.selectedItems[0]]
        self.updateDisplayedFolders()

    def importPressed(self):
        """Import selected corpus"""
        # TODO: handle exceptions
        corpus = self.displayedFolderLabels[self.selectedItems[0]]
        self.importedCorpus = self.currentFolder + corpus
        self.importButton.setDisabled(True)
        self.importedCorpusLabel.setText("Corpus %s ready to import." % corpus)
        self.sendButton.settingsChanged()

    def listBoxDoubleClicked(self):
        """Reroute to 'openPressed' or 'importPressed' as needed"""
        if self.displayedFolderLabels[self.selectedItems[0]].endswith(".zip"):
            self.importPressed()
        else:
            self.openPressed()

    def loadDatabaseCache(self):
        """Load the cached database"""
        # Try to open saved file in this module"s directory...
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        try:
            file = open(os.path.join(path, self.__class__.cache_filename),
                        "rb")
            self.database = pickle.load(file)
            file.close()
        # Else try to rebuild cache from CHILDES website...
        except IOError:
            self.database = self.rebuildCacheFromWebsite()

        self.currentFolder = self.__class__.base_url
        self.updateDisplayedFolders()

    def updateDisplayedFolders(self):
        """Refresh state of displayed folder listbox"""
        # Current folder label...
        currentFolder = self.currentFolder[len(self.__class__.base_url) - 1:]
        self.currentFolderLabel.setText("Current folder: " + currentFolder)

        # Populate listbox...
        folderContent = self.database[self.__class__.base_url]
        steps = currentFolder[:-1].split("/")[1:]
        for idx, _ in enumerate(steps):
            path = self.__class__.base_url + "/".join(steps[:idx + 1]) + "/"
            folderContent = folderContent[path]
        displayedFolderLabels = list()
        for item in folderContent.keys():
            if item.endswith(".zip"):
                displayedFolderLabels.append(item)
            else:
                displayedFolderLabels.append(item.split("/")[-2] + "/")
        self.displayedFolderLabels = displayedFolderLabels

        # Imported corpus label...
        if self.importedCorpus:
            self.importedCorpusLabel.setText(
                "Corpus %s ready to import." %
                self.importedCorpus.split("/")[-1])

        # Buttons.
        self.updateBrowseBoxButtons()

    def updateBrowseBoxButtons(self):
        """Refresh state of Browse box buttons"""
        currentFolder = self.currentFolder[len(self.__class__.base_url) - 1:]
        self.homeRefreshButton.setDisabled(currentFolder == "/")
        self.backButton.setDisabled(currentFolder == "/")
        self.openButton.setDisabled(
            len(self.selectedItems) == 0 or
            self.displayedFolderLabels[self.selectedItems[0]].endswith(".zip"))
        self.importButton.setDisabled(
            len(self.selectedItems) == 0
            or self.displayedFolderLabels[self.selectedItems[0]].endswith("/"))

    # The following method need to be copied (without any change) in
    # every Textable widget...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 12
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.titleQuery = ''
        self.authorQuery = ''
        self.langQuery = 'Any'
        self.nbr_results = 200
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (texts) in a list
        self.createdInputs = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )
        #----------------------------------------------------------------------
        # User interface...
        # Create the working area
        self.cacheGenerationButton = gui.button(
            widget=self.controlArea,
            master=self,
            label="Generate cache",
            callback=self.generate_cache,
            tooltip="Generate the gutenberg cache, this might take a while...",
        )

        self.queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search books",
            orientation="vertical",
        )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribut
        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='titleQuery',
            orientation='horizontal',
            label=u"Title: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='authorQuery',
            orientation='horizontal',
            label=u"Author: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        #ComboBox for selecting the text language
        queryLang = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value='langQuery',
            items=[
                "Any", "Afrikaans", "Aleut", "Arabic", "Arapaho", "Bodo",
                "Breton", "Bulgarian", "Caló", "Catalan", "Cebuano", "Chinese",
                "Czech", "Danish", "Dutch", "English", "Esperanto", "Estonian",
                "Farsi", "Finnish", "French", "Frisian", "Friulian",
                "Gaelic, Scottish", "Galician", "Gamilaraay", "German",
                "Greek", "Greek, Ancient", "Hebrew", "Hungarian", "Icelandic",
                "Iloko", "Interlingua", "Inuktitut", "Irish", "Italian",
                "Japanese", "Kashubian", "Khasi", "Korean", "Latin",
                "Lithuanian", "Maori", "Mayan Languages", "Middle English",
                "Nahuatl", "Napoletano-Calabrese", "Navajo",
                "North American Indian", "Norwegian", "Occitan", "Ojibwa",
                "Old English", "Polish", "Portuguese", "Romanian", "Russian",
                "Sanskrit", "Serbian", "Slovenian", "Spanish", "Swedish",
                "Tagabawa", "Tagalog", "Telugu", "Welsh", "Yiddish"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Language",
            labelWidth=120,
            tooltip=("Please select the desired language.\n"),
        )

        #dict to get the language code
        self.lang_dict = {
            "Any": "",
            "Afrikaans": "af",
            "Aleut": "ale",
            "Arabic": "ar",
            "Arapaho": "arp",
            "Bodo": "brx",
            "Breton": "br",
            "Bulgarian": "bg",
            "Caló": "rmr",
            "Catalan": "ca",
            "Cebuano": "ceb",
            "Chinese": "zh",
            "Czech": "cs",
            "Danish": "da",
            "Dutch": "nl",
            "English": "en",
            "Esperanto": "eo",
            "Estonian": "et",
            "Farsi": "fa",
            "Finnish": "fi",
            "French": "fr",
            "Frisian": "fy",
            "Friulian": "fur",
            "Gaelic, Scottish": "gla",
            "Galician": "gl",
            "Gamilaraay": "kld",
            "German": "de",
            "Greek": "el",
            "Greek, Ancient": "grc",
            "Hebrew": "he",
            "Hungarian": "hu",
            "Icelandic": "is",
            "Iloko": "ilo",
            "Interlingua": "ia",
            "Inuktitut": "iu",
            "Irish": "ga",
            "Italian": "it",
            "Japanese": "ja",
            "Kashubian": "csb",
            "Khasi": "kha",
            "Korean": "ko",
            "Latin": "la",
            "Lithuanian": "lt",
            "Maori": "mi",
            "Mayan Languages": "myn",
            "Middle English": "enm",
            "Nahuatl": "nah",
            "Napoletano-Calabrese": "nap",
            "Navajo": "nav",
            "North American Indian": "nai",
            "Norwegian": "no",
            "Occitan": "oc",
            "Ojibwa": "oji",
            "Old English": "ang",
            "Polish": "pl",
            "Portuguese": "pt",
            "Romanian": "ro",
            "Russian": "ru",
            "Sanskrit": "sa",
            "Serbian": "sr",
            "Slovenian": "sl",
            "Spanish": "es",
            "Swedish": "sv",
            "Tagabawa": "bgs",
            "Tagalog": "tl",
            "Telugu": "te",
            "Welsh": "cy",
            "Yiddish": "yi"
        }
        # Allows to choose the wanted results numberp (10 by 10)
        queryNbr = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value="nbr_results",
            items=[
                "10", "20", "30", "40", "50", "60", "70", "80", "90", "100",
                "200", "300", "400", "500", "1000"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Number of results: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchFunction" attribut
        self.searchButton = gui.button(
            widget=self.queryBox,
            master=self,
            label="Search",
            callback=self.search,
            tooltip="Connect Genius and make a research",
        )
        self.titleListbox = gui.listBox(
            widget=self.queryBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=self.queryBox,
            box=False,
            orientation='horizontal',
        )
        # Add text button
        self.addButton = gui.button(
            widget=boxbutton,
            master=self,
            label=u'Add to corpus',
            callback=self.add,
            tooltip=(u"Move the selected book downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        gui.separator(widget=self.queryBox, height=3)

        # area where confirmed texts are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of books which will be imported",
        )
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
        )
        # Remove text button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected book from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed texts button
        self.clearmyBasketButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.clearmyBasket,
            tooltip=(u"Remove all books from your corpus."),
        )
        self.clearmyBasketButton.setDisabled(True)

        gui.separator(widget=mytitleBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # Update the selections list
        self.updateMytitleLabels()

        # Send data if autoSend.
        self.sendButton.sendIf()

        # checks if the cache exists
        self.check_cache()
Ejemplo n.º 13
0
class Gutenberg(OWTextableBaseWidget):
    """Textable widget for importing clean texts from Gutenberg
    (https://www.gutenberg.org/)
    """

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Gutenberg"
    description = "Gutenberg caching and importation"
    icon = "icons/gutenberg.png"
    priority = 10

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = []
    outputs = [("Gutenberg importation", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    # Saved settings
    autoSend = settings.Setting(False)
    myBasket = settings.Setting([])

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.titleQuery = ''
        self.authorQuery = ''
        self.langQuery = 'Any'
        self.nbr_results = 200
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (texts) in a list
        self.createdInputs = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )
        #----------------------------------------------------------------------
        # User interface...
        # Create the working area
        self.cacheGenerationButton = gui.button(
            widget=self.controlArea,
            master=self,
            label="Generate cache",
            callback=self.generate_cache,
            tooltip="Generate the gutenberg cache, this might take a while...",
        )

        self.queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search books",
            orientation="vertical",
        )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribut
        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='titleQuery',
            orientation='horizontal',
            label=u"Title: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='authorQuery',
            orientation='horizontal',
            label=u"Author: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        #ComboBox for selecting the text language
        queryLang = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value='langQuery',
            items=[
                "Any", "Afrikaans", "Aleut", "Arabic", "Arapaho", "Bodo",
                "Breton", "Bulgarian", "Caló", "Catalan", "Cebuano", "Chinese",
                "Czech", "Danish", "Dutch", "English", "Esperanto", "Estonian",
                "Farsi", "Finnish", "French", "Frisian", "Friulian",
                "Gaelic, Scottish", "Galician", "Gamilaraay", "German",
                "Greek", "Greek, Ancient", "Hebrew", "Hungarian", "Icelandic",
                "Iloko", "Interlingua", "Inuktitut", "Irish", "Italian",
                "Japanese", "Kashubian", "Khasi", "Korean", "Latin",
                "Lithuanian", "Maori", "Mayan Languages", "Middle English",
                "Nahuatl", "Napoletano-Calabrese", "Navajo",
                "North American Indian", "Norwegian", "Occitan", "Ojibwa",
                "Old English", "Polish", "Portuguese", "Romanian", "Russian",
                "Sanskrit", "Serbian", "Slovenian", "Spanish", "Swedish",
                "Tagabawa", "Tagalog", "Telugu", "Welsh", "Yiddish"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Language",
            labelWidth=120,
            tooltip=("Please select the desired language.\n"),
        )

        #dict to get the language code
        self.lang_dict = {
            "Any": "",
            "Afrikaans": "af",
            "Aleut": "ale",
            "Arabic": "ar",
            "Arapaho": "arp",
            "Bodo": "brx",
            "Breton": "br",
            "Bulgarian": "bg",
            "Caló": "rmr",
            "Catalan": "ca",
            "Cebuano": "ceb",
            "Chinese": "zh",
            "Czech": "cs",
            "Danish": "da",
            "Dutch": "nl",
            "English": "en",
            "Esperanto": "eo",
            "Estonian": "et",
            "Farsi": "fa",
            "Finnish": "fi",
            "French": "fr",
            "Frisian": "fy",
            "Friulian": "fur",
            "Gaelic, Scottish": "gla",
            "Galician": "gl",
            "Gamilaraay": "kld",
            "German": "de",
            "Greek": "el",
            "Greek, Ancient": "grc",
            "Hebrew": "he",
            "Hungarian": "hu",
            "Icelandic": "is",
            "Iloko": "ilo",
            "Interlingua": "ia",
            "Inuktitut": "iu",
            "Irish": "ga",
            "Italian": "it",
            "Japanese": "ja",
            "Kashubian": "csb",
            "Khasi": "kha",
            "Korean": "ko",
            "Latin": "la",
            "Lithuanian": "lt",
            "Maori": "mi",
            "Mayan Languages": "myn",
            "Middle English": "enm",
            "Nahuatl": "nah",
            "Napoletano-Calabrese": "nap",
            "Navajo": "nav",
            "North American Indian": "nai",
            "Norwegian": "no",
            "Occitan": "oc",
            "Ojibwa": "oji",
            "Old English": "ang",
            "Polish": "pl",
            "Portuguese": "pt",
            "Romanian": "ro",
            "Russian": "ru",
            "Sanskrit": "sa",
            "Serbian": "sr",
            "Slovenian": "sl",
            "Spanish": "es",
            "Swedish": "sv",
            "Tagabawa": "bgs",
            "Tagalog": "tl",
            "Telugu": "te",
            "Welsh": "cy",
            "Yiddish": "yi"
        }
        # Allows to choose the wanted results numberp (10 by 10)
        queryNbr = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value="nbr_results",
            items=[
                "10", "20", "30", "40", "50", "60", "70", "80", "90", "100",
                "200", "300", "400", "500", "1000"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Number of results: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchFunction" attribut
        self.searchButton = gui.button(
            widget=self.queryBox,
            master=self,
            label="Search",
            callback=self.search,
            tooltip="Connect Genius and make a research",
        )
        self.titleListbox = gui.listBox(
            widget=self.queryBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=self.queryBox,
            box=False,
            orientation='horizontal',
        )
        # Add text button
        self.addButton = gui.button(
            widget=boxbutton,
            master=self,
            label=u'Add to corpus',
            callback=self.add,
            tooltip=(u"Move the selected book downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        gui.separator(widget=self.queryBox, height=3)

        # area where confirmed texts are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of books which will be imported",
        )
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
        )
        # Remove text button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected book from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed texts button
        self.clearmyBasketButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.clearmyBasket,
            tooltip=(u"Remove all books from your corpus."),
        )
        self.clearmyBasketButton.setDisabled(True)

        gui.separator(widget=mytitleBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # Update the selections list
        self.updateMytitleLabels()

        # Send data if autoSend.
        self.sendButton.sendIf()

        # checks if the cache exists
        self.check_cache()

    def check_cache(self):
        """changes layout according to the cache existens
        """
        # disables the search button if cache does not exists
        if not GutenbergCache.exists():
            # disables the search button if not
            self.queryBox.setDisabled(True)
            self.infoBox.setText(
                "Cache must be generated before first launch, it can take up to 10min",
                "warning")
        # disables the the cache generation button if it does exists
        else:
            self.cacheGenerationButton.setDisabled(True)

    def generate_cache(self):
        """generates the cache
        """
        if not GutenbergCache.exists():
            try:
                self.infoBox.setText(
                    "The cache is being generated. This can take up to 10min.",
                    "warning")
                GutenbergCache.create(refresh=True,
                                      download=True,
                                      unpack=True,
                                      parse=True,
                                      deleteTemp=True)
                self.infoBox.setText("Cache generated!")
                self.cacheGenerationButton.setDisabled(True)
                self.queryBox.setEnabled(True)
            except Exception as exc:
                print(exc)
                self.infoBox.setText(
                    "An error occurred while building the cache", "error")
        else:
            self.infoBox.setText("The cache already exists.")

    def search(self):
        """
            Parse a query string and do a search in the Gutenberg cache
        """
        query_string = self.titleQuery
        query_author = self.authorQuery
        language = self.lang_dict[self.langQuery]

        # informs the user that he didn't change anything
        if self.langQuery == 'Any' and query_string == '' and self.authorQuery == '':
            self.infoBox.setText(
                "You can't search only by language, if it's set to Any",
                "warning")

        else:
            # Recode author to name, first_name
            if len(query_author.split()) == 2:
                if "," not in query_author:
                    query_author = "%, ".join(query_author.split()[::-1])

            # parse query and lookup in gutenbergcache
            cache = GutenbergCache.get_cache()

            # searches the database
            try:
                query_results = cache.native_query(sql_query="""
                    /* Creates a new table with one author per book
                    by selecting the greatest author id */

                    WITH unique_book_author AS
                    (SELECT * FROM book_authors  
                    WHERE authorid IN (SELECT MAX(authorid) FROM book_authors GROUP BY bookid))

                    /* Selects title, author, gutenberg id and language */

                    SELECT titles.name, authors.name, books.gutenbergbookid, languages.name
                    FROM titles

                    /* Merges every needed table into one on shared attributes */

                    INNER JOIN books ON books.id = titles.bookid
                    INNER JOIN unique_book_author ON  books.id = unique_book_author.bookid 
                    INNER JOIN authors ON authors.id = unique_book_author.authorid
                    INNER JOIN languages ON books.languageid = languages.id

                    /* Matches users query using % wildcard for more permissive query */

                    WHERE upper(titles.name) LIKE "%{title}%"
                    AND upper(authors.name) LIKE "%{author}%"
                    AND languages.name LIKE "%{lang}%"
                    LIMIT {limit}
                    """.format(title=query_string,
                               author=query_author,
                               lang=language,
                               limit=self.nbr_results))
            except Exception as exc:
                print(exc)
                self.infoBox.setText(
                    "An error occurred while interrogating the cache.",
                    "error")
                return
            # get the results
            Results = list(query_results)

            self.searchResults = list()

            # creates better results
            for result in Results:
                result = list(result)
                # replaces all newlines types
                result[0] = re.sub(r'[\n\r]+', r', ', result[0])
                # recodes athor from: name, first_name to: fisrt_name name
                result[1] = " ".join(result[1].split(", ")[::-1])
                # gets the key from the lang_dict for the coresponding language abbreviation
                result[3] = [
                    key for key, value in self.lang_dict.items()
                    if value == result[3]
                ][0]

                self.searchResults.append(result)

            # display info message
            n_results = len(self.searchResults)
            self.infoBox.setText("{n} result{s} have been found".format(
                n=n_results, s="s" if n_results > 0 else ""))

            self.clearResults()
            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:

                result_string = "{title} — {author} — {lang}".format(
                    title=idx[0], author=idx[1], lang=idx[3])
                self.titleLabels.append(result_string)

                self.titleLabels = self.titleLabels
                self.clearButton.setDisabled(False)
                self.addButton.setDisabled(self.selectedTitles == list())

                self.controlArea.setDisabled(False)

    # Function clearing the results list
    def clearResults(self):
        """Clear the results list"""
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        self.clearButton.setDisabled(True)
        self.addButton.setDisabled(self.titleLabels == list())

    # Add texts function
    def add(self):
        """Add texts in your selection """
        for selectedTitle in self.selectedTitles:
            titleData = self.searchResults[selectedTitle]
            if titleData not in self.myBasket:
                self.myBasket.append(titleData)

        self.updateMytitleLabels()
        self.sendButton.settingsChanged()

    # Update selections function
    def updateMytitleLabels(self):
        self.mytitleLabels = list()

        for titleData in self.myBasket:

            result_string = "{title} — {author} — {lang}".format(
                title=titleData[0], author=titleData[1], lang=titleData[3])
            self.mytitleLabels.append(result_string)

        self.mytitleLabels = self.mytitleLabels
        self.clearmyBasketButton.setDisabled(self.myBasket == list())
        self.removeButton.setDisabled(self.myTitles == list())

    def remove(self):
        """Remove the selected books in the user's basket """
        self.myBasket = [
            title for idx, title in enumerate(self.myBasket)
            if idx not in self.myTitles
        ]
        self.updateMytitleLabels()
        self.sendButton.settingsChanged()

    # Clear selections function
    def clearmyBasket(self):
        """Remove all texts in your selection """
        self.mytitleLabels = list()
        self.myBasket = list()
        self.sendButton.settingsChanged()
        self.clearmyBasketButton.setDisabled(True)

    # Function computing results then sending them to the widget output
    def sendData(self):
        """Compute result of widget processing and send to output"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some books first", "warning")
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(
            self,
            iterations=len(self.myBasket),
        )

        text_content = list()
        annotations = list()

        try:
            # Retrieve selected texts from gutenberg
            for text in self.myBasket:

                gutenberg_id = text[2]

                # Get the text with Gutenbergpy
                gutenberg_text = gutenbergpy.textget.strip_headers(
                    gutenbergpy.textget.get_text_by_id(gutenberg_id)).decode(
                        "utf-8")

                text_content.append(gutenberg_text)
                # populate the annotation list
                annotations.append([text[0], text[1], text[3]])
                progressBar.advance()

        # If an error occurs (e.g. http error, or memory error)...
        except Exception as exc:
            # Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from Gutenberg",
                                 "error")
            self.controlArea.setDisabled(False)
            print(exc)
            return

        # Store downloaded text strings in input objects...
        for text in text_content:
            newInput = Input(text, self.captionTitle)
            self.createdInputs.append(newInput)

        # If there's only one text, the widget's output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation.
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments with book metadata
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update({"title": annotations[idx][0]})
            segment.annotations.update({"author": annotations[idx][1]})
            segment.annotations.update({"language": annotations[idx][2]})
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send("Gutenberg importation", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    # The following method needs to be copied verbatim in
    # every Textable widget that sends a segmentation...
    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 14
0
class SpaCy(OWTextableBaseWidget):
    """Textable widget for NLP using spaCy."""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "spaCy"
    description = "Natural language processing using spaCy"
    icon = "icons/spacy.svg"
    priority = 21  # TODO

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = [("Text data", Segmentation, "inputData")]
    outputs = [("Linguistically analyzed data", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    model = settings.Setting("fr_core_news_sm")

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.inputSeg = None
        self.nlp = None

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )

        # User interface...

        # Tabs...
        self.tabs = QTabWidget()
        self.optionsTab = QWidget()
        self.modelManagerTab = QWidget()
        self.tabs.addTab(self.optionsTab, "Options")
        self.tabs.addTab(self.modelManagerTab, "Model manager")

        # Options tab...
        OptionsTabBox = QHBoxLayout()

        optionsBox = gui.widgetBox(widget=self.optionsTab)

        self.modelComboBox = gui.comboBox(
            widget=optionsBox,
            master=self,
            value='model',
            label='Model: ',
            tooltip='Select the spaCy language model you want to use.',
            items=INSTALLED_MODELS,
            sendSelectedValue=True,
            callback=self.modelChanged,
        )

        OptionsTabBox.addWidget(optionsBox)
        self.optionsTab.setLayout(OptionsTabBox)

        # Model manager tab...
        modelManagerTabBox = QHBoxLayout()

        modelManagerBox = gui.widgetBox(widget=self.modelManagerTab)

        # TODO: Model manager UI

        modelManagerTabBox.addWidget(modelManagerBox)
        self.modelManagerTab.setLayout(modelManagerTabBox)

        self.controlArea.layout().addWidget(self.tabs)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        # Load spaCy language model...
        self.modelChanged()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def inputData(self, newInput):
        """Process incoming data."""
        self.inputSeg = newInput
        self.infoBox.inputChanged()
        self.sendButton.sendIf()

    def modelChanged(self):
        """Respond to model change in UI."""
        self.nlp = spacy.load(self.model)
        self.sendButton.settingsChanged()

    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Check that there's an input...
        if self.inputSeg is None:
            self.infoBox.setText("Widget needs input", "warning")
            self.send("Linguistically analyzed data", None, self)
            return

        # Initialize progress bar.
        self.infoBox.setText(
            u"Processing, please wait...",
            "warning",
        )
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(self.inputSeg))

        tokenizedSegments = list()

        # Process each input segment...
        for segment in self.inputSeg:

            # Input segment attributes...
            inputContent = segment.get_content()
            inputAnnotations = segment.annotations
            inputString = segment.str_index
            inputStart = segment.start or 0
            inputEnd = segment.end or len(inputContent)

            # NLP analysis...
            doc = self.nlp(inputContent)

            # Process each token in input segment...
            for token in doc:
                tokenAnnotations = inputAnnotations.copy()
                tokenAnnotations.update({
                    k: getattr(token, k)
                    for k in RELEVANT_KEYS if getattr(token, k) is not None
                })
                tokenStart = inputStart + token.idx
                tokenizedSegments.append(
                    Segment(
                        str_index=inputString,
                        start=tokenStart,
                        end=tokenStart + len(token),
                        annotations=tokenAnnotations,
                    ))

            progressBar.advance()

        outputSeg = Segmentation(tokenizedSegments, self.captionTitle)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output." % len(outputSeg)
        message = pluralize(message, len(outputSeg))
        self.infoBox.setText(message)

        print(outputSeg.to_string())

        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)

        # Send data to output...
        self.send("Linguistically analyzed data", outputSeg, self)

        self.sendButton.resetSettingsChangedFlag()

    # The following method needs to be copied verbatim in
    # every Textable widget that sends a segmentation...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 15
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.inputSeg = None
        self.nlp = None

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )

        # User interface...

        # Tabs...
        self.tabs = QTabWidget()
        self.optionsTab = QWidget()
        self.modelManagerTab = QWidget()
        self.tabs.addTab(self.optionsTab, "Options")
        self.tabs.addTab(self.modelManagerTab, "Model manager")

        # Options tab...
        OptionsTabBox = QHBoxLayout()

        optionsBox = gui.widgetBox(widget=self.optionsTab)

        self.modelComboBox = gui.comboBox(
            widget=optionsBox,
            master=self,
            value='model',
            label='Model: ',
            tooltip='Select the spaCy language model you want to use.',
            items=INSTALLED_MODELS,
            sendSelectedValue=True,
            callback=self.modelChanged,
        )

        OptionsTabBox.addWidget(optionsBox)
        self.optionsTab.setLayout(OptionsTabBox)

        # Model manager tab...
        modelManagerTabBox = QHBoxLayout()

        modelManagerBox = gui.widgetBox(widget=self.modelManagerTab)

        # TODO: Model manager UI

        modelManagerTabBox.addWidget(modelManagerBox)
        self.modelManagerTab.setLayout(modelManagerTabBox)

        self.controlArea.layout().addWidget(self.tabs)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        # Load spaCy language model...
        self.modelChanged()

        # Send data if autoSend.
        self.sendButton.sendIf()
Ejemplo n.º 16
0
class TopicModels(OWTextableBaseWidget):
    """Textable widget for building topic models based on a term-document matrix
    """

    #----------------------------------------------------------------------
    # Widget"s metadata...

    name = "Topic Models"
    description = "Build topic models based on term-document matrices"
    icon = "icons/topic_models.svg"
    priority = 10

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = [("Textable Crosstab", PivotCrosstab, "input_data")]
    outputs = [
        ("Term-topic Textable table", PivotCrosstab, widget.Default),
        ("Document-topic Textable table", PivotCrosstab),
        ("Term-topic Orange table", Orange.data.Table, widget.Default),
        ("Document-topic Orange table", Orange.data.Table),
    ]

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    method = settings.Setting("Latent semantic indexing")
    numTopics = settings.Setting(10)

    want_main_area = False

    def __init__(self):
        """Widget creator."""
        super().__init__()

        # Other attributes...
        self.inputTable = None
        self.listEntries = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.send_data,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # User interface...

        # Filter box (advanced settings only)
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box="Options",
            orientation="vertical",
        )
        method_combo = gui.comboBox(
            widget=optionsBox,
            master=self,
            value="method",
            items=[
                "Latent Dirichlet allocation",
                "Latent semantic indexing",
                "Correspondence analysis",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Method:",
            labelWidth=120,
            callback=self.sendButton.settingsChanged,
            tooltip=("Please select the desired topic modelling method.\n"),
        )
        method_combo.setMinimumWidth(120)
        gui.separator(widget=optionsBox, height=3)
        self.numTopicsSpin = gui.spin(
            widget=optionsBox,
            master=self,
            value='numTopics',
            minv=1,
            maxv=999,
            orientation='horizontal',
            label=u'Number of topics:',
            labelWidth=120,
            callback=self.sendButton.settingsChanged,
            keyboardTracking=False,
            tooltip=(
                u"Please select the desired number of topics in output tables."
            ),
        )
        gui.separator(widget=optionsBox, height=3)
        gui.listBox(
            widget=optionsBox,
            master=self,
            labels='listEntries',
            tooltip=(u"TODO"),
        )

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()

    def input_data(self, newInput):
        """Process incoming data."""
        self.inputTable = newInput
        self.infoBox.inputChanged()
        self.sendButton.sendIf()

    def send_data(self):
        """Compute result of widget processing and send to output"""

        # Check that there's a table in input...
        if self.inputTable is None:
            self.infoBox.setText("Widget needs input.", "warning")
            self.send("Term-topic Textable table", None)
            self.send("Document-topic Textable table", None)
            self.send("Term-topic Orange table", None)
            self.send("Document-topic Orange table", None)
            self.listEntries = list()
            return

        # Initialize progress bar.
        progressBar = gui.ProgressBar(
            self,
            iterations=1  # TODO
        )

        # Convert input table to gensim dictionary.
        dictionary, corpus = pivot_crosstab_to_gensim(self.inputTable)

        # Apply topic modelling...

        # Case 1: LDA...
        if self.method == "Latent Dirichlet allocation":

            model = models.LdaModel(
                corpus,
                id2word=dictionary,
                num_topics=self.numTopics,
            )

            # Create segment-topic PivotCrosstab table.
            values = dict()
            terms = list()
            for topic in xrange(self.numTopics):
                topic_terms = model.get_topic_terms(
                    topic,
                    len(self.inputTable.col_ids),
                )
                for term, score in topic_terms:
                    values[(dictionary[term], topic)] = score
                terms.append(
                    list(dictionary[t]
                         for t, s in topic_terms[:MAX_NUM_DISPLAYED_TERMS]))
            segmentTopicTable = PivotCrosstab(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            newListEntries = list()
            for topicNum in range(self.numTopics):
                displayedTerms = ", ".join(terms[topicNum])
                if len(self.inputTable.col_ids) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms += ", ..."
                listEntry = "%i. %s" % (
                    topicNum + 1,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table...
            corpus_lda = model[corpus]
            values = dict()
            for row_idx, row in enumerate(self.inputTable.row_ids):
                lda_doc = corpus_lda[row_idx]
                for topic, score in lda_doc:
                    values[(row, topic)] = score
            contextTopicTable = PivotCrosstab(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__context__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
                missing=0,
            )

        # Case 2: LSI...
        if self.method == "Latent semantic indexing":

            model = models.LsiModel(
                corpus,
                id2word=dictionary,
                num_topics=self.numTopics,
            )

            # Create segment-topic PivotCrosstab table.
            segmentTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=model.projection.u,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            colIds = np.array(self.inputTable.col_ids)
            newListEntries = list()
            # Subtask: compute total inertia, i.e. sum of eigenvalues of
            # doc-term matrix multiplied by its transposed...
            rect_matrix = self.inputTable.to_numpy()
            matrix_dims = self.inputTable.to_numpy().shape
            if matrix_dims[0] > matrix_dims[1]:
                square_matrix = np.dot(np.transpose(rect_matrix), rect_matrix)
            else:
                square_matrix = np.dot(rect_matrix, np.transpose(rect_matrix))
            total_inertia = sum(np.linalg.eigvals(square_matrix))
            for topicNum in range(self.numTopics):
                # Proportion of inertia is SQUARE of singular value divided by
                # total inertia, because n-th singular value = square root of
                # n-th eigenvalue (cf. compute total inertia above)...
                propInertia = model.projection.s[topicNum]**2 / total_inertia
                scores = model.projection.u[:, topicNum]
                sortedTerms = colIds[scores.argsort()[::-1]]
                if len(colIds) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms = ", ".join(
                        sortedTerms[:MAX_NUM_DISPLAYED_TERMS // 2])
                    displayedTerms += ", ..., "
                    displayedTerms += ", ".join(
                        sortedTerms[-MAX_NUM_DISPLAYED_TERMS // 2:])
                else:
                    displayedTerms = ", ".join(sortedTerms)
                listEntry = "%i. (%.2f%%) %s" % (
                    topicNum + 1,
                    propInertia * 100,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table...
            contextTopicMatrix = corpus2dense(
                model[corpus], len(model.projection.s)).T / model.projection.s
            values = dict()
            for row_idx, row in enumerate(contextTopicMatrix):
                for topic, val in enumerate(row):
                    values[(self.inputTable.row_ids[row_idx], topic)] = val
            contextTopicTable = PivotCrosstab(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__context__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
                missing=0,
            )

        # Case 2: Correspondence analysis...
        elif self.method == "Correspondence analysis":

            ca = correspondence(self.inputTable.to_numpy())

            # Create segment-topic PivotCrosstab table.
            segmentTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=ca.col_factors[:, range(self.numTopics)],
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            colIds = np.array(self.inputTable.col_ids)
            newListEntries = list()
            total_inertia = sum(ca.inertia_of_axis())
            for topicNum in range(self.numTopics):
                propInertia = ca.inertia_of_axis()[topicNum] / total_inertia
                scores = np.array(ca.col_factors[:, topicNum])
                sortedTerms = colIds[scores.argsort()[::-1]]
                if len(colIds) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms = ", ".join(
                        sortedTerms[:MAX_NUM_DISPLAYED_TERMS // 2])
                    displayedTerms += ", ..., "
                    displayedTerms += ", ".join(
                        sortedTerms[-MAX_NUM_DISPLAYED_TERMS // 2:])
                else:
                    displayedTerms = ", ".join(sortedTerms)
                listEntry = "%i. (%.2f%%) %s" % (
                    topicNum + 1,
                    propInertia * 100,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table.
            contextTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=ca.row_factors[:, range(self.numTopics)],
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

        # Set status to OK and report...
        self.infoBox.setText("Tables correctly sent to output.")
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send tokens...
        self.send("Term-topic Textable table", segmentTopicTable)
        self.send("Document-topic Textable table", contextTopicTable)
        self.send(
            "Term-topic Orange table",
            segmentTopicTable.to_orange_table(),
        )
        self.send(
            "Document-topic Orange table",
            contextTopicTable.to_orange_table(),
        )

        self.sendButton.resetSettingsChangedFlag()

    def updateGUI(self):
        """Update GUI state"""
        if self.inputTable is not None:
            if (self.method == "Latent semantic indexing"
                    or self.method == "Correspondence analysis"):
                maxNumTopics = min(
                    len(self.inputTable.row_ids),
                    len(self.inputTable.col_ids),
                )
                self.numTopicsSpin.setRange(1, maxNumTopics - 1)
            else:
                self.numTopicsSpin.setRange(1, 999)
        else:
            self.numTopicsSpin.setRange(1, 999)
Ejemplo n.º 17
0
    def __init__(self):
        """Widget creator."""
        super().__init__()

        # Other attributes...
        self.inputTable = None
        self.listEntries = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.send_data,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # User interface...

        # Filter box (advanced settings only)
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box="Options",
            orientation="vertical",
        )
        method_combo = gui.comboBox(
            widget=optionsBox,
            master=self,
            value="method",
            items=[
                "Latent Dirichlet allocation",
                "Latent semantic indexing",
                "Correspondence analysis",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Method:",
            labelWidth=120,
            callback=self.sendButton.settingsChanged,
            tooltip=("Please select the desired topic modelling method.\n"),
        )
        method_combo.setMinimumWidth(120)
        gui.separator(widget=optionsBox, height=3)
        self.numTopicsSpin = gui.spin(
            widget=optionsBox,
            master=self,
            value='numTopics',
            minv=1,
            maxv=999,
            orientation='horizontal',
            label=u'Number of topics:',
            labelWidth=120,
            callback=self.sendButton.settingsChanged,
            keyboardTracking=False,
            tooltip=(
                u"Please select the desired number of topics in output tables."
            ),
        )
        gui.separator(widget=optionsBox, height=3)
        gui.listBox(
            widget=optionsBox,
            master=self,
            labels='listEntries',
            tooltip=(u"TODO"),
        )

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()
Ejemplo n.º 18
0
    def __init__(self):
        """Widget creator."""
        super().__init__()

        # NONE BASIC SETTING
        self.inputData = None
        self.system = os.name
        self.user = os.environ.get("USER")
        self.langues = list()
        self.created_inputs = list()
        self.language = 0
        self.check_firt_use = False
        self.createdInputs = list()
        self.compteur = 0
        self.NoLink = True

        # liste des langues possible
        self.langues_possibles = {
            "French": ["french.par", "french-abbreviations"],
            "English": ["english-utf8.par", "english-abbreviations"],
            "German": ["german-utf8.par", "german-abbreviations"],
            "Italian": ["italian-utf8.par", "italian-abbreviations"],
            "Swahili": ["swahili.par", "swahili-abbreviations"],
            "Portuguese": ["portuguese.par", "portuguese-abbreviations"],
            "Russian": ["russian.par", "russian-abbreviations"],
            "Spanish":
            ["spanish-utf8.par", "spanish-abbreviations", "spanish-mwls"],
            "Slovenian": ["slovenian-utf8.par"],
            "Slovak": ["slovak2-utf8.par"],
            "Romanian": ["romanian.par"],
            "Polish": ["polish-utf8.par"],
            "Mongolian": ["mongolian.par"],
            "Latin": ["latin.par"],
            "Galician": ["galician.par"],
            "Finnish": ["finnish-utf8.par"],
            "Estonian": ["estonian.par"],
            "Bulgarian": ["bulgarian-utf8.par"],
            "Spoken French": ["spoken-french.par", "french-abbreviations"]
        }

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(widget=self.controlArea,
                                     master=self,
                                     callback=self.sendData,
                                     infoBoxAttribute=u"infoBox",
                                     sendIfPreCallback=self.updateGUI)

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.

        # User interface...

        # OPTION BOX
        gui.separator(widget=self.controlArea, height=5)

        self.infoBox1 = gui.widgetBox(self.controlArea,
                                      u"Option",
                                      addSpace=True)

        # definir la langue
        self.langueBox = gui.comboBox(widget=self.infoBox1,
                                      master=self,
                                      value="language",
                                      items=self.langues,
                                      orientation=u"horizontal",
                                      label="Select text language :",
                                      callback=self.settings_changed)

        self.langueBox.setMaximumWidth(100)

        gui.separator(widget=self.controlArea, height=3)

        # Checkbox pour activer output avec code xml
        self.choix_xml = gui.checkBox(widget=self.infoBox1,
                                      master=self,
                                      value="activer_xml",
                                      label=" Output with XML code",
                                      callback=self.settings_changed)

        # Checkbox pour afficher unknown si le mot est inconnu
        self.choix_unknown = gui.checkBox(widget=self.infoBox1,
                                          master=self,
                                          value="unknown",
                                          label=" Output without '[unknown]'",
                                          callback=self.settings_changed)

        # The following lines:
        # Bouton pour aller cherche le lien vers treetagger...
        self.treetagger_box = gui.widgetBox(
            self.controlArea,
            u"Please, enter a correct path to TreeTagger :",
            addSpace=True)

        gui.button(widget=self.treetagger_box,
                   master=self,
                   label="Browse",
                   callback=self.treetagger_search)

        gui.separator(widget=self.treetagger_box, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # Send data if autoSend.
        self.sendButton.sendIf()

        # ajuster taille widjet
        self.adjustSizeWithTimer()

        # verifie lien treetagger
        self.treetagger_check()
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.inputSeg = None
        self.morphology = dict()
        self.selectedMainWord = None
        self.mainWords = list()
        self.selectedParse = None
        self.parses = list()
        self.selectedStemForParse = None
        self.stemsForParse = list()
        self.selectedSuffixForParse = None
        self.suffixesForParse = list()
        self.selectedMainSignature = None
        self.mainSignatures = list()
        self.wordsForSig = list()
        self.stemsForSig = list()
        self.suffixesForSig = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )

        # User interface...

        # A) Control area...

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box="Options",
            orientation="vertical",
        )
        gui.spin(
            widget=optionsBox,
            master=self,
            value='minStemLen',
            label='Minimum length of stems: ',
            callback=self.sendButton.sendIf,
            labelWidth=180,
            tooltip=(
                'Select the minimum number of required characters in stems'),
            minv=LOWER_MIN_STEM_LEN,
            maxv=MAX_MORPH_LEN,
            step=1,
        )
        gui.separator(widget=optionsBox, height=2)

        gui.rubber(self.controlArea)

        # B) Main area...

        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)

        # Tabs...
        self.tabs = QTabWidget()
        self.wordTab = QWidget()
        self.signatureTab = QWidget()
        self.tabs.addTab(self.wordTab, "Words")
        self.tabs.addTab(self.signatureTab, "Signatures")

        # Words tab...
        wordTabBox = QHBoxLayout()
        wordBox = gui.widgetBox(
            widget=self.wordTab,
            orientation="horizontal",
            margin=5,
        )

        wordBoxRight = gui.widgetBox(widget=wordBox)

        self.mainWordListbox = gui.listBox(
            widget=wordBoxRight,
            master=self,
            value="selectedMainWord",
            labels="mainWords",
            callback=self.mainWordSelected,
            tooltip="Select a word to display its possible parses.",
        )
        self.mainWordListbox.setFont(font)

        gui.separator(widget=wordBox, width=3)

        wordBoxLeft = gui.widgetBox(widget=wordBox)

        gui.label(
            widget=wordBoxLeft,
            master=self,
            label="Parse(s):",
        )

        self.parsesListbox = gui.listBox(
            widget=wordBoxLeft,
            master=self,
            value="selectedParse",
            labels="parses",
            callback=self.parseSelected,
            tooltip="Select a parse to display the corresponding signature.",
        )
        self.parsesListbox.setFont(font)

        self.sigForParseBox = gui.widgetBox(
            widget=wordBoxLeft,
            box="Signature",
        )

        gui.label(
            widget=self.sigForParseBox,
            master=self,
            label="Stem(s):",
        )

        self.stemsForParseListbox = gui.listBox(
            widget=self.sigForParseBox,
            master=self,
            labels="stemsForParse",
            tooltip="Stems associated with the parse selected above.",
        )

        gui.separator(widget=self.sigForParseBox, height=2)

        gui.label(
            widget=self.sigForParseBox,
            master=self,
            label="Suffixes(s):",
        )

        self.suffixesForParseListbox = gui.listBox(
            widget=self.sigForParseBox,
            master=self,
            labels="suffixesForParse",
            tooltip="Suffixes associated with the parse selected above.",
        )

        wordTabBox.addWidget(wordBox)
        self.wordTab.setLayout(wordTabBox)

        # Signature tab...
        signatureTabBox = QHBoxLayout()

        signatureBox = gui.widgetBox(
            widget=self.signatureTab,
            orientation="horizontal",
            margin=5,
        )

        signatureBoxRight = gui.widgetBox(widget=signatureBox)

        self.mainSignatureListbox = gui.listBox(
            widget=signatureBoxRight,
            master=self,
            value="selectedMainSignature",
            labels="mainSignatures",
            callback=self.mainSignatureSelected,
            tooltip="Select a signature to display its contents.",
        )
        self.mainSignatureListbox.setFont(font)

        gui.separator(widget=signatureBox, width=3)

        signatureBoxLeft = gui.widgetBox(widget=signatureBox)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Words:",
        )

        self.wordsForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="wordsForSig",
            tooltip="Words associated with the selected signature.",
        )
        self.wordsForSigListbox.setFont(font)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Stem(s):",
        )

        self.stemsForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="stemsForSig",
            tooltip="Stems associated with the selected signature.",
        )
        self.stemsForSigListbox.setFont(font)

        gui.label(
            widget=signatureBoxLeft,
            master=self,
            label="Suffixes(s):",
        )

        self.suffixesForSigListbox = gui.listBox(
            widget=signatureBoxLeft,
            master=self,
            labels="suffixesForSig",
            tooltip="Suffixes associated with the selected signature.",
        )
        self.suffixesForSigListbox.setFont(font)

        signatureTabBox.addWidget(signatureBox)
        self.signatureTab.setLayout(signatureTabBox)

        self.mainArea.layout().addWidget(self.tabs)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")

        self.setMinimumWidth(602)
        self.setMinimumHeight(317)
        self.adjustSizeWithTimer()

        # Send data if autoSend.
        self.sendButton.sendIf()
class MovieTranscripts(OWTextableBaseWidget):
    """Textable widget for importing movie scripts from the 
    springfieldspringfield.co.uk website 
    (https://www.springfieldspringfield.co.uk)
    """

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Movie Transcripts"
    description = "Import movie transcripts from www.springfieldspringfield.co.uk"
    icon = "icons/Movie_Transcripts.png"
    priority = 11

    #----------------------------------------------------------------------
    # Channel definitions...

    inputs = []
    outputs = [("Movie transcripts", Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0]
    )

    # Saved settings
    autoSend = settings.Setting(False)
    myBasket = settings.Setting([])

    # Other class variables...

    cacheFilename = "cache_movie_transcripts"

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.newQuery = ''
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (scripts) in a list
        self.createdInputs = list()
        # stock the part of dictionary that will be used to access script's page
        self.path_storage = dict()
        # stock titles of movies
        self.movie_titles = list()
        # stock all the movies titles and link parts
        self.title_to_href = dict()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            )

        # User interface...
        # Create the working area
        queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search movie",
            orientation="vertical",
            )

        searchBox = gui.widgetBox(
            widget=queryBox,
            orientation="horizontal",
            )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribute
        gui.lineEdit(
            widget=searchBox,
            master=self,
            value='newQuery',
            orientation='horizontal',
            labelWidth=100,
            tooltip=("Enter a movie title"),
            )

        # Research button
        # Use "searchFunction" attibute
        self.searchButton = gui.button(
            widget=searchBox,
            master=self,
            label='Search',
            callback=self.searchFunction,
            tooltip='Search for the movie',
            )
        gui.separator(widget=queryBox, height=3)

        # Button that refresh all movie titles from the website
        self.refreshButton = gui.button(
            widget=queryBox,
            master=self,
            label="Refresh database",
            callback=self.refreshTitles,
            tooltip="Update SpringfieldSpringfield database"
            )

        # Box that displays search results
        self.titleListbox = gui.listBox(
            widget=queryBox,
            master=self,
            value="selectedTitles",    # setting (list)
            labels="titleLabels",      # setting (list)
            callback=lambda: self.selectButton.setDisabled(
                self.selectedTitles == list()),
            tooltip="Select the movie transcript you want to import",
            )
        self.titleListbox.doubleClicked.connect(self.Add)
        self.titleListbox.setMinimumHeight(120)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=queryBox,
            box=False,
            orientation='horizontal',
            )

        # Add button
        # Uses "Add" function
        self.selectButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Add to corpus",
            callback=self.Add,
            tooltip="Add selected movie to the corpus",
            )
        self.selectButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
            )
        self.clearButton.setDisabled(True)

        # Area where confirmed movies are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
            )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(
                self.myTitles == list()),
            tooltip="The list of titles whose content will be imported",
            )
        self.mytitleListbox.doubleClicked.connect(self.Remove)
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
            )
        # Remove movies button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.Remove,
            tooltip="Remove the selected movie from your corpus.",
            )
        self.removeButton.setDisabled(True)

        # Delete all confirmed movies button
        self.clearmyBasket = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.ClearmyCorpus,
            tooltip="Remove all movies from your corpus.",
            )
        self.clearmyBasket.setDisabled(True)

        gui.rubber(self.controlArea)

    #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because we may need to display an error message).
        self.loadDatabaseCache()

        # Make sure that whatever was in the corpus last time is deleted
        self.ClearmyCorpus()

        # Send data if autoSend.
        self.sendButton.sendIf()

    def searchFunction(self):
        self.controlArea.setDisabled(True)

        # Search from the springfieldspringfield.co.uk
        query_string = self.newQuery
        testdict = self.title_to_href

        # Reset and clear the visible widget list
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        del self.movie_titles[:]
        self.movie_titles = self.movie_titles

        if query_string != "":
            # Initialize progress bar.
            progressBar = ProgressBar(self, iterations=1)

            self.searchResults = process.extractBests(
                query_string, 
                testdict,
                limit=100000,
                score_cutoff=80
            )

            progressBar.finish()

            progressBar = ProgressBar(self, iterations=len(self.searchResults))

            for key, score, val in self.searchResults:
                self.titleLabels.append(val)
                self.movie_titles.append(val)
                self.path_storage[val] = key
                # 1 tick on the progress bar of the widget
                progressBar.advance()

            self.titleLabels = self.titleLabels
            self.clearButton.setDisabled(False)
            self.controlArea.setDisabled(False)

            # Clear progress bar.
            progressBar.finish()

            if self.searchResults:
                self.infoBox.setText("Search complete")
            elif self.searchResults == []:
                self.infoBox.setText("No result please try again", 'warning')

        else:
            self.infoBox.setText(
                "Please, enter a query in a search bar", 
                "warning"
            )
            self.controlArea.setDisabled(False)

    def clearResults(self):
        """Clear the results list"""
        del self.titleLabels[:]
        self.titleLabels = self.titleLabels
        del self.movie_titles[:]
        self.movie_titles = self.movie_titles
        self.clearButton.setDisabled(True)

    def loadDatabaseCache(self):
        """Load the cached database"""
        # Try to open saved file in this module's directory...

        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe()))
        )
        try:
            file = open(os.path.join(path, self.__class__.cacheFilename), "rb")
            self.title_to_href = pickle.load(file)
            file.close()

        # Else try to rebuild cache from SpringfieldSpringfield website...
        except IOError:
            self.refreshTitles()

    def refreshTitles(self):
        """Refresh the database cache"""

        basepath = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe()))
            )
        cachedFilename = self.__class__.cacheFilename

        dialog = PyQt5.QtWidgets.QMessageBox()
        response = dialog.question(
            self,
            "springfieldspringfield",
            "Are you sure you want to refresh the database?\n"
            + "It will take several minutes",
            dialog.Yes | dialog.No
        )

        self.infoBox.setText(
            "Scraping SpringfieldSpringfield website, please wait...",
            "warning",
        )
        self.warning("Warning : it will take several minutes")
        if response == dialog.No:
            return
        else:
            try:
                self.get_all_titles()
                try:
                    path = os.path.dirname(
                        os.path.abspath(inspect.getfile(inspect.currentframe()))
                    )
                    file = open(
                        os.path.join(path, self.__class__.cacheFilename),
                        "wb",
                    )
                    pickle.dump(self.title_to_href, file)
                    file.close()
                    self.infoBox.setText(
                        "Database successfully updated",
                    )
                except IOError:
                    self.infoBox.setText(
                        "Couldn't save database to disk.",
                        "warning",
                    )
            except requests.exceptions.ConnectionError:
                self.infoBox.setText(
                    "Error while attempting to scrape the "
                    + "SpringfieldSpringfield website.",
                    "error",
                )


	# Get all movie titles from www.springfieldspringfield.co.uk
    def get_all_titles(self):
        '''php_query_string and http_query_string are the variable that will need to be changed
        if different database is used or if current database's structure undergoes changes'''
        php_query_string = '/movie_script.php?movie='
        http_query_string = 'https://www.springfieldspringfield.co.uk/' +   \
                            'movie_scripts.php?order='
        alphabet = ['0', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 
                    'K', 'L', 'M', 'N', 'O', 'P', 'K', 'R', 'S', 'T', 'U', 
                    'V', 'W', 'X', 'Y', 'Z']

        # Initialize progress bar.
        progressBar = ProgressBar(
            self,
            iterations=len(alphabet)
        )
        self.controlArea.setDisabled(True)

        try:
            for lettre in alphabet:
                page_num = 1
                # 1 tick on the progress bar of the widget
                progressBar.advance()

                # This part of code is what gets all the movie titles from each
                # page of the database
                while True:
                    page_url = http_query_string + '%s&page=%i' % (
                        lettre, 
                        page_num,
                    )
                    page = urllib.request.urlopen(page_url)
                    soup = BeautifulSoup(page, 'html.parser')
                    # script_links is a variable that may need to be changed if 
                    # another database is used or current database undergoes 
                    # change
                    script_links = soup.findAll('a', attrs={'class':
                        re.compile("^script-list-item")})
                    if not script_links:
                        break
                    links = dict()
                    for link in soup.findAll(
                        'a', attrs={'class': re.compile("^script-list-item")}
                    ):
                        links[link.text] =  \
                            link.get('href')[len(php_query_string):]
                    self.title_to_href.update(links)
                    page_num += 1

        except:
            self.infoBox.setText(
                "Couldn't download data from springfieldspringfield website.",
                "error"
            )
        # Clear progress bar.
        progressBar.finish()
        self.controlArea.setDisabled(False)
        return self.title_to_href

    # Add Movies function
    def Add(self):
        """Add movies in your selection """
        for selectedTitle in self.selectedTitles:
            movie_title = self.titleLabels[selectedTitle]
            if movie_title not in self.myBasket:
                self.myBasket.append(movie_title)
                self.mytitleLabels.append(movie_title)
                self.mytitleLabels = self.mytitleLabels
        self.clearmyBasket.setDisabled(False)
        self.sendButton.settingsChanged()

    # Remove movies function
    def Remove(self):
        """Remove the selected songs in your selection """
        self.myBasket = [
            movie for idx, movie in enumerate(self.myBasket)
            if idx not in self.myTitles
        ]
        self.updateMytitleLabels()
        self.sendButton.settingsChanged()


    def ClearmyCorpus(self):
        """Clears your selection """
        del self.mytitleLabels[:]
        del self.myBasket[:]
        self.mytitleLabels = self.mytitleLabels
        self.clearmyBasket.setDisabled(True)
        self.sendButton.settingsChanged()


    def updateMytitleLabels(self):
        """Update selections function"""
        self.mytitleLabels = list()
        for movie in self.myBasket:
            self.mytitleLabels.append(movie)
        self.mytitleLabels = self.mytitleLabels

        self.clearmyBasket.setDisabled(self.myBasket == list())
        self.removeButton.setDisabled(self.myTitles == list())


    # Create the final output with the script
    def sendData(self):
        """Send data from website springfieldspringfield"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some movies first",
                "warning"
            )
            self.segmentation = None
            self.send("Movie transcripts", self.segmentation, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        annotations = list()
        script_list = list()
        annotations_dict = dict()
        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(self, iterations=len(self.myBasket))

        # This part of code is what fetches the actual script
        try:
            for movie in self.myBasket:
                # Each movie that is in the corpus is split into title and year
                # (rsplit makes sure to only split last occurence) which will 
                # become annotations
                b = copy.copy(movie)
                future_annotation = b.rsplit('(', 1)
                movie_title = future_annotation[0]
                movie_year = future_annotation[-1]
                movie_year = movie_year[:-1]
                annotations_dict["Movie Title"] = movie_title
                annotations_dict["Year of release"] = movie_year
                # It is important to make a copy of dictionary, otherwise each 
                # iteration will replace every element of the annotations list
                annotations.append(annotations_dict.copy())
                # link_end and page_url are the two variables that will have to
                # be changed in case scripts need to be taken from elsewhere
                link_end = self.path_storage[movie]
                page_url = "https://www.springfieldspringfield.co.uk/" +   \
                    "movie_script.php?movie=" + link_end
                page = urllib.request.urlopen(page_url)
                soup = BeautifulSoup(page, 'html.parser')

                # This is what grabs the movie script
                script = soup.find("div", {"class":"movie_script"})

                script_list.append(script.text)

                # 1 tick on the progress bar of the widget
                progressBar.advance()

        except:
            self.infoBox.setText(
                "Couldn't download data from SpringfieldSpringfield website.",
                "error"
            )
            self.controlArea.setDisabled(False)
            return

        # Store downloaded script strings in input objects...
        for script in script_list:
            newInput = Input(script, self.captionTitle)
            self.createdInputs.append(newInput)

       # If there's only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send("Movie transcripts", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()



    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        del self.createdInputs[:]


    def setCaption(self, title):
        """The following method needs to be copied verbatim in
        every Textable widget that sends a segmentation"""
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.SendButton.settingsChanged()
        else:
            super().setCaption(title)
Ejemplo n.º 21
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...

        self.inputSeg = None
        self.outputSeg = None
        self.dialect = None
        self.selectedHeader = None
        self.csvSeg = list()
        # list of deleted segments
        self.contentIsNone = list()
        # list for gui
        self.headerList = list()
        self.content_column = 0
        self.headerEdit = ""

        # those are for the rename function
        self.renamedHeader = None
        self.isRenamed = False
        self.dict_keys = list()

        # preprocess
        self.deleteQuotes = False

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=None,
        )
        #self.header_there = False

        #----------------------------------------------------------------------
        # User interface...

        # preprocess box...
        self.preprocessBox = gui.widgetBox(
            widget=self.controlArea,
            box="Preprocess",
            orientation="vertical",
        )
        # check box...
        self.checkQuotes = gui.checkBox(
            widget=self.preprocessBox,
            master=self,
            value='deleteQuotes',
            label='delete quotation marks',
            callback=self.delete_quotes,
        )

        # main box...
        self.mainBox = gui.widgetBox(
            widget=self.controlArea,
            box="Click to select a header to modify",
            orientation="vertical",
        )

        # List of all the headers (named with numbers if None)
        self.headerListbox = gui.listBox(
            widget=self.mainBox,
            master=self,
            value="selectedHeader",
            labels="headerList",
            callback=self.update_gui,
            selectionMode=1, # can only choose one item
            tooltip="list of all your headers",
        )

        # set "rename" button (must be aside the list)
        self.renameHeader = gui.button(
            widget=self.mainBox,
            master=self,
            label="rename",
            callback=self.set_renamebox,
            tooltip="click to rename header"
        )

        # set "use as content" button (must be aside the list)
        self.iscontentHeader = gui.button(
            widget=self.mainBox,
            master=self,
            label="use as content",
            callback=self.content_changed,
            tooltip="click to select as content"
        )

        #----------------------------------------------------------------------
        # rename box...

        self.renameBox = gui.widgetBox(
            widget=self.controlArea,
            box='Rename header',
            orientation='horizontal',
            addSpace=True,
        )
        gui.separator(widget=self.renameBox, height=3)
        self.headerEditLine = gui.lineEdit(
            widget=self.renameBox,
            master=self,
            value='headerEdit',
            orientation='horizontal',
            label='New header:',
            tooltip=(
                "Rename the selected header."
            ),
            callback=lambda: self.renameButton.setDisabled(not self.headerEdit),
        )
        self.renameButton = gui.button(
            widget=self.renameBox,
            master=self,
            label="rename",
            callback=self.rename,
            tooltip="click to rename header"
        )
        self.cancelButton = gui.button(
            widget=self.renameBox,
            master=self,
            label="cancel",
            callback=self.cancel,
            tooltip="click to cancel renaming"
        )
        #----------------------------------------------------------------------
        # interface parameters...

        self.update_gui()
        self.renameBox.setVisible(False)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()
        self.infoBox.setText("Widget needs input", "warning")
        
        # Send data if autoSend.
        self.sendButton.sendIf()
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.newQuery = ''
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (scripts) in a list
        self.createdInputs = list()
        # stock the part of dictionary that will be used to access script's page
        self.path_storage = dict()
        # stock titles of movies
        self.movie_titles = list()
        # stock all the movies titles and link parts
        self.title_to_href = dict()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            )

        # User interface...
        # Create the working area
        queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search movie",
            orientation="vertical",
            )

        searchBox = gui.widgetBox(
            widget=queryBox,
            orientation="horizontal",
            )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribute
        gui.lineEdit(
            widget=searchBox,
            master=self,
            value='newQuery',
            orientation='horizontal',
            labelWidth=100,
            tooltip=("Enter a movie title"),
            )

        # Research button
        # Use "searchFunction" attibute
        self.searchButton = gui.button(
            widget=searchBox,
            master=self,
            label='Search',
            callback=self.searchFunction,
            tooltip='Search for the movie',
            )
        gui.separator(widget=queryBox, height=3)

        # Button that refresh all movie titles from the website
        self.refreshButton = gui.button(
            widget=queryBox,
            master=self,
            label="Refresh database",
            callback=self.refreshTitles,
            tooltip="Update SpringfieldSpringfield database"
            )

        # Box that displays search results
        self.titleListbox = gui.listBox(
            widget=queryBox,
            master=self,
            value="selectedTitles",    # setting (list)
            labels="titleLabels",      # setting (list)
            callback=lambda: self.selectButton.setDisabled(
                self.selectedTitles == list()),
            tooltip="Select the movie transcript you want to import",
            )
        self.titleListbox.doubleClicked.connect(self.Add)
        self.titleListbox.setMinimumHeight(120)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=queryBox,
            box=False,
            orientation='horizontal',
            )

        # Add button
        # Uses "Add" function
        self.selectButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Add to corpus",
            callback=self.Add,
            tooltip="Add selected movie to the corpus",
            )
        self.selectButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
            )
        self.clearButton.setDisabled(True)

        # Area where confirmed movies are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
            )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(
                self.myTitles == list()),
            tooltip="The list of titles whose content will be imported",
            )
        self.mytitleListbox.doubleClicked.connect(self.Remove)
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
            )
        # Remove movies button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.Remove,
            tooltip="Remove the selected movie from your corpus.",
            )
        self.removeButton.setDisabled(True)

        # Delete all confirmed movies button
        self.clearmyBasket = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.ClearmyCorpus,
            tooltip="Remove all movies from your corpus.",
            )
        self.clearmyBasket.setDisabled(True)

        gui.rubber(self.controlArea)

    #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because we may need to display an error message).
        self.loadDatabaseCache()

        # Make sure that whatever was in the corpus last time is deleted
        self.ClearmyCorpus()

        # Send data if autoSend.
        self.sendButton.sendIf()
Ejemplo n.º 23
0
    def __init__(self):
        super().__init__()

        # Search filters attributs
        self.newQuery = ''
        self.type_results = 'Title'
        #self.genre_searched = 'Comedy'
        self.filter_results = 'Popularity'
        self.nbr_results = '10'
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()

        # stocks the imdbpy instance
        self.ia = imdb.IMDb()
        # stock all the inputs (movie names) in a list
        self.createdInputs = list()

        # Mandatory declaration of the info box and the send button
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=None,
        )

        #----------------------------------------------------------------------
        # User interface...

        # Creation of the different working areas
        self.queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query",
            orientation="horizontal",
        )

        self.genreBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query",
            orientation="horizontal",
        )

        self.filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Query options",
            orientation="horizontal",
        )

        searchButtonBox = gui.widgetBox(
            widget=self.controlArea,
            orientation="vertical",
        )

        resultBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search results",
            orientation="vertical",
        )

        # List Box where all the searched movies are stocked
        self.titleListbox = gui.listBox(
            widget=resultBox,
            master=self,
            value="selectedTitles",
            labels="titleLabels",
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.doubleClicked.connect(self.addToCorpus)
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        resultButtonBox = gui.widgetBox(
            widget=resultBox,
            box=False,
            orientation='horizontal',
        )

        corpusBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        # Corpus where confirmed movies are moved and stocked
        self.mytitleListbox = gui.listBox(
            widget=corpusBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.mytitleListbox.doubleClicked.connect(self.remove)
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        corpusButtonBox = gui.widgetBox(
            widget=corpusBox,
            box=False,
            orientation='horizontal',
        )

        # Allows to enter specific text to the research
        gui.lineEdit(
            widget=self.queryBox,
            master=self,
            value='newQuery',
            orientation='horizontal',
            label=u"Search: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        # Allows to choose a type of search
        searchType = gui.comboBox(
            widget=self.queryBox,
            master=self,
            value="type_results",
            items=[
                "Title",
                "Actor",
                #"Genre",
            ],
            sendSelectedValue=True,
            callback=self.mode_changed,
            orientation="horizontal",
            label="Search by: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )
        """genreTypes = gui.comboBox(
            widget=self.genreBox,
            master=self,
            value="genre_searched",
            items=[
                "Comedy",
                "Action",
                "Drama",
                "Horror",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Search Type: ",
            labelWidth=120,
            tooltip=(
                "Please select the desired search.\n"
            ),
        )
        """
        searchTypeGenre = gui.comboBox(
            widget=self.genreBox,
            master=self,
            value="type_results",
            items=[
                "Title",
                "Actor",
                "Genre",
            ],
            sendSelectedValue=True,
            callback=self.mode_changed,
            orientation="horizontal",
            label="Search Type: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Allows to chose a filter for the search
        self.searchFilter = gui.comboBox(
            widget=self.filterBox,
            master=self,
            value="filter_results",
            items=[
                "Year",
                "Alphabetical",
                "Random",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Sort by: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Allows to choose the wanted results numberp (10 by 10)
        self.searchNbr = gui.comboBox(
            widget=self.filterBox,
            master=self,
            value="nbr_results",
            items=[
                "10",
                "20",
                "30",
                "40",
                "50",
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Results' number: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchMovies" attribute
        self.searchButton = gui.button(
            widget=searchButtonBox,
            master=self,
            label="Search",
            callback=self.searchMovies,
            tooltip="Connect to imdbpy and make a research",
        )

        # Add movies button
        self.addButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label=u'Add to corpus',
            callback=self.addToCorpus,
            tooltip=(u"Move the selected movie downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=resultButtonBox,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        #gui.separator(widget=queryBox, height=3)

        # Remove movie button
        self.removeButton = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected movie from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed movies button
        self.clearmyBasket = gui.button(
            widget=corpusButtonBox,
            master=self,
            label=u'Clear corpus',
            callback=self.clearCorpus,
            tooltip=(u"Remove all movies from your corpus."),
        )

        self.clearmyBasket.setDisabled(True)

        gui.separator(widget=corpusBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        #self.mode_changed()
        self.updateCorpus()

        self.mode_changed()

        # Send data if autoSend.
        self.sendButton.sendIf()
Ejemplo n.º 24
0
class Treetagger(OWTextableBaseWidget):
    """Orange widget for POS-tagging and lemmatization with Treetagger"""

    name = "Treetagger"
    description = "POS-tagging and lemmatization with Treetagger"
    icon = "icons/treetagger.svg"
    priority = 2003

    inputs = [("Segmentation", Segmentation, "inputData")]
    outputs = [("Tagged data", Segmentation)]

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    language = settings.Setting(0)
    replaceUnknown = settings.Setting(False)
    outputFormat = settings.Setting("segment into words")

    want_main_area = False

    configFilePath = os.path.normpath(
        appdirs.user_data_dir("textable", "langtech") + "/treetagger_path")

    def __init__(self, *args, **kwargs):
        """Initialize a Message widget"""
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.noLanguageParameterWarning = (
            "Please make sure that at least one language parameter "
            "file is installed in your Treetagger 'lib' directory, "
            "then click 'Reload language parameter files'.")
        self.noTreetaggerPathWarning = (
            "Please click 'Locate Treetagger' below and select the "
            "base directory of a valid Treetagger distribution.")
        self.TreetaggerPath = (treetaggerwrapper.locate_treetagger()
                               or self.lookupSavedTreetaggerPath())

        self.infoBox = InfoBox(widget=self.controlArea)

        self.sendButton = SendButton(widget=self.controlArea,
                                     master=self,
                                     callback=self.sendData,
                                     infoBoxAttribute=u"infoBox",
                                     sendIfPreCallback=self.updateGUI)

        gui.separator(self.controlArea, height=3)

        self.optionsBox = gui.widgetBox(
            self.controlArea,
            u"Options",
        )

        self.languageCombobox = gui.comboBox(
            widget=self.optionsBox,
            master=self,
            value="language",
            items=list(),
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Input language:",
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select the language of the input text."),
        )
        self.languageCombobox.setMinimumWidth(120)

        gui.separator(self.optionsBox, height=3)

        gui.comboBox(
            widget=self.optionsBox,
            master=self,
            value="outputFormat",
            items=[
                "segment into words",
                "add XML tags",
            ],
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Output format:",
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"Select the format of the output:\n\n"
                u"Segment into words: each word is in a separate segment,\n"
                u"with lemma and POS-tag as annotations.\n\n"
                u"Add XML tags: output segments correspond to input segments\n"
                u"and each word is tagged in XML as a 'w' element with\n"
                u"lemma and POS-tag as attributes."),
        )

        gui.separator(self.optionsBox, height=3)

        gui.checkBox(
            widget=self.optionsBox,
            master=self,
            value="replaceUnknown",
            label="Output token in place of [unknown] lemmas",
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"For out-of-vocabulary words, the word form is used as the\n"
                u"lemma (in place of Treetagger's default 'unknown' code)."),
        )

        gui.rubber(self.controlArea)

        self.sendButton.draw()
        self.infoBox.draw()

        self.locateTreetaggerBox = gui.widgetBox(
            self.controlArea,
            addSpace=False,
        )

        gui.separator(self.locateTreetaggerBox, height=3)

        self.treetaggerButton = gui.button(
            widget=self.locateTreetaggerBox,
            master=self,
            label="Locate Treetagger",
            callback=self.validateTreetagger,
            tooltip=(
                u"Click to select the location of the Treetagger base\n"
                u"directory (containing the 'lib' and 'bin' subdirectories)."),
        )

        self.sendButton.sendIf()

        self.adjustSizeWithTimer()

    def inputData(self, inputData):
        """Process incoming data."""
        self.segmentation = inputData
        self.infoBox.inputChanged()
        self.sendButton.sendIf()

    def sendData(self):

        # Clear created Inputs...
        self.clearCreatedInputs()

        if not self.TreetaggerPath:
            self.infoBox.setText(self.noTreetaggerPathWarning, "warning")
            self.send("Tagged data", None)
            return
        elif not self.getAvailableLanguages():
            self.infoBox.setText(self.noLanguageParameterWarning, "warning")
            self.send("Tagged data", None)
            return
        elif not self.segmentation:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send("Tagged data", None)
            return

        # Initialize progress bar.
        self.infoBox.setText(u"Processing, please wait...", "warning")
        self.controlArea.setDisabled(True)
        self.progressBar = ProgressBar(self, iterations=5)

        # Create a copy of input seg, storing annotations in temp attr...
        copy_of_input_seg = Segmentation()
        copy_of_input_seg.label = self.segmentation.label
        for seg_idx, segment in enumerate(self.segmentation):
            attr = " ".join([
                "%s=%s" % (
                    ''.join(c for c in unicodedata.normalize('NFD', item[0])
                            if unicodedata.category(c) != 'Mn'),
                    quoteattr(str(item[1])),
                ) for item in segment.annotations.items()
            ])

            segment.annotations["tt_ax"] = attr
            copy_of_input_seg.append(segment)

        self.progressBar.advance()

        # Dump segmentation in unique string to avoid multiple calls to TT...
        concatenated_text = copy_of_input_seg.to_string(
            formatting="<ax_tt %(tt_ax)s>%(__content__)s</ax_tt>",
            display_all=True,
        )

        self.progressBar.advance()

        # Tag the segmentation contents...
        tagopt = '-token -lemma -sgml -quiet'
        if self.replaceUnknown:
            tagopt += " -no-unknown"
        tagger = treetaggerwrapper.TreeTagger(
            TAGLANG=pycountry.languages.get(name=self.language).alpha_2,
            TAGOPT=tagopt,
            TAGDIR=self.TreetaggerPath,
        )
        tagged_lines = tagger.tag_text(
            concatenated_text,
            notagurl=True,
            notagemail=True,
            notagip=True,
            notagdns=True,
        )
        tagged_input = Input("\n".join(tagged_lines))
        self.createdInputs.append(tagged_input)

        # Replace <unknown> with [unknown] and " with &quot; then
        # re-segment to match the original segmentation structure.
        tagged_segmentation, _ = Segmenter.recode(
            tagged_input,
            substitutions=[
                (re.compile(r"<unknown>"), "[unknown]"),
                (re.compile(r'"""'), '"&quot;"'),
            ],
        )
        tagged_segmentation = Segmenter.import_xml(tagged_segmentation,
                                                   "ax_tt")

        self.progressBar.advance()

        # Place each output line of Treetagger in an xml tag with annotations..
        xml_segmentation, _ = Segmenter.recode(
            tagged_segmentation,
            substitutions=[
                (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                 '<w lemma="&3" pos-tag="&2">&1</w>'),
                (re.compile(r'^\n|\n$'), ''),
            ],
        )
        # Segment into individual tokens if XML output option is disabled...
        if self.outputFormat == "add XML tags":
            output_segmentation = xml_segmentation
        else:
            try:
                output_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")
            except ValueError:
                self.infoBox.setText(
                    "Please check that either the input contains well-formed "
                    "XML, or it doesn't contain instances of '&#60;' and '\x3e'",
                    "error")
                self.send("Tagged data", None)
                self.progressBar.finish()
                self.controlArea.setDisabled(False)
                return

        self.progressBar.finish()
        self.controlArea.setDisabled(False)

        output_segmentation.label = self.captionTitle
        message = u'%i segment@p sent to output.' % len(output_segmentation)
        message = pluralize(message, len(output_segmentation))
        self.infoBox.setText(message)
        self.send('Tagged data', output_segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def updateGUI(self):
        """Update GUI state"""
        if self.TreetaggerPath:
            self.optionsBox.setDisabled(False)
            self.locateTreetaggerBox.setVisible(False)
            self.languageCombobox.clear()
            languages = self.getAvailableLanguages()
            if not languages:
                self.infoBox.setText(self.noLanguageParameterWarning,
                                     "warning")
                self.optionsBox.setDisabled(True)
                self.locateTreetaggerBox.setVisible(True)
                self.treetaggerButton.setText(
                    "Reload language parameter files")
            else:
                self.language = self.language or languages[0]
        else:
            self.infoBox.setText(self.noTreetaggerPathWarning, "warning")
            self.optionsBox.setDisabled(True)
            self.locateTreetaggerBox.setVisible(True)
        self.adjustSizeWithTimer()

    def getAvailableLanguages(self):
        languages = list()
        for lang_code in sorted(treetaggerwrapper.g_langsupport):
            if lang_code.startswith("__"):
                continue
            try:
                treetaggerwrapper.TreeTagger(
                    TAGLANG=lang_code,
                    TAGDIR=self.TreetaggerPath,
                )
                language = pycountry.languages.get(alpha_2=lang_code).name
                self.languageCombobox.addItem(language)
                languages.append(language)
            except:
                pass
        return languages

    def lookupSavedTreetaggerPath(self):
        """Look for a saved Treetagger base dir path in app data"""
        if os.path.exists(self.__class__.configFilePath):
            try:
                inputFile = open(self.__class__.configFilePath, "r")
                TreetaggerSavedPath = inputFile.read()
                inputFile.close()
                if self.checkTreetaggerPath(TreetaggerSavedPath):
                    return TreetaggerSavedPath
                else:
                    os.remove(self.__class__.configFilePath)
                    return None
            except IOError:
                pass

    def validateTreetagger(self):
        """Respond to user actions needed to validate Treetagger path"""

        # If the Treetagger path is known, make sure there are language files...
        if self.TreetaggerPath:
            if self.getAvailableLanguages():
                self.sendButton.settingsChanged()
                self.updateGUI()
            else:
                QMessageBox.warning(None, 'Textable',
                                    'Language parameter files not found.',
                                    QMessageBox.Ok)
            return

        # Else if the path is not known...

        # First try to locate it automatically...
        TreetaggerPath = treetaggerwrapper.locate_treetagger()

        # If it fails, let the user locate it manually...
        if not (TreetaggerPath and self.checkTreetaggerPath(TreetaggerPath)):

            TreetaggerManualPath = os.path.normpath(
                str(
                    QFileDialog.getExistingDirectory(
                        self, u"Please locate Treetagger base directory")))

            # If user selected a dir...
            if TreetaggerManualPath:

                # Check if selected dir contains Treetagger binary...
                if self.checkTreetaggerPath(TreetaggerManualPath):
                    TreetaggerPath = TreetaggerManualPath
                else:
                    QMessageBox.warning(
                        None, 'Textable',
                        'Not a valid Treetagger base directory.',
                        QMessageBox.Ok)

        # If a valid path was found somehow, save config to app data...
        if TreetaggerPath:
            try:
                user_data_editor_dir = os.path.normpath(
                    self.__class__.configFilePath + "/../..")
                if not os.path.exists(user_data_editor_dir):
                    os.makedirs(user_data_editor_dir)
                user_data_software_dir = os.path.normpath(
                    self.__class__.configFilePath + "/..")
                if not os.path.exists(user_data_software_dir):
                    os.makedirs(user_data_software_dir)
                outputFile = open(self.__class__.configFilePath, "w")
                outputFile.write(TreetaggerPath)
                outputFile.close()
            except IOError:
                pass
            self.TreetaggerPath = TreetaggerPath

            self.sendButton.settingsChanged()

    def checkTreetaggerPath(self, path):
        """Check if path is a valid Treetagger base dir"""
        return os.path.exists(
            os.path.normpath(path + "/bin/tree-tagger" +
                             (".exe" if os.name == "nt" else "")))

    def clearCreatedInputs(self):
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
class Redditor(OWTextableBaseWidget):
    """An Orange widget to scrape Reddit"""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Redditor"
    description = "Scrap on Reddit"
    icon = "icons/Reddit-alien.png"
    priority = 14

    #----------------------------------------------------------------------
    # Channel definitions...

    # inputs = []
    outputs = [("Segmentation", Segmentation)]

    #----------------------------------------------------------------------
    # GUI layout parameters...

    want_main_area = False
    resizing_enabled = True

    #----------------------------------------------------------------------
    # Query settings...

    mode = Setting("Subreddit")
    subreddit = Setting(u'')
    URL = Setting(u'')
    fullText = Setting(u'')

    # Filters settings...

    sortBy = Setting("Hot")
    sortByFT = Setting("Relevance")
    postedAt = Setting("All")
    amount = Setting(1)

    # Include settings...

    includeComments = Setting(True)
    includeImage = Setting(False)

    # resultBox settings...

    labelsPanier = Setting(list())

    # Data settings...

    queryList = Setting(list())
    annotList = Setting(list())

    #----------------------------------------------------------------------
    # Praw instance

    reddit = praw.Reddit(client_id="aHeP3Ub7aILvsg",
                         client_secret=None,
                         username="******",
                         password="******",
                         user_agent="Redditor by /u/RedditorApp")

    #----------------------------------------------------------------------
    # Temporary inputs and data lists

    createdInputs = list()
    listeTempAnnot = list()
    listeTempPosts = list()

    def __init__(self):
        """Init of the module: UI and variables definition"""
        super().__init__()

        # queryBox indexes
        self.indicesPanier = list()

        #----------------------------------------------------------------------
        # User interface...

        self.infoBox = InfoBox(widget=self.controlArea, )

        #-------------------------#
        #    Main widget boxes    #
        #-------------------------#

        sourceBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )

        self.filterBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Filters',
            orientation='vertical',
            addSpace=False,
        )

        self.includeOuterBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Include',
            orientation='vertical',
            addSpace=False,
        )

        self.requestsBox = gui.widgetBox(
            widget=self.controlArea,
            orientation='horizontal',
            addSpace=False,
        )

        panierBox = gui.widgetBox(
            widget=self.controlArea,
            orientation='vertical',
            box=u'Selection',
            addSpace=False,
        )

        #-------------------#
        #    Send button    #
        #-------------------#

        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.send_data,
            infoBoxAttribute='infoBox',
        )

        #------------------------#
        #   Query box elements   #
        #------------------------#

        self.choiceBox = gui.comboBox(
            widget=sourceBox,
            master=self,
            value='mode',
            label="Mode:",
            callback=self.mode_changed,
            tooltip="Choose mode",
            orientation='horizontal',
            sendSelectedValue=True,
            items=["Subreddit", "URL", "Full text"],
            labelWidth=135,
        )

        self.modeBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        self.urlBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.urlBox,
            master=self,
            value='URL',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'Search with URL:',
            labelWidth=135,
        )

        self.subredditBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.subredditBox,
            master=self,
            value='subreddit',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'reddit.com/r/...:',
            labelWidth=135,
        )
        self.fullTextBox = gui.widgetBox(
            widget=sourceBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.lineEdit(
            widget=self.fullTextBox,
            master=self,
            value='fullText',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            label=u'Search on reddit:',
            labelWidth=135,
        )

        #----------------------------#
        #    Filters box elements    #
        #----------------------------#

        self.subredditFilter = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.subredditFilter,
            master=self,
            value='sortBy',
            label=u'Sort by:',
            tooltip="Choose mode to sort your posts",
            orientation='horizontal',
            sendSelectedValue=True,
            callback=self.checkSubredditSortMode,
            items=["Hot", "New", "Controversial", "Top", "Rising"],
            labelWidth=135,
        )

        self.fullTextFilter = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.fullTextFilter,
            master=self,
            value='sortByFT',
            label="Sort by:",
            tooltip="Choose mode",
            orientation='horizontal',
            sendSelectedValue=True,
            callback=self.checkSearchSortMode,
            items=["Relevance", "Top", "New", "Comments"],
            labelWidth=135,
        )

        self.timeBox = gui.widgetBox(
            widget=self.filterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.comboBox(
            widget=self.timeBox,
            master=self,
            value='postedAt',
            label=u'Time:',
            tooltip="Choose mode to sort your posts",
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            sendSelectedValue=True,
            items=["All", "Past day", "Past hour", "Past month", "Past year"],
            labelWidth=135,
        )

        gui.spin(
            widget=self.filterBox,
            master=self,
            value="amount",
            minv=1,
            maxv=10000,
            label="Max amount of posts:",
            labelWidth=135,
            orientation="horizontal",
            callback=self.sendButton.settingsChanged,
            tooltip="Select the amount of posts that you want",
        )

        #--------------------------#
        #   Include box elements   #
        #--------------------------#

        self.includeBox = gui.widgetBox(
            widget=self.includeOuterBox,
            orientation='horizontal',
            addSpace=False,
        )

        gui.checkBox(
            widget=self.includeBox,
            master=self,
            value='includeImage',
            label=u'Include images',
            callback=self.mode_changed,
        )

        gui.checkBox(
            widget=self.includeBox,
            master=self,
            value='includeComments',
            label=u'Include comments',
            callback=self.mode_changed,
        )

        #--------------------------#
        #   Request box elements   #
        #--------------------------#

        self.refreshButton = gui.button(
            widget=self.requestsBox,
            master=self,
            label=u'Refresh the Data',
            callback=self.refresh_content,
        )

        if len(self.labelsPanier) == 0:
            self.refreshButton.setDisabled(True)

        self.fetchButton = gui.button(
            widget=self.requestsBox,
            master=self,
            label=u'Add Request',
            callback=self.confirm_settings,
        )

        #--------------------------#
        #   Results box elements   #
        #--------------------------#

        panier = gui.listBox(
            widget=panierBox,
            master=self,
            value="indicesPanier",
            labels="labelsPanier",
            callback=lambda: self.removeButton.setDisabled(self.indicesPanier
                                                           == list()),
            tooltip="List of imported corpora.",
        )
        panier.setMinimumHeight(120)
        panier.setSelectionMode(3)

        self.update_list(self.labelsPanier)

        removalBox = gui.widgetBox(
            widget=panierBox,
            orientation='horizontal',
            addSpace=False,
        )

        self.removeButton = gui.button(
            widget=removalBox,
            master=self,
            label="Remove from selection",
            callback=self.removePressed,
            tooltip="Remove the selected corpus.",
        )

        self.removeButton.setDisabled(True)

        self.clearButton = gui.button(
            widget=removalBox,
            master=self,
            label="Clear selection",
            callback=self.clearPressed,
            tooltip="Remove all corpora from selection.",
        )

        if len(self.labelsPanier) == 0:
            self.clearButton.setDisabled(True)

        #------------------------#
        #   End of definitions   #
        #------------------------#

        gui.separator(widget=self.controlArea, height=3)  # Spacer
        gui.rubber(self.controlArea)

        # Info box...

        self.sendButton.draw()
        self.infoBox.draw()

        self.mode_changed()
        self.sendButton.settingsChanged()

        # Send data if autoSend...
        self.sendButton.sendIf()

    def mode_changed(self):
        self.sendButton.settingsChanged()
        """Allows to update the interface depeding on query mode"""
        if self.mode == "Subreddit":  # 0 = subreddit selected
            # Hide URL and full text
            self.urlBox.setVisible(False)
            self.fullTextBox.setVisible(False)
            self.fullTextFilter.setVisible(False)

            # Show subreddit
            self.subredditBox.setVisible(True)
            self.filterBox.setVisible(True)
            self.subredditFilter.setVisible(True)

            self.checkSubredditSortMode()
        elif self.mode == "URL":  # self.mode ==1 => post selected
            # Hide subreddit and full text
            self.subredditBox.setVisible(False)
            self.fullTextBox.setVisible(False)
            self.filterBox.setVisible(False)

            # Show URL
            self.urlBox.setVisible(True)
        elif self.mode == "Full text":
            # Hide subreddit
            self.subredditBox.setVisible(False)
            self.urlBox.setVisible(False)
            self.subredditFilter.setVisible(False)

            # Show full text
            self.fullTextBox.setVisible(True)
            self.filterBox.setVisible(True)
            self.fullTextFilter.setVisible(True)

            self.checkSearchSortMode()

        return

    def get_content(self, m, pA, sI, uI, ftI, sTF, ftTF, iI, iC, a):
        """Fetches the content on Reddit based on the filters selected by the user
        
        
        Parameters:
        m (string): Stands for 'mode', defines which mode the query will use
        pA (string): Stands for 'posted at', defines the time scale of the query
        sI (string): Stands for 'SubReddit input', defines the SubReddit name that will be queried
        uI (string): Stands for 'URL input', defines the URL of the SubReddit post that will be queried
        ftI (string): Stands for 'full text input', defines the query parameter for a full text query
        sTF (string): Stands for 'SubReddit time filters', defines the manner of filters for a SubReddit query in 'sort by'
        ftTF (string): Stands for 'full text time filters', defines the manner of filters for a full text query
        iI (string): Stands for 'include image', defines if the images should be included
        iC (string): Stands for 'include comments', defines if comments should be included
        a (int): Stands for 'amount', amount of posts to be fetched

        """
        self.controlArea.setDisabled(True)
        self.infoBox.setText("Processing query, please wait.", "warning")

        # Check if anything was put in the input
        if ((m == "Subreddit" and len(sI) > 0) or (m == "URL" and len(uI) > 0)
                or (m == "Full text" and len(ftI) > 0)):
            if pA == "All":
                varTimeFilter = "all"
            elif pA == "Past day":
                varTimeFilter = "day"
            elif pA == "Past hour":
                varTimeFilter = "hour"
            elif pA == "Past week":
                varTimeFilter = "week"
            elif pA == "Past month":
                varTimeFilter = "month"
            elif pA == "Past year":
                varTimeFilter = "year"

            # Differenciate method depending of user selection
            if self.mode == "Subreddit":
                # Get the subreddit based on subreddit name
                try:
                    subreddit = self.reddit.subreddit(sI)
                    # Set list of posts "posts" according to filter
                    # Initiate lists without time filters applicable first
                    if sTF == "Hot":
                        posts = subreddit.hot(limit=a)
                    elif sTF == "New":
                        posts = subreddit.new(limit=a)
                    elif sTF == "Rising":
                        posts = subreddit.rising(limit=a)
                    # Initiate lists with time filters
                    elif sTF == "Controversial":
                        posts = subreddit.controversial(
                            limit=a, time_filter=varTimeFilter)
                    elif sTF == "Top":
                        posts = subreddit.top(limit=a,
                                              time_filter=varTimeFilter)
                    # Loop on the posts found
                    for post in posts:
                        # Creation of corresponding segments
                        self.create_post_segments(post, iI, iC)
                # Handle exceptions
                except prawcore.exceptions.Redirect:
                    self.infoBox.setText(
                        "Error in redirect, please make sure the subreddit name is correct.",
                        "error")
                    return
                except prawcore.exceptions.NotFound:
                    self.infoBox.setText("Subreddit not found.", "error")
                    return
            elif self.mode == "URL":
                # Get post based on URL
                try:
                    # Set list of posts "posts" according to filter
                    # Initiate lists without time filters applicable first
                    post = self.reddit.submission(url=uI)
                    # Creation of corresponding segment
                    self.create_post_segments(post, iI, iC)
                # Handle exceptions
                except prawcore.exceptions.NotFound:
                    self.infoBox.setText("No match for URL.", "error")
                    return
                except praw.exceptions.ClientException:
                    self.infoBox.setText("URL not found.", "error")
                    return
            elif self.mode == "Full text":
                # Get posts based on a full text research
                userSearch = ftI
                reddit = self.reddit.subreddit("all")

                # Get posts based on the filter chosen by the user
                if ftTF == "Relevance":
                    posts = reddit.search(
                        userSearch,
                        sort="relevance",
                        limit=a,
                        time_filter=varTimeFilter,
                    )
                elif ftTF == "Top":
                    posts = reddit.search(
                        userSearch,
                        sort="top",
                        limit=a,
                        time_filter=varTimeFilter,
                    )
                elif ftTF == "Comments":
                    posts = reddit.search(
                        userSearch,
                        sort="comments",
                        limit=a,
                        time_filter=varTimeFilter,
                    )
                elif ftTF == "New":
                    posts = reddit.search(
                        userSearch,
                        sort="new",
                        limit=a,
                    )

                for post in posts:
                    # Creation of corresponding segment
                    self.create_post_segments(post, iI, iC)

            if len(self.listeTempPosts) > 0:
                # If content was gathered, add the data to the
                # Settings' data list and label list
                self.queryList.append(self.listeTempPosts)
                self.annotList.append(self.listeTempAnnot)
                self.add_to_list(m=m,
                                 pA=pA,
                                 sI=sI,
                                 uI=uI,
                                 ftI=ftI,
                                 sTF=sTF,
                                 ftTF=ftTF,
                                 iI=iI,
                                 iC=iC,
                                 a=a)
                # Empty temporary lists and signal a change in the settings
                self.refreshButton.setDisabled(False)
                self.clearButton.setDisabled(False)
                self.listeTempPosts = list()
                self.listeTempAnnot = list()
                self.sendButton.settingsChanged()
            else:
                # If no centent is found, explain why to the user
                self.infoBox.setText(
                    "The posts found only contained images. Try to include images or comments.",
                    "warning")
        else:
            # If no input is registered, signal it to the user
            self.infoBox.setText("Please type a query.", "warning")

        self.controlArea.setDisabled(False)
        return

    def refresh_content(self):
        """Refreshes all the queries in the list of queries to update the corpus"""
        # Set a list of regex to get the information necessary
        modeReg = re.compile(r"(?<=Mode: ).+?(?=; Value)")
        valueReg = re.compile(r"(?<=Value: ).+?(?=; Settings)")
        settingsReg = re.compile(r"(?<=Settings: ).+?(?=; Include)")
        includeImageReg = re.compile(r"(?<=Include image: )\w+(?=; Include)")
        includeCommentsReg = re.compile(
            r"(?<=Include comments: )\w+(?=; Segments)")
        # Get labels
        labels = self.labelsPanier
        # Clear labels
        self.clearPressed()
        for label in labels:
            # For each labels, get data using the regex
            mode = re.search(modeReg, label).group(0)
            value = re.search(valueReg, label).group(0)
            settings = re.search(settingsReg, label).group(0).split(", ")
            incIma = re.search(includeImageReg, label).group(0)
            incCom = re.search(includeCommentsReg, label).group(0)
            subIn = ""
            urlIn = ""
            ftxtIn = ""
            if mode == "Subreddit":
                subIn = value
            elif mode == "URL":
                urlIn = value
            elif mode == "Full text":
                ftxtIn = value

            if settings[1] == "[not specified]":
                settings[1] = "All"

            # Call the get_content method with the information gathered
            self.get_content(m=mode,
                             pA=settings[1],
                             sI=subIn,
                             uI=urlIn,
                             ftI=ftxtIn,
                             sTF=settings[0],
                             ftTF=settings[0],
                             iI=incIma,
                             iC=incCom,
                             a=int(settings[2]))

    def confirm_settings(self):
        """Sets all the values for filters entered by user"""
        mode = self.mode
        timeFilt = self.postedAt
        subInput = self.subreddit
        urlInput = self.URL
        ftInput = self.fullText
        subTimeFilter = self.sortBy
        ftTimeFiler = self.sortByFT
        image = self.includeImage
        comments = self.includeComments
        amount = self.amount
        # Call the get_content method with the information gathered
        self.get_content(m=mode,
                         pA=timeFilt,
                         sI=subInput,
                         uI=urlInput,
                         ftI=ftInput,
                         sTF=subTimeFilter,
                         ftTF=ftTimeFiler,
                         iI=image,
                         iC=comments,
                         a=amount)

    def create_post_segments(self, post, includeImage, includeComments):
        """ Creation of segments from posts"""
        self.create_content_segment(post, includeImage)
        # If "Comments" is checked, we create the corresponding segments
        if includeComments:
            self.create_comments_segments(post)
            return

    def create_content_segment(self, post, includeImage=False):
        """ Creation of segments for posts"""
        progressBar = ProgressBar(self, iterations=1)
        self.infoBox.setText("Processing post content, please wait.",
                             "warning")
        # Create annotations
        annotations = dict()
        annotations["Title"] = post.title
        annotations["Id"] = post.id
        annotations["Parent"] = post.id
        annotations["Author"] = post.author
        annotations["Score"] = post.score
        annotations["Parent_type"] = "0"

        # Time annotations
        time = post.created_utc
        ts = int(time)
        date = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        annotations["Posted_Unix"] = time
        annotations["Posted_at"] = date

        # author, created_utc (ou created ?) et score
        content = post.selftext
        if content == "":
            content = "[image]"

        if not (includeImage == False and content == "[image]"):
            # Create a segment is the user wishes to have a segment
            # When there is only an image
            progressBar.advance()
            self.listeTempPosts.append(content)
            self.listeTempAnnot.append(annotations)
            progressBar.finish()

        return

    def create_comments_segments(self, post):
        """ Creation of segments for each comment in the post"""
        # Get the totality of the comments under a post
        post.comments.replace_more(limit=None)
        comments = post.comments.list()

        progressBar = ProgressBar(self, iterations=len(comments))
        self.infoBox.setText(
            "Processing comments, this can take a while, please wait.",
            "warning")

        # Creation of a segment for each comment
        for comment in comments:
            annotations = dict()
            annotations["Title"] = post.title
            annotations["Id"] = comment.id
            annotations["Author"] = comment.author
            annotations["Score"] = comment.score

            # Time annotations
            time = comment.created_utc
            ts = int(time)
            date = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
            annotations["Posted_Unix"] = time
            annotations["Posted_at"] = date

            # author, created_utc (ou created ?) et score
            parentId = comment.parent_id.split("_")
            annotations["Parent"] = parentId[1]
            annotations["Parent_type"] = parentId[0][1]

            self.listeTempPosts.append(comment.body)
            self.listeTempAnnot.append(annotations)
            progressBar.advance()
        progressBar.finish()
        return

    def checkSubredditSortMode(self):
        """Change available settings to the user based on the SubReddit mode"""
        self.sendButton.settingsChanged()
        if self.sortBy == "Hot":
            self.timeBox.setDisabled(True)
        elif self.sortBy == "New":
            self.timeBox.setDisabled(True)
        elif self.sortBy == "Controversial":
            self.timeBox.setDisabled(False)
        elif self.sortBy == "Top":
            self.timeBox.setDisabled(False)
        elif self.sortBy == "Rising":
            self.timeBox.setDisabled(True)

    def checkSearchSortMode(self):
        """Change available settings to the user based on the full text mode"""
        self.sendButton.settingsChanged()
        if self.sortByFT == "Relevance":
            self.timeBox.setDisabled(False)
        elif self.sortByFT == "New":
            self.timeBox.setDisabled(True)
        elif self.sortByFT == "Top":
            self.timeBox.setDisabled(False)
        elif self.sortByFT == "Comments":
            self.timeBox.setDisabled(False)

    def removePressed(self):
        """Remove the selected queries from basket"""
        labelsPanier = self.labelsPanier

        # Delete the corret element from both the label list and the data list
        for idx in sorted(self.indicesPanier, reverse=True):
            del labelsPanier[idx]
            del self.queryList[idx]
            del self.annotList[idx]

        self.labelsPanier = labelsPanier
        self.sendButton.settingsChanged()

    def clearPressed(self):
        """Empty all the queries in the data list and all of labels in the label list"""
        self.labelsPanier = list()
        self.queryList = list()
        self.annotList = list()
        self.sendButton.settingsChanged()
        self.refreshButton.setDisabled(True)
        self.clearButton.setDisabled(True)

    def add_to_list(self, m, pA, sI, uI, ftI, sTF, ftTF, iI, iC, a):
        """Add a label to the basket created with the fonction information from get_content

        Parameters:
        m (string): Stands for 'mode', defines which mode the query will use
        pA (string): Stands for 'posted at', defines the time scale of the query
        sI (string): Stands for 'SubReddit input', defines the SubReddit name that will be queried
        uI (string): Stands for 'URL input', defines the URL of the SubReddit post that will be queried
        ftI (string): Stands for 'full text input', defines the query parameter for a full text query
        sTF (string): Stands for 'SubReddit time filters', defines the manner of filters for a SubReddit query in 'sort by'
        ftTF (string): Stands for 'full text time filters', defines the manner of filters for a full text query
        iI (string): Stands for 'include image', defines if the images should be included
        iC (string): Stands for 'include comments', defines if comments should be included
        a (int): Stands for 'amount', amount of posts to be fetched
        """
        labelsPanier = self.labelsPanier

        # Some gathering of data...
        if m == "Subreddit":
            valeur = sI
            sortBy = sTF
            if sortBy == "Top" or sortBy == "Controversial":
                time = pA
            else:
                time = "[not specified]"
            amount = a
        elif m == "URL":
            valeur = uI
            sortBy = "[not specified]"
            time = "[not specified]"
            amount = 1
        elif m == "Full text":
            valeur = ftI
            sortBy = ftTF
            time = pA
            amount = a

        if iI:
            image = "True"
        else:
            image = "False"

        if iC:
            comments = "True"
        else:
            comments = "False"

        # Append the label to the temporary label list
        labelsPanier.append(
            "* Mode: {}; Value: {}; Settings: {}, {}, {}; Include image: {}; Include comments: {}; Segments: {}"
            .format(m, valeur, sortBy, time, amount, image, comments,
                    len(self.queryList[len(self.queryList) - 1])))

        # Update the labels list
        self.update_list(labelsPanier)

    def update_list(self, listOfLabels):
        """Updates the list of labels in basket"""
        try:
            self.labelsPanier = listOfLabels
        except TypeError:
            self.infoBox.setText("Error !", "error")
            return

    def change_button(self):
        """Deactivates the 'remove' button if nothing is selected in basket"""
        self.removeButton.setDisabled(False)

    def send_data(self):
        """Creates the inputs based on the fetched data"""
        self.controlArea.setDisabled(True)
        self.clearCreatedInputs()
        segmentation = None

        # Goes over each queries in the data list
        for query in self.queryList:
            for text in query:
                # Create inputs
                newInput = Input(text)
                self.createdInputs.append(newInput)

        # If there is only one input, create a segmentation...
        if len(self.createdInputs) == 1:
            segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            segmentation = Segmenter.concatenate(
                self.createdInputs,
                import_labels_as=None,
            )

        # Annotate segments...
        annotations = list()
        for elem in self.annotList:
            for dic in elem:
                annotations.append(dic)

        for idx, segment in enumerate(segmentation):
            segment.annotations.update(annotations[idx])
            segmentation[idx] = segment

        # Calculate number of characters...
        num_chars = 0
        for segment in segmentation:
            num_chars += len(Segmentation.get_data(segment.str_index))

        # If there is data...
        if len(segmentation) != 0:
            # Inform the user of the number of segments and the number of characters...
            self.infoBox.setText(
                "{} segments sent to output ({} characters)".format(
                    len(segmentation),
                    num_chars,
                ))
            # Send the segments
            self.send("Segmentation", segmentation)
            self.controlArea.setDisabled(False)
            self.sendButton.resetSettingsChangedFlag()
        else:
            # Else, signal the user that no data is sendable...
            self.infoBox.setText(
                "There are {} segments to send to output. Please fill the query basket and click 'send' again"
                .format(len(segmentation)), "warning")
            self.sendButton.resetSettingsChangedFlag()
            self.controlArea.setDisabled(False)
            self.send("Segmentation", None)

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created"""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()
Ejemplo n.º 26
0
    def __init__(self, *args, **kwargs):
        """Initialize a Message widget"""
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.noLanguageParameterWarning = (
            "Please make sure that at least one language parameter "
            "file is installed in your Treetagger 'lib' directory, "
            "then click 'Reload language parameter files'.")
        self.noTreetaggerPathWarning = (
            "Please click 'Locate Treetagger' below and select the "
            "base directory of a valid Treetagger distribution.")
        self.TreetaggerPath = (treetaggerwrapper.locate_treetagger()
                               or self.lookupSavedTreetaggerPath())

        self.infoBox = InfoBox(widget=self.controlArea)

        self.sendButton = SendButton(widget=self.controlArea,
                                     master=self,
                                     callback=self.sendData,
                                     infoBoxAttribute=u"infoBox",
                                     sendIfPreCallback=self.updateGUI)

        gui.separator(self.controlArea, height=3)

        self.optionsBox = gui.widgetBox(
            self.controlArea,
            u"Options",
        )

        self.languageCombobox = gui.comboBox(
            widget=self.optionsBox,
            master=self,
            value="language",
            items=list(),
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Input language:",
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select the language of the input text."),
        )
        self.languageCombobox.setMinimumWidth(120)

        gui.separator(self.optionsBox, height=3)

        gui.comboBox(
            widget=self.optionsBox,
            master=self,
            value="outputFormat",
            items=[
                "segment into words",
                "add XML tags",
            ],
            sendSelectedValue=True,
            orientation=u"horizontal",
            label="Output format:",
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"Select the format of the output:\n\n"
                u"Segment into words: each word is in a separate segment,\n"
                u"with lemma and POS-tag as annotations.\n\n"
                u"Add XML tags: output segments correspond to input segments\n"
                u"and each word is tagged in XML as a 'w' element with\n"
                u"lemma and POS-tag as attributes."),
        )

        gui.separator(self.optionsBox, height=3)

        gui.checkBox(
            widget=self.optionsBox,
            master=self,
            value="replaceUnknown",
            label="Output token in place of [unknown] lemmas",
            callback=self.sendButton.settingsChanged,
            tooltip=(
                u"For out-of-vocabulary words, the word form is used as the\n"
                u"lemma (in place of Treetagger's default 'unknown' code)."),
        )

        gui.rubber(self.controlArea)

        self.sendButton.draw()
        self.infoBox.draw()

        self.locateTreetaggerBox = gui.widgetBox(
            self.controlArea,
            addSpace=False,
        )

        gui.separator(self.locateTreetaggerBox, height=3)

        self.treetaggerButton = gui.button(
            widget=self.locateTreetaggerBox,
            master=self,
            label="Locate Treetagger",
            callback=self.validateTreetagger,
            tooltip=(
                u"Click to select the location of the Treetagger base\n"
                u"directory (containing the 'lib' and 'bin' subdirectories)."),
        )

        self.sendButton.sendIf()

        self.adjustSizeWithTimer()
Ejemplo n.º 27
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # ATTRIBUTS
        # searchFunction
        self.searchResults = None
        self.inputSeg = None
        # newQuery = attribut box lineEdit (search something)
        self.titleQuery = ''
        self.authorQuery = ''
        self.nbr_results = 10
        # Results box attributs
        self.titleLabels = list()
        self.selectedTitles = list()
        # selections box attributs
        self.myTitles = list()
        self.mytitleLabels = list()
        # stock all the inputs (songs) in a list
        self.createdInputs = list()

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
        )
        #----------------------------------------------------------------------
        # User interface...
        # Create the working area
        queryBox = gui.widgetBox(
            widget=self.controlArea,
            box="Search books",
            orientation="vertical",
        )

        self.cacheGenerationButton = gui.button(
            widget=queryBox,
            master=self,
            label="Generate cache",
            callback=self.generate_cache,
            tooltip="Generate the gutenberg cache, this might take a while...",
        )

        # Allows to enter specific text to the research
        #  Uses "newQuery" attribut
        gui.lineEdit(
            widget=queryBox,
            master=self,
            value='titleQuery',
            orientation='horizontal',
            label=u"Title: ",
            labelWidth=120,
            tooltip=("Enter a string"),
        )

        # gui.lineEdit(
        #     widget=queryBox,
        #     master=self,
        #     value='authorQuery',
        #     orientation='horizontal',
        #     label=u"Author: ",
        #     labelWidth=120,
        #     tooltip=("Enter a string"),
        # )

        # Allows to choose the wanted results numberp (10 by 10)
        queryNbr = gui.comboBox(
            widget=queryBox,
            master=self,
            value="nbr_results",
            items=[
                "10", "20", "30", "40", "50", "60", "70", "80", "90", "100",
                "200", "300", "400", "500", "1000"
            ],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Number of results: ",
            labelWidth=120,
            tooltip=("Please select the desired search.\n"),
        )

        # Reasearch button
        # Uses "searchFunction" attribut
        self.searchButton = gui.button(
            widget=queryBox,
            master=self,
            label="Search",
            callback=self.search,
            tooltip="Connect Genius and make a research",
        )
        self.titleListbox = gui.listBox(
            widget=queryBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=lambda: self.addButton.setDisabled(self.selectedTitles ==
                                                        list()),
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)

        boxbutton = gui.widgetBox(
            widget=queryBox,
            box=False,
            orientation='horizontal',
        )
        # Add songs button
        self.addButton = gui.button(
            widget=boxbutton,
            master=self,
            label=u'Add to corpus',
            callback=self.add,
            tooltip=(u"Move the selected book downward in your corpus."),
        )
        self.addButton.setDisabled(True)

        # Clear button
        # Uses "clearResults" function
        self.clearButton = gui.button(
            widget=boxbutton,
            master=self,
            label="Clear results",
            callback=self.clearResults,
            tooltip="Clear results",
        )
        self.clearButton.setDisabled(True)
        gui.separator(widget=queryBox, height=3)

        # area where confirmed songs are moved and stocked
        mytitleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Corpus",
            orientation="vertical",
        )

        self.mytitleListbox = gui.listBox(
            widget=mytitleBox,
            master=self,
            value="myTitles",
            labels="mytitleLabels",
            callback=lambda: self.removeButton.setDisabled(self.myTitles ==
                                                           list()),
            tooltip="The list of books which will be imported",
        )
        self.mytitleListbox.setMinimumHeight(150)
        self.mytitleListbox.setSelectionMode(3)

        boxbutton2 = gui.widgetBox(
            widget=mytitleBox,
            box=False,
            orientation='horizontal',
        )
        # Remove songs button
        self.removeButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Remove from corpus',
            callback=self.remove,
            tooltip=(u"Remove the selected book from your corpus."),
        )
        self.removeButton.setDisabled(True)

        # Delete all confirmed texts button
        self.clearmyBasketButton = gui.button(
            widget=boxbutton2,
            master=self,
            label=u'Clear corpus',
            callback=self.clearmyBasket,
            tooltip=(u"Remove all books from your corpus."),
        )
        self.clearmyBasketButton.setDisabled(True)

        gui.separator(widget=mytitleBox, height=3)
        gui.rubber(self.controlArea)
        #----------------------------------------------------------------------

        # Draw Info box and Send button
        self.sendButton.draw()
        self.searchButton.setDefault(True)
        self.infoBox.draw()

        # Update the selections list
        self.updateMytitleLabels()

        # Send data if autoSend.
        self.sendButton.sendIf()
Ejemplo n.º 28
0
class ECP(OWTextableBaseWidget):
    """Textable widget for importing XML-TEI data from the Eighteenth Century
    Poetry website (http://www.eighteenthcenturypoetry.org/)
    """

    #----------------------------------------------------------------------
    # Widget"s metadata...

    name = "18th Century Poetry"
    description = "Import XML-TEI data from ECP website"
    icon = "icons/18th_century_poetry.svg"
    priority = 10

    #----------------------------------------------------------------------
    # Channel definitions (NB: no input in this case)...

    inputs = []
    outputs = [("XML-TEI data", Segmentation)]

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    autoSend = settings.Setting(False)
    selectedTitles = settings.Setting([])
    titleLabels = settings.Setting([])
    filterCriterion = settings.Setting("author")
    filterValue = settings.Setting("(all)")
    importedURLs = settings.Setting([])
    displayAdvancedSettings = settings.Setting(False)

    want_main_area = False

    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.titleSeg = None
        self.filteredTitleSeg = None
        self.filterValues = dict()
        self.base_url =     \
          u"http://www.eighteenthcenturypoetry.org/works/#genres"
        self.document_base_url =     \
          u"http://www.eighteenthcenturypoetry.org"

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.updateFilterValueList,
        )

        # User interface...

        # Advanced settings checkbox (basic/advanced interface will appear
        # immediately after it...
        self.advancedSettings.draw()

        # Filter box (advanced settings only)
        filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Filter",
            orientation="vertical",
        )
        filterCriterionCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterCriterion",
            items=["author", "genre"],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Criterion:",
            labelWidth=120,
            callback=self.updateFilterValueList,
            tooltip=(
                "Please select a criterion for searching the title list\n"),
        )
        filterCriterionCombo.setMinimumWidth(120)
        gui.separator(widget=filterBox, height=3)
        self.filterValueCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterValue",
            sendSelectedValue=True,
            orientation="horizontal",
            label="Value:",
            labelWidth=120,
            callback=self.updateTitleList,
            tooltip=("Please select a value for the chosen criterion."),
        )
        gui.separator(widget=filterBox, height=3)

        # The following lines add filterBox (and a vertical separator) to the
        # advanced interface...
        self.advancedSettings.advancedWidgets.append(filterBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Title box
        titleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Titles",
            orientation="vertical",
        )
        self.titleListbox = gui.listBox(
            widget=titleBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=self.sendButton.settingsChanged,
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)
        gui.separator(widget=titleBox, height=3)
        gui.button(
            widget=titleBox,
            master=self,
            label="Refresh",
            callback=self.refreshTitleSeg,
            tooltip="Connect to ECP website and refresh list.",
        )
        gui.separator(widget=titleBox, height=3)

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because getTitleSeg may need to display an error message).
        self.getTitleSeg()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()

    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Skip if title list is empty:
        if self.titleLabels == list():
            return

        # Check that something has been selected...
        if len(self.selectedTitles) == 0:
            self.infoBox.setText("Please select one or more titles.",
                                 "warning")
            self.send("XML-TEI data", None, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        # Initialize progress bar.
        progressBar = gui.ProgressBar(self,
                                      iterations=len(self.selectedTitles))

        # Attempt to connect to ECP and retrieve plays...
        xml_contents = list()
        annotations = list()
        try:
            for title in self.selectedTitles:
                doc_url = self.document_base_url +  \
                    self.filteredTitleSeg[title].annotations["url"]
                print(doc_url)
                url = re.sub(r"/([^/]+)\.shtml", r"/\1/\1.xml", doc_url)
                print(url)
                response = urllib.request.urlopen(url)
                xml_contents.append(response.read().decode('utf-8'))
                source_annotations = \
                self.filteredTitleSeg[title].annotations.copy()
                #source_annotations["url"] = source_annotations["href"]
                #del source_annotations["href"]
                annotations.append(source_annotations)
                progressBar.advance()  # 1 tick on the progress bar...

        # If an error occurs (e.g. http error, or memory error)...
        except:
            #Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from ECP website.",
                                 "error")
            # Reset output channel.
            self.send("XML-TEI data", None, self)
            return

        # Store downloaded XML in input objects...
        for xml_content_idx in range(len(xml_contents)):
            newInput = Input(xml_contents[xml_content_idx], self.captionTitle)
            self.createdInputs.append(newInput)

        # If there"s only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Store imported URLs as setting.
        self.importedURLs = [
            self.filteredTitleSeg[self.selectedTitles[0]].annotations["url"]
        ]

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send token...
        self.send("XML-TEI data", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def getTitleSeg(self):
        """Get title segmentation, either saved locally or online"""

        # Try to open saved file in this module"s directory...
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        try:
            file = open(os.path.join(path, "cached_title_list_ecp"), "rb")
            self.titleSeg = pickle.load(file)
            file.close()
        # Else try to load list from ECP and build new seg...
        except IOError:
            self.titleSeg = self.getTitleListFromECP()

        # Build author and genre lists...
        if self.titleSeg is not None:
            self.filterValues["author"] = Processor.count_in_context(
                units={
                    "segmentation": self.titleSeg,
                    "annotation_key": "author"
                }).col_ids
            self.filterValues["author"].sort()
            self.filterValues["genre"] = Processor.count_in_context(
                units={
                    "segmentation": self.titleSeg,
                    "annotation_key": "genre"
                }).col_ids
            self.filterValues["genre"].sort()

        # Sort the segmentation alphabetically based on titles (nasty hack!)...
        self.titleSeg.buffer.sort(key=lambda s: s.annotations["title"])

        # Update title and filter value lists (only at init and on manual
        # refresh, therefore separate from self.updateGUI).
        self.updateFilterValueList()

    def refreshTitleSeg(self):
        """Refresh title segmentation from website"""
        self.titleSeg = self.getTitleListFromECP()
        # Update title and filter value lists (only at init and on manual
        # refresh, therefore separate from self.updateGUI).
        self.updateFilterValueList()

    def getTitleListFromECP(self):
        """Fetch titles from the ECP website"""

        self.infoBox.customMessage(
            "Fetching data from ECP website, please wait")

        # Attempt to connect to ECP...
        try:
            response = urllib.request.urlopen(self.base_url)
            base_html = response.read().decode('utf-8')
            self.infoBox.customMessage("Done fetching data from ECP website.")

        # If unable to connect (somehow)...
        except:

            # Set Info box and widget to "warning" state.
            self.infoBox.noDataSent(warning="Couldn't access ECP website.")

            # Empty title list box.
            self.titleLabels = list()

            # Reset output channel.
            self.send("XML-TEI data", None, self)
            return None

        # Otherwise store HTML content in LTTL Input object.
        base_html_seg = Input(base_html)

        # Remove accents from the data...
        recoded_seg, _ = Segmenter.recode(base_html_seg, remove_accents=True)

        # Extract table containing titles...
        genresListSeg = Segmenter.import_xml(
            segmentation=recoded_seg,
            element="ul",
            conditions={"id": re.compile(r"^genres-list")},
        )

        # Extract genre annotation...
        genreSeg = Segmenter.tokenize(
            segmentation=genresListSeg,
            regexes=[(re.compile(r'<a id[^>]+>(.+?)</a.+?(?=<a id|$)(?s)'), \
            "tokenize", {"genre": "&1"})],
            import_annotations=False,
        )

        # Extract works...
        titleSeg = Segmenter.tokenize(
            segmentation=genreSeg,
            regexes=[(re.compile(r'<li class="bibl".+?</span>(?s)'), \
            "tokenize")],
        )

        # Extract annotations...
        titleSeg = Segmenter.tokenize(
            segmentation=titleSeg,
            regexes=[
                (re.compile(r"^.*>\n(.+?)</span>.*$(?s)"), "tokenize", {
                    "author": "&1"
                }),
                (re.compile(r'^.*href="(/works/.+?\.shtml)">.*$(?s)'),
                 "tokenize", {
                     "url": "&1"
                 }),
                (re.compile(r'^.*shtml">(.*)</a>.*$(?s)'), "tokenize", {
                    "title": "&1"
                }),
            ],
            merge_duplicates=True,
        )

        # Try to save list in this module"s directory for future reference...
        path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        try:
            file = open(os.path.join(path, "cached_title_list_ecp"), "wb")
            pickle.dump(titleSeg, file, -1)
            file.close()
        except IOError:
            pass

        # Remove warning (if any)...
        self.error(0)
        self.warning(0)

        return titleSeg

    def updateFilterValueList(self):
        """Update the list of filter values"""

        # In Advanced settings mode, populate filter value list...
        if self.titleSeg is not None and self.displayAdvancedSettings:
            self.filterValueCombo.clear()
            self.filterValueCombo.addItem("(all)")
            for filterValue in self.filterValues[self.filterCriterion]:
                self.filterValueCombo.addItem(filterValue)

        # Reset filterValue if needed...
        if self.filterValue not in [
                self.filterValueCombo.itemText(i)
                for i in range(self.filterValueCombo.count())
        ]:
            self.filterValue = "(all)"
        else:
            self.filterValue = self.filterValue

        self.updateTitleList()

    def updateTitleList(self):
        """Update the list of titles"""

        # If titleSeg has not been loaded for some reason, skip.
        if self.titleSeg is None:
            return

        # In Advanced settings mode, get list of selected titles...
        if self.displayAdvancedSettings and self.filterValue != "(all)":
            self.filteredTitleSeg, _ = Segmenter.select(
                segmentation=self.titleSeg,
                regex=re.compile(r"^%s$" % self.filterValue),
                annotation_key=self.filterCriterion,
            )
        else:
            self.filteredTitleSeg = self.titleSeg

        # If criterion is not "genre" and his filter value not "all",
        # group titles with different genres...

        # Create a dictionary with "author" and "title" as key...

        unique_titles = dict()
        for title in self.filteredTitleSeg:
            title_id = (
                title.annotations["author"],
                title.annotations["title"],
            )
            try:
                unique_titles[title_id].append(title)
            except KeyError:
                unique_titles[title_id] = [title]

        # Create a list with new annotation comporting all genres...
        new_title_segments = list()
        for unique_title in unique_titles.values():
            title_genres = list()
            new_title_segments.append(unique_title[0])
            title_genres.append(unique_title[0].annotations["genre"])
            for equivalent_title in unique_title[1:]:
                title_genres.append(equivalent_title.annotations["genre"])
            new_title_segments[-1].annotations["genre"] = ", ".join(
                sorted(list(set(title_genres))))

        self.filteredTitleSeg = Segmentation(None)
        self.filteredTitleSeg.extend(new_title_segments)

        # Populate titleLabels list with the titles...
        self.titleLabels = sorted(
            [s.annotations["title"] for s in self.filteredTitleSeg])

        # Add specification (author, year and genre, depending on criterion)...
        titleLabels = self.titleLabels[:]
        for idx, titleLabel in enumerate(titleLabels):
            specs = list()
            if (self.displayAdvancedSettings == False
                    or self.filterCriterion != "author"
                    or self.filterValue == "(all)"):
                specs.append(self.filteredTitleSeg[idx].annotations["author"])
            if (self.displayAdvancedSettings == False
                    or self.filterCriterion != "genre"
                    or self.filterValue == "(all)"):
                specs.append(self.filteredTitleSeg[idx].annotations["genre"])
            titleLabels[idx] = titleLabel + " (%s)" % "; ".join(specs)
        self.titleLabels = titleLabels

        # Reset selectedTitles if needed...
        if not set(self.importedURLs).issubset(
                set(u.annotations["url"] for u in self.filteredTitleSeg)):
            self.selectedTitles = list()
        else:
            self.selectedTitles = self.selectedTitles

        self.sendButton.settingsChanged()

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

        if len(self.titleLabels) > 0:
            self.selectedTitles = self.selectedTitles

    def clearCreatedInputs(self):
        """Delete all Input objects that have been created."""
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def onDeleteWidget(self):
        """Free memory when widget is deleted (overriden method)"""
        self.clearCreatedInputs()

    # The following method need to be copied (without any change) in
    # every Textable widget...

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)
class SuperTextFiles(OWTextableBaseWidget):
    """Textable widget to import PDF files and if necessary to do an Optical
    Character Recognition (OCR)"""

    #----------------------------------------------------------------------
    # Widget's metadata...

    name = "Super Text Files"
    description = "Import data from raw text and PDF files"
    icon = "icons/SuperTextFiles.svg"
    priority = 1  # TODO

    #----------------------------------------------------------------------
    # Channel definitions....

    inputs = [('Message', JSONMessage, "inputMessage", widget.Single)]
    outputs = [('Text data', Segmentation)]

    #----------------------------------------------------------------------
    # Layout parameters...

    want_main_area = False

    #----------------------------------------------------------------------
    # Settings...

    settingsHandler = VersionedSettingsHandler(
        version=__version__.rsplit(".", 1)[0])

    files = settings.Setting([])
    encoding = settings.Setting('(auto-detect)')
    autoNumber = settings.Setting(False)
    autoNumberKey = settings.Setting(u'num')
    importFilenames = settings.Setting(True)
    importFilenamesKey = settings.Setting(u'filename')
    lastLocation = settings.Setting('.')
    displayAdvancedSettings = settings.Setting(False)
    file = settings.Setting(u'')

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.fileLabels = list()
        self.selectedFileLabels = list()
        self.newFiles = u''
        self.newAnnotationKey = u''
        self.newAnnotationValue = u''
        self.pdfPassword = u''  # SuperTextFiles
        self.ocrForce = False  # SuperTextFiles
        self.ocrLanguages = u'eng'  # SuperTextFiles
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute='infoBox',
            sendIfPreCallback=self.updateGUI,
        )
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.sendButton.settingsChanged,
        )

        # GUI...

        # Advanced settings checkbox...
        self.advancedSettings.draw()

        # BASIC GUI...

        # Basic file box
        basicFileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Source',
            orientation='vertical',
            addSpace=False,
        )
        basicFileBoxLine1 = gui.widgetBox(
            widget=basicFileBox,
            box=False,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=basicFileBoxLine1,
            master=self,
            value='file',
            orientation='horizontal',
            label=u'File path:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"The path of the file."),
        )
        gui.separator(widget=basicFileBoxLine1, width=5)
        gui.button(
            widget=basicFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting file."),
        )
        gui.separator(widget=basicFileBox, width=3)
        advancedEncodingsCombobox = gui.comboBox(
            widget=basicFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=101,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(advancedEncodingsCombobox)
        addAutoDetectEncoding(advancedEncodingsCombobox)
        gui.separator(widget=basicFileBox, width=3)
        self.advancedSettings.basicWidgets.append(basicFileBox)
        self.advancedSettings.basicWidgetsAppendSeparator()

        # ADVANCED GUI...

        defaultLabelWidth = 120  # SuperTextFiles

        # File box
        fileBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Sources',
            orientation='vertical',
            addSpace=False,
        )
        fileBoxLine1 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='horizontal',
            addSpace=True,
        )
        self.fileListbox = gui.listBox(
            widget=fileBoxLine1,
            master=self,
            value='selectedFileLabels',
            labels='fileLabels',
            callback=self.updateFileBoxButtons,
            tooltip=(
                u"The list of files whose content will be imported.\n"
                u"\nIn the output segmentation, the content of each\n"
                u"file appears in the same position as in the list.\n"
                u"\nColumn 1 shows the file's name.\n"
                u"Column 2 shows the file's annotation (if any).\n"
                # Start SuperTextFiles
                # u"Column 3 shows the file's encoding." # removed
                u"Column 3 shows the file's password (if any).\n"
                u"Column 4 shows the file's languages (if any).\n"
                u"Column 5 shows if OCR is forced.\n"
                u"Column 6 shows the file's encoding."
                # End SuperTextFiles
            ),
        )
        font = QFont()
        font.setFamily('Courier')
        font.setStyleHint(QFont.Courier)
        font.setPixelSize(12)
        self.fileListbox.setFont(font)
        fileBoxCol2 = gui.widgetBox(
            widget=fileBoxLine1,
            orientation='vertical',
        )
        self.moveUpButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Up',
            callback=self.moveUp,
            tooltip=(u"Move the selected file upward in the list."),
        )
        self.moveDownButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Move Down',
            callback=self.moveDown,
            tooltip=(u"Move the selected file downward in the list."),
        )
        self.removeButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Remove',
            callback=self.remove,
            tooltip=(u"Remove the selected file from the list."),
        )
        self.clearAllButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Clear All',
            callback=self.clearAll,
            tooltip=(u"Remove all files from the list."),
        )
        self.exportButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Export List',
            callback=self.exportList,
            tooltip=(u"Open a dialog for selecting a file where the file\n"
                     u"list can be exported in JSON format."),
        )
        self.importButton = gui.button(
            widget=fileBoxCol2,
            master=self,
            label=u'Import List',
            callback=self.importList,
            tooltip=(u"Open a dialog for selecting a file list to\n"
                     u"import (in JSON format). Files from this list\n"
                     u"will be added to those already imported."),
        )
        fileBoxLine2 = gui.widgetBox(
            widget=fileBox,
            box=False,
            orientation='vertical',
        )
        # Add file box
        addFileBox = gui.widgetBox(
            widget=fileBoxLine2,
            box=True,
            orientation='vertical',
        )
        addFileBoxLine1 = gui.widgetBox(
            widget=addFileBox,
            orientation='horizontal',
        )
        gui.lineEdit(
            widget=addFileBoxLine1,
            master=self,
            value='newFiles',
            orientation='horizontal',
            label=u'File path(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"The paths of the files that will be added to the\n"
                     u"list when button 'Add' is clicked.\n\n"
                     u"Successive paths must be separated with ' / ' \n"
                     u"(whitespace + slash + whitespace). Their order in\n"
                     u"the list will be the same as in this field."),
        )
        gui.separator(widget=addFileBoxLine1, width=5)
        gui.button(
            widget=addFileBoxLine1,
            master=self,
            label=u'Browse',
            callback=self.browse,
            tooltip=(u"Open a dialog for selecting files.\n\n"
                     u"To select multiple files at once, either draw a\n"
                     u"selection box around them, or use shift and/or\n"
                     u"ctrl + click.\n\n"
                     u"Selected file paths will appear in the field to\n"
                     u"the left of this button afterwards, ready to be\n"
                     u"added to the list when button 'Add' is clicked."),
        )
        gui.separator(widget=addFileBox, width=3)
        basicEncodingsCombobox = gui.comboBox(
            widget=addFileBox,
            master=self,
            value='encoding',
            items=getPredefinedEncodings(),
            sendSelectedValue=True,
            orientation='horizontal',
            label=u'Encoding:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Select input file(s) encoding."),
        )
        addSeparatorAfterDefaultEncodings(basicEncodingsCombobox)
        addAutoDetectEncoding(basicEncodingsCombobox)
        self.encoding = self.encoding
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationKey',
            orientation='horizontal',
            label=u'Annotation key:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify a custom annotation\n"
                     u"key associated with each file that is about to be\n"
                     u"added to the list."),
        )
        gui.separator(widget=addFileBox, width=3)
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='newAnnotationValue',
            orientation='horizontal',
            label=u'Annotation value:',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify the annotation value\n"
                     u"associated with the above annotation key."),
        )

        ### Start SuperTextFiles addition
        gui.separator(widget=addFileBox, width=3)
        # Field for PDF password
        gui.lineEdit(
            widget=addFileBox,
            master=self,
            value='pdfPassword',
            orientation='horizontal',
            label=u'PDF password:'******'ocrLanguages',
            orientation='horizontal',
            label=u'OCR Language(s):',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"This field lets you specify languages\n"
                     u"for the OCR process. Ex.: fra+ita"),
        )

        gui.checkBox(
            widget=addFileBox,
            master=self,
            value='ocrForce',
            label=u'Force OCR',
            labelWidth=defaultLabelWidth,
            callback=self.updateGUI,
            tooltip=(u"Force to use an OCR detection on this file"),
        )
        ### End SuperTextFiles addition

        gui.separator(widget=addFileBox, width=3)
        self.addButton = gui.button(
            widget=addFileBox,
            master=self,
            label=u'Add',
            callback=self.add,
            tooltip=(u"Add the file(s) currently displayed in the\n"
                     u"'Files' text field to the list.\n\n"
                     u"Each of these files will be associated with the\n"
                     u"specified encoding and annotation (if any).\n\n"
                     u"Other files may be selected afterwards and\n"
                     u"assigned a different encoding and annotation."),
        )
        self.advancedSettings.advancedWidgets.append(fileBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Options box...
        optionsBox = gui.widgetBox(
            widget=self.controlArea,
            box=u'Options',
            orientation='vertical',
            addSpace=False,
        )
        optionsBoxLine1 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenames',
            label=u'Import file names with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Import file names as annotations."),
        )
        self.importFilenamesKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine1,
            master=self,
            value='importFilenamesKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for importing file names."),
        )
        gui.separator(widget=optionsBox, width=3)
        optionsBoxLine2 = gui.widgetBox(
            widget=optionsBox,
            box=False,
            orientation='horizontal',
        )
        gui.checkBox(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumber',
            label=u'Auto-number with key:',
            labelWidth=180,
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotate files with increasing numeric indices."),
        )
        self.autoNumberKeyLineEdit = gui.lineEdit(
            widget=optionsBoxLine2,
            master=self,
            value='autoNumberKey',
            orientation='horizontal',
            callback=self.sendButton.settingsChanged,
            tooltip=(u"Annotation key for file auto-numbering."),
        )
        gui.separator(widget=optionsBox, width=3)
        self.advancedSettings.advancedWidgets.append(optionsBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        gui.rubber(self.controlArea)

        # Send button...
        self.sendButton.draw()

        # Info box...
        self.infoBox.draw()

        self.adjustSizeWithTimer()
        QTimer.singleShot(0, self.sendButton.sendIf)

    def inputMessage(self, message):
        """Handle JSON message on input connection"""
        if not message:
            return
        self.displayAdvancedSettings = True
        self.advancedSettings.setVisible(True)
        self.clearAll()
        self.infoBox.inputChanged()
        try:
            json_data = json.loads(message.content)
            temp_files = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                pdfPassword = entry.get('pdf_password', '')  # SuperTextFiles
                ocrLanguages = entry.get('ocr_languages', '')  # SuperTextFiles
                ocrForce = entry.get('ocr_force', '')  # SuperTextFiles

                if path == '' or encoding == '' or ocrForce == '':
                    self.infoBox.setText(
                        u"Please verify keys and values of incoming "
                        u"JSON message.", 'error')
                    self.send('Text data', None, self)
                    return
                temp_files.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                    pdfPassword,  # SuperTextFiles
                    ocrLanguages,  # SuperTextFiles
                    ocrForce,  # SuperTextFiles
                ))
            self.files.extend(temp_files)
            self.sendButton.settingsChanged()
        except ValueError:
            self.infoBox.setText(
                u"Please make sure that incoming message is valid JSON.",
                'error')
            self.send('Text data', None, self)
            return

    def sendData(self):
        """Load files, create and send segmentation"""

        # Check that there's something on input...
        if ((self.displayAdvancedSettings and not self.files)
                or not (self.file or self.displayAdvancedSettings)):
            self.infoBox.setText(u'Please select input file.', 'warning')
            self.send('Text data', None, self)
            return

        # Check that autoNumberKey is not empty (if necessary)...
        if self.displayAdvancedSettings and self.autoNumber:
            if self.autoNumberKey:
                autoNumberKey = self.autoNumberKey
            else:
                self.infoBox.setText(
                    u'Please enter an annotation key for auto-numbering.',
                    'warning')
                self.send('Text data', None, self)
                return
        else:
            autoNumberKey = None

        # Clear created Inputs...
        self.clearCreatedInputs()

        fileContents = list()
        annotations = list()
        counter = 1

        if self.displayAdvancedSettings:
            myFiles = self.files
        else:
            myFiles = [[self.file, self.encoding, "", "", "", "eng", False]]

        self.infoBox.setText(u"Processing, please wait...", "warning")
        self.controlArea.setDisabled(True)
        progressBar = ProgressBar(self, iterations=len(myFiles))

        # Open and process each file successively...
        for myFile in myFiles:
            filePath = myFile[0]
            encoding = myFile[1]
            encoding = re.sub(r"[ ]\(.+", "", encoding)
            annotation_key = myFile[2]
            annotation_value = myFile[3]
            pdf_password = myFile[4]  # SuperTextFiles
            ocr_languages = myFile[5]  # SuperTextFiles
            ocr_force = myFile[6]  # SuperTextFiles

            myFiletype = filetype.guess(myFile[0])  # SuperTextFiles

            # Try to open the file...
            self.error()
            # Start SuperTextFiles
            try:
                if myFiletype is None:
                    fileContent = self.extract_raw_text(filePath, encoding)

                elif myFiletype.extension == "pdf":
                    if ocr_force is True:
                        fileContent = self.get_pdf_content(
                            filePath,
                            ocr_languages,
                        )
                    else:
                        if self.is_textual_pdf_file(filePath) is True:
                            fileContent = self.extract_text_from_pdf(filePath)
                        else:
                            fileContent = self.get_pdf_content(
                                filePath,
                                ocr_languages,
                            )

                elif myFiletype.extension in IMG_FILETYPES:
                    fileContent = self.ocrize(filePath, ocr_languages)

                if fileContent == -1:
                    message = u"Couldn't open file."
                    self.infoBox.setText(message, 'error')
                    self.send('Text data', None, self)
                    self.controlArea.setDisabled(False)
                    return

            # End SuperTextFiles

            except IOError as e:
                if "tesseract" in str(e):
                    QMessageBox.warning(None, 'Textable', str(e),
                                        QMessageBox.Ok)
                progressBar.finish()
                if len(myFiles) > 1:
                    message = u"Couldn't open file '%s'." % filePath
                else:
                    message = u"Couldn't open file."
                self.infoBox.setText(message, 'error')
                self.send('Text data', None, self)
                self.controlArea.setDisabled(False)
                return

            # Remove utf-8 BOM if necessary...
            if encoding == u'utf-8':
                fileContent = fileContent.lstrip(
                    codecs.BOM_UTF8.decode('utf-8'))

            # Normalize text (canonical decomposition then composition)...
            fileContent = normalize('NFC', fileContent)

            fileContents.append(fileContent)

            # Annotations...
            annotation = dict()
            if self.displayAdvancedSettings:
                if annotation_key and annotation_value:
                    annotation[annotation_key] = annotation_value
                if self.importFilenames and self.importFilenamesKey:
                    filename = os.path.basename(filePath)
                    annotation[self.importFilenamesKey] = filename
                if self.autoNumber and self.autoNumberKey:
                    annotation[self.autoNumberKey] = counter
                    counter += 1
            annotations.append(annotation)
            progressBar.advance()

        # Create an LTTL.Input for each file...
        if len(fileContents) == 1:
            label = self.captionTitle
        else:
            label = None
        for index in range(len(fileContents)):
            myInput = Input(fileContents[index], label)
            segment = myInput[0]
            segment.annotations.update(annotations[index])
            myInput[0] = segment
            self.createdInputs.append(myInput)

        # If there's only one file, the widget's output is the created Input.
        if len(fileContents) == 1:
            self.segmentation = self.createdInputs[0]
        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                segmentations=self.createdInputs,
                label=self.captionTitle,
                copy_annotations=True,
                import_labels_as=None,
                sort=False,
                auto_number_as=None,
                merge_duplicates=False,
                progress_callback=None,
            )

        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()
        self.controlArea.setDisabled(False)

        self.send('Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

    def extract_raw_text(self, filePath, encoding):
        """This function receive a filePath and an encoding value and return a
        string with the text of the given file."""
        if encoding == "(auto-detect)":
            detector = UniversalDetector()
            fh = open(filePath, 'rb')
            for line in fh:
                detector.feed(line)
                if detector.done: break
            detector.close()
            fh.close()
            encoding = detector.result['encoding']
        fh = open(
            filePath,
            mode='rU',
            encoding=encoding,
        )
        try:
            i = 0
            fileContent = ""
            chunks = list()
            for chunk in iter(lambda: fh.read(CHUNK_LENGTH), ""):
                chunks.append('\n'.join(chunk.splitlines()))
                i += CHUNK_LENGTH
                if i % (CHUNK_NUM * CHUNK_LENGTH) == 0:
                    fileContent += "".join(chunks)
                    chunks = list()
            if len(chunks):
                fileContent += "".join(chunks)
            del chunks
            return fileContent
        except UnicodeError:
            progressBar.finish()
            if len(myFiles) > 1:
                message = u"Please select another encoding "    \
                        + u"for file %s." % filePath
            else:
                message = u"Please select another encoding."
            self.infoBox.setText(message, 'error')
            self.send('Text data', None, self)
            self.controlArea.setDisabled(False)
            return
        finally:
            fh.close()

    def is_textual_pdf_file(self, filePath):
        """Evaluate the content of the pdf file"""
        with pdfplumber.open(filePath, password=self.pdfPassword) as fh:
            first_page = fh.pages[0]
            text = first_page.extract_text()

            if text is None or text.isspace() is True:
                return False
            else:
                return True

    def extract_text_from_pdf(self, filePath):
        """Extract all readable text contents"""
        fileContent = ""
        with pdfplumber.open(filePath, password=self.pdfPassword) as fh:
            for page in fh.pages:
                fileContent += page.extract_text()

        return fileContent

    def get_pdf_content(self, filePath, languages):
        """ First this function get all texts in the file if exist. Then it
        creates a list of pictures to make the OCR method."""
        text = ""
        with fitz.open(filePath) as doc:
            images = []
            for page in doc:
                text += page.getText("text")
                images += doc.getPageImageList(page.number)

            for image in images:
                xref = image[0]
                picture = fitz.Pixmap(doc, xref)

                if picture.n > 4:  # CMYK colorspace
                    picture = fitz.Pixmap(fitz.csRGB,
                                          picture)  # convert to RGB

                bytes_img = BytesIO(picture.getImageData())

                page_text = self.ocrize(bytes_img, languages)

                if page_text == -1:
                    text = -1
                    break
                elif page_text:
                    text += page_text

        return text

    def ocrize(self, image, languages):
        """Make an OCR on a list of images or an image file"""
        languages = languages.strip()  # remove trailing spaces
        if languages == "":
            languages = "eng"
        try:
            ocrized_text = image_to_string(Image.open(image), lang=languages)
            return ocrized_text
        except TesseractError as e:
            if "load" in str(e):
                QMessageBox.warning(
                    None, 'Textable',
                    "Please make sure all Tesseract parameter files for "
                    "language(s) '%s' have been installed." % languages,
                    QMessageBox.Ok)
            return -1

    def clearCreatedInputs(self):
        for i in self.createdInputs:
            Segmentation.set_data(i[0].str_index, None)
        del self.createdInputs[:]

    def importList(self):
        """Display a FileDialog and import file list"""
        filePath, _ = QFileDialog.getOpenFileName(self, u'Import File List',
                                                  self.lastLocation,
                                                  u'Text files (*)')
        if not filePath:
            return
        self.file = os.path.normpath(filePath)
        self.lastLocation = os.path.dirname(filePath)
        self.error()
        try:
            fileHandle = codecs.open(filePath, encoding='utf8')
            fileContent = fileHandle.read()
            fileHandle.close()
        except IOError:
            QMessageBox.warning(None, 'Textable', "Couldn't open file.",
                                QMessageBox.Ok)
            return
        try:
            json_data = json.loads(fileContent)
            temp_files = list()
            for entry in json_data:
                path = entry.get('path', '')
                encoding = entry.get('encoding', '')
                annotationKey = entry.get('annotation_key', '')
                annotationValue = entry.get('annotation_value', '')
                pdfPassword = entry.get('pdf_password', '')  # SuperTextFiles
                ocrLanguages = entry.get('ocr_languages', '')  # SuperTextFiles
                ocrForce = entry.get('ocr_force', '')  # SuperTextFiles

                if path == '' or encoding == '' or ocrForce == '':
                    QMessageBox.warning(
                        None, 'Textable',
                        "Selected JSON file doesn't have the right keys "
                        "and/or values.", QMessageBox.Ok)
                    return
                temp_files.append((
                    path,
                    encoding,
                    annotationKey,
                    annotationValue,
                    pdfPassword,  # SuperTextFiles
                    ocrLanguages,  # SuperTextFiles
                    ocrForce,  # SuperTextFiles
                ))
            self.files.extend(temp_files)
            if temp_files:
                self.sendButton.settingsChanged()
        except ValueError:
            QMessageBox.warning(None, 'Textable', "JSON parsing error.",
                                QMessageBox.Ok)
            return

    def exportList(self):
        """Display a FileDialog and export file list"""
        toDump = list()
        for myfile in self.files:
            toDump.append({
                'path': myfile[0],
                'encoding': myfile[1],
            })
            if myfile[2] and myfile[3]:
                toDump[-1]['annotation_key'] = myfile[2]
                toDump[-1]['annotation_value'] = myfile[3]
            # Start SuperTextFiles
            if myfile[4]:
                toDump[-1]['pdf_password'] = myfile[4]

            if myfile[5]:
                toDump[-1]['ocr_languages'] = myfile[5]

            toDump[-1]['ocr_force'] = myfile[6]
            # End SuperTextFiles

        filePath, _ = QFileDialog.getSaveFileName(
            self,
            u'Export File List',
            self.lastLocation,
        )

        if filePath:
            self.lastLocation = os.path.dirname(filePath)
            outputFile = codecs.open(
                filePath,
                encoding='utf8',
                mode='w',
                errors='xmlcharrefreplace',
            )
            outputFile.write(
                normalizeCarriageReturns(
                    json.dumps(toDump, sort_keys=True, indent=4)))
            outputFile.close()
            QMessageBox.information(None, 'Textable',
                                    'File list correctly exported',
                                    QMessageBox.Ok)

    def browse(self):
        """Display a FileDialog and select files"""
        if self.displayAdvancedSettings:
            filePathList, _ = QFileDialog.getOpenFileNames(
                self, u'Select Text File(s)', self.lastLocation,
                u'Text files (*)')
            if not filePathList:
                return
            filePathList = [os.path.normpath(f) for f in filePathList]
            self.newFiles = u' / '.join(filePathList)
            self.lastLocation = os.path.dirname(filePathList[-1])
            self.updateGUI()
        else:
            filePath, _ = QFileDialog.getOpenFileName(self, u'Open Text File',
                                                      self.lastLocation,
                                                      u'Text files (*)')
            if not filePath:
                return
            self.file = os.path.normpath(filePath)
            self.lastLocation = os.path.dirname(filePath)
            self.updateGUI()
            self.sendButton.settingsChanged()

    def moveUp(self):
        """Move file upward in Files listbox"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            if index > 0:
                temp = self.files[index - 1]
                self.files[index - 1] = self.files[index]
                self.files[index] = temp
                self.selectedFileLabels = [index - 1]
                self.sendButton.settingsChanged()

    def moveDown(self):
        """Move file downward in Files listbox"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            if index < len(self.files) - 1:
                temp = self.files[index + 1]
                self.files[index + 1] = self.files[index]
                self.files[index] = temp
                self.selectedFileLabels = [index + 1]
                self.sendButton.settingsChanged()

    def clearAll(self):
        """Remove all files from files attr"""
        del self.files[:]
        del self.selectedFileLabels[:]
        self.sendButton.settingsChanged()

    def remove(self):
        """Remove file from files attr"""
        if self.selectedFileLabels:
            index = self.selectedFileLabels[0]
            self.files.pop(index)
            del self.selectedFileLabels[:]
            self.sendButton.settingsChanged()

    def add(self):
        """Add files to files attr"""
        filePathList = re.split(r' +/ +', self.newFiles)
        for filePath in filePathList:
            encoding = re.sub(r"[ ]\(.+", "", self.encoding)
            self.files.append((
                filePath,
                encoding,
                self.newAnnotationKey,
                self.newAnnotationValue,
                self.pdfPassword,  # SuperTextFiles
                self.ocrLanguages,  # SuperTextFiles
                self.ocrForce,  # SuperTextFiles
            ))
        self.sendButton.settingsChanged()

    def updateGUI(self):
        """Update GUI state"""
        if self.displayAdvancedSettings:
            if self.selectedFileLabels:
                cachedLabel = self.selectedFileLabels[0]
            else:
                cachedLabel = None
            del self.fileLabels[:]
            if self.files:
                filePaths = [f[0] for f in self.files]
                filenames = [os.path.basename(p) for p in filePaths]
                encodings = [f[1] for f in self.files]
                annotations = ['{%s: %s}' % (f[2], f[3]) for f in self.files]
                maxFilenameLen = max([len(n) for n in filenames])
                maxAnnoLen = max([len(a) for a in annotations])
                # Start SuperTextFiles
                pdfPassword = [f[4] for f in self.files]
                ocrLanguages = [f[5] for f in self.files]
                ocrForce = [str(f[6]) for f in self.files]
                maxPdfPasswordLen = max([len(n) for n in pdfPassword])
                maxOcrLanguagesLen = max([len(n) for n in ocrLanguages])
                # End SuperTextFiles

                for index in range(len(self.files)):
                    format = u'%-' + str(maxFilenameLen + 2) + u's'
                    fileLabel = format % filenames[index]
                    if maxAnnoLen > 4:
                        if len(annotations[index]) > 4:
                            format = u'%-' + str(maxAnnoLen + 2) + u's'
                            fileLabel += format % annotations[index]
                        else:
                            fileLabel += u' ' * (maxAnnoLen + 2)

                    # Start SuperTextFiles
                    format = u'%-' + str(maxPdfPasswordLen + 2) + u's'
                    fileLabel += format % pdfPassword[index]

                    format = u'%-' + str(maxOcrLanguagesLen + 2) + u's'
                    fileLabel += format % ocrLanguages[index]

                    format = u'%-' + str(5 + 2) + u's'
                    fileLabel += format % ocrForce[index]
                    # End SuperTextFiles

                    fileLabel += encodings[index]
                    self.fileLabels.append(fileLabel)
            self.fileLabels = self.fileLabels
            if cachedLabel is not None:
                self.sendButton.sendIfPreCallback = None
                self.selectedFileLabels = [cachedLabel]
                self.sendButton.sendIfPreCallback = self.updateGUI
            if self.newFiles:
                if ((self.newAnnotationKey and self.newAnnotationValue)
                        or (not self.newAnnotationKey
                            and not self.newAnnotationValue)):
                    self.addButton.setDisabled(False)
                else:
                    self.addButton.setDisabled(True)
            else:
                self.addButton.setDisabled(True)
            if self.autoNumber:
                self.autoNumberKeyLineEdit.setDisabled(False)
            else:
                self.autoNumberKeyLineEdit.setDisabled(True)
            if self.importFilenames:
                self.importFilenamesKeyLineEdit.setDisabled(False)
            else:
                self.importFilenamesKeyLineEdit.setDisabled(True)
            self.updateFileBoxButtons()
            self.advancedSettings.setVisible(True)
        else:
            self.advancedSettings.setVisible(False)

    def updateFileBoxButtons(self):
        """Update state of File box buttons"""
        if self.selectedFileLabels:
            self.removeButton.setDisabled(False)
            if self.selectedFileLabels[0] > 0:
                self.moveUpButton.setDisabled(False)
            else:
                self.moveUpButton.setDisabled(True)
            if self.selectedFileLabels[0] < len(self.files) - 1:
                self.moveDownButton.setDisabled(False)
            else:
                self.moveDownButton.setDisabled(True)
        else:
            self.moveUpButton.setDisabled(True)
            self.moveDownButton.setDisabled(True)
            self.removeButton.setDisabled(True)
        if len(self.files):
            self.clearAllButton.setDisabled(False)
            self.exportButton.setDisabled(False)
        else:
            self.clearAllButton.setDisabled(True)
            self.exportButton.setDisabled(True)

    def setCaption(self, title):
        if 'captionTitle' in dir(self):
            changed = title != self.captionTitle
            super().setCaption(title)
            if changed:
                self.sendButton.settingsChanged()
        else:
            super().setCaption(title)

    def onDeleteWidget(self):
        self.clearCreatedInputs()
Ejemplo n.º 30
0
    def __init__(self):
        """Widget creator."""

        super().__init__()

        # Other attributes...
        self.segmentation = None
        self.createdInputs = list()
        self.titleSeg = None
        self.filteredTitleSeg = None
        self.filterValues = dict()
        self.base_url =     \
          u"http://www.eighteenthcenturypoetry.org/works/#genres"
        self.document_base_url =     \
          u"http://www.eighteenthcenturypoetry.org"

        # Next two instructions are helpers from TextableUtils. Corresponding
        # interface elements are declared here and actually drawn below (at
        # their position in the UI)...
        self.infoBox = InfoBox(widget=self.controlArea)
        self.sendButton = SendButton(
            widget=self.controlArea,
            master=self,
            callback=self.sendData,
            infoBoxAttribute="infoBox",
            sendIfPreCallback=self.updateGUI,
        )

        # The AdvancedSettings class, also from TextableUtils, facilitates
        # the management of basic vs. advanced interface. An object from this
        # class (here assigned to self.advancedSettings) contains two lists
        # (basicWidgets and advancedWidgets), to which the corresponding
        # widgetBoxes must be added.
        self.advancedSettings = AdvancedSettings(
            widget=self.controlArea,
            master=self,
            callback=self.updateFilterValueList,
        )

        # User interface...

        # Advanced settings checkbox (basic/advanced interface will appear
        # immediately after it...
        self.advancedSettings.draw()

        # Filter box (advanced settings only)
        filterBox = gui.widgetBox(
            widget=self.controlArea,
            box="Filter",
            orientation="vertical",
        )
        filterCriterionCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterCriterion",
            items=["author", "genre"],
            sendSelectedValue=True,
            orientation="horizontal",
            label="Criterion:",
            labelWidth=120,
            callback=self.updateFilterValueList,
            tooltip=(
                "Please select a criterion for searching the title list\n"),
        )
        filterCriterionCombo.setMinimumWidth(120)
        gui.separator(widget=filterBox, height=3)
        self.filterValueCombo = gui.comboBox(
            widget=filterBox,
            master=self,
            value="filterValue",
            sendSelectedValue=True,
            orientation="horizontal",
            label="Value:",
            labelWidth=120,
            callback=self.updateTitleList,
            tooltip=("Please select a value for the chosen criterion."),
        )
        gui.separator(widget=filterBox, height=3)

        # The following lines add filterBox (and a vertical separator) to the
        # advanced interface...
        self.advancedSettings.advancedWidgets.append(filterBox)
        self.advancedSettings.advancedWidgetsAppendSeparator()

        # Title box
        titleBox = gui.widgetBox(
            widget=self.controlArea,
            box="Titles",
            orientation="vertical",
        )
        self.titleListbox = gui.listBox(
            widget=titleBox,
            master=self,
            value="selectedTitles",  # setting (list)
            labels="titleLabels",  # setting (list)
            callback=self.sendButton.settingsChanged,
            tooltip="The list of titles whose content will be imported",
        )
        self.titleListbox.setMinimumHeight(150)
        self.titleListbox.setSelectionMode(3)
        gui.separator(widget=titleBox, height=3)
        gui.button(
            widget=titleBox,
            master=self,
            label="Refresh",
            callback=self.refreshTitleSeg,
            tooltip="Connect to ECP website and refresh list.",
        )
        gui.separator(widget=titleBox, height=3)

        gui.separator(widget=self.controlArea, height=3)

        gui.rubber(self.controlArea)

        # Now Info box and Send button must be drawn...
        self.sendButton.draw()
        self.infoBox.draw()

        # This initialization step needs to be done after infoBox has been
        # drawn (because getTitleSeg may need to display an error message).
        self.getTitleSeg()

        # Send data if autoSend.
        self.sendButton.sendIf()

        self.setMinimumWidth(350)
        self.adjustSizeWithTimer()