Esempio n. 1
0
class SignInForm(QDialog):
    def __init__(self, flags, *args, **kwargs):
        super().__init__(flags, *args, **kwargs)
        self.cm: CredentialManager = CredentialManager(CREDENTIAL_MANAGER_SERVICE)

        self.setWindowTitle('Sign in')
        self.setFixedSize(400, 250)

        self.server_cb_label = QLabel('Server *')
        self.server_cb = QComboBox(self)
        self.server_cb.addItems(RESOLWE_URLS)
        self.server_cb.setEditable(True)

        self.username_label = QLabel('Username *')
        self.username_line_edit = QLineEdit(self)
        self.username_line_edit.setPlaceholderText('Enter correct username')
        self.username_line_edit.returnPressed.connect(self.sign_in)
        self.username_line_edit.textChanged.connect(self.handle_sign_in_btn)

        self.password_label = QLabel('Password *')
        self.password_line_edit = QLineEdit(self)
        self.password_line_edit.setPlaceholderText('Enter correct password')
        self.password_line_edit.returnPressed.connect(self.sign_in)
        self.password_line_edit.textChanged.connect(self.handle_sign_in_btn)
        self.password_line_edit.setEchoMode(QLineEdit.Password)

        self.sign_in_btn = QPushButton('Sign in', self)
        self.sign_in_btn.setDisabled(True)
        self.sign_in_btn.clicked.connect(self.sign_in)

        self.error_msg = QLabel('Unable to log in with provided credentials.')
        self.error_msg.setStyleSheet('color:red')
        self.error_msg.hide()

        layout = QVBoxLayout(self)
        layout.addWidget(self.server_cb_label)
        layout.addWidget(self.server_cb)
        layout.addWidget(self.username_label)
        layout.addWidget(self.username_line_edit)
        layout.addWidget(self.password_label)
        layout.addWidget(self.password_line_edit)
        layout.addWidget(self.error_msg)
        layout.addStretch()
        layout.addWidget(self.sign_in_btn)

        self.resolwe_instance = None

    def handle_sign_in_btn(self):
        self.sign_in_btn.setEnabled(
            True if self.username_line_edit.text() and self.password_line_edit.text() else False
        )

    def sign_in(self):
        self.server_cb_label.setStyleSheet(None)
        self.username_label.setStyleSheet(None)
        self.password_label.setStyleSheet(None)
        self.error_msg.hide()

        server = self.server_cb.currentText()
        username = self.cm.username if self.cm.username else self.username_line_edit.text()
        password = self.cm.password if self.cm.password else self.password_line_edit.text()

        if not server:
            self.server_cb_label.setStyleSheet('color:red')
            return

        if not username:
            self.username_label.setStyleSheet('color:red')
            return

        if not password:
            self.password_label.setStyleSheet('color:red')
            return

        try:
            self.resolwe_instance = connect(username, password, url=server)
        except ResolweAuthException:
            self.error_msg.show()
            return

        self.cm.username = username
        self.cm.password = password
        self.accept()
class OWImageEmbedding(OWWidget):
    name = "Image Embedding"
    description = "Image embedding through deep neural networks."
    icon = "icons/ImageEmbedding.svg"
    priority = 150

    want_main_area = False
    _auto_apply = Setting(default=True)

    class Inputs:
        images = Input('Images', Table)

    class Outputs:
        embeddings = Output('Embeddings', Table, default=True)
        skipped_images = Output('Skipped Images', Table)

    cb_image_attr_current_id = Setting(default=0)
    cb_embedder_current_id = Setting(default=0)

    _NO_DATA_INFO_TEXT = "No data on input."

    def __init__(self):
        super().__init__()
        self.embedders = sorted(list(EMBEDDERS_INFO),
                                key=lambda k: EMBEDDERS_INFO[k]['order'])
        self._image_attributes = None
        self._input_data = None
        self._log = logging.getLogger(__name__)
        self._task = None
        self._setup_layout()
        self._image_embedder = None
        self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
        self.setBlocking(True)
        QTimer.singleShot(0, self._init_server_connection)

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, 'Info')
        self.input_data_info = widgetLabel(widget_box, self._NO_DATA_INFO_TEXT)
        self.connection_info = widgetLabel(widget_box, "")

        widget_box = widgetBox(self.controlArea, 'Settings')
        self.cb_image_attr = comboBox(
            widget=widget_box,
            master=self,
            value='cb_image_attr_current_id',
            label='Image attribute:',
            orientation=Qt.Horizontal,
            callback=self._cb_image_attr_changed
        )

        self.cb_embedder = comboBox(
            widget=widget_box,
            master=self,
            value='cb_embedder_current_id',
            label='Embedder:',
            orientation=Qt.Horizontal,
            callback=self._cb_embedder_changed
        )
        names = [EMBEDDERS_INFO[e]['name'] +
                 (" (local)" if EMBEDDERS_INFO[e].get("is_local") else "")
                 for e in self.embedders]
        self.cb_embedder.setModel(VariableListModel(names))
        if not self.cb_embedder_current_id < len(self.embedders):
            self.cb_embedder_current_id = 0
        self.cb_embedder.setCurrentIndex(self.cb_embedder_current_id)

        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info = widgetLabel(
            widget_box,
            EMBEDDERS_INFO[current_embedder]['description']
        )

        self.auto_commit_widget = auto_commit(
            widget=self.controlArea,
            master=self,
            value='_auto_apply',
            label='Apply',
            commit=self.commit
        )

        self.cancel_button = QPushButton(
            'Cancel',
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)
        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def _init_server_connection(self):
        self.setBlocking(False)
        self._image_embedder = ImageEmbedder(
            model=self.embedders[self.cb_embedder_current_id],
            layer='penultimate'
        )
        self._set_server_info(
            self._image_embedder.is_connected_to_server()
        )

    @Inputs.images
    def set_data(self, data):
        if not data:
            self._input_data = None
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)
            return

        self._image_attributes = ImageEmbedder.filter_image_attributes(data)
        if not self._image_attributes:
            input_data_info_text = (
                "Data with {:d} instances, but without image attributes."
                .format(len(data)))
            input_data_info_text.format(input_data_info_text)
            self.input_data_info.setText(input_data_info_text)
            self._input_data = None
            return

        if not self.cb_image_attr_current_id < len(self._image_attributes):
            self.cb_image_attr_current_id = 0

        self.cb_image_attr.setModel(VariableListModel(self._image_attributes))
        self.cb_image_attr.setCurrentIndex(self.cb_image_attr_current_id)

        self._input_data = data
        self.input_data_info.setText(
            "Data with {:d} instances.".format(len(data)))

        self._cb_image_attr_changed()

    def _cb_image_attr_changed(self):
        self.commit()

    def _cb_embedder_changed(self):
        current_embedder = self.embedders[self.cb_embedder_current_id]
        self._image_embedder = ImageEmbedder(
            model=current_embedder,
            layer='penultimate'
        )
        self.embedder_info.setText(
            EMBEDDERS_INFO[current_embedder]['description'])
        if self._input_data:
            self.input_data_info.setText(
                "Data with {:d} instances.".format(len(self._input_data)))
            self.commit()
        else:
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)
        self._set_server_info(self._image_embedder.is_connected_to_server())

    def commit(self):
        if self._task is not None:
            self.cancel()

        if self._image_embedder is None:
            self._set_server_info(connected=False)
            return

        if not self._image_attributes or self._input_data is None:
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            return

        self._set_server_info(connected=True)
        self.cancel_button.setDisabled(False)
        self.cb_image_attr.setDisabled(True)
        self.cb_embedder.setDisabled(True)

        file_paths_attr = self._image_attributes[self.cb_image_attr_current_id]
        file_paths = self._input_data[:, file_paths_attr].metas.flatten()
        origin = file_paths_attr.attributes.get("origin", "")
        if urlparse(origin).scheme in ("http", "https", "ftp", "data") and \
                origin[-1] != "/":
            origin += "/"

        assert file_paths_attr.is_string
        assert file_paths.dtype == np.dtype('O')

        file_paths_mask = file_paths == file_paths_attr.Unknown
        file_paths_valid = file_paths[~file_paths_mask]
        for i, a in enumerate(file_paths_valid):
            urlparts = urlparse(a)
            if urlparts.scheme not in ("http", "https", "ftp", "data"):
                if urlparse(origin).scheme in ("http", "https", "ftp", "data"):
                    file_paths_valid[i] = urljoin(origin, a)
                else:
                    file_paths_valid[i] = os.path.join(origin, a)

        ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size))
        set_progress = qconcurrent.methodinvoke(
            self, "__progress_set", (float,))

        def advance(success=True):
            if success:
                set_progress(next(ticks))

        def cancel():
            task.future.cancel()
            task.cancelled = True
            task.embedder.set_canceled(True)

        embedder = self._image_embedder

        def run_embedding(paths):
            return embedder(
                file_paths=paths, image_processed_callback=advance)

        self.auto_commit_widget.setDisabled(True)
        self.progressBarInit(processEvents=None)
        self.progressBarSet(0.0, processEvents=None)
        self.setBlocking(True)

        f = self._executor.submit(run_embedding, file_paths_valid)
        f.add_done_callback(
            qconcurrent.methodinvoke(self, "__set_results", (object,)))

        task = self._task = namespace(
            file_paths_mask=file_paths_mask,
            file_paths_valid=file_paths_valid,
            file_paths=file_paths,
            embedder=embedder,
            cancelled=False,
            cancel=cancel,
            future=f,
        )
        self._log.debug("Starting embedding task for %i images",
                        file_paths.size)
        return

    @Slot(float)
    def __progress_set(self, value):
        assert self.thread() is QThread.currentThread()
        if self._task is not None:
            self.progressBarSet(value)

    @Slot(object)
    def __set_results(self, f):
        assert self.thread() is QThread.currentThread()
        if self._task is None or self._task.future is not f:
            self._log.info("Reaping stale task")
            return

        assert f.done()

        task, self._task = self._task, None
        self.auto_commit_widget.setDisabled(False)
        self.cancel_button.setDisabled(True)
        self.cb_image_attr.setDisabled(False)
        self.cb_embedder.setDisabled(False)
        self.progressBarFinished(processEvents=None)
        self.setBlocking(False)

        try:
            embeddings = f.result()
        except ConnectionError:
            self._log.exception("Error", exc_info=True)
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            self._set_server_info(connected=False)
            return
        except Exception as err:
            self._log.exception("Error", exc_info=True)
            self.error(
                "\n".join(traceback.format_exception_only(type(err), err)))
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            return

        assert self._input_data is not None
        assert len(self._input_data) == len(task.file_paths_mask)

        # Missing paths/urls were filtered out. Restore the full embeddings
        # array from information stored in task.file_path_mask ...
        embeddings_all = [None] * len(task.file_paths_mask)
        for i, embedding in zip(np.flatnonzero(~task.file_paths_mask),
                                embeddings):
            embeddings_all[i] = embedding
        embeddings_all = np.array(embeddings_all)
        self._send_output_signals(embeddings_all)

    def _send_output_signals(self, embeddings):
        embedded_images, skipped_images, num_skipped =\
            ImageEmbedder.prepare_output_data(self._input_data, embeddings)
        self.Outputs.embeddings.send(embedded_images)
        self.Outputs.skipped_images.send(skipped_images)
        if num_skipped is not 0:
            self.input_data_info.setText(
                "Data with {:d} instances, {:d} images skipped.".format(
                    len(self._input_data), num_skipped))

    def _set_server_info(self, connected):
        self.clear_messages()
        if self._image_embedder is None:
            return

        if connected:
            self.connection_info.setText("Connected to server.")
        elif self._image_embedder.is_local_embedder():
            self.connection_info.setText("Using local embedder.")
        else:
            self.connection_info.setText("Not connected to server.")
            self.warning("Click Apply to try again.")

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
        if self._image_embedder is not None:
            self._image_embedder.__exit__(None, None, None)

    def cancel(self):
        if self._task is not None:
            task, self._task = self._task, None
            task.cancel()
            # wait until done
            try:
                task.future.exception()
            except qconcurrent.CancelledError:
                pass

            self.auto_commit_widget.setDisabled(False)
            self.cancel_button.setDisabled(True)
            self.progressBarFinished(processEvents=None)
            self.setBlocking(False)
            self.cb_image_attr.setDisabled(False)
            self.cb_embedder.setDisabled(False)
            self._image_embedder.set_canceled(False)
            # reset the connection.
            connected = self._image_embedder.reconnect_to_server()
            self._set_server_info(connected=connected)
Esempio n. 3
0
class OWImageEmbedding(OWWidget):
    name = "Image Embedding"
    description = "Image embedding through deep neural networks."
    icon = "icons/ImageEmbedding.svg"
    priority = 150

    want_main_area = False
    _auto_apply = Setting(default=True)

    class Inputs:
        images = Input('Images', Table)

    class Outputs:
        embeddings = Output('Embeddings', Table, default=True)
        skipped_images = Output('Skipped Images', Table)

    cb_image_attr_current_id = Setting(default=0)
    cb_embedder_current_id = Setting(default=0)

    _NO_DATA_INFO_TEXT = "No data on input."

    def __init__(self):
        super().__init__()
        self.embedders = sorted(list(EMBEDDERS_INFO),
                                key=lambda k: EMBEDDERS_INFO[k]['order'])
        self._image_attributes = None
        self._input_data = None
        self._log = logging.getLogger(__name__)
        self._task = None
        self._setup_layout()
        self._image_embedder = None
        self._executor = qconcurrent.ThreadExecutor(
            self, threadPool=QThreadPool(maxThreadCount=1)
        )
        self.setBlocking(True)
        QTimer.singleShot(0, self._init_server_connection)

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, 'Info')
        self.input_data_info = widgetLabel(widget_box, self._NO_DATA_INFO_TEXT)
        self.connection_info = widgetLabel(widget_box, "")

        widget_box = widgetBox(self.controlArea, 'Settings')
        self.cb_image_attr = comboBox(
            widget=widget_box,
            master=self,
            value='cb_image_attr_current_id',
            label='Image attribute:',
            orientation=Qt.Horizontal,
            callback=self._cb_image_attr_changed
        )

        self.cb_embedder = comboBox(
            widget=widget_box,
            master=self,
            value='cb_embedder_current_id',
            label='Embedder:',
            orientation=Qt.Horizontal,
            callback=self._cb_embedder_changed
        )
        self.cb_embedder.setModel(VariableListModel(
            [EMBEDDERS_INFO[e]['name'] for e in self.embedders]))
        if not self.cb_embedder_current_id < len(self.embedders):
            self.cb_embedder_current_id = 0
        self.cb_embedder.setCurrentIndex(self.cb_embedder_current_id)

        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info = widgetLabel(
            widget_box,
            EMBEDDERS_INFO[current_embedder]['description']
        )

        self.auto_commit_widget = auto_commit(
            widget=self.controlArea,
            master=self,
            value='_auto_apply',
            label='Apply',
            commit=self.commit
        )

        self.cancel_button = QPushButton(
            'Cancel',
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)
        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def _init_server_connection(self):
        self.setBlocking(False)
        self._image_embedder = ImageEmbedder(
            model=self.embedders[self.cb_embedder_current_id],
            layer='penultimate'
        )
        self._set_server_info(
            self._image_embedder.is_connected_to_server()
        )

    @Inputs.images
    def set_data(self, data):
        if not data:
            self._input_data = None
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)
            return

        self._image_attributes = ImageEmbedder.filter_image_attributes(data)
        if not self._image_attributes:
            input_data_info_text = (
                "Data with {:d} instances, but without image attributes."
                .format(len(data)))
            input_data_info_text.format(input_data_info_text)
            self.input_data_info.setText(input_data_info_text)
            self._input_data = None
            return

        if not self.cb_image_attr_current_id < len(self._image_attributes):
            self.cb_image_attr_current_id = 0

        self.cb_image_attr.setModel(VariableListModel(self._image_attributes))
        self.cb_image_attr.setCurrentIndex(self.cb_image_attr_current_id)

        self._input_data = data
        self.input_data_info.setText(
            "Data with {:d} instances.".format(len(data)))

        self._cb_image_attr_changed()

    def _cb_image_attr_changed(self):
        self.commit()

    def _cb_embedder_changed(self):
        current_embedder = self.embedders[self.cb_embedder_current_id]
        self._image_embedder = ImageEmbedder(
            model=current_embedder,
            layer='penultimate'
        )
        self.embedder_info.setText(
            EMBEDDERS_INFO[current_embedder]['description'])
        if self._input_data:
            self.input_data_info.setText(
                "Data with {:d} instances.".format(len(self._input_data)))
            self.commit()
        else:
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)

    def commit(self):
        if self._task is not None:
            self.cancel()

        if self._image_embedder is None:
            self._set_server_info(connected=False)
            return

        if not self._image_attributes or self._input_data is None:
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            return

        self._set_server_info(connected=True)
        self.cancel_button.setDisabled(False)
        self.cb_image_attr.setDisabled(True)
        self.cb_embedder.setDisabled(True)

        file_paths_attr = self._image_attributes[self.cb_image_attr_current_id]
        file_paths = self._input_data[:, file_paths_attr].metas.flatten()
        origin = file_paths_attr.attributes.get("origin", "")
        if urlparse(origin).scheme in ("http", "https", "ftp", "data") and \
                origin[-1] != "/":
            origin += "/"

        assert file_paths_attr.is_string
        assert file_paths.dtype == np.dtype('O')

        file_paths_mask = file_paths == file_paths_attr.Unknown
        file_paths_valid = file_paths[~file_paths_mask]
        for i, a in enumerate(file_paths_valid):
            urlparts = urlparse(a)
            if urlparts.scheme not in ("http", "https", "ftp", "data"):
                if urlparse(origin).scheme in ("http", "https", "ftp", "data"):
                    file_paths_valid[i] = urljoin(origin, a)
                else:
                    file_paths_valid[i] = os.path.join(origin, a)

        ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size))
        set_progress = qconcurrent.methodinvoke(
            self, "__progress_set", (float,))

        def advance(success=True):
            if success:
                set_progress(next(ticks))

        def cancel():
            task.future.cancel()
            task.cancelled = True
            task.embedder.cancelled = True

        embedder = self._image_embedder

        def run_embedding(paths):
            return embedder(
                file_paths=paths, image_processed_callback=advance)

        self.auto_commit_widget.setDisabled(True)
        self.progressBarInit(processEvents=None)
        self.progressBarSet(0.0, processEvents=None)
        self.setBlocking(True)

        f = self._executor.submit(run_embedding, file_paths_valid)
        f.add_done_callback(
            qconcurrent.methodinvoke(self, "__set_results", (object,)))

        task = self._task = namespace(
            file_paths_mask=file_paths_mask,
            file_paths_valid=file_paths_valid,
            file_paths=file_paths,
            embedder=embedder,
            cancelled=False,
            cancel=cancel,
            future=f,
        )
        self._log.debug("Starting embedding task for %i images",
                        file_paths.size)
        return

    @Slot(float)
    def __progress_set(self, value):
        assert self.thread() is QThread.currentThread()
        if self._task is not None:
            self.progressBarSet(value)

    @Slot(object)
    def __set_results(self, f):
        assert self.thread() is QThread.currentThread()
        if self._task is None or self._task.future is not f:
            self._log.info("Reaping stale task")
            return

        assert f.done()

        task, self._task = self._task, None
        self.auto_commit_widget.setDisabled(False)
        self.cancel_button.setDisabled(True)
        self.cb_image_attr.setDisabled(False)
        self.cb_embedder.setDisabled(False)
        self.progressBarFinished(processEvents=None)
        self.setBlocking(False)

        try:
            embeddings = f.result()
        except ConnectionError:
            self._log.exception("Error", exc_info=True)
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            self._set_server_info(connected=False)
            return
        except Exception as err:
            self._log.exception("Error", exc_info=True)
            self.error(
                "\n".join(traceback.format_exception_only(type(err), err)))
            self.Outputs.embeddings.send(None)
            self.Outputs.skipped_images.send(None)
            return

        assert self._input_data is not None
        assert len(self._input_data) == len(task.file_paths_mask)

        # Missing paths/urls were filtered out. Restore the full embeddings
        # array from information stored in task.file_path_mask ...
        embeddings_all = [None] * len(task.file_paths_mask)
        for i, embedding in zip(np.flatnonzero(~task.file_paths_mask),
                                embeddings):
            embeddings_all[i] = embedding
        embeddings_all = np.array(embeddings_all)
        self._send_output_signals(embeddings_all)

    def _send_output_signals(self, embeddings):
        embedded_images, skipped_images, num_skipped =\
            ImageEmbedder.prepare_output_data(self._input_data, embeddings)
        self.Outputs.embeddings.send(embedded_images)
        self.Outputs.skipped_images.send(skipped_images)
        if num_skipped is not 0:
            self.input_data_info.setText(
                "Data with {:d} instances, {:d} images skipped.".format(
                    len(self._input_data), num_skipped))

    def _set_server_info(self, connected):
        self.clear_messages()
        if connected:
            self.connection_info.setText("Connected to server.")
        else:
            self.connection_info.setText("No connection with server.")
            self.warning("Click Apply to try again.")

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
        if self._image_embedder is not None:
            self._image_embedder.__exit__(None, None, None)

    def cancel(self):
        if self._task is not None:
            task, self._task = self._task, None
            task.cancel()
            # wait until done
            try:
                task.future.exception()
            except qconcurrent.CancelledError:
                pass

            self.auto_commit_widget.setDisabled(False)
            self.cancel_button.setDisabled(True)
            self.progressBarFinished(processEvents=None)
            self.setBlocking(False)
            self.cb_image_attr.setDisabled(False)
            self.cb_embedder.setDisabled(False)
            self._image_embedder.cancelled = False
            # reset the connection.
            connected = self._image_embedder.reconnect_to_server()
            self._set_server_info(connected=connected)
Esempio n. 4
0
class OWDocumentEmbedding(OWWidget, ConcurrentWidgetMixin):
    name = "Document Embedding"
    description = "Document embedding using pretrained models."
    keywords = ['embedding', 'document embedding', 'text']
    icon = 'icons/TextEmbedding.svg'
    priority = 300

    want_main_area = False
    _auto_apply = Setting(default=True)

    class Inputs:
        corpus = Input('Corpus', Corpus)

    class Outputs:
        new_corpus = Output('Embeddings', Corpus, default=True)
        skipped = Output('Skipped documents', Corpus)

    class Error(OWWidget.Error):
        no_connection = Msg("No internet connection. " +
                            "Please establish a connection or " +
                            "use another vectorizer.")
        unexpected_error = Msg('Embedding error: {}')

    class Warning(OWWidget.Warning):
        unsuccessful_embeddings = Msg('Some embeddings were unsuccessful.')

    language = Setting(default=LANGUAGES.index("English"))
    aggregator = Setting(default=0)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.aggregators = AGGREGATORS
        self.corpus = None
        self.new_corpus = None
        self._setup_layout()

    @staticmethod
    def sizeHint():
        return QSize(300, 300)

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, 'Settings')

        self.language_cb = comboBox(widget=widget_box,
                                    master=self,
                                    value='language',
                                    label='Language: ',
                                    orientation=Qt.Horizontal,
                                    items=LANGUAGES,
                                    callback=self._option_changed,
                                    searchable=True)

        self.aggregator_cb = comboBox(widget=widget_box,
                                      master=self,
                                      value='aggregator',
                                      label='Aggregator: ',
                                      orientation=Qt.Horizontal,
                                      items=self.aggregators,
                                      callback=self._option_changed)

        self.auto_commit_widget = auto_commit(widget=self.controlArea,
                                              master=self,
                                              value='_auto_apply',
                                              label='Apply',
                                              commit=self.commit,
                                              box=False)

        self.cancel_button = QPushButton('Cancel',
                                         icon=self.style().standardIcon(
                                             QStyle.SP_DialogCancelButton))

        self.cancel_button.clicked.connect(self.cancel)

        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    @Inputs.corpus
    def set_data(self, data):
        self.Warning.clear()
        self.cancel()

        if not data:
            self.corpus = None
            self.clear_outputs()
            return

        self.corpus = data
        self.unconditional_commit()

    def _option_changed(self):
        self.commit()

    def commit(self):
        if self.corpus is None:
            self.clear_outputs()
            return

        self.cancel_button.setDisabled(False)

        self.start(run_pretrained_embedder, self.corpus,
                   LANGS_TO_ISO[LANGUAGES[self.language]],
                   self.aggregators[self.aggregator])

        self.Error.clear()

    def on_done(self, embeddings: Tuple[Corpus, Corpus]) -> None:
        self.cancel_button.setDisabled(True)
        self._send_output_signals(embeddings[0], embeddings[1])

    def on_partial_result(self, result: Any):
        self.cancel()
        self.Error.no_connection()

    def on_exception(self, ex: Exception):
        self.cancel_button.setDisabled(True)
        if isinstance(ex, EmbeddingConnectionError):
            self.Error.no_connection()
        else:
            self.Error.unexpected_error(type(ex).__name__)
        self.cancel()
        self.clear_outputs()

    def cancel(self):
        self.cancel_button.setDisabled(True)
        super().cancel()

    def _send_output_signals(self, embeddings, skipped):
        self.Outputs.new_corpus.send(embeddings)
        self.Outputs.skipped.send(skipped)
        unsuccessful = len(skipped) if skipped else 0
        if unsuccessful > 0:
            self.Warning.unsuccessful_embeddings()

    def clear_outputs(self):
        self._send_output_signals(None, None)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Esempio n. 5
0
class OWTweetProfiler(OWWidget, ConcurrentWidgetMixin):
    name = "Tweet Profiler"
    description = "Detect Ekman's, Plutchik's or Profile of Mood States's " \
                  "emotions in tweets."
    icon = "icons/TweetProfiler.svg"
    priority = 330
    keywords = ["Twitter"]

    class Inputs:
        corpus = Input("Corpus", Corpus)

    class Outputs:
        corpus = Output("Corpus", Corpus)

    want_main_area = False
    resizing_enabled = False

    model_name = Setting('')
    output_mode = Setting('')
    tweet_attr = Setting(0)
    auto_commit = Setting(True)

    class Error(OWWidget.Error):
        server_down = Msg('Our servers are not responding. '
                          'Please try again later.')
        unexpected_error = Msg('Unknown error: {}')

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.corpus = None
        self.last_config = None     # to avoid reruns with the same params
        self.strings_attrs = []
        self.profiler = TweetProfiler(on_server_down=self.Error.server_down)

        # Settings
        self.controlArea.layout().addWidget(self.generate_grid_layout())

        # Auto commit
        buttons_layout = QHBoxLayout()
        buttons_layout.addSpacing(15)
        buttons_layout.addWidget(
            gui.auto_commit(None, self, 'auto_commit', 'Commit', box=False)
        )
        self.controlArea.layout().addLayout(buttons_layout)

        self.cancel_button = QPushButton(
            'Cancel',
            icon=self.style()
            .standardIcon(QStyle.SP_DialogCancelButton))

        self.cancel_button.clicked.connect(self.cancel)

        hbox = gui.hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def generate_grid_layout(self):
        box = QGroupBox(title='Options')

        layout = QGridLayout()
        layout.setSpacing(10)
        row = 0

        self.tweet_attr_combo = gui.comboBox(None, self, 'tweet_attr',
                                             callback=self.apply)
        layout.addWidget(QLabel('Attribute:'))
        layout.addWidget(self.tweet_attr_combo, row, 1)

        row += 1
        self.model_name_combo = gui.comboBox(None, self, 'model_name',
                                             items=self.profiler.model_names,
                                             sendSelectedValue=True,
                                             callback=self.apply)
        if self.profiler.model_names:
            self.model_name = self.profiler.model_names[0]  # select 0th
        layout.addWidget(QLabel('Emotions:'))
        layout.addWidget(self.model_name_combo, row, 1)

        row += 1
        self.output_mode_combo = gui.comboBox(None, self, 'output_mode',
                                              items=self.profiler.output_modes,
                                              sendSelectedValue=True,
                                              callback=self.apply)
        if self.profiler.output_modes:
            self.output_mode = self.profiler.output_modes[0]    # select 0th
        layout.addWidget(QLabel('Output:'))
        layout.addWidget(self.output_mode_combo, row, 1)

        box.setLayout(layout)
        return box

    @Inputs.corpus
    def set_corpus(self, corpus):
        self.cancel()
        self.corpus = corpus
        self.last_config = None

        if corpus is not None:
            self.strings_attrs = [a for a in self.corpus.domain.metas
                                  if isinstance(a, StringVariable)]
            self.tweet_attr_combo.setModel(VariableListModel(self.strings_attrs))
            self.tweet_attr_combo.currentIndexChanged.emit(self.tweet_attr)

            # select the first feature from 'text_features' if present
            ind = [self.strings_attrs.index(tf)
                   for tf in corpus.text_features
                   if tf in self.strings_attrs]
            if ind:
                self.tweet_attr = ind[0]

        self.commit()

    def apply(self):
        self.commit()

    def _get_config(self):
        return self.tweet_attr, self.model_name, self.output_mode

    def commit(self):
        self.Error.clear()

        if self.last_config == self._get_config():
            return

        if self.corpus is not None:
            self.cancel_button.setDisabled(False)
            self.start(run_profiler, self.profiler, self.corpus,
                       self.strings_attrs[self.tweet_attr],
                       self.model_name, self.output_mode)
        else:
            self.Outputs.corpus.send(None)

    def on_done(self, result):
        self.cancel_button.setDisabled(True)
        self.last_config = self._get_config()
        self.Outputs.corpus.send(result)

    def on_partial_result(self, result):
        self.cancel()

    def on_exception(self, ex):
        self.Error.unexpected_error(type(ex).__name__)
        self.cancel()

    def cancel(self):
        self.cancel_button.setDisabled(True)
        super().cancel()

    def send_report(self):
        self.report_items([
            ('Attribute', self.strings_attrs[self.tweet_attr]
             if len(self.strings_attrs) > self.tweet_attr else ''),
            ('Emotions', self.model_name),
            ('Output', self.output_mode),
        ])
Esempio n. 6
0
class OWImageEmbedding(OWWidget, ConcurrentWidgetMixin):
    name = "Image Embedding"
    description = "Image embedding through deep neural networks."
    keywords = ["embedding", "image", "image embedding"]
    icon = "icons/ImageEmbedding.svg"
    priority = 150

    want_main_area = False
    _auto_apply = Setting(default=True)

    class Inputs:
        images = Input("Images", Table)

    class Outputs:
        embeddings = Output("Embeddings", Table, default=True)
        skipped_images = Output("Skipped Images", Table)

    class Warning(OWWidget.Warning):
        switched_local_embedder = Msg(
            "No internet connection: switched to local embedder")
        no_image_attribute = Msg(
            "Please provide data with an image attribute.")
        images_skipped = Msg("{} images are skipped.")

    class Error(OWWidget.Error):
        unexpected_error = Msg("Embedding error: {}")

    cb_image_attr_current_id = Setting(default=0)
    cb_embedder_current_id = Setting(default=0)
    _previous_attr_id = None
    _previous_embedder_id = None

    _NO_DATA_INFO_TEXT = "No data on input."

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.embedders = sorted(list(EMBEDDERS_INFO),
                                key=lambda k: EMBEDDERS_INFO[k]["order"])
        self._image_attributes = None
        self._input_data = None
        self._log = logging.getLogger(__name__)
        self._task = None
        self._setup_layout()

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, "Settings")
        self.cb_image_attr = comboBox(
            widget=widget_box,
            master=self,
            value="cb_image_attr_current_id",
            label="Image attribute:",
            orientation=Qt.Horizontal,
            callback=self._cb_image_attr_changed,
        )

        self.cb_embedder = comboBox(
            widget=widget_box,
            master=self,
            value="cb_embedder_current_id",
            label="Embedder:",
            orientation=Qt.Horizontal,
            callback=self._cb_embedder_changed,
        )
        names = [
            EMBEDDERS_INFO[e]["name"] +
            (" (local)" if EMBEDDERS_INFO[e].get("is_local") else "")
            for e in self.embedders
        ]
        self.cb_embedder.setModel(VariableListModel(names))
        if not self.cb_embedder_current_id < len(self.embedders):
            self.cb_embedder_current_id = 0
        self.cb_embedder.setCurrentIndex(self.cb_embedder_current_id)

        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info = widgetLabel(
            widget_box, EMBEDDERS_INFO[current_embedder]["description"])

        self.auto_commit_widget = auto_commit(
            widget=self.controlArea,
            master=self,
            value="_auto_apply",
            label="Apply",
            commit=self.commit,
        )

        self.cancel_button = QPushButton(
            "Cancel",
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)
        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def set_input_data_summary(self, data):
        if data is None:
            self.info.set_input_summary(self.info.NoInput)
        else:
            self.info.set_input_summary(str(len(data)),
                                        f"Data have {len(data)} instances")

    def set_output_data_summary(self, data_emb, data_skip):
        if data_emb is None and data_skip is None:
            self.info.set_output_summary(self.info.NoOutput)
        else:
            success = 0 if data_emb is None else len(data_emb)
            skip = 0 if data_skip is None else len(data_skip)
            self.info.set_output_summary(
                f"{success}",
                f"{success} images successfully embedded ,\n"
                f"{skip} images skipped.",
            )

    @Inputs.images
    def set_data(self, data):
        self.Warning.clear()
        self.set_input_data_summary(data)
        self.clear_outputs()

        if not data:
            self._input_data = None
            return

        self._image_attributes = ImageEmbedder.filter_image_attributes(data)
        if not self.cb_image_attr_current_id < len(self._image_attributes):
            self.cb_image_attr_current_id = 0

        self.cb_image_attr.setModel(VariableListModel(self._image_attributes))
        self.cb_image_attr.setCurrentIndex(self.cb_image_attr_current_id)

        if not self._image_attributes:
            self._input_data = None
            self.Warning.no_image_attribute()
            self.clear_outputs()
            return

        self._input_data = data
        self._previous_attr_id = self.cb_image_attr_current_id
        self._previous_embedder_id = self.cb_embedder_current_id

        self.unconditional_commit()

    def _cb_image_attr_changed(self):
        self._cb_changed()

    def _cb_embedder_changed(self):
        self.Warning.switched_local_embedder.clear()
        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info.setText(
            EMBEDDERS_INFO[current_embedder]["description"])
        self._cb_changed()

    def _cb_changed(self):
        if (self._previous_embedder_id != self.cb_embedder_current_id
                or self._previous_attr_id != self.cb_image_attr_current_id):
            # recompute embeddings only when selected value in dropdown changes
            self._previous_embedder_id = self.cb_embedder_current_id
            self._previous_attr_id = self.cb_image_attr_current_id
            self.cancel()
            self.commit()

    def commit(self):
        if not self._image_attributes or self._input_data is None:
            self.clear_outputs()
            return

        self.cancel_button.setDisabled(False)

        embedder_name = self.embedders[self.cb_embedder_current_id]
        image_attribute = self._image_attributes[self.cb_image_attr_current_id]
        self.start(run_embedding, self._input_data, image_attribute,
                   embedder_name)
        self.Error.unexpected_error.clear()

    def on_done(self, result: Result) -> None:
        """
        Invoked when task is done.

        Parameters
        ----------
        result
            Embedding results.
        """
        self.cancel_button.setDisabled(True)
        assert len(self._input_data) == len(result.embedding or []) + len(
            result.skip_images or [])
        self._send_output_signals(result)

    def on_partial_result(self, result: str) -> None:
        self._switch_to_local_embedder()

    def on_exception(self, ex: Exception) -> None:
        """
        When an exception occurs during the calculation.

        Parameters
        ----------
        ex
            Exception occurred during the embedding.
        """
        log = logging.getLogger(__name__)
        log.debug(ex, exc_info=ex)
        self.cancel_button.setDisabled(True)
        self.Error.unexpected_error(type(ex).__name__)
        self.clear_outputs()
        logging.debug("Exception", exc_info=ex)

    def cancel(self):
        self.cancel_button.setDisabled(True)
        super().cancel()

    def _switch_to_local_embedder(self):
        self.Warning.switched_local_embedder()
        self.cb_embedder_current_id = self.embedders.index("squeezenet")

    def _send_output_signals(self, result: Result) -> None:
        self.Warning.images_skipped.clear()
        self.Outputs.embeddings.send(result.embedding)
        self.Outputs.skipped_images.send(result.skip_images)
        if result.num_skipped != 0:
            self.Warning.images_skipped(result.num_skipped)
        self.set_output_data_summary(result.embedding, result.skip_images)

    def clear_outputs(self):
        self._send_output_signals(
            Result(embedding=None, skpped_images=None, num_skipped=0))

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
class OWImageEmbedding(OWWidget):
    name = "Image Embedding"
    description = "Image embedding through deep neural networks."
    keywords = ["embedding", "image", "image embedding"]
    icon = "icons/ImageEmbedding.svg"
    priority = 150

    want_main_area = False
    _auto_apply = Setting(default=True)

    class Inputs:
        images = Input('Images', Table)

    class Outputs:
        embeddings = Output('Embeddings', Table, default=True)
        skipped_images = Output('Skipped Images', Table)

    class Warning(OWWidget.Warning):
        switched_local_embedder = Msg(
            "No internet connection: switched to local embedder")
        no_image_attribute = Msg("Please provide data with an image attribute.")
        images_skipped = Msg("{} images are skipped.")

    cb_image_attr_current_id = Setting(default=0)
    cb_embedder_current_id = Setting(default=0)

    _NO_DATA_INFO_TEXT = "No data on input."

    def __init__(self):
        super().__init__()
        self.embedders = sorted(list(EMBEDDERS_INFO),
                                key=lambda k: EMBEDDERS_INFO[k]['order'])
        self._image_attributes = None
        self._input_data = None
        self._log = logging.getLogger(__name__)
        self._task = None
        self._setup_layout()

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, 'Settings')
        self.cb_image_attr = comboBox(
            widget=widget_box,
            master=self,
            value='cb_image_attr_current_id',
            label='Image attribute:',
            orientation=Qt.Horizontal,
            callback=self._cb_image_attr_changed
        )

        self.cb_embedder = comboBox(
            widget=widget_box,
            master=self,
            value='cb_embedder_current_id',
            label='Embedder:',
            orientation=Qt.Horizontal,
            callback=self._cb_embedder_changed
        )
        names = [EMBEDDERS_INFO[e]['name'] +
                 (" (local)" if EMBEDDERS_INFO[e].get("is_local") else "")
                 for e in self.embedders]
        self.cb_embedder.setModel(VariableListModel(names))
        if not self.cb_embedder_current_id < len(self.embedders):
            self.cb_embedder_current_id = 0
        self.cb_embedder.setCurrentIndex(self.cb_embedder_current_id)

        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info = widgetLabel(
            widget_box,
            EMBEDDERS_INFO[current_embedder]['description']
        )

        self.auto_commit_widget = auto_commit(
            widget=self.controlArea,
            master=self,
            value='_auto_apply',
            label='Apply',
            commit=self.commit
        )

        self.cancel_button = QPushButton(
            'Cancel',
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)
        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def set_input_data_summary(self, data):
        if data is None:
            self.info.set_input_summary(self.info.NoInput)
        else:
            self.info.set_input_summary(
                str(len(data)),
                f"Data have {len(data)} instances")

    def set_output_data_summary(self, data_emb, data_skip):
        if data_emb is None and data_skip is None:
            self.info.set_output_summary(self.info.NoOutput)
        else:
            success = 0 if data_emb is None else len(data_emb)
            skip = 0 if data_skip is None else len(data_skip)
            self.info.set_output_summary(
                f"{success}",
                f"{success} images successfully embedded ,\n"
                f"{skip} images skipped."
            )

    @Inputs.images
    def set_data(self, data):
        self.Warning.clear()
        self.set_input_data_summary(data)
        if not data:
            self._input_data = None
            self.clear_outputs()
            return

        self._image_attributes = ImageEmbedder.filter_image_attributes(data)
        if not self.cb_image_attr_current_id < len(self._image_attributes):
            self.cb_image_attr_current_id = 0

        self.cb_image_attr.setModel(VariableListModel(self._image_attributes))
        self.cb_image_attr.setCurrentIndex(self.cb_image_attr_current_id)

        if not self._image_attributes:
            self._input_data = None
            self.Warning.no_image_attribute()
            self.clear_outputs()
            return

        self._input_data = data

        self.commit()

    def _cb_image_attr_changed(self):
        self.commit()

    def connect(self):
        """
        This function tries to connects to the selected embedder if it is not
        successful due to any server/connection error it switches to the
        local embedder and warns the user about that.
        """
        self.Warning.switched_local_embedder.clear()

        # try to connect to current embedder
        embedder = ImageEmbedder(
            model=self.embedders[self.cb_embedder_current_id],
            layer='penultimate'
        )

        if not embedder.is_local_embedder() and \
            not embedder.is_connected_to_server(use_hyper=False):
            # there is a problem with connecting to the server
            # switching to local embedder
            self.Warning.switched_local_embedder()
            del embedder  # remove current embedder
            self.cb_embedder_current_id = self.embedders.index("squeezenet")
            print(self.embedders[self.cb_embedder_current_id])
            embedder = ImageEmbedder(
                model=self.embedders[self.cb_embedder_current_id],
                layer='penultimate'
            )

        return embedder

    def _cb_embedder_changed(self):
        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info.setText(
            EMBEDDERS_INFO[current_embedder]['description'])
        if self._input_data:
            self.commit()

    def commit(self):
        if self._task is not None:
            self.cancel()

        if not self._image_attributes or self._input_data is None:
            self.clear_outputs()
            return

        embedder = self.connect()
        _executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
        self.cancel_button.setDisabled(False)
        self.cb_image_attr.setDisabled(True)
        self.cb_embedder.setDisabled(True)

        file_paths_attr = self._image_attributes[self.cb_image_attr_current_id]
        file_paths = self._input_data[:, file_paths_attr].metas.flatten()

        file_paths_mask = file_paths == file_paths_attr.Unknown
        file_paths_valid = file_paths[~file_paths_mask]

        ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size))
        set_progress = qconcurrent.methodinvoke(
            self, "__progress_set", (float,))

        def advance(success=True):
            if success:
                set_progress(next(ticks))

        def cancel():
            task.future.cancel()
            task.cancelled = True
            task.embedder.set_canceled(True)

        def run_embedding():
            return embedder(
                self._input_data, col=file_paths_attr,
                image_processed_callback=advance)

        self.auto_commit_widget.setDisabled(True)
        self.progressBarInit()
        self.progressBarSet(0.0)
        self.setBlocking(True)

        f = _executor.submit(run_embedding)
        f.add_done_callback(
            qconcurrent.methodinvoke(self, "__set_results", (object,)))

        task = self._task = namespace(
            file_paths_mask=file_paths_mask,
            file_paths_valid=file_paths_valid,
            file_paths=file_paths,
            embedder=embedder,
            cancelled=False,
            cancel=cancel,
            future=f,
        )
        self._log.debug("Starting embedding task for %i images",
                        file_paths.size)
        return

    @Slot(float)
    def __progress_set(self, value):
        assert self.thread() is QThread.currentThread()
        if self._task is not None:
            self.progressBarSet(value)

    @Slot(object)
    def __set_results(self, f):
        assert self.thread() is QThread.currentThread()
        if self._task is None or self._task.future is not f:
            self._log.info("Reaping stale task")
            return

        assert f.done()

        task, self._task = self._task, None
        self.auto_commit_widget.setDisabled(False)
        self.cancel_button.setDisabled(True)
        self.cb_image_attr.setDisabled(False)
        self.cb_embedder.setDisabled(False)
        self.progressBarFinished()
        self.setBlocking(False)

        try:
            embeddings = f.result()
        except ConnectionError:
            self._log.exception("Error", exc_info=True)
            self._send_output_signals((None, None, 0))
            return
        except Exception as err:
            self._log.exception("Error", exc_info=True)
            self.error(
                "\n".join(traceback.format_exception_only(type(err), err)))
            self._send_output_signals((None, None, 0))
            return

        assert self._input_data is not None
        assert len(self._input_data) == len(task.file_paths_mask)

        self._send_output_signals(embeddings)

    def _send_output_signals(self, embeddings):
        self.Warning.images_skipped.clear()
        embedded_images, skipped_images, num_skipped = embeddings
        self.Outputs.embeddings.send(embedded_images)
        self.Outputs.skipped_images.send(skipped_images)
        if num_skipped is not 0:
            self.Warning.images_skipped(num_skipped)
        self.set_output_data_summary(embedded_images, skipped_images)

    def clear_outputs(self):
        self._send_output_signals((None, None, 0))

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    def cancel(self):
        if self._task is not None:
            task, self._task = self._task, None
            task.cancel()
            del task.embedder
            # the process will still continue in the background - it will
            # wait current waiting response to come back but then will stop

            self.auto_commit_widget.setDisabled(False)
            self.cancel_button.setDisabled(True)
            self.progressBarFinished()
            self.setBlocking(False)
            self.cb_image_attr.setDisabled(False)
            self.cb_embedder.setDisabled(False)
Esempio n. 8
0
class OWMoleculeEmbedding(OWWidget):
    name = "Molecule Embedding"
    description = "Molecule embedding through deep neural networks."
    icon = "../widgets/icons/category.svg"
    priority = 150

    want_main_area = False
    _auto_apply = Setting(default=True)

    inputs = [(_Input.SMILES, Table, 'set_data')]
    outputs = [(_Output.FINGERPRINTS, Table, Default),
               (_Output.SKIPPED_SMILES, Table)]

    cb_smiles_attr_current_id = Setting(default=0)
    cb_embedder_current_id = Setting(default=0)

    _NO_DATA_INFO_TEXT = "No data on input."

    def __init__(self):
        super().__init__()
        self.embedders = sorted(list(EMBEDDERS_INFO),
                                key=lambda k: EMBEDDERS_INFO[k]['order'])
        self._string_attributes = None
        self._input_data = None
        self._log = logging.getLogger(__name__)
        self._task = None
        self._setup_layout()
        self._smiles_embedder = None
        self._executor = qconcurrent.ThreadExecutor(
            self, threadPool=QThreadPool(maxThreadCount=1))
        self.setBlocking(True)
        QTimer.singleShot(0, self._init_server_connection)

    def _setup_layout(self):
        self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
        self.layout().setSizeConstraint(QLayout.SetFixedSize)

        widget_box = widgetBox(self.controlArea, 'Info')
        self.input_data_info = widgetLabel(widget_box, self._NO_DATA_INFO_TEXT)
        self.connection_info = widgetLabel(widget_box, "")

        widget_box = widgetBox(self.controlArea, 'Settings')
        self.cb_smiles_attr = comboBox(widget=widget_box,
                                       master=self,
                                       value='cb_smiles_attr_current_id',
                                       label='SMILES attribute:',
                                       orientation=Qt.Horizontal,
                                       callback=self._cb_smiles_attr_changed)

        self.cb_embedder = comboBox(widget=widget_box,
                                    master=self,
                                    value='cb_embedder_current_id',
                                    label='Embedder:',
                                    orientation=Qt.Horizontal,
                                    callback=self._cb_embedder_changed)
        self.cb_embedder.setModel(
            VariableListModel(
                [EMBEDDERS_INFO[e]['name'] for e in self.embedders]))
        if not self.cb_embedder_current_id < len(self.embedders):
            self.cb_embedder_current_id = 0
        self.cb_embedder.setCurrentIndex(self.cb_embedder_current_id)

        current_embedder = self.embedders[self.cb_embedder_current_id]
        self.embedder_info = widgetLabel(
            widget_box, EMBEDDERS_INFO[current_embedder]['description'])

        self.auto_commit_widget = auto_commit(widget=self.controlArea,
                                              master=self,
                                              value='_auto_apply',
                                              label='Apply',
                                              commit=self.commit)

        self.cancel_button = QPushButton(
            'Cancel',
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)
        hbox = hBox(self.controlArea)
        hbox.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def _init_server_connection(self):
        self.setBlocking(False)
        self._smiles_embedder = MoleculeEmbedder(
            model=self.embedders[self.cb_embedder_current_id],
            layer='penultimate')
        self._set_server_info(self._smiles_embedder.is_connected_to_server())

    def set_data(self, data):
        if not data:
            self._input_data = None
            self.send(_Output.FINGERPRINTS, None)
            self.send(_Output.SKIPPED_SMILES, None)
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)
            return

        self._string_attributes = MoleculeEmbedder.filter_string_attributes(
            data)
        if not self._string_attributes:
            input_data_info_text = (
                "Data with {:d} instances, but without string attributes.".
                format(len(data)))
            input_data_info_text.format(input_data_info_text)
            self.input_data_info.setText(input_data_info_text)
            self._input_data = None
            return

        if not self.cb_smiles_attr_current_id < len(self._string_attributes):
            self.cb_smiles_attr_current_id = 0

        self.cb_smiles_attr.setModel(VariableListModel(
            self._string_attributes))
        self.cb_smiles_attr.setCurrentIndex(self.cb_smiles_attr_current_id)

        self._input_data = data
        self.input_data_info.setText("Data with {:d} instances.".format(
            len(data)))

        self._cb_smiles_attr_changed()

    def _cb_smiles_attr_changed(self):
        self.commit()

    def _cb_embedder_changed(self):
        current_embedder = self.embedders[self.cb_embedder_current_id]
        self._smiles_embedder = MoleculeEmbedder(model=current_embedder,
                                                 layer='penultimate')
        self.embedder_info.setText(
            EMBEDDERS_INFO[current_embedder]['description'])
        if self._input_data:
            self.input_data_info.setText("Data with {:d} instances.".format(
                len(self._input_data)))
            self.commit()
        else:
            self.input_data_info.setText(self._NO_DATA_INFO_TEXT)

    def commit(self):
        if self._task is not None:
            self.cancel()

        if self._smiles_embedder is None:
            self._set_server_info(connected=False)
            return

        if not self._string_attributes or self._input_data is None:
            self.send(_Output.FINGERPRINTS, None)
            self.send(_Output.SKIPPED_SMILES, None)
            return

        self._set_server_info(connected=True)
        self.cancel_button.setDisabled(False)
        self.cb_smiles_attr.setDisabled(True)
        self.cb_embedder.setDisabled(True)

        smiles_attr = self._string_attributes[self.cb_smiles_attr_current_id]
        smiles = self._input_data[:, smiles_attr].metas.flatten()

        assert smiles_attr.is_string
        assert smiles.dtype == np.dtype('O')

        ticks = iter(np.linspace(0.0, 100.0, smiles.size))
        set_progress = qconcurrent.methodinvoke(self, "__progress_set",
                                                (float, ))

        def advance(success=True):
            if success:
                set_progress(next(ticks))

        def cancel():
            task.future.cancel()
            task.cancelled = True
            task.embedder.cancelled = True

        embedder = self._smiles_embedder

        def run_embedding(smiles_list):
            return embedder(smiles=smiles_list,
                            smiles_processed_callback=advance)

        self.auto_commit_widget.setDisabled(True)
        self.progressBarInit(processEvents=None)
        self.progressBarSet(0.0, processEvents=None)
        self.setBlocking(True)

        f = self._executor.submit(run_embedding, smiles)
        f.add_done_callback(
            qconcurrent.methodinvoke(self, "__set_results", (object, )))

        task = self._task = namespace(
            smiles=smiles,
            embedder=embedder,
            cancelled=False,
            cancel=cancel,
            future=f,
        )
        self._log.debug("Starting embedding task for %i smiles", smiles.size)
        return

    @Slot(float)
    def __progress_set(self, value):
        assert self.thread() is QThread.currentThread()
        if self._task is not None:
            self.progressBarSet(value)

    @Slot(object)
    def __set_results(self, f):
        assert self.thread() is QThread.currentThread()
        if self._task is None or self._task.future is not f:
            self._log.info("Reaping stale task")
            return

        assert f.done()

        task, self._task = self._task, None
        self.auto_commit_widget.setDisabled(False)
        self.cancel_button.setDisabled(True)
        self.cb_smiles_attr.setDisabled(False)
        self.cb_embedder.setDisabled(False)
        self.progressBarFinished(processEvents=None)
        self.setBlocking(False)

        try:
            embeddings = f.result()
        except ConnectionError:
            self._log.exception("Error", exc_info=True)
            self.send(_Output.FINGERPRINTS, None)
            self.send(_Output.SKIPPED_SMILES, None)
            self._set_server_info(connected=False)
            return
        except Exception as err:
            self._log.exception("Error", exc_info=True)
            self.error("\n".join(
                traceback.format_exception_only(type(err), err)))
            self.send(_Output.FINGERPRINTS, None)
            self.send(_Output.SKIPPED_SMILES, None)
            return

        assert self._input_data is not None
        assert len(self._input_data) == len(task.smiles)

        embeddings_all = [None] * len(task.smiles)
        for i, embedding in enumerate(embeddings):
            embeddings_all[i] = embedding
        embeddings_all = np.array(embeddings_all)
        self._send_output_signals(embeddings_all)

    def _send_output_signals(self, embeddings):
        embedded_smiles, skipped_smiles, num_skipped =\
            MoleculeEmbedder.prepare_output_data(self._input_data, embeddings)
        self.send(_Output.SKIPPED_SMILES, skipped_smiles)
        self.send(_Output.FINGERPRINTS, embedded_smiles)
        if num_skipped is not 0:
            self.input_data_info.setText(
                "Data with {:d} instances, {:d} SMILES skipped.".format(
                    len(self._input_data), num_skipped))

    def _set_server_info(self, connected):
        self.clear_messages()
        if connected:
            self.connection_info.setText("Connected to server.")
        else:
            self.connection_info.setText("No connection with server.")
            self.warning("Click Apply to try again.")

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
        if self._smiles_embedder is not None:
            self._smiles_embedder.__exit__(None, None, None)

    def cancel(self):
        if self._task is not None:
            task, self._task = self._task, None
            task.cancel()
            # wait until done
            try:
                task.future.exception()
            except qconcurrent.CancelledError:
                pass

            self.auto_commit_widget.setDisabled(False)
            self.cancel_button.setDisabled(True)
            self.progressBarFinished(processEvents=None)
            self.setBlocking(False)
            self.cb_smiles_attr.setDisabled(False)
            self.cb_embedder.setDisabled(False)
            self._smiles_embedder.cancelled = False
            # reset the connection.
            connected = self._smiles_embedder.reconnect_to_server()
            self._set_server_info(connected=connected)
Esempio n. 9
0
class OWDocumentEmbedding(OWBaseVectorizer):
    name = "Document Embedding"
    description = "Document embedding using pretrained models."
    keywords = ["embedding", "document embedding", "text", "fasttext", "bert", "sbert"]
    icon = "icons/TextEmbedding.svg"
    priority = 300

    buttons_area_orientation = Qt.Vertical
    settings_version = 2

    Methods = [SBERT, DocumentEmbedder]

    class Outputs(OWBaseVectorizer.Outputs):
        skipped = Output("Skipped documents", Corpus)

    class Error(OWWidget.Error):
        no_connection = Msg(
            "No internet connection. Please establish a connection or use "
            "another vectorizer."
        )
        unexpected_error = Msg("Embedding error: {}")

    class Warning(OWWidget.Warning):
        unsuccessful_embeddings = Msg("Some embeddings were unsuccessful.")

    method: int = Setting(default=0)
    language: str = Setting(default="English")
    aggregator: str = Setting(default="Mean")

    def __init__(self):
        super().__init__()
        self.cancel_button = QPushButton(
            "Cancel", icon=self.style().standardIcon(QStyle.SP_DialogCancelButton)
        )
        self.cancel_button.clicked.connect(self.cancel)
        self.buttonsArea.layout().addWidget(self.cancel_button)
        self.cancel_button.setDisabled(True)

    def create_configuration_layout(self):
        layout = QVBoxLayout()
        rbtns = gui.radioButtons(None, self, "method", callback=self.on_change)
        layout.addWidget(rbtns)

        gui.appendRadioButton(rbtns, "Multilingual SBERT")
        gui.appendRadioButton(rbtns, "fastText:")
        ibox = gui.indentedBox(rbtns)
        self.language_cb = gui.comboBox(
            ibox,
            self,
            "language",
            items=LANGUAGES,
            label="Language:",
            sendSelectedValue=True,  # value is actual string not index
            orientation=Qt.Horizontal,
            callback=self.on_change,
            searchable=True,
        )
        self.aggregator_cb = gui.comboBox(
            ibox,
            self,
            "aggregator",
            items=AGGREGATORS,
            label="Aggregator:",
            sendSelectedValue=True,  # value is actual string not index
            orientation=Qt.Horizontal,
            callback=self.on_change,
            searchable=True,
        )

        return layout

    def update_method(self):
        disabled = self.method == 0
        self.aggregator_cb.setDisabled(disabled)
        self.language_cb.setDisabled(disabled)
        self.vectorizer = EmbeddingVectorizer(self.init_method(), self.corpus)

    def init_method(self):
        params = dict(language=LANGS_TO_ISO[self.language], aggregator=self.aggregator)
        kwargs = ({}, params)[self.method]
        return self.Methods[self.method](**kwargs)

    @gui.deferred
    def commit(self):
        self.Error.clear()
        self.Warning.clear()
        self.cancel_button.setDisabled(False)
        super().commit()

    def on_done(self, result):
        self.cancel_button.setDisabled(True)
        skipped = self.vectorizer.skipped_documents
        self.Outputs.skipped.send(skipped)
        if skipped is not None and len(skipped) > 0:
            self.Warning.unsuccessful_embeddings()
        super().on_done(result)

    def on_exception(self, ex: Exception):
        self.cancel_button.setDisabled(True)
        if isinstance(ex, EmbeddingConnectionError):
            self.Error.no_connection()
        else:
            self.Error.unexpected_error(type(ex).__name__)
        self.cancel()

    def cancel(self):
        self.Outputs.skipped.send(None)
        self.cancel_button.setDisabled(True)
        super().cancel()

    @classmethod
    def migrate_settings(cls, settings: Dict[str, Any], version: Optional[int]):
        if version is None or version < 2:
            # before version 2 settings were indexes now they are strings
            # with language name and selected aggregator name
            settings["language"] = LANGUAGES[settings["language"]]
            settings["aggregator"] = AGGREGATORS[settings["aggregator"]]