예제 #1
0
    def set_dataset(self, data, tid=None):
        """Set the input dataset."""
        self.closeContext()
        if data is not None:
            if tid in self._inputs:
                # update existing input slot
                slot = self._inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setSectionsMovable(True)
                header.setSectionsClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)

            view.dataset = data
            self.tabs.addTab(view, getattr(data, "name", "Data"))

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot
            self._inputs[tid] = slot

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):

                def update(f):
                    QMetaObject.invokeMethod(self, "_update_info",
                                             Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self._inputs:
            slot = self._inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)
        self.openContext(data)

        if self.__pending_selected_rows is not None:
            self.selected_rows = self.__pending_selected_rows
            self.__pending_selected_rows = None
        else:
            self.selected_rows = []

        if self.__pending_selected_cols is not None:
            self.selected_cols = self.__pending_selected_cols
            self.__pending_selected_cols = None
        else:
            self.selected_cols = []

        self.set_selection()
        self.commit()
class OWGenialisExpressions(widget.OWWidget, ConcurrentWidgetMixin):
    name = 'Genialis Expressions'
    priority = 30
    want_main_area = True
    want_control_area = True
    icon = '../widgets/icons/OWGenialisExpressions.svg'

    pagination_availability = pyqtSignal(bool, bool)

    norm_component = settings.SettingProvider(NormalizationComponent)
    pagination_component = settings.SettingProvider(PaginationComponent)
    filter_component = settings.SettingProvider(CollapsibleFilterComponent)

    exp_type: int
    exp_type = settings.Setting(1, schema_only=True)

    proc_slug: int
    proc_slug = settings.Setting(0, schema_only=True)

    exp_source: int
    exp_source = settings.Setting(0, schema_only=True)

    append_qc_data: bool
    append_qc_data = settings.Setting(False, schema_only=True)

    auto_commit: bool
    auto_commit = settings.Setting(False, schema_only=True)

    class Outputs:
        table = Output('Expressions', Table)

    class Warning(widget.OWWidget.Warning):
        no_expressions = Msg('Expression data objects not found.')
        no_data_objects = Msg(
            'No expression data matches the selected options.')
        unexpected_feature_type = Msg(
            'Can not import expression data, unexpected feature type "{}".')
        multiple_feature_type = Msg(
            'Can not import expression data, multiple feature types found.')

    def __init__(self):
        super().__init__()
        ConcurrentWidgetMixin.__init__(self)

        self._res: Optional[resolwe.resapi.ResolweAPI] = None

        # Store collection ID from currently selected row
        self.selected_collection_id: Optional[str] = None
        # Store data output options
        self.data_output_options: Optional[DataOutputOptions] = None
        # Cache output data table
        self.data_table: Optional[Table] = None
        # Cache clinical metadata
        self.clinical_metadata: Optional[Table] = None

        # Control area
        self.info_box = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", margin=3),
            'No data on output.')

        self.exp_type_combo = gui.comboBox(
            self.controlArea,
            self,
            'exp_type',
            label='Expression Type',
            callback=self.on_output_option_changed)
        self.proc_slug_combo = gui.comboBox(
            self.controlArea,
            self,
            'proc_slug',
            label='Process Name',
            callback=self.on_output_option_changed)
        self.exp_source_combo = gui.comboBox(
            self.controlArea,
            self,
            'exp_source',
            label='Expression source',
            callback=self.on_output_option_changed,
        )

        self.norm_component = NormalizationComponent(self, self.controlArea)
        self.norm_component.options_changed.connect(
            self.on_normalization_changed)

        box = gui.widgetBox(self.controlArea, 'Sample QC')
        gui.checkBox(box,
                     self,
                     'append_qc_data',
                     'Append QC data',
                     callback=self.on_output_option_changed)

        gui.rubber(self.controlArea)
        box = gui.widgetBox(self.controlArea, 'Sign in')
        self.user_info = gui.label(box, self, '')
        self.server_info = gui.label(box, self, '')

        box = gui.widgetBox(box, orientation=Qt.Horizontal)
        self.sign_in_btn = gui.button(box,
                                      self,
                                      'Sign In',
                                      callback=self.sign_in,
                                      autoDefault=False)
        self.sign_out_btn = gui.button(box,
                                       self,
                                       'Sign Out',
                                       callback=self.sign_out,
                                       autoDefault=False)

        self.commit_button = gui.auto_commit(self.controlArea,
                                             self,
                                             'auto_commit',
                                             '&Commit',
                                             box=False)
        self.commit_button.button.setAutoDefault(False)

        # Main area
        self.table_view = QTableView()
        self.table_view.setAlternatingRowColors(True)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setShowGrid(False)
        self.table_view.verticalHeader().hide()
        self.table_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.ResizeToContents)
        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        # self.table_view.setStyleSheet('QTableView::item:selected{background-color: palette(highlight); color: palette(highlightedText);};')

        self.model = GenialisExpressionsModel(self)
        self.model.setHorizontalHeaderLabels(TableHeader.labels())
        self.table_view.setModel(self.model)
        self.table_view.selectionModel().selectionChanged.connect(
            self.on_selection_changed)

        self.filter_component = CollapsibleFilterComponent(self, self.mainArea)
        self.filter_component.options_changed.connect(self.on_filter_changed)
        self.mainArea.layout().addWidget(self.table_view)
        self.pagination_component = PaginationComponent(self, self.mainArea)
        self.pagination_component.options_changed.connect(
            self.update_collections_view)

        self.sign_in(silent=True)

    @property
    def res(self):
        return self._res

    @res.setter
    def res(self, value: resolwe.resapi.ResolweAPI):
        if isinstance(value, resolwe.resapi.ResolweAPI):
            self._res = value
            self.update_user_status()
            self.update_collections_view()
            self.__invalidate()
            self.Outputs.table.send(None)

    def __invalidate(self):
        self.data_table = None
        self.selected_collection_id = None
        self.clinical_metadata = None

        self.data_output_options = None
        self.exp_type_combo.clear()
        self.proc_slug_combo.clear()
        self.exp_source_combo.clear()

        self.Outputs.table.send(None)
        self.Warning.no_expressions.clear()
        self.Warning.multiple_feature_type.clear()
        self.Warning.unexpected_feature_type.clear()
        self.Warning.no_data_objects.clear()
        self.info.set_output_summary(StateInfo.NoOutput)
        self.update_info_box()

    def update_user_status(self):
        user = self.res.get_currently_logged_user()

        if user:
            user_info = f"{user[0].get('first_name', '')} {user[0].get('last_name', '')}".strip(
            )
            user_info = f"User: {user_info if user_info else user[0].get('username', '')}"
            self.sign_in_btn.setEnabled(False)
            self.sign_out_btn.setEnabled(True)
        else:
            user_info = 'User: Anonymous'
            self.sign_in_btn.setEnabled(True)
            self.sign_out_btn.setEnabled(False)

        self.user_info.setText(user_info)
        self.server_info.setText(f'Server: {self.res.url[8:]}')

    def update_info_box(self):
        if self.data_table:
            total_genes = len(self.data_table.domain.attributes)
            known_genes = len([
                col for col in self.data_table.domain.attributes
                if len(col.attributes)
            ])

            info_text = ('{} genes on output\n'
                         '{} genes match Entrez database\n'
                         '{} genes with match conflicts\n'.format(
                             total_genes, known_genes,
                             total_genes - known_genes))

        else:
            info_text = 'No data on output.'

        self.info_box.setText(info_text)

    def sign_in(self, silent=False):
        dialog = SignIn(self, server_type=resolwe.RESOLWE_PLATFORM)

        if silent:
            dialog.sign_in()
            if dialog.resolwe_instance is not None:
                self.res = dialog.resolwe_instance
            else:
                self.res = resolwe.connect(
                    url=resolwe.resapi.DEFAULT_URL,
                    server_type=resolwe.RESOLWE_PLATFORM)

        if not silent and dialog.exec_():
            self.res = dialog.resolwe_instance

    def sign_out(self):
        # Use public credentials when user signs out
        self.res = resolwe.connect(url=resolwe.resapi.DEFAULT_URL,
                                   server_type=resolwe.RESOLWE_PLATFORM)
        # Remove username and password
        cm = get_credential_manager(resolwe.RESOLWE_PLATFORM)
        if cm.username:
            del cm.username
        if cm.password:
            del cm.password

    def on_filter_changed(self):
        self.pagination_component.reset_pagination()
        self.update_collections_view()

    def get_query_parameters(self) -> Dict[str, str]:
        params = {
            'limit':
            ItemsPerPage.values()[self.pagination_component.items_per_page],
            'offset':
            self.pagination_component.offset,
            'ordering':
            SortBy.values()[self.filter_component.sort_by],
        }

        if self.filter_component.filter_by_full_text:
            params.update({'text': self.filter_component.filter_by_full_text})

        if self.filter_component.filter_by_name:
            params.update(
                {'name__icontains': self.filter_component.filter_by_name})

        if self.filter_component.filter_by_contrib:
            params.update(
                {'contributor_name': self.filter_component.filter_by_contrib})

        if self.filter_component.filter_by_owner:
            params.update(
                {'owners_name': self.filter_component.filter_by_owner})

        last_modified = FilterByDateModified.values()[
            self.filter_component.filter_by_modified]
        if last_modified:
            params.update({'modified__gte': last_modified.isoformat()})

        return params

    def get_collections(self) -> Tuple[Dict[str, str], Dict[str, str]]:
        # Get response from the server
        collections = self.res.get_collections(**self.get_query_parameters())
        # Loop trough collections and store ids
        collection_ids = [
            collection['id'] for collection in collections.get('results', [])
        ]
        # Get species by collection ids
        collection_to_species = self.res.get_species(collection_ids)

        return collections, collection_to_species

    def update_collections_view(self):
        collections, collection_to_species = self.get_collections()

        # Pass the results to data model
        self.model.set_data(collections.get('results', []),
                            collection_to_species)
        self.table_view.setItemDelegateForColumn(
            TableHeader.id, gui.LinkStyledItemDelegate(self.table_view))
        self.table_view.setColumnHidden(TableHeader.slug, True)
        self.table_view.setColumnHidden(TableHeader.tags, True)

        # Check pagination parameters and emit pagination_availability signal
        next_page = True if collections.get('next') else False
        previous_page = True if collections.get('previous') else False
        self.pagination_availability.emit(next_page, previous_page)

    def normalize(self, table: Table) -> Optional[Table]:
        if not table:
            return

        if self.norm_component.quantile_norm:
            table = QuantileNormalization()(table)

        if self.norm_component.log_norm:
            table = LogarithmicScale()(table)

        if self.norm_component.z_score_norm:
            table = ZScore(axis=self.norm_component.z_score_axis)(table)

        if self.norm_component.quantile_transform:
            axis = self.norm_component.quantile_transform_axis
            quantiles = table.X.shape[int(not axis)]
            distribution = QuantileTransformDist.values()[
                self.norm_component.quantile_transform_dist]
            table = QuantileTransform(axis=axis,
                                      n_quantiles=quantiles,
                                      output_distribution=distribution)(table)

        return table

    def commit(self):
        self.Warning.no_data_objects.clear()
        self.cancel()
        self.start(self.runner)

    def on_output_option_changed(self):
        self.data_table = None
        self.commit()

    def on_clinical_data_changed(self):
        self.clinical_metadata = self.fetch_clinical_metadata()
        self.commit()

    def on_normalization_changed(self):
        self.commit()

    def on_selection_changed(self):
        self.__invalidate()

        collection_id: str = self.get_selected_row_data(TableHeader.id)
        if not collection_id:
            return

        self.selected_collection_id = collection_id
        data_objects = self.res.get_expression_data_objects(collection_id)
        self.data_output_options = available_data_output_options(data_objects)

        self.exp_type_combo.addItems(
            exp_name
            for _, exp_name in self.data_output_options.expression_type)
        if self.exp_type >= len(self.data_output_options.expression_type):
            self.exp_type = 0
        self.exp_type_combo.setCurrentIndex(self.exp_type)

        self.proc_slug_combo.addItems(
            proc_name for _, proc_name in self.data_output_options.process)
        if self.proc_slug >= len(self.data_output_options.process):
            self.proc_slug = 0
        self.proc_slug_combo.setCurrentIndex(self.proc_slug)

        self.exp_source_combo.addItems(
            self.data_output_options.expression_sources)
        if self.exp_source >= len(self.data_output_options.expression_sources):
            self.exp_source = 0
        self.exp_source_combo.setCurrentIndex(self.exp_source)

        if not data_objects:
            self.Warning.no_expressions()
            return

        # Note: This here is to handle an edge case where we get
        #       different 'feature_type' data object in a collection.
        #       For now we raise a warning, but in the future we should
        #       discuss about how to properly handle different types of features.
        feature_types = {data.output['feature_type'] for data in data_objects}

        if len(feature_types) == 1 and 'gene' not in feature_types:
            self.Warning.unexpected_feature_type(feature_types.pop())
            # self.data_objects = []
            return

        if len(feature_types) > 1:
            self.Warning.multiple_feature_type()
            # self.data_objects = []
            return

        self.on_output_option_changed()

    def get_selected_row_data(self, column: int) -> Optional[str]:
        selection_model = self.table_view.selectionModel()
        rows = selection_model.selectedRows(column=column)
        if not rows:
            return

        return rows[0].data()

    def on_done(self, table: Table):
        if table:
            samples, genes = table.X.shape
            self.info.set_output_summary(f'Samples: {samples} Genes: {genes}')
            self.update_info_box()
            self.Outputs.table.send(table)

    def on_exception(self, ex):
        # if isinstance(ex, ResolweDataObjectsNotFound):
        #     self.Warning.no_data_objects()
        #     self.Outputs.table.send(None)
        #     self.data_table = None
        #     self.info.set_output_summary(StateInfo.NoOutput)
        #     self.update_info_box()
        # else:
        raise ex

    def on_partial_result(self, result: Any) -> None:
        pass

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def sizeHint(self):
        return QSize(1280, 620)

    def runner(self, state: TaskState) -> Table:
        exp_type = self.data_output_options.expression_type[self.exp_type].type
        exp_source = self.data_output_options.expression_sources[
            self.exp_source]
        proc_slug = self.data_output_options.process[self.proc_slug].slug
        collection_id = self.selected_collection_id

        table = self.data_table
        progress_steps_download = iter(np.linspace(0, 50, 2))

        def callback(i: float, status=""):
            state.set_progress_value(i * 100)
            if status:
                state.set_status(status)
            if state.is_interruption_requested():
                raise Exception

        if not table:
            collection = self.res.get_collection_by_id(collection_id)
            coll_table = resdk.tables.RNATables(
                collection,
                expression_source=exp_source,
                expression_process_slug=proc_slug,
                progress_callable=wrap_callback(callback, end=0.5),
            )
            species = coll_table._data[0].output['species']
            sample = coll_table._samples[0]

            state.set_status('Downloading ...')
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            df_exp = coll_table.exp if exp_type != 'rc' else coll_table.rc
            df_exp = df_exp.rename(index=coll_table.readable_index)
            df_metas = coll_table.meta
            df_metas = df_metas.rename(index=coll_table.readable_index)
            df_qc = None
            if self.append_qc_data:
                # TODO: check if there is a way to detect if collection
                #       table contains QC data
                try:
                    df_qc = coll_table.qc
                    df_qc = df_qc.rename(index=coll_table.readable_index)
                except ValueError:
                    pass
            loop.close()

            state.set_status('To data table ...')

            duplicates = {
                item
                for item, count in Counter([
                    label.split('.')[1]
                    for label in df_metas.columns.to_list() if '.' in label
                ]).items() if count > 1
            }

            # what happens if there is more nested sections?
            section_name_to_label = {
                section['name']: section['label']
                for section in sample.descriptor_schema.schema
            }

            column_labels = {}
            for field_schema, fields, path in iterate_schema(
                    sample.descriptor, sample.descriptor_schema.schema,
                    path=''):
                path = path[1:]  # this is ugly, but cant go around it
                if path not in df_metas.columns:
                    continue
                label = field_schema['label']
                section_name, field_name = path.split('.')
                column_labels[path] = (
                    label if field_name not in duplicates else
                    f'{section_name_to_label[section_name]} - {label}')

            df_exp = df_exp.reset_index(drop=True)
            df_metas = df_metas.astype('object')
            df_metas = df_metas.fillna(np.nan)
            df_metas = df_metas.replace('nan', np.nan)
            df_metas = df_metas.rename(columns=column_labels)
            if df_qc is not None:
                df_metas = pd.merge(df_metas,
                                    df_qc,
                                    left_index=True,
                                    right_index=True)

            xym, domain_metas = vars_from_df(df_metas)
            x, _, m = xym
            x_metas = np.hstack((x, m))
            attrs = [ContinuousVariable(col) for col in df_exp.columns]
            metas = domain_metas.attributes + domain_metas.metas
            domain = Domain(attrs, metas=metas)
            table = Table(domain, df_exp.to_numpy(), metas=x_metas)
            state.set_progress_value(next(progress_steps_download))

            state.set_status('Matching genes ...')
            progress_steps_gm = iter(
                np.linspace(50, 99, len(coll_table.gene_ids)))

            def gm_callback():
                state.set_progress_value(next(progress_steps_gm))

            tax_id = species_name_to_taxid(species)
            gm = GeneMatcher(tax_id, progress_callback=gm_callback)
            table = gm.match_table_attributes(table, rename=True)
            table.attributes[TableAnnotation.tax_id] = tax_id
            table.attributes[TableAnnotation.gene_as_attr_name] = True
            table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID'
            self.data_table = table

        state.set_status('Normalizing ...')
        table = self.normalize(table)
        state.set_progress_value(100)

        return table
class OWClusterAnalysis(OWWidget):
    name = "Cluster Analysis"
    description = "The widget displays differentially expressed genes that characterize the cluster, " \
                  "and corresponding gene terms that describe differentially expressed genes"
    icon = "../widgets/icons/OWClusterAnalysis.svg"
    priority = 100

    class Inputs:
        data_table = Input('Data', Table)
        custom_sets = Input('Custom Gene Sets', Table)

    class Outputs:
        selected_data = Output('Selected Data', Table)
        gene_scores = Output('Gene Scores', Table)
        gene_set_scores = Output('Gene Set Scores', Table)

    class Information(OWWidget.Information):
        pass

    class Warning(OWWidget.Warning):
        gene_enrichment = Msg('{}, {}.')
        no_selected_gene_sets = Msg(
            'No gene set selected, select them from Gene Sets box.')

    class Error(OWWidget.Error):
        no_cluster_indicator = Msg('No cluster indicator in the input data')
        gene_as_attributes = Msg(
            'Genes, in the input data, are expected as column names')
        organism_mismatch = Msg(
            'Organism in input data and custom gene sets does not match')
        cluster_batch_conflict = Msg(
            'Cluster and batch must not be the same variable')

    settingsHandler = ClusterAnalysisContextHandler()
    cluster_indicators = ContextSetting([])
    batch_indicator = ContextSetting(None)
    stored_gene_sets_selection = ContextSetting(tuple())

    scoring_method_selection = ContextSetting(0)
    scoring_method_design = ContextSetting(0)
    scoring_test_type = ContextSetting(0)

    # genes filter
    max_gene_count = Setting(20)
    use_gene_count_filter = Setting(True)

    max_gene_p_value = Setting(0.1)
    use_gene_pval_filter = Setting(False)

    max_gene_fdr = Setting(0.1)
    use_gene_fdr_filter = Setting(True)

    # gene sets filter
    min_gs_count = Setting(5)
    use_gs_count_filter = Setting(True)

    max_gs_p_value = Setting(0.1)
    use_gs_pval_filter = Setting(False)

    max_gs_fdr = Setting(0.1)
    use_gs_max_fdr = Setting(True)

    # auto commit results
    auto_commit = settings.Setting(False)

    custom_gene_set_indicator = settings.Setting(None)

    def __init__(self):
        super().__init__()

        # widget attributes
        self.input_data = None
        self.store_input_domain = None
        self.input_genes_names = []
        self.input_genes_ids = []

        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None

        # custom gene set input
        self.feature_model = itemmodels.DomainModel(
            valid_types=(DiscreteVariable, StringVariable))
        self.custom_data = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.num_of_custom_sets = None

        self.rows_by_cluster = None
        self.rows_by_batch = None
        self.clusters = []
        self.new_cluster_profile = []

        # data model
        self.cluster_info_model = None

        # Info
        info_box = vBox(self.controlArea, 'Info')
        self.input_info = widgetLabel(info_box)

        # Cluster selection
        self.cluster_indicator_model = itemmodels.DomainModel(
            valid_types=(DiscreteVariable, ), separators=False)
        self.cluster_indicator_box = widgetBox(self.controlArea,
                                               'Cluster Indicator')

        self.cluster_indicator_view = listView(
            self.cluster_indicator_box,
            self,
            'cluster_indicators',
            model=self.cluster_indicator_model,
            selectionMode=QListWidget.MultiSelection,
            callback=self.invalidate,
            sizeHint=QSize(256, 70))

        # Batch selection
        self.batch_indicator_model = itemmodels.DomainModel(
            valid_types=(DiscreteVariable, ), separators=False, placeholder="")
        box = widgetBox(self.controlArea, 'Batch Indicator')
        self.batch_indicator_combobox = comboBox(
            box,
            self,
            'batch_indicator',
            model=self.batch_indicator_model,
            sendSelectedValue=True,
            callback=self.batch_indicator_changed)

        # Gene scoring
        box = widgetBox(self.controlArea, 'Gene Scoring')
        self.gene_scoring = GeneScoringWidget(box, self)
        self.gene_scoring.set_method_selection_area('scoring_method_selection')
        self.gene_scoring.set_method_design_area('scoring_method_design')
        self.gene_scoring.set_test_type('scoring_test_type')

        # Gene Sets widget
        gene_sets_box = widgetBox(self.controlArea, "Gene Sets")
        self.gs_widget = GeneSetsSelection(gene_sets_box, self,
                                           'stored_gene_sets_selection')
        self.gs_widget.hierarchy_tree_widget.itemClicked.connect(
            self.__gene_sets_enrichment)

        # custom gene sets area
        box = vBox(self.controlArea, "Custom Gene Sets")

        if self.custom_gene_set_indicator not in self.feature_model:
            self.custom_gene_set_indicator = None

        self.gs_label_combobox = comboBox(
            box,
            self,
            "custom_gene_set_indicator",
            sendSelectedValue=True,
            model=self.feature_model,
            callback=self.handle_custom_gene_sets)
        self.gs_label_combobox.setDisabled(True)

        # main area
        splitter = QSplitter(Qt.Horizontal, self.mainArea)
        self.mainArea.layout().addWidget(splitter)

        genes_filter = widgetBox(splitter,
                                 'Filter Genes',
                                 orientation=QHBoxLayout())
        spin(genes_filter,
             self,
             'max_gene_count',
             0,
             10000,
             label='Count',
             tooltip='Minimum genes count',
             checked='use_gene_count_filter',
             callback=self.filter_genes,
             callbackOnReturn=True,
             checkCallback=self.filter_genes)

        doubleSpin(genes_filter,
                   self,
                   'max_gene_p_value',
                   0.0,
                   1.0,
                   0.0001,
                   label='p-value',
                   tooltip='Maximum p-value of the enrichment score',
                   checked='use_gene_pval_filter',
                   callback=self.filter_genes,
                   callbackOnReturn=True,
                   checkCallback=self.filter_genes)

        doubleSpin(genes_filter,
                   self,
                   'max_gene_fdr',
                   0.0,
                   1.0,
                   0.0001,
                   label='FDR',
                   tooltip='Maximum false discovery rate',
                   checked='use_gene_fdr_filter',
                   callback=self.filter_genes,
                   callbackOnReturn=True,
                   checkCallback=self.filter_genes)

        gene_sets_filter = widgetBox(splitter,
                                     'Filter Gene Sets',
                                     orientation=QHBoxLayout())
        spin(gene_sets_filter,
             self,
             'min_gs_count',
             0,
             DISPLAY_GENE_SETS_COUNT,
             label='Count',
             tooltip='Minimum genes count',
             checked='use_gs_count_filter',
             callback=self.filter_gene_sets,
             callbackOnReturn=True,
             checkCallback=self.filter_gene_sets)

        doubleSpin(gene_sets_filter,
                   self,
                   'max_gs_p_value',
                   0.0,
                   1.0,
                   0.0001,
                   label='p-value',
                   tooltip='Maximum p-value of the enrichment score',
                   checked='use_gs_pval_filter',
                   callback=self.filter_gene_sets,
                   callbackOnReturn=True,
                   checkCallback=self.filter_gene_sets)

        doubleSpin(gene_sets_filter,
                   self,
                   'max_gs_fdr',
                   0.0,
                   1.0,
                   0.0001,
                   label='FDR',
                   tooltip='Maximum false discovery rate',
                   checked='use_gs_max_fdr',
                   callback=self.filter_gene_sets,
                   callbackOnReturn=True,
                   checkCallback=self.filter_gene_sets)

        self.cluster_info_view = QTableView()
        self.cluster_info_view.verticalHeader().setVisible(False)
        self.cluster_info_view.setItemDelegate(HTMLDelegate())
        self.cluster_info_view.horizontalHeader().hide()
        self.cluster_info_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        auto_commit(self.controlArea,
                    self,
                    "auto_commit",
                    "&Commit",
                    box=False)

        self.mainArea.layout().addWidget(self.cluster_info_view)

    def sizeHint(self):
        return QSize(800, 600)

    def __update_info_box(self):
        info_string = ''
        if self.input_genes_ids:
            info_string += '{} samples, {} clusters\n'.format(
                self.input_data.X.shape[0],
                len(self.clusters) if self.clusters else '?')
            info_string += '{:,d} unique genes\n'.format(
                len(self.input_genes_ids))
        else:
            info_string += 'No genes on input.\n'

        if self.custom_data:
            info_string += '{} marker genes in {} sets\n'.format(
                self.custom_data.X.shape[0], self.num_of_custom_sets)

        self.input_info.setText(info_string)

    def __set_cluster_info_model(self):
        self.cluster_info_view.setModel(None)

        self.cluster_info_model = ClusterModel(self)
        self.cluster_info_model.add_rows(self.clusters)

        # add model to the view
        self.cluster_info_view.setModel(self.cluster_info_model)
        # call sizeHint function
        self.cluster_info_view.resizeRowsToContents()
        self.cluster_info_view.selectionModel().selectionChanged.connect(
            self.commit)

    def __create_temp_class_var(self):
        """ See no evil !"""
        cluster_indicator_name = 'Cluster indicators'

        var_index_lookup = dict([(val, idx) for var in self.cluster_indicators
                                 for idx, val in enumerate(var.values)])

        row_profile = None
        new_cluster_values = []

        cart_prod = itertools.product(
            *[cluster.values for cluster in self.cluster_indicators])
        for comb in cart_prod:
            new_cluster_values.append(', '.join([val for val in comb]))
            self.new_cluster_profile.append(
                [var_index_lookup[val] for val in comb])

        row_profile_lookup = dict([(tuple(profile), indx) for indx, (
            profile,
            _) in enumerate(zip(self.new_cluster_profile, new_cluster_values))
                                   ])
        for var in self.cluster_indicators:
            if row_profile is None:
                row_profile = np.asarray(
                    self.input_data.get_column_view(var)[0], dtype=int)
            else:
                row_profile = np.vstack(
                    (row_profile,
                     np.asarray(self.input_data.get_column_view(var)[0],
                                dtype=int)))

        ca_ind = DiscreteVariable.make(
            cluster_indicator_name,
            values=[val for val in new_cluster_values],
            ordered=True)

        domain = Domain(self.input_data.domain.attributes,
                        self.input_data.domain.class_vars,
                        self.input_data.domain.metas + (ca_ind, ))

        table = self.input_data.transform(domain)
        table[:, ca_ind] = np.array(
            [[row_profile_lookup[tuple(row_profile[:, i])]]
             for i in range(row_profile.shape[1])])
        self.input_data = table
        return ca_ind

    def __set_clusters(self):
        self.clusters = []
        self.new_cluster_profile = []
        self.cluster_var = None

        if self.cluster_indicators and self.input_data:

            if isinstance(self.cluster_indicators,
                          list) and len(self.cluster_indicators) > 1:
                self.cluster_var = self.__create_temp_class_var()
            else:
                self.cluster_var = self.cluster_indicators[0]

            self.rows_by_cluster = np.asarray(self.input_data.get_column_view(
                self.cluster_var)[0],
                                              dtype=int)
            for index, name in enumerate(self.cluster_var.values):
                cluster = Cluster(name, index)
                self.clusters.append(cluster)
                cluster.set_genes(self.input_genes_names, self.input_genes_ids)

    def __set_batch(self):
        self.Error.cluster_batch_conflict.clear()
        self.rows_by_batch = None

        if self.batch_indicator == self.cluster_var:
            self.Error.cluster_batch_conflict()
            return
        if self.batch_indicator and self.input_data:
            self.rows_by_batch = np.asarray(self.input_data.get_column_view(
                self.batch_indicator)[0],
                                            dtype=int)

    def __set_genes(self):
        self.input_genes_names = []
        self.input_genes_ids = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes_names.append(str(variable.name))
                self.input_genes_ids.append(
                    str(variable.attributes.get(self.gene_id_attribute,
                                                np.nan)))

    def filter_genes(self):
        if self.cluster_info_model:
            # filter genes
            # note: after gene filter is applied, we need to recalculate gene set enrichment
            self.cluster_info_model.apply_gene_filters(
                self.max_gene_p_value if self.use_gene_pval_filter else None,
                self.max_gene_fdr if self.use_gene_fdr_filter else None,
                self.max_gene_count if self.use_gene_count_filter else None)

            # recalculate gene set enrichment
            self.__gene_sets_enrichment()
            # call sizeHint function
            self.cluster_info_view.resizeRowsToContents()

            # commit changes after filter
            self.commit()

    def filter_gene_sets(self):
        if self.cluster_info_model:
            # filter gene sets
            self.cluster_info_model.apply_gene_sets_filters(
                self.max_gs_p_value if self.use_gs_pval_filter else None,
                self.max_gs_fdr if self.use_gs_max_fdr else None,
                self.min_gs_count if self.use_gs_count_filter else None)

            # call sizeHint function
            self.cluster_info_view.resizeRowsToContents()

    def __gene_enrichment(self):
        design = bool(self.gene_scoring.get_selected_desig()
                      )  # if true cluster vs. cluster else cluster vs rest
        test_type = self.gene_scoring.get_selected_test_type()
        method = self.gene_scoring.get_selected_method()
        try:
            if method.score_function == score_hypergeometric_test:
                values = set(np.unique(self.input_data.X))
                if (0 not in values) or (len(values) != 2):
                    raise ValueError('Binary data expected (use Preprocess)')

            self.cluster_info_model.score_genes(
                design=design,
                table_x=self.input_data.X,
                rows_by_cluster=self.rows_by_cluster,
                rows_by_batch=self.rows_by_batch,
                method=method,
                alternative=test_type)
        except ValueError as e:
            self.Warning.gene_enrichment(str(e), 'p-values are set to 1')

    def __gene_sets_enrichment(self):
        if self.input_data:
            self.Warning.no_selected_gene_sets.clear()
            all_sets = self.gs_widget.get_hierarchies()
            selected_sets = self.gs_widget.get_hierarchies(only_selected=True)

            if len(selected_sets) == 0 and len(all_sets) > 0:
                self.Warning.no_selected_gene_sets()

            # save setting on selected hierarchies
            self.stored_gene_sets_selection = tuple(selected_sets)
            ref_genes = set(self.input_genes_ids)

            try:
                self.cluster_info_model.gene_sets_enrichment(
                    self.gs_widget.gs_object, selected_sets, ref_genes)
            except Exception as e:
                # TODO: possible exceptions?

                raise e

            self.filter_gene_sets()

    def invalidate(self, cluster_init=True):
        if self.input_data is not None and self.tax_id is not None:
            self.Warning.gene_enrichment.clear()

            if self.cluster_info_model is not None:
                self.cluster_info_model.cancel()

            self.__set_genes()
            if cluster_init:
                self.__set_clusters()
            self.__set_batch()
            self.__set_cluster_info_model()

            # note: when calling self.__gene_enrichment we calculate gse automatically.
            #       No need to call self.__gene_sets_enrichment here
            self.__gene_enrichment()
            self.__update_info_box()

    def batch_indicator_changed(self):
        self.invalidate(cluster_init=False)

    @Inputs.data_table
    def handle_input(self, data):
        self.closeContext()
        self.Warning.clear()
        self.Error.clear()

        self.input_data = None
        self.store_input_domain = None
        self.stored_gene_sets_selection = tuple()
        self.input_genes_names = []
        self.input_genes_ids = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.clusters = None

        self.gs_widget.clear()
        self.gs_widget.clear_gene_sets()
        self.cluster_info_view.setModel(None)

        self.cluster_indicators = []
        self.cluster_var = None
        self.batch_indicator = None
        self.cluster_indicator_model.set_domain(None)
        self.batch_indicator_model.set_domain(None)

        self.__update_info_box()

        if data:
            self.input_data = data

            self.cluster_indicator_model.set_domain(self.input_data.domain)
            self.batch_indicator_model.set_domain(self.input_data.domain)

            # For Cluster Indicator do not use categorical variables that contain only one value.
            self.cluster_indicator_model.wrap([
                item for item in self.cluster_indicator_model
                if len(item.values) > 1
            ])
            # First value in batch indicator model is a NoneType,
            # we can skip it when we validate categorical variables
            self.batch_indicator_model.wrap(self.batch_indicator_model[:1] + [
                item for item in self.batch_indicator_model[1:]
                if len(item.values) > 1
            ])

            self.tax_id = self.input_data.attributes.get(TAX_ID, None)
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)

            if not self.cluster_indicator_model:
                self.Error.no_cluster_indicator()
                return
            elif not self.use_attr_names:
                self.Error.gene_as_attributes()
                return

            self.openContext(self.input_data.domain)

            self.gs_widget.load_gene_sets(self.tax_id)
            if self.cluster_indicator_model and len(
                    self.cluster_indicators) < 1:
                self.cluster_indicators = [self.cluster_indicator_model[0]]
            if self.batch_indicator_model and self.batch_indicator is None:
                self.batch_indicator = self.batch_indicator_model[0]

            self.invalidate()

            if self.custom_data:
                self.refresh_custom_gene_sets()
                self._handle_future_model()
                self.handle_custom_gene_sets()

    @Inputs.custom_sets
    def handle_custom_input(self, data):
        self.Error.clear()
        self.Warning.clear()
        self.closeContext()
        self.custom_data = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.num_of_custom_sets = None
        self.feature_model.set_domain(None)

        if data:
            self.custom_data = data
            self.feature_model.set_domain(self.custom_data.domain)
            self.custom_tax_id = str(
                self.custom_data.attributes.get(TAX_ID, None))
            self.custom_use_attr_names = self.custom_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.custom_gene_id_attribute = self.custom_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.custom_gene_id_column = self.custom_data.attributes.get(
                GENE_ID_COLUMN, None)

            self._handle_future_model()

        if self.input_data:
            self.openContext(self.input_data.domain)

        self.gs_label_combobox.setDisabled(True)
        self.refresh_custom_gene_sets()
        self.handle_custom_gene_sets(select_customs_flag=True)

    def __check_organism_mismatch(self):
        """ Check if organisms from different inputs match.

        :return: True if there is a mismatch
        """
        if self.tax_id is not None and self.custom_tax_id is not None:
            return self.tax_id != self.custom_tax_id
        return False

    def _handle_future_model(self):
        if self.custom_gene_set_indicator in self.feature_model:
            index = self.feature_model.indexOf(self.custom_gene_set_indicator)
            self.custom_gene_set_indicator = self.feature_model[index]
        else:
            if self.feature_model:
                self.custom_gene_set_indicator = self.feature_model[0]
            else:
                self.custom_gene_set_indicator = None

    def handle_custom_gene_sets(self, select_customs_flag=False):
        if self.custom_gene_set_indicator:
            if self.custom_data is not None and self.custom_gene_id_column is not None:

                if self.__check_organism_mismatch():
                    self.gs_label_combobox.setDisabled(True)
                    self.Error.organism_mismatch()
                    self.gs_widget.update_gs_hierarchy()
                    self.__gene_sets_enrichment()
                    return

                if isinstance(self.custom_gene_set_indicator,
                              DiscreteVariable):
                    labels = self.custom_gene_set_indicator.values
                    gene_sets_names = [
                        labels[int(idx)]
                        for idx in self.custom_data.get_column_view(
                            self.custom_gene_set_indicator)[0]
                    ]
                else:
                    gene_sets_names, _ = self.custom_data.get_column_view(
                        self.custom_gene_set_indicator)

                self.num_of_custom_sets = len(set(gene_sets_names))
                gene_names, _ = self.custom_data.get_column_view(
                    self.custom_gene_id_column)
                hierarchy_title = (self.custom_data.name if
                                   self.custom_data.name else 'Custom sets', )
                try:
                    self.gs_widget.add_custom_sets(
                        gene_sets_names,
                        gene_names,
                        hierarchy_title=hierarchy_title,
                        select_customs_flag=select_customs_flag)
                except GeneSetException:
                    pass
                self.gs_label_combobox.setDisabled(False)
            else:
                self.gs_widget.update_gs_hierarchy()

        self.__gene_sets_enrichment()
        self.__update_info_box()

    def refresh_custom_gene_sets(self):
        self.gs_widget.clear_custom_sets()
        # self.gs_widget.update_gs_hierarchy()

    def gene_scores_output(self, selected_clusters):

        metas = [
            StringVariable('Gene'),
            StringVariable(NCBI_ID),
            StringVariable('Rank'),
            ContinuousVariable('Statistic score'),
            ContinuousVariable('P-value'),
            ContinuousVariable('FDR')
        ]

        if len(self.new_cluster_profile):
            # note: order is important
            metas = self.cluster_indicators + metas

        domain = Domain([], metas=metas, class_vars=self.cluster_var)

        data = []
        for cluster in selected_clusters:
            num_of_genes = len(cluster.filtered_genes)

            scores = [gene.score for gene in cluster.filtered_genes]
            p_vals = [gene.p_val for gene in cluster.filtered_genes]
            fdr_vals = [gene.fdr for gene in cluster.filtered_genes]
            gene_names = [gene.input_name for gene in cluster.filtered_genes]
            gene_ids = [gene.ncbi_id for gene in cluster.filtered_genes]
            rank = rankdata(p_vals, method='min')

            if len(self.new_cluster_profile):
                profiles = [[cluster.index] * num_of_genes]
                [
                    profiles.append([p] * num_of_genes)
                    for p in self.new_cluster_profile[cluster.index]
                ]
            else:
                profiles = [[cluster.index] * num_of_genes]

            for row in zip(*profiles, gene_names, gene_ids, rank, scores,
                           p_vals, fdr_vals):
                data.append(list(row))

        out_data = Table(domain, data)
        out_data.attributes[TAX_ID] = self.tax_id
        out_data.attributes[GENE_AS_ATTRIBUTE_NAME] = False
        out_data.attributes[GENE_ID_COLUMN] = NCBI_ID
        self.Outputs.gene_scores.send(out_data)

    def gene_set_scores_output(self, selected_clusters):

        metas = [
            StringVariable('Term'),
            StringVariable('Term ID'),
            StringVariable('Rank'),
            ContinuousVariable('P-value'),
            ContinuousVariable('FDR')
        ]

        if len(self.new_cluster_profile):
            # note: order is important
            metas = self.cluster_indicators + metas

        domain = Domain([], metas=metas, class_vars=self.cluster_var)

        data = []
        for cluster in selected_clusters:
            num_of_sets = len(cluster.filtered_gene_sets)

            p_vals = [gs.p_val for gs in cluster.filtered_gene_sets]
            fdr_vals = [gs.fdr for gs in cluster.filtered_gene_sets]
            gs_names = [gs.name for gs in cluster.filtered_gene_sets]
            gs_ids = [gs.gs_id for gs in cluster.filtered_gene_sets]
            rank = rankdata(p_vals, method='min')

            if len(self.new_cluster_profile):
                profiles = [[cluster.index] * num_of_sets]
                [
                    profiles.append([p] * num_of_sets)
                    for p in self.new_cluster_profile[cluster.index]
                ]
            else:
                profiles = [[cluster.index] * num_of_sets]

            for row in zip(*profiles, gs_names, gs_ids, rank, p_vals,
                           fdr_vals):
                data.append(list(row))

        self.Outputs.gene_set_scores.send(Table(domain, data))

    def commit(self):
        selection_model = self.cluster_info_view.selectionModel()
        selected_rows = selection_model.selectedRows()
        selected_clusters = []
        selected_cluster_indexes = set()
        selected_cluster_genes = set()

        if not self.input_data or not selected_rows:
            self.Outputs.selected_data.send(None)
            return

        for sel_row in selected_rows:
            cluster = sel_row.data()
            selected_clusters.append(cluster)
            selected_cluster_indexes.add(cluster.index)
            [
                selected_cluster_genes.add(gene.ncbi_id)
                for gene in cluster.filtered_genes
            ]

        # get columns of selected clusters
        selected_columns = [
            column for column in self.input_data.domain.attributes
            if self.gene_id_attribute in column.attributes
            and str(column.attributes[
                self.gene_id_attribute]) in selected_cluster_genes
        ]

        domain = Domain(selected_columns, self.input_data.domain.class_vars,
                        self.input_data.domain.metas)
        output_data = self.input_data.from_table(domain, self.input_data)

        # get rows of selected clusters
        selected_rows = [
            row_index
            for row_index, col_index in enumerate(self.rows_by_cluster)
            if col_index in selected_cluster_indexes
        ]

        # send to output signal
        self.Outputs.selected_data.send(output_data[selected_rows])
        self.gene_scores_output(selected_clusters)
        self.gene_set_scores_output(selected_clusters)
예제 #4
0
class OWGenialisExpressions(widget.OWWidget, ConcurrentWidgetMixin):
    name = 'Genialis Expressions'
    priority = 30
    want_main_area = True
    want_control_area = True
    icon = '../widgets/icons/OWGenialisExpressions.svg'

    pagination_availability = pyqtSignal(bool, bool)

    norm_component = settings.SettingProvider(NormalizationComponent)
    pagination_component = settings.SettingProvider(PaginationComponent)
    filter_component = settings.SettingProvider(CollapsibleFilterComponent)

    exp_type: int
    exp_type = settings.Setting(None, schema_only=True)

    proc_type: int
    proc_type = settings.Setting(None, schema_only=True)

    input_annotation: int
    input_annotation = settings.Setting(None, schema_only=True)

    auto_commit: bool
    auto_commit = settings.Setting(False, schema_only=True)

    class Outputs:
        table = Output('Expressions', Table)

    class Warning(widget.OWWidget.Warning):
        no_expressions = Msg('Expression data objects not found.')
        no_data_objects = Msg('No expression data matches the selected filtering options.')
        unexpected_feature_type = Msg('Can not import expression data, unexpected feature type "{}".')
        multiple_feature_type = Msg('Can not import expression data, multiple feature types found.')

    def __init__(self):
        super().__init__()
        ConcurrentWidgetMixin.__init__(self)

        self._res = None
        self._data_objects: Optional[List[Data]] = None
        self.data_output_options: Optional[DataOutputOptions] = None
        self.data_table: Optional[Table] = None

        # Control area
        box = gui.widgetBox(self.controlArea, 'Sign in')
        self.user_info = gui.label(box, self, '')
        self.server_info = gui.label(box, self, '')

        box = gui.widgetBox(box, orientation=Qt.Horizontal)
        self.sign_in_btn = gui.button(box, self, 'Sign In', callback=self.sign_in, autoDefault=False)
        self.sign_out_btn = gui.button(box, self, 'Sign Out', callback=self.sign_out, autoDefault=False)

        self.exp_type_box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.exp_type_options = gui.radioButtons(
            self.exp_type_box, self, 'exp_type', callback=self.on_data_output_option_changed
        )

        self.proc_type_box = gui.widgetBox(self.controlArea, 'Process Name')
        self.proc_type_options = gui.radioButtons(
            self.proc_type_box, self, 'proc_type', callback=self.on_data_output_option_changed
        )

        self.input_anno_box = gui.widgetBox(self.controlArea, 'Expression source')
        self.input_anno_options = gui.radioButtons(
            self.input_anno_box, self, 'input_annotation', callback=self.on_data_output_option_changed
        )

        self.norm_component = NormalizationComponent(self, self.controlArea)
        self.norm_component.options_changed.connect(self.on_normalization_changed)

        gui.rubber(self.controlArea)
        self.commit_button = gui.auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False)
        self.commit_button.button.setAutoDefault(False)

        # Main area
        self.table_view = QTableView()
        self.table_view.setAlternatingRowColors(True)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setShowGrid(False)
        self.table_view.verticalHeader().hide()
        self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeToContents)
        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        # self.table_view.setStyleSheet('QTableView::item:selected{background-color: palette(highlight); color: palette(highlightedText);};')

        self.model = GenialisExpressionsModel(self)
        self.model.setHorizontalHeaderLabels(TableHeader.labels())
        self.table_view.setModel(self.model)
        self.table_view.selectionModel().selectionChanged.connect(self.on_selection_changed)

        self.filter_component = CollapsibleFilterComponent(self, self.mainArea)
        self.filter_component.options_changed.connect(self.on_filter_changed)
        self.mainArea.layout().addWidget(self.table_view)
        self.pagination_component = PaginationComponent(self, self.mainArea)
        self.pagination_component.options_changed.connect(self.update_collections_view)

        self.sign_in(silent=True)

    def __invalidate(self):
        self.data_objects = None
        self.data_table = None
        self.Warning.no_expressions.clear()
        self.Warning.multiple_feature_type.clear()
        self.Warning.unexpected_feature_type.clear()
        self.info.set_output_summary(StateInfo.NoOutput)

    def set_input_annotation_options(self) -> None:
        for btn in self.input_anno_options.buttons:
            btn.deleteLater()
        self.input_anno_options.buttons = []

        if not self.data_output_options:
            return

        for source, species, build in self.data_output_options.input_annotation:
            tooltip = f'{source}, {species}, {build}'
            text = f'{species}, {build}'
            gui.appendRadioButton(self.input_anno_options, text, tooltip=tooltip)

        if len(self.input_anno_options.buttons):
            self.input_annotation = 0

    def set_proc_type_options(self) -> None:
        for btn in self.proc_type_options.buttons:
            btn.deleteLater()
        self.proc_type_options.buttons = []

        if not self.data_output_options:
            return

        for proc_type, proc_name in self.data_output_options.process:
            gui.appendRadioButton(self.proc_type_options, proc_name, tooltip=proc_type)

        if len(self.proc_type_options.buttons):
            self.proc_type = 0

    def set_exp_type_options(self) -> None:
        for btn in self.exp_type_options.buttons:
            btn.deleteLater()
        self.exp_type_options.buttons = []

        if not self.data_output_options:
            return

        for _, exp_name in self.data_output_options.expression:
            gui.appendRadioButton(self.exp_type_options, exp_name)

        if len(self.exp_type_options.buttons) > 1:
            self.exp_type = 1

    @property
    def res(self):
        return self._res

    @res.setter
    def res(self, value: ResolweAPI):
        if isinstance(value, ResolweAPI):
            self._res = value
            self.update_user_status()
            self.update_collections_view()
            self.__invalidate()
            self.Outputs.table.send(None)

    @property
    def data_objects(self):
        return self._data_objects

    @data_objects.setter
    def data_objects(self, data_objects: Optional[List[Data]]):
        self._data_objects = data_objects
        self.data_output_options = self._available_data_output_options()

    def _available_data_output_options(self) -> Optional[DataOutputOptions]:
        """
        Traverse the data objects in the selected collection and store the
        information regarding available expression types, process types and
        input annotations used in the creation of the data object.

        The method returns a named tuple (`DataOutputOptions`) which used for
        creating radio buttons in the control area.
        """
        if not self.data_objects:
            return

        expression_types = sorted({data.output['exp_type'] for data in self.data_objects})
        expression_types = (Expression('rc', 'Read Counts'),) + tuple(
            Expression(exp_type, exp_type) for exp_type in expression_types
        )

        process_types = sorted({(data.process.type, data.process.name) for data in self.data_objects})
        process_types = tuple(Process(proc_type, proc_name) for proc_type, proc_name in process_types)

        input_annotations = sorted(
            {(data.output['source'], data.output['species'], data.output['build']) for data in self.data_objects}
        )
        input_annotations = tuple(
            InputAnnotation(source, species, build) for source, species, build in input_annotations
        )

        return DataOutputOptions(
            expression=expression_types, process=process_types, input_annotation=input_annotations
        )

    def update_user_status(self):
        user = self.res.get_currently_logged_user()

        if user:
            user_info = f"{user[0].get('first_name', '')} {user[0].get('last_name', '')}".strip()
            user_info = f"User: {user_info if user_info else user[0].get('username', '')}"
            self.sign_in_btn.setEnabled(False)
            self.sign_out_btn.setEnabled(True)
        else:
            user_info = 'User: Anonymous'
            self.sign_in_btn.setEnabled(True)
            self.sign_out_btn.setEnabled(False)

        self.user_info.setText(user_info)
        self.server_info.setText(f'Server: {self.res.url[8:]}')

    def sign_in(self, silent=False):
        dialog = SignInForm(self)

        if silent:
            dialog.sign_in()
            if dialog.resolwe_instance is not None:
                self.res = dialog.resolwe_instance
            else:
                self.res = connect(url=DEFAULT_URL)

        if not silent and dialog.exec_():
            self.res = dialog.resolwe_instance

    def sign_out(self):
        # Use public credentials when user signs out
        self.res = connect(url=DEFAULT_URL)
        # Remove username and
        cm = CredentialManager(CREDENTIAL_MANAGER_SERVICE)
        del cm.username
        del cm.password

    def on_filter_changed(self):
        self.pagination_component.reset_pagination()
        self.update_collections_view()

    def get_query_parameters(self) -> Dict[str, str]:
        params = {
            'limit': ItemsPerPage.values()[self.pagination_component.items_per_page],
            'offset': self.pagination_component.offset,
            'ordering': SortBy.values()[self.filter_component.sort_by],
        }

        if self.filter_component.filter_by_full_text:
            params.update({'text': self.filter_component.filter_by_full_text})

        if self.filter_component.filter_by_name:
            params.update({'name__icontains': self.filter_component.filter_by_name})

        if self.filter_component.filter_by_contrib:
            params.update({'contributor_name': self.filter_component.filter_by_contrib})

        if self.filter_component.filter_by_owner:
            params.update({'owners_name': self.filter_component.filter_by_owner})

        last_modified = FilterByDateModified.values()[self.filter_component.filter_by_modified]
        if last_modified:
            params.update({'modified__gte': last_modified.isoformat()})

        return params

    def get_collections(self) -> Tuple[Dict[str, str], Dict[str, str]]:
        # Get response from the server
        collections = self.res.get_collections(**self.get_query_parameters())
        # Loop trough collections and store ids
        collection_ids = [collection['id'] for collection in collections.get('results', [])]
        # Get species by collection ids
        collection_to_species = self.res.get_species(collection_ids)

        return collections, collection_to_species

    def update_collections_view(self):
        collections, collection_to_species = self.get_collections()

        # Pass the results to data model
        self.model.set_data(collections.get('results', []), collection_to_species)
        self.table_view.setItemDelegateForColumn(TableHeader.id, gui.LinkStyledItemDelegate(self.table_view))
        self.table_view.setColumnHidden(TableHeader.slug, True)
        self.table_view.setColumnHidden(TableHeader.tags, True)

        # Check pagination parameters and emit pagination_availability signal
        next_page = True if collections.get('next') else False
        previous_page = True if collections.get('previous') else False
        self.pagination_availability.emit(next_page, previous_page)

    def normalize(self, table: Table) -> Optional[Table]:
        if not table:
            return

        if self.norm_component.quantile_norm:
            table = QuantileNormalization()(table)

        if self.norm_component.log_norm:
            table = LogarithmicScale()(table)

        if self.norm_component.z_score_norm:
            table = ZScore(axis=self.norm_component.z_score_axis)(table)

        if self.norm_component.quantile_transform:
            axis = self.norm_component.quantile_transform_axis
            quantiles = min(table.X.shape[int(not axis)], 100)
            distribution = QuantileTransformDist.values()[self.norm_component.quantile_transform_dist]
            table = QuantileTransform(axis=axis, n_quantiles=quantiles, output_distribution=distribution)(table)

        return table

    def commit(self):
        self.Warning.no_data_objects.clear()
        self.cancel()

        if self.data_objects and not self.data_table:
            self.start(
                runner,
                self.res,
                self.data_objects,
                self.data_output_options,
                self.exp_type,
                self.proc_type,
                self.input_annotation,
            )
        else:
            self.Outputs.table.send(self.normalize(self.data_table))

    def on_data_output_option_changed(self):
        self.data_table = None

        if self.data_objects:
            self.commit()

    def on_normalization_changed(self):
        if self.data_objects:
            self.commit()

    def on_selection_changed(self):
        self.__invalidate()

        collection_id: str = self.get_selected_row_data(TableHeader.id)
        if not collection_id:
            return

        self.data_objects = self.res.get_expression_data_objects(collection_id)
        self.set_exp_type_options()
        self.set_proc_type_options()
        self.set_input_annotation_options()

        if not self.data_objects:
            self.Warning.no_expressions()
            return

        # Note: This here is to handle an edge case where we get
        #       different 'feature_type' data object in a collection.
        #       For now we raise a warning, but in the future we should
        #       discuss about how to properly handle different types of features.
        feature_types = {data.output['feature_type'] for data in self.data_objects}

        if len(feature_types) == 1 and 'gene' not in feature_types:
            self.Warning.unexpected_feature_type(feature_types.pop())
            self.data_objects = []
            return

        if len(feature_types) > 1:
            self.Warning.multiple_feature_type()
            self.data_objects = []
            return

        self.commit()

    def get_selected_row_data(self, column: int) -> Optional[str]:
        selection_model = self.table_view.selectionModel()
        rows = selection_model.selectedRows(column=column)
        if not rows:
            return

        return rows[0].data()

    def on_done(self, table: Table):
        if table:
            samples, genes = table.X.shape
            self.info.set_output_summary(f'Samples: {samples} Genes: {genes}')
            self.data_table = table
            self.Outputs.table.send(self.normalize(table))

    def on_exception(self, ex):
        if isinstance(ex, ResolweDataObjectsNotFound):
            self.Warning.no_data_objects()
            self.Outputs.table.send(None)
            self.data_table = None
            self.info.set_output_summary(StateInfo.NoOutput)
        else:
            raise ex

    def on_partial_result(self, result: Any) -> None:
        pass

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def sizeHint(self):
        return QSize(1280, 620)
예제 #5
0
파일: owtable.py 프로젝트: rekonder/orange3
    def set_dataset(self, data, tid=None):
        """Set the input dataset."""
        self.closeContext()
        if data is not None:
            if tid in self._inputs:
                # update existing input slot
                slot = self._inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setSectionsMovable(True)
                header.setSectionsClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)

            view.dataset = data
            self.tabs.addTab(view, getattr(data, "name", "Data"))

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot
            self._inputs[tid] = slot

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):
                def update(f):
                    QMetaObject.invokeMethod(
                        self, "_update_info", Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self._inputs:
            slot = self._inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)
        self.selected_rows = []
        self.selected_cols = []
        self.openContext(data)
        self.set_selection()
        self.commit()
예제 #6
0
class OWConcordance(OWWidget):
    name = "Concordance"
    description = "Display the context of the word."
    icon = "icons/Concordance.svg"
    priority = 520

    class Inputs:
        corpus = Input("Corpus", Corpus)
        query_word = Input("Query Word", Topic)

    class Outputs:
        selected_documents = Output("Selected Documents", Corpus)
        concordances = Output("Concordances", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)
    autocommit = Setting(True)
    context_width = Setting(5)
    word = ContextSetting("", exclude_metas=False)
    selected_rows = Setting([], schema_only=True)

    class Warning(OWWidget.Warning):
        multiple_words_on_input = Msg("Multiple query words on input. "
                                      "Only the first one is considered!")

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.n_matching = ''  # Info on docs matching the word
        self.n_tokens = ''  # Info on tokens
        self.n_types = ''  # Info on types (unique tokens)
        self.is_word_on_input = False

        # Info attributes
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Width parameter
        gui.spin(self.controlArea,
                 self,
                 'context_width',
                 3,
                 10,
                 box=True,
                 label="Number of words:",
                 callback=self.set_width)

        gui.rubber(self.controlArea)

        # Search
        c_box = gui.widgetBox(self.mainArea, orientation="vertical")
        self.input = gui.lineEdit(c_box,
                                  self,
                                  'word',
                                  orientation=Qt.Horizontal,
                                  sizePolicy=QSizePolicy(
                                      QSizePolicy.MinimumExpanding,
                                      QSizePolicy.Fixed),
                                  label='Query:',
                                  callback=self.set_word,
                                  callbackOnType=True)
        self.input.setFocus()

        # Concordances view
        self.conc_view = QTableView()
        self.model = ConcordanceModel()
        self.conc_view.setModel(self.model)
        self.conc_view.setWordWrap(False)
        self.conc_view.setSelectionBehavior(QTableView.SelectRows)
        self.conc_view.setSelectionModel(DocumentSelectionModel(self.model))
        self.conc_view.setItemDelegate(HorizontalGridDelegate())
        self.conc_view.selectionModel().selectionChanged.connect(
            self.selection_changed)
        self.conc_view.horizontalHeader().hide()
        self.conc_view.setShowGrid(False)
        self.mainArea.layout().addWidget(self.conc_view)
        self.set_width()

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
                        'Auto commit is on')

    def sizeHint(self):  # pragma: no cover
        return QSize(600, 400)

    def set_width(self):
        sel = self.conc_view.selectionModel().selection()
        self.model.set_width(self.context_width)
        if sel:
            self.conc_view.selectionModel().select(
                sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    def selection_changed(self):
        selection = self.conc_view.selectionModel().selection()
        self.selected_rows = sorted(
            set(cell.row() for cell in selection.indexes()))
        self.commit()

    def set_selection(self, selection):
        if selection:
            sel = QItemSelection()
            for row in selection:
                index = self.conc_view.model().index(row, 0)
                sel.select(index, index)
            self.conc_view.selectionModel().select(
                sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    @Inputs.corpus
    def set_corpus(self, data=None):
        self.closeContext()
        self.corpus = data
        if data is None:  # data removed, clear selection
            self.selected_rows = []

        if not self.is_word_on_input:
            self.word = ""
            self.openContext(self.corpus)

        self.model.set_corpus(self.corpus)
        self.set_word()

    @Inputs.query_word
    def set_word_from_input(self, topic):
        self.Warning.multiple_words_on_input.clear()
        if self.is_word_on_input:  # word changed, clear selection
            self.selected_rows = []
        self.is_word_on_input = topic is not None and len(topic) > 0
        self.input.setEnabled(not self.is_word_on_input)
        if self.is_word_on_input:
            if len(topic) > 1:
                self.Warning.multiple_words_on_input()
            self.word = topic.metas[0, 0]
            self.set_word()

    def set_word(self):
        self.selected_rows = []
        self.model.set_word(self.word)
        self.update_widget()
        self.commit()

    def handleNewSignals(self):
        self.set_selection(self.selected_rows)

    def resize_columns(self):
        col_width = (self.conc_view.width() -
                     self.conc_view.columnWidth(1)) / 2 - 12
        self.conc_view.setColumnWidth(0, col_width)
        self.conc_view.setColumnWidth(2, col_width)

    def resizeEvent(self, event):  # pragma: no cover
        super().resizeEvent(event)
        self.resize_columns()

    def update_widget(self):
        self.conc_view.resizeColumnToContents(1)
        self.resize_columns()
        self.conc_view.resizeRowsToContents()

        if self.corpus is not None:
            self.n_matching = '{}/{}'.format(
                self.model.matching_docs() if self.word else 0,
                len(self.corpus))
            self.n_tokens = self.model.n_tokens
            self.n_types = self.model.n_types
        else:
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        selected_docs = sorted(
            set(self.model.word_index[row][0] for row in self.selected_rows))
        concordance = self.model.get_data()
        if selected_docs:
            selected = self.corpus[selected_docs]
            self.Outputs.selected_documents.send(selected)
        else:
            self.Outputs.selected_documents.send(None)
        self.Outputs.concordances.send(concordance)

    def send_report(self):
        view = self.conc_view
        model = self.conc_view.model()
        self.report_items("Concordances", (
            ("Query", model.word),
            ("Tokens", model.n_tokens),
            ("Types", model.n_types),
            ("Matching", self.n_matching),
        ))
        self.report_table(view)
예제 #7
0
class ExtendedTableView(QWidget):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.ow = kwargs.get('parent', None)

        # set layout
        layout = QVBoxLayout()
        layout.setContentsMargins(0, 0, 0, 0)
        self.setLayout(layout)

        # set splitter
        self.splitter = QSplitter()
        self.splitter.setOrientation(Qt.Horizontal)

        # data models
        self.genes_model = None
        self.info_model = None

        # left side list view
        self.genes_view = QTableView()
        self.genes_view.horizontalHeader().hide()

        self.genes_view.setItemDelegate(GeneItemDelegate())
        self.genes_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        # right side list view
        self.info_view = QTableView()
        self.info_view.setItemDelegate(HTMLDelegate())
        self.info_view.horizontalHeader().hide()

        self.info_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        self.splitter.addWidget(self.genes_view)
        self.splitter.addWidget(self.info_view)

        # self.splitter.setStretchFactor(0, 60)
        # self.splitter.setStretchFactor(1, 40)

        self.layout().addWidget(self.splitter)

    def set_genes_model(self, rows):
        self.genes_model = GeneMatcherModel()
        self.genes_model.add_rows(rows)

    def get_selected_gens(self):
        # return a list of QModelIndex
        return self.genes_selection_model().selectedRows()

    def reset_genes_model(self):
        if self.genes_model:
            self.genes_model.deleteLater()
            self.genes_model = None

    def genes_selection_model(self):
        return self.genes_view.selectionModel()

    def reset_info_model(self):
        if self.info_model:
            self.info_model.deleteLater()
            self.info_model = None
            self.info_view.setModel(None)

    def set_info_model(self, rows):
        unique, partial, unknown = range(len(self.ow.filter_labels))

        if self.ow.selected_filter == unique:
            # create model
            self.info_model = GeneMatcherModel(show_icon=False)
            # add rows
            self.info_model.add_rows(rows)
            # add model to the view
            self.info_view.setModel(self.info_model)
            # disable selection of gene info cards
            self.info_view.setSelectionMode(QAbstractItemView.NoSelection)
            # call sizeHint function
            self.info_view.resizeRowsToContents()
        else:
            self.reset_info_model()
예제 #8
0
class OWExplainPred(OWWidget):

    name = "Explain Predictions"
    description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value"
    icon = "icons/ExplainPredictions.svg"
    priority = 200
    gui_error = settings.Setting(0.05)
    gui_p_val = settings.Setting(0.05)

    class Inputs:
        data = Input("Data", Table, default=True)
        model = Input("Model", Model, multiple=False)
        sample = Input("Sample", Table)

    class Outputs:
        explanations = Output("Explanations", Table)

    class Error(OWWidget.Error):
        sample_too_big = widget.Msg("Can only explain one sample at the time.")

    class Warning(OWWidget.Warning):
        unknowns_increased = widget.Msg(
            "Number of unknown values increased, Data and Sample domains mismatch.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.model = None
        self.to_explain = None
        self.explanations = None
        self.stop = True
        self.e = None

        self._task = None
        self._executor = ThreadExecutor()

        self.dataview = QTableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                   sortingEnabled=True,
                                   selectionMode=QTableView.NoSelection,
                                   focusPolicy=Qt.StrongFocus)

        self.dataview.sortByColumn(2, Qt.DescendingOrder)
        self.dataview.horizontalHeader().setResizeMode(QHeaderView.Stretch)

        domain = Domain([ContinuousVariable("Score"),
                         ContinuousVariable("Error")],
                        metas=[StringVariable(name="Feature"), StringVariable(name="Value")])
        self.placeholder_table_model = TableModel(
            Table.from_domain(domain), parent=None)

        self.dataview.setModel(self.placeholder_table_model)

        info_box = gui.vBox(self.controlArea, "Info")
        self.data_info = gui.widgetLabel(info_box, "Data: N/A")
        self.model_info = gui.widgetLabel(info_box, "Model: N/A")
        self.sample_info = gui.widgetLabel(info_box, "Sample: N/A")

        criteria_box = gui.vBox(self.controlArea, "Stopping criteria")
        self.error_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_error",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error < ",
                                   spinType=float,
                                   callback=self._update_error_spin,
                                   controlWidth=80,
                                   keyboardTracking=False)

        self.p_val_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_p_val",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error p-value < ",
                                   spinType=float,
                                   callback=self._update_p_val_spin,
                                   controlWidth=80, keyboardTracking=False)

        gui.rubber(self.controlArea)

        self.cancel_button = gui.button(self.controlArea,
                                        self,
                                        "Stop Computation",
                                        callback=self.toggle_button,
                                        autoDefault=True,
                                        tooltip="Stops and restarts computation")
        self.cancel_button.setDisabled(True)

        predictions_box = gui.vBox(self.mainArea, "Model prediction")
        self.predict_info = gui.widgetLabel(predictions_box, "")

        self.mainArea.layout().addWidget(self.dataview)

        self.resize(640, 480)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """Set input 'Data'"""
        self.data = data
        self.explanations = None
        self.data_info.setText("Data: N/A")
        self.e = None
        if data is not None:
            model = TableModel(data, parent=None)
            if data.X.shape[0] == 1:
                inst = "1 instance and "
            else:
                inst = str(data.X.shape[0]) + " instances and "
            if data.X.shape[1] == 1:
                feat = "1 feature "
            else:
                feat = str(data.X.shape[1]) + " features"
            self.data_info.setText("Data: " + inst + feat)

    @Inputs.model
    def set_predictor(self, model):
        """Set input 'Model'"""
        self.model = model
        self.model_info.setText("Model: N/A")
        self.explanations = None
        self.e = None
        if model is not None:
            self.model_info.setText("Model: " + str(model.name))

    @Inputs.sample
    @check_sql_input
    def set_sample(self, sample):
        """Set input 'Sample', checks if size is appropriate"""
        self.to_explain = sample
        self.explanations = None
        self.Error.sample_too_big.clear()
        self.sample_info.setText("Sample: N/A")
        if sample is not None:
            if len(sample.X) != 1:
                self.to_explain = None
                self.Error.sample_too_big()
            else:
                if sample.X.shape[1] == 1:
                    feat = "1 feature"
                else:
                    feat = str(sample.X.shape[1]) + " features"
                self.sample_info.setText("Sample: " + feat)
                if self.e is not None:
                    self.e.saved = False

    def handleNewSignals(self):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self.dataview.setModel(self.placeholder_table_model)
        self.predict_info.setText("")
        self.Warning.unknowns_increased.clear()
        self.stop = True
        self.cancel_button.setText("Stop Computation")
        self.commit_calc_or_output()

    def commit_calc_or_output(self):
        if self.data is not None and self.to_explain is not None:
            self.commit_calc()
        else:
            self.commit_output()


    def commit_calc(self):
        num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0]))

        self.to_explain = self.to_explain.transform(self.data.domain)
        if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])):
            self.Warning.unknowns_increased()
        if self.model is not None:
            # calculate contributions
            if self.e is None:
                self.e = ExplainPredictions(self.data,
                                       self.model,
                                       batch_size=min(
                                           len(self.data.X), 500),
                                       p_val=self.gui_p_val,
                                       error=self.gui_error)
            self._task = task = Task()


            def callback(progress):
                nonlocal task
                # update progress bar
                QMetaObject.invokeMethod(
                    self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress))
                if task.canceled:
                    return True
                return False

            def callback_update(table):
                QMetaObject.invokeMethod(
                    self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table))

            def callback_prediction(class_value):
                QMetaObject.invokeMethod(
                    self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value))

            self.was_canceled = False
            explain_func = partial(
                self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction)

            self.progressBarInit(processEvents=None)
            task.future = self._executor.submit(explain_func)
            task.watcher = FutureWatcher(task.future)
            task.watcher.done.connect(self._task_finished)
            self.cancel_button.setDisabled(False)

    @pyqtSlot(Orange.data.Table)
    def update_view(self, table):
        self.explanations = table
        model = TableModel(table, parent=None)
        header = self.dataview.horizontalHeader()
        model.sort(
            header.sortIndicatorSection(),
            header.sortIndicatorOrder())
        self.dataview.setModel(model)
        self.commit_output()

    @pyqtSlot(float)
    def update_model_prediction(self, value):
        self._print_prediction(value)

    @pyqtSlot(int)
    def set_progress_value(self, value):
        self.progressBarSet(value, processEvents=False)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters:
        ----------
        f: conncurent.futures.Future
            future instance holding the result of learner evaluation
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None

        if not self.was_canceled:
            self.cancel_button.setDisabled(True)

        try:
            results = f.result()
        except Exception as ex:
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.error("Exception occured during evaluation: {!r}".format(ex))

            for key in self.results.keys():
                self.results[key] = None
        else:
            self.update_view(results[1])

        self.progressBarFinished(processEvents=False)


    def commit_output(self):
        """
        Sends best-so-far results forward
        """
        self.Outputs.explanations.send(self.explanations)

    def toggle_button(self):
        if self.stop :
            self.stop = False
            self.cancel_button.setText("Restart Computation")
            self.cancel()
        else:
            self.stop = True
            self.cancel_button.setText("Stop Computation")
            self.commit_calc_or_output()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self.was_canceled = True
            self._task_finished(self._task.future)

    def _print_prediction(self, class_value):
        """
        Parameters
        ----------
        class_value: float 
            Number representing either index of predicted class value, looked up in domain, or predicted value (regression)
        """
        name = self.data.domain.class_vars[0].name
        if isinstance(self.data.domain.class_vars[0], ContinuousVariable):
            self.predict_info.setText(name + ":      " + str(class_value))
        else:
            self.predict_info.setText(
                name + ":      " + self.data.domain.class_vars[0].values[int(class_value)])

    def _update_error_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.error = self.gui_error
        self.handleNewSignals()

    def _update_p_val_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.p_val = self.gui_p_val
        self.handleNewSignals()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
class OWFeatureStatistics(widget.OWWidget):
    HISTOGRAM_ASPECT_RATIO = (7, 3)
    MINIMUM_HISTOGRAM_HEIGHT = 50
    MAXIMUM_HISTOGRAM_HEIGHT = 80

    name = 'Feature Statistics'
    description = 'Show basic statistics for data features.'
    icon = 'icons/FeatureStatistics.svg'

    class Inputs:
        data = Input('Data', Table, default=True)

    want_main_area = True
    buttons_area_orientation = Qt.Vertical

    settingsHandler = DomainContextHandler()

    auto_commit = Setting(True)
    color_var = ContextSetting(None)  # type: Optional[Variable]
    filter_string = ContextSetting('')

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.model = None  # type: Optional[FeatureStatisticsTableModel]

        # Information panel
        info_box = gui.vBox(self.controlArea, 'Info')
        info_box.setMinimumWidth(200)
        self.info_summary = gui.widgetLabel(info_box, wordWrap=True)
        self.info_attr = gui.widgetLabel(info_box, wordWrap=True)
        self.info_class = gui.widgetLabel(info_box, wordWrap=True)
        self.info_meta = gui.widgetLabel(info_box, wordWrap=True)
        self.set_info()

        # TODO: Implement filtering on the model
        # filter_box = gui.vBox(self.controlArea, 'Filter')
        # self.filter_text = gui.lineEdit(
        #     filter_box, self, value='filter_string',
        #     placeholderText='Filter variables by name',
        #     callback=self._filter_table_variables, callbackOnType=True,
        # )
        # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus)
        # shortcut.setWhatsThis('Filter variables by name')

        self.color_var_model = DomainModel(
            valid_types=(ContinuousVariable, DiscreteVariable),
            placeholder='None',
        )
        box = gui.vBox(self.controlArea, 'Histogram')
        self.cb_color_var = gui.comboBox(
            box, master=self, value='color_var',
            model=self.color_var_model, label='Color:', orientation=Qt.Horizontal,
        )
        self.cb_color_var.currentIndexChanged.connect(self.__color_var_changed)

        gui.rubber(self.controlArea)
        gui.auto_commit(
            self.buttonsArea, self, 'auto_commit', 'Send Selected Rows',
            'Send Automatically'
        )

        # Main area
        self.view = QTableView(
            showGrid=False,
            cornerButtonEnabled=False,
            sortingEnabled=True,
            selectionBehavior=QTableView.SelectRows,
            selectionMode=QTableView.MultiSelection,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            verticalScrollMode=QTableView.ScrollPerPixel,
        )

        hheader = self.view.horizontalHeader()
        hheader.setStretchLastSection(False)
        # Contents precision specifies how many rows should be taken into
        # account when computing the sizes, 0 being the visible rows. This is
        # crucial, since otherwise the `ResizeToContents` section resize mode
        # would call `sizeHint` on every single row in the data before first
        # render. However this, this cannot be used here, since this only
        # appears to work properly when the widget is actually shown. When the
        # widget is not shown, size `sizeHint` is called on every row.
        hheader.setResizeContentsPrecision(5)
        # Set a nice default size so that headers have some space around titles
        hheader.setDefaultSectionSize(120)
        # Set individual column behaviour in `set_data` since the logical
        # indices must be valid in the model, which requires data.
        hheader.setSectionResizeMode(QHeaderView.Interactive)

        vheader = self.view.verticalHeader()
        vheader.setVisible(False)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        def bind_histogram_aspect_ratio(logical_index, _, new_size):
            """Force the horizontal and vertical header to maintain the defined
            aspect ratio specified for the histogram."""
            # Prevent function being exectued more than once per resize
            if logical_index is not self.model.Columns.DISTRIBUTION.index:
                return
            ratio_width, ratio_height = self.HISTOGRAM_ASPECT_RATIO
            unit_width = new_size / ratio_width
            new_height = unit_width * ratio_height
            effective_height = max(new_height, self.MINIMUM_HISTOGRAM_HEIGHT)
            effective_height = min(effective_height, self.MAXIMUM_HISTOGRAM_HEIGHT)
            vheader.setDefaultSectionSize(effective_height)

        def keep_row_centered(logical_index, old_size, new_size):
            """When resizing the widget when scrolled further down, the
            positions of rows changes. Obviously, the user resized in order to
            better see the row of interest. This keeps that row centered."""
            # TODO: This does not work properly
            # Prevent function being exectued more than once per resize
            if logical_index is not self.model.Columns.DISTRIBUTION.index:
                return
            top_row = self.view.indexAt(self.view.rect().topLeft()).row()
            bottom_row = self.view.indexAt(self.view.rect().bottomLeft()).row()
            middle_row = top_row + (bottom_row - top_row) // 2
            self.view.scrollTo(self.model.index(middle_row, 0), QTableView.PositionAtCenter)

        hheader.sectionResized.connect(bind_histogram_aspect_ratio)
        hheader.sectionResized.connect(keep_row_centered)

        self.distribution_delegate = DistributionDelegate()
        self.view.setItemDelegate(self.distribution_delegate)

        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(900, 500)

    def _filter_table_variables(self):
        regex = QRegExp(self.filter_string)
        # If the user explicitly types different cases, we assume they know
        # what they are searching for and account for letter case in filter
        different_case = (
            any(c.islower() for c in self.filter_string) and
            any(c.isupper() for c in self.filter_string)
        )
        if not different_case:
            regex.setCaseSensitivity(Qt.CaseInsensitive)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.data = data

        if data is not None:
            self.model = FeatureStatisticsTableModel(data, parent=self)
            self.color_var_model.set_domain(data.domain)
            # Set the selected index to 1 if any target classes, otherwise 0
            if data.domain.class_vars:
                self.color_var = data.domain.class_vars[0]
            self.openContext(self.data)
        else:
            self.model = None
            self.color_var_model.set_domain(None)

        self.view.setModel(self.model)
        self._filter_table_variables()

        self.distribution_delegate.clear()
        self.set_info()

        # The resize modes for individual columns must be set here, because
        # the logical index must be valid in `setSectionResizeMode`. It is not
        # valid when there is no data in the model.
        if self.model:
            columns, hheader = self.model.Columns, self.view.horizontalHeader()
            hheader.setSectionResizeMode(columns.ICON.index, QHeaderView.ResizeToContents)
            hheader.setSectionResizeMode(columns.DISTRIBUTION.index, QHeaderView.Stretch)

    @pyqtSlot(int)
    def __color_var_changed(self, new_index):
        attribute = None if new_index < 1 else self.cb_color_var.model()[new_index]
        self.distribution_delegate.set_color_attribute(attribute)

        if self.model:
            for row_idx in range(self.model.rowCount()):
                index = self.model.index(
                    row_idx,
                    self.model.Columns.DISTRIBUTION.index)
                self.view.update(index)

    @staticmethod
    def _format_variables_string(variables):
        agg = []
        for var_type_name, var_type in [
            ('categorical', DiscreteVariable),
            ('numeric', ContinuousVariable),
            ('time', TimeVariable),
            ('string', StringVariable)
        ]:
            var_type_list = [v for v in variables if isinstance(v, var_type)]
            if var_type_list:
                agg.append((
                    '%d %s' % (len(var_type_list), var_type_name),
                    len(var_type_list)
                ))

        if not agg:
            return 'No variables'

        attrs, counts = list(zip(*agg))
        if len(attrs) > 1:
            var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1]
        else:
            var_string = attrs[0]
        return plural('%s variable{s}' % var_string, sum(counts))

    def set_info(self):
        if self.data is not None:
            self.info_summary.setText('<b>%s</b> contains %s with %s' % (
                self.data.name,
                plural('{number} instance{s}', self.model.n_instances),
                plural('{number} feature{s}', self.model.n_attributes)
            ))

            self.info_attr.setText(
                '<b>Attributes:</b><br>%s' %
                self._format_variables_string(self.data.domain.attributes)
            )
            self.info_class.setText(
                '<b>Class variables:</b><br>%s' %
                self._format_variables_string(self.data.domain.class_vars)
            )
            self.info_meta.setText(
                '<b>Metas:</b><br>%s' %
                self._format_variables_string(self.data.domain.metas)
            )
        else:
            self.info_summary.setText('No data on input.')
            self.info_attr.setText('')
            self.info_class.setText('')
            self.info_meta.setText('')

    def commit(self):
        pass

    def send_report(self):
        pass
예제 #10
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 70

    inputs = [(IO.DATA, Table, 'set_data')]
    outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)]

    search_indices = ContextSetting([0])   # features included in search
    display_indices = ContextSetting([0])  # features for display
    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.output_mask = []           # Output corpus indices
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_features = []

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.regenerate_docs,)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, '',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        # For PyQt5 WebEngine's setHtml grabs the focus and makes typing hard
        # More info: http://stackoverflow.com/questions/36609489
        # To bypass the annoying behaviour disable the widget for WebEngine
        self.doc_webview = gui.WebviewWidget(self.splitter, self,
                                             debug=True, enabled=HAVE_WEBKIT)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    def set_data(self, data=None):
        self.reset_widget()
        self.corpus = data
        if data is not None:
            if not isinstance(data, Corpus):
                self.corpus = Corpus.from_table(data.domain, data)
            self.load_features()
            self.regenerate_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.display_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def load_features(self):
        self.search_indices = []
        self.display_indices = []
        if self.corpus is not None:
            domain = self.corpus.domain
            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            # FIXME: Select features based on ContextSetting
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))

            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

    def list_docs(self):
        """ List documents into the left scrolling area """
        search_keyword = self.filter_input.text().strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)          # Select the first document
        else:
            self.doc_webview.setHtml('')
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = index.data(Qt.UserRole)[feature.name]
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'

        self.doc_webview.setHtml(HTML.format(html))
        self.load_js()
        self.highlight_docs()

    def load_js(self):
        resources = os.path.join(os.path.dirname(__file__), 'resources')
        for script in ('jquery-3.1.1.min.js', 'jquery.mark.min.js', 'highlighter.js', ):
            self.doc_webview.evalJS(open(os.path.join(resources, script), encoding='utf-8').read())

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)
            self.refresh_search()

    def refresh_search(self):
        if self.corpus:
            self.list_docs()
            self.update_info()

    def highlight_docs(self):
        search_keyword = self.filter_input.text().\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js
        if search_keyword:
            self.doc_webview.evalJS('mark("{}");'.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents)
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask]
            unmatched = self.corpus[unmatched_mask]
            self.send(IO.MATCHED, matched)
            self.send(IO.UNMATCHED, unmatched)
        else:
            self.send(IO.MATCHED, None)
            self.send(IO.UNMATCHED, None)
예제 #11
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 70

    inputs = [(IO.DATA, Table, 'set_data')]
    outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)]

    search_indices = ContextSetting([0])  # features included in search
    display_indices = ContextSetting([0])  # features for display
    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.corpus_docs = None  # Documents generated from Corpus
        self.output_mask = []  # Output corpus indices
        self.doc_webview = None  # WebView for showing content
        self.search_features = [
        ]  # two copies are needed since Display allows drag & drop
        self.display_features = []

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea,
            self,
            'search_indices',
            'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features',
            callback=self.regenerate_docs,
        )

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box,
            self,
            'display_indices',
            'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs,
            enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box,
                                                 self,
                                                 'show_tokens',
                                                 'Show Tokens && Tags',
                                                 callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data',
                        'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea,
                                         self,
                                         '',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(
                                             QSizePolicy.MinimumExpanding,
                                             QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    def set_data(self, data=None):
        self.reset_widget()
        self.corpus = data
        if data is not None:
            if not isinstance(data, Corpus):
                self.corpus = Corpus.from_table(data.domain, data)
            self.load_features()
            self.regenerate_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.display_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def load_features(self):
        self.search_indices = []
        self.display_indices = []
        if self.corpus is not None:
            domain = self.corpus.domain
            self.search_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            # FIXME: Select features based on ContextSetting
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))

            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

    def list_docs(self):
        """ List documents into the left scrolling area """
        search_keyword = self.filter_input.text().strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(
                zip(self.corpus, self.corpus.titles, self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [
            f for i, f in enumerate(self.search_features)
            if i in self.search_indices
        ]

        html = '<table>'
        for doc_count, index in enumerate(
                self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:  # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = str(index.data(Qt.UserRole)[feature.name])
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'

        # QUrl is a workaround to allow local resources
        # https://bugreports.qt.io/browse/QTBUG-55902?focusedCommentId=335945
        self.doc_webview.setHtml(HTML.format(html), QUrl("file://"))
        self.load_js()
        self.highlight_docs()

    def load_js(self):
        resources = os.path.join(os.path.dirname(__file__), 'resources')
        for script in (
                'jquery-3.1.1.min.js',
                'jquery.mark.min.js',
                'highlighter.js',
        ):
            self.doc_webview.evalJS(
                open(os.path.join(resources, script), encoding='utf-8').read())

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)
            self.refresh_search()

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.update_info()

    def highlight_docs(self):
        search_keyword = self.filter_input.text().\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js
        if search_keyword:
            self.doc_webview.evalJS('mark("{}");'.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(),
                                             self.n_documents)
            self.n_tokens = sum(
                map(len,
                    self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(
                self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.send(IO.MATCHED, matched)
            self.send(IO.UNMATCHED, unmatched)
        else:
            self.send(IO.MATCHED, None)
            self.send(IO.UNMATCHED, None)
예제 #12
0
class OWConcordance(OWWidget):
    name = "Concordance"
    description = "Display the context of the word."
    icon = "icons/Concordance.svg"
    priority = 520

    class Inputs:
        corpus = Input("Corpus", Corpus)
        query_word = Input("Query Word", Topic)

    class Outputs:
        selected_documents = Output("Selected Documents", Corpus)
        concordances = Output("Concordances", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL
    )
    autocommit = Setting(True)
    context_width = Setting(5)
    word = ContextSetting("", exclude_metas=False)
    selected_rows = Setting([], schema_only=True)

    class Warning(OWWidget.Warning):
        multiple_words_on_input = Msg("Multiple query words on input. "
                                      "Only the first one is considered!")

    def __init__(self):
        super().__init__()

        self.corpus = None      # Corpus
        self.n_matching = ''    # Info on docs matching the word
        self.n_tokens = ''      # Info on tokens
        self.n_types = ''       # Info on types (unique tokens)
        self.is_word_on_input = False

        # Info attributes
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Width parameter
        gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True,
                 label="Number of words:", callback=self.set_width)

        gui.rubber(self.controlArea)

        # Search
        c_box = gui.widgetBox(self.mainArea, orientation="vertical")
        self.input = gui.lineEdit(
            c_box, self, 'word', orientation=Qt.Horizontal,
            sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                   QSizePolicy.Fixed),
            label='Query:', callback=self.set_word, callbackOnType=True)
        self.input.setFocus()

        # Concordances view
        self.conc_view = QTableView()
        self.model = ConcordanceModel()
        self.conc_view.setModel(self.model)
        self.conc_view.setWordWrap(False)
        self.conc_view.setSelectionBehavior(QTableView.SelectRows)
        self.conc_view.setSelectionModel(DocumentSelectionModel(self.model))
        self.conc_view.setItemDelegate(HorizontalGridDelegate())
        self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed)
        self.conc_view.horizontalHeader().hide()
        self.conc_view.setShowGrid(False)
        self.mainArea.layout().addWidget(self.conc_view)
        self.set_width()

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
                        'Auto commit is on')

    def sizeHint(self): # pragma: no cover
        return QSize(600, 400)

    def set_width(self):
        sel = self.conc_view.selectionModel().selection()
        self.model.set_width(self.context_width)
        if sel:
            self.conc_view.selectionModel().select(sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    def selection_changed(self):
        selection = self.conc_view.selectionModel().selection()
        self.selected_rows = sorted(set(cell.row() for cell in selection.indexes()))
        self.commit()

    def set_selection(self, selection):
        if selection:
            sel = QItemSelection()
            for row in selection:
                index = self.conc_view.model().index(row, 0)
                sel.select(index, index)
            self.conc_view.selectionModel().select(sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    @Inputs.corpus
    def set_corpus(self, data=None):
        self.closeContext()
        self.corpus = data
        if data is None:    # data removed, clear selection
            self.selected_rows = []

        if not self.is_word_on_input:
            self.word = ""
            self.openContext(self.corpus)

        self.model.set_corpus(self.corpus)
        self.set_word()

    @Inputs.query_word
    def set_word_from_input(self, topic):
        self.Warning.multiple_words_on_input.clear()
        if self.is_word_on_input:   # word changed, clear selection
            self.selected_rows = []
        self.is_word_on_input = topic is not None and len(topic) > 0
        self.input.setEnabled(not self.is_word_on_input)
        if self.is_word_on_input:
            if len(topic) > 1:
                self.Warning.multiple_words_on_input()
            self.word = topic.metas[0, 0]
            self.set_word()

    def set_word(self):
        self.selected_rows = []
        self.model.set_word(self.word)
        self.update_widget()
        self.commit()

    def handleNewSignals(self):
        self.set_selection(self.selected_rows)

    def resize_columns(self):
        col_width = (self.conc_view.width() -
                     self.conc_view.columnWidth(1)) / 2 - 12
        self.conc_view.setColumnWidth(0, col_width)
        self.conc_view.setColumnWidth(2, col_width)

    def resizeEvent(self, event): # pragma: no cover
        super().resizeEvent(event)
        self.resize_columns()

    def update_widget(self):
        self.conc_view.resizeColumnToContents(1)
        self.resize_columns()
        self.conc_view.resizeRowsToContents()

        if self.corpus is not None:
            self.n_matching = '{}/{}'.format(
                self.model.matching_docs() if self.word else 0,
                len(self.corpus))
            self.n_tokens = self.model.n_tokens
            self.n_types = self.model.n_types
        else:
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        selected_docs = sorted(set(self.model.word_index[row][0]
                                   for row in self.selected_rows))
        concordance = self.model.get_data()
        if selected_docs:
            selected = self.corpus[selected_docs]
            self.Outputs.selected_documents.send(selected)
        else:
            self.Outputs.selected_documents.send(None)
        self.Outputs.concordances.send(concordance)

    def send_report(self):
        view = self.conc_view
        model = self.conc_view.model()
        self.report_items("Concordances", (
            ("Query", model.word),
            ("Tokens", model.n_tokens),
            ("Types", model.n_types),
            ("Matching", self.n_matching),
        ))
        self.report_table(view)
예제 #13
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL
    )

    search_indices = ContextSetting([], exclude_metas=False)   # features included in search
    display_indices = ContextSetting([], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    regexp_filter = ContextSetting("")

    selection = [0]  # TODO: DataHashContextHandler

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.output_mask = []           # Output corpus indices
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_list_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selection = [0]
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        search_keyword = self.regexp_filter.strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

    def reset_selection(self):
        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')

    def set_selection(self):
        view = self.doc_list
        if len(self.selection):
            selection = QItemSelection()

            for row in self.selection:
                selection.append(
                    QItemSelectionRange(
                        view.model().index(row, 0),
                        view.model().index(row, 0)
                    )
                )
            view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}
        
        .content {{
            /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */
        
            /* These are technically the same, but use both */
            overflow-wrap: break-word;
            word-wrap: break-word;
        
            -ms-word-break: break-all;
            /* This is the dangerous one in WebKit, as it breaks things wherever */
            word-break: break-all;
            /* Instead use this non-standard one: */
            word-break: break-word;
        
            /* Adds a hyphen where the word breaks, if supported (No Blink) */
            -ms-hyphens: auto;
            -moz-hyphens: auto;
            -webkit-hyphens: auto;
            hyphens: auto;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        selection = [i.row() for i in self.doc_list.selectionModel().selectedRows()]
        if selection != []:
            self.selection = selection
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                value = str(index.data(Qt.UserRole)[feature.name])
                if feature in marked_search_features:
                    value = self.__mark_text(value)
                value = value.replace('\n', '<br/>')
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td class="content">{}</td></tr>'.format(
                    feature.name, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def __mark_text(self, text):
        search_keyword = self.regexp_filter.strip('|')
        if not search_keyword:
            return text

        try:
            reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
        except sre_constants.error:
            return text

        matches = list(reg.finditer(text))
        if not matches:
            return text

        text = list(text)
        for m in matches[::-1]:
            text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
                .format("".join(text[m.start():m.end()])))

        return "".join(text)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.reset_selection()
            self.update_info()
            self.commit()

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents)
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
        ))
예제 #14
0
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin):
    name = "GEO Data Sets"
    description = "Access to Gene Expression Omnibus data sets."
    icon = "icons/OWGEODatasets.svg"
    priority = 2

    class Outputs:
        gds_data = Output("Expression Data", Table)

    search_pattern = Setting('')
    auto_commit = Setting(True)
    genes_as_rows = Setting(False)
    mergeSpots = Setting(True)
    selected_gds = Setting(None)
    gdsSelectionStates = Setting({})
    splitter_settings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01',
    ))

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.gds_info: Optional[GDSInfo] = GDSInfo(
        )  # TODO: handle possible exceptions
        self.gds_data: Optional[Table] = None

        # Control area
        box = widgetBox(self.controlArea, 'Info', addSpace=True)
        self.infoBox = widgetLabel(box, 'Initializing\n\n')

        box = widgetBox(self.controlArea, 'Output', addSpace=True)
        radioButtonsInBox(box,
                          self,
                          'genes_as_rows',
                          ['Samples in rows', 'Genes in rows'],
                          callback=self._run)
        separator(box)

        rubber(self.controlArea)
        auto_commit(self.controlArea,
                    self,
                    'auto_commit',
                    '&Commit',
                    box=False)

        # Main Area

        # Filter widget
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self._apply_filter)
        self.mainArea.layout().addWidget(self.filter)

        splitter_vertical = QSplitter(Qt.Vertical, self.mainArea)

        self.mainArea.layout().addWidget(splitter_vertical)

        # set table view
        self.table_view = QTableView(splitter_vertical)
        self.table_view.setShowGrid(False)
        self.table_view.setSortingEnabled(True)
        self.table_view.sortByColumn(1, Qt.AscendingOrder)
        self.table_view.setAlternatingRowColors(True)
        self.table_view.verticalHeader().setVisible(False)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSizeAdjustPolicy(
            QAbstractScrollArea.AdjustToContents)

        self.table_model = GEODatasetsModel()
        self.table_model.initialize(self.gds_info)
        self.table_view.setModel(self.table_model)
        self.table_model.show_table()

        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.resizeColumnsToContents()

        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)

        # set item delegates
        self.table_view.setItemDelegateForColumn(
            self.table_model.pubmedid_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.gds_id_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.indicator_col,
            IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole),
        )

        splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical)

        # Description Widget
        box = widgetBox(splitter_horizontal, 'Description')
        self.description_widget = widgetLabel(box, '')
        self.description_widget.setWordWrap(True)
        rubber(box)

        # Sample Annotations Widget
        box = widgetBox(splitter_horizontal, 'Sample Annotations')
        self.annotations_widget = QTreeWidget(box)
        self.annotations_widget.setHeaderLabels(
            ['Type (Sample annotations)', 'Sample count'])
        self.annotations_widget.setRootIsDecorated(True)
        box.layout().addWidget(self.annotations_widget)
        self._annotations_updating = False
        self.annotations_widget.itemChanged.connect(
            self.on_annotation_selection_changed)
        self.splitters = splitter_vertical, splitter_horizontal

        for sp, setting in zip(self.splitters, self.splitter_settings):
            sp.splitterMoved.connect(self._splitter_moved)
            sp.restoreState(setting)

        self.table_view.selectionModel().selectionChanged.connect(
            self.on_gds_selection_changed)
        self._apply_filter()

        self._run()

    def _splitter_moved(self, *args):
        self.splitter_settings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def _set_description_widget(self):
        self.description_widget.setText(
            self.selected_gds.get('description', 'Description not available.'))

    def _set_annotations_widget(self, gds):
        self._annotations_updating = True
        self.annotations_widget.clear()

        annotations = defaultdict(set)
        subsets_count = {}

        for desc in gds['subsets']:
            annotations[desc['type']].add(desc['description'])
            subsets_count[desc['description']] = str(len(desc['sample_id']))

        for _type, subsets in annotations.items():
            key = (gds["name"], _type)
            parent = QTreeWidgetItem(self.annotations_widget, [_type])
            parent.key = key
            for subset in subsets:
                key = (gds['name'], _type, subset)
                item = QTreeWidgetItem(
                    parent, [subset, subsets_count.get(subset, '')])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key

        self._annotations_updating = False
        self.annotations_widget.expandAll()
        for i in range(self.annotations_widget.columnCount()):
            self.annotations_widget.resizeColumnToContents(i)

    def _set_selection(self):
        if self.selected_gds is not None:
            index = self.table_model.get_row_index(
                self.selected_gds.get('name'))
            if index is not None:
                self.table_view.selectionModel().blockSignals(True)
                self.table_view.selectRow(index)
                self._handle_selection_changed()
                self.table_view.selectionModel().blockSignals(False)

    def _handle_selection_changed(self):
        if self.table_model.table is not None:
            selection = self.table_view.selectionModel().selectedRows(
                self.table_model.gds_id_col)
            selected_gds_name = selection[0].data(
            ) if len(selection) > 0 else None

            if selected_gds_name:
                self.selected_gds = self.table_model.info.get(
                    selected_gds_name)
                self._set_annotations_widget(self.selected_gds)
                self._set_description_widget()
            else:
                self.annotations_widget.clear()
                self.description_widget.clear()

            self.update_info()

    def _apply_filter(self):
        if self.table_model.table is not None:
            self.table_model.show_table(
                filter_pattern=str(self.search_pattern))
            self._set_selection()
            self.update_info()

    def _run(self):
        if self.selected_gds is not None:
            self.gds_data = None
            self.start(run_download_task, self.selected_gds.get('name'),
                       self.get_selected_samples(), self.genes_as_rows)

    def on_gds_selection_changed(self):
        self._handle_selection_changed()
        self._run()

    def on_annotation_selection_changed(self):
        if self._annotations_updating:
            return
        for i in range(self.annotations_widget.topLevelItemCount()):
            item = self.annotations_widget.topLevelItem(i)
            if 'key' in item.__dict__:
                self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                if 'key' in child.__dict__:
                    self.gdsSelectionStates[child.key] = child.checkState(0)

        self._run()

    def update_info(self):
        all_gds = len(self.table_model.info)
        text = "{} datasets\n{} datasets cached\n".format(
            all_gds, len(local_files.listfiles()))
        filtered = self.table_view.model().rowCount()
        if all_gds != filtered:
            text += "{} after filtering".format(filtered)
        self.infoBox.setText(text)

    def get_selected_samples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        TODO: this could probably be simplified.

        """
        def childiter(item):
            """ Iterate over the children of an QTreeWidgetItem instance.
            """
            for i in range(item.childCount()):
                yield item.child(i)

        samples = []
        unused_types = []
        used_types = []

        for stype in childiter(self.annotations_widget.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        _samples = defaultdict(list)
        for sample, sample_type in samples:
            _samples[sample].append(sample_type)
        return _samples

    def commit(self):
        self.Outputs.gds_data.send(self.gds_data)

    def on_done(self, result: Result):
        assert isinstance(result.gds_dataset, Table)
        self.gds_data = result.gds_dataset
        self.commit()

        if self.gds_info:
            self.table_model.initialize(self.gds_info)
            self._apply_filter()

    def on_partial_result(self, result: Any) -> None:
        pass

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def send_report(self):
        self.report_items(
            "GEO Dataset",
            [
                ("ID", self.selected_gds['name']),
                ("Title", self.selected_gds['title']),
                ("Organism", self.selected_gds['sample_organism']),
            ],
        )
        self.report_items(
            "Data",
            [
                ("Samples", self.selected_gds['sample_count']),
                ("Features", self.selected_gds['variables']),
                ("Genes", self.selected_gds['genes']),
            ],
        )
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.selected_gds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for _type in subsets:
            self.report_html += "<b>" + _type + ":</b></br>"
            for desc, count in subsets[_type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"
예제 #15
0
class OWGenes(OWWidget, ConcurrentWidgetMixin):
    name = "Genes"
    description = "Tool for working with genes"
    icon = "../widgets/icons/OWGeneInfo.svg"
    priority = 40
    want_main_area = True

    selected_organism: int = Setting(11)
    search_pattern: str = Setting('')
    exclude_unmatched = Setting(True)
    replace_id_with_symbol = Setting(True)
    auto_commit = Setting(True)

    settingsHandler = DomainContextHandler()
    selected_gene_col = ContextSetting(None)
    use_attr_names = ContextSetting(True)

    replaces = [
        'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher'
    ]

    class Inputs:
        data_table = Input("Data", Table)

    class Outputs:
        data_table = Output("Data", Table)
        gene_matcher_results = Output("Genes", Table)

    class Information(OWWidget.Information):
        pass

    def sizeHint(self):
        return QSize(1280, 960)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        # ATTRIBUTES #
        self.target_database = ENTREZ_ID

        # input data
        self.input_data = None
        self.input_genes = None
        self.tax_id = None
        self.column_candidates = []

        # input options
        self.organisms = []

        # gene matcher
        self.gene_matcher = None

        # progress bar
        self.progress_bar = None

        self._timer = QTimer()
        self._timer.timeout.connect(self._apply_filter)
        self._timer.setSingleShot(True)

        # GUI SECTION #

        # Control area
        self.info_box = widgetLabel(
            widgetBox(self.controlArea, "Info", addSpace=True),
            'No data on input.\n')

        organism_box = vBox(self.controlArea, 'Organism')
        self.organism_select_combobox = comboBox(
            organism_box,
            self,
            'selected_organism',
            callback=self.on_input_option_change)

        self.get_available_organisms()
        self.organism_select_combobox.setCurrentIndex(self.selected_organism)

        box = widgetBox(self.controlArea, 'Gene IDs in the input data')
        self.gene_columns_model = itemmodels.DomainModel(
            valid_types=(StringVariable, DiscreteVariable))
        self.gene_column_combobox = comboBox(
            box,
            self,
            'selected_gene_col',
            label='Stored in data column',
            model=self.gene_columns_model,
            sendSelectedValue=True,
            callback=self.on_input_option_change,
        )

        self.attr_names_checkbox = checkBox(
            box,
            self,
            'use_attr_names',
            'Stored as feature (column) names',
            disables=[(-1, self.gene_column_combobox)],
            callback=self.on_input_option_change,
        )

        self.gene_column_combobox.setDisabled(bool(self.use_attr_names))

        output_box = vBox(self.controlArea, 'Output')

        # separator(output_box)
        # output_box.layout().addWidget(horizontal_line())
        # separator(output_box)
        self.exclude_radio = checkBox(output_box,
                                      self,
                                      'exclude_unmatched',
                                      'Exclude unmatched genes',
                                      callback=self.commit)

        self.replace_radio = checkBox(output_box,
                                      self,
                                      'replace_id_with_symbol',
                                      'Replace feature IDs with gene names',
                                      callback=self.commit)

        auto_commit(self.controlArea,
                    self,
                    "auto_commit",
                    "&Commit",
                    box=False)

        rubber(self.controlArea)

        # Main area
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self.handle_filter_callback)
        # rubber(self.radio_group)
        self.mainArea.layout().addWidget(self.filter)

        # set splitter
        self.splitter = QSplitter()
        self.splitter.setOrientation(Qt.Vertical)

        self.table_model = GeneInfoModel()
        self.table_view = QTableView()
        self.table_view.setAlternatingRowColors(True)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSortingEnabled(True)
        self.table_view.setShowGrid(False)
        self.table_view.verticalHeader().hide()
        # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)

        self.unknown_model = UnknownGeneInfoModel()

        self.unknown_view = QTableView()
        self.unknown_view.setModel(self.unknown_model)
        self.unknown_view.verticalHeader().hide()
        self.unknown_view.setShowGrid(False)
        self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection)
        self.unknown_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        self.splitter.addWidget(self.table_view)
        self.splitter.addWidget(self.unknown_view)

        self.splitter.setStretchFactor(0, 90)
        self.splitter.setStretchFactor(1, 10)

        self.mainArea.layout().addWidget(self.splitter)

    def handle_filter_callback(self):
        self._timer.stop()
        self._timer.start(500)

    def _apply_filter(self):
        # filter only if input data is present and model is populated
        if self.table_model.table is not None:
            self.table_model.update_model(
                filter_pattern=str(self.search_pattern))
            self.commit()

    def __reset_widget_state(self):
        self.table_view.clearSpans()
        self.table_view.setModel(None)
        self.table_model.clear()
        self.unknown_model.clear()
        self._update_info_box()

    def _update_info_box(self):

        if self.input_genes and self.gene_matcher:
            num_genes = len(self.gene_matcher.genes)
            known_genes = len(self.gene_matcher.get_known_genes())

            info_text = ('{} genes in input data\n'
                         '{} genes match Entrez database\n'
                         '{} genes with match conflicts\n'.format(
                             num_genes, known_genes, num_genes - known_genes))

        else:
            info_text = 'No data on input.'

        self.info_box.setText(info_text)

    def on_done(self, _):
        # update info box
        self._update_info_box()

        # set output options
        self.toggle_radio_options()

        # set known genes
        self.table_model.initialize(self.gene_matcher.genes)
        self.table_view.setModel(self.table_model)
        self.table_view.selectionModel().selectionChanged.connect(self.commit)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)

        self.table_view.setItemDelegateForColumn(
            self.table_model.entrez_column_index,
            LinkStyledItemDelegate(self.table_view))
        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)
        self.table_view.horizontalHeader().setStretchLastSection(True)

        # set unknown genes
        self.unknown_model.initialize(self.gene_matcher.genes)
        self.unknown_view.verticalHeader().setStretchLastSection(True)

        self._apply_filter()

    def get_available_organisms(self):
        available_organism = sorted(((tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()),
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]
        self.organism_select_combobox.addItems(
            [tax_id[1] for tax_id in available_organism])

    def gene_names_from_table(self):
        """ Extract and return gene names from `Orange.data.Table`.
        """
        self.input_genes = []
        if self.input_data:
            if self.use_attr_names:
                self.input_genes = [
                    str(attr.name).strip()
                    for attr in self.input_data.domain.attributes
                ]
            else:
                if self.selected_gene_col is None:
                    self.selected_gene_col = self.gene_column_identifier()

                self.input_genes = [
                    str(e[self.selected_gene_col]) for e in self.input_data
                    if not np.isnan(e[self.selected_gene_col])
                ]

    def _update_gene_matcher(self):
        self.gene_names_from_table()

        self.gene_matcher = GeneMatcher(self.get_selected_organism(),
                                        auto_start=False)
        self.gene_matcher.genes = self.input_genes
        # self.gene_matcher.organism = self.get_selected_organism()

    def get_selected_organism(self):
        return self.organisms[self.selected_organism]

    def _run(self):
        if self.gene_matcher is not None:
            self.start(run_gene_matcher, self.gene_matcher)

    def on_input_option_change(self):
        self.__reset_widget_state()
        self._update_gene_matcher()
        self._run()

    def gene_column_identifier(self):
        """
        Get most suitable column that stores genes. If there are
        several suitable columns, select the one with most unique
        values. Take the best one.
        """

        # candidates -> (variable, num of unique values)
        candidates = ((col,
                       np.unique(self.input_data.get_column_view(col)[0]).size)
                      for col in self.gene_columns_model
                      if isinstance(col, DiscreteVariable)
                      or isinstance(col, StringVariable))

        best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1]
        return best_candidate

    def find_genes_location(self):
        """ Try locate the genes in the input data when we first load the data.

            Proposed rules:
                - when no suitable feature names are present, check the columns.
                - find the most suitable column, that is, the one with most unique values.

        """
        domain = self.input_data.domain
        if not domain.attributes:
            if self.selected_gene_col is None:
                self.selected_gene_col = self.gene_column_identifier()
                self.use_attr_names = False

    @Inputs.data_table
    def handle_input(self, data):
        self.closeContext()
        self.input_data = None
        self.input_genes = None
        self.__reset_widget_state()
        self.gene_columns_model.set_domain(None)
        self.selected_gene_col = None

        if data:
            self.input_data = data
            self.gene_columns_model.set_domain(self.input_data.domain)

            # check if input table has tax_id, human is used if tax_id is not found
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606'))
            # check for gene location. Default is that genes are attributes in the input table.
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, self.use_attr_names)

            if self.tax_id in self.organisms and not self.selected_organism:
                self.selected_organism = self.organisms.index(self.tax_id)

            self.openContext(self.input_data.domain)
            self.find_genes_location()
            self.on_input_option_change()

    def commit(self):
        selection = self.table_view.selectionModel().selectedRows(
            self.table_model.entrez_column_index)

        selected_genes = [row.data() for row in selection]
        if not len(selected_genes):
            selected_genes = self.table_model.get_filtered_genes()

        gene_ids = self.get_target_ids()
        known_genes = [gid for gid in gene_ids if gid != '?']

        table = None
        gm_table = None
        if known_genes:
            # Genes are in rows (we have a column with genes).
            if not self.use_attr_names:

                if self.target_database in self.input_data.domain:
                    gene_var = self.input_data.domain[self.target_database]
                    metas = self.input_data.domain.metas
                else:
                    gene_var = StringVariable(self.target_database)
                    metas = self.input_data.domain.metas + (gene_var, )

                domain = Domain(self.input_data.domain.attributes,
                                self.input_data.domain.class_vars, metas)

                table = self.input_data.transform(domain)
                col, _ = table.get_column_view(gene_var)
                col[:] = gene_ids

                # filter selected rows
                selected_genes_set = set(selected_genes)
                selected_rows = [
                    row_index for row_index, row in enumerate(table)
                    if str(row[gene_var]) in selected_genes_set
                ]

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = False
                table.attributes[GENE_ID_COLUMN] = self.target_database
                table = table[selected_rows] if selected_rows else table

                if self.exclude_unmatched:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(
                        gene_var, known_genes)
                    # apply filter to the data
                    table = table_filter.Values([only_known])(table)

                self.Outputs.data_table.send(table)

            # genes are are in columns (genes are features).
            else:
                domain = self.input_data.domain.copy()
                table = self.input_data.transform(domain)

                for gene in self.gene_matcher.genes:
                    if gene.input_identifier in table.domain:

                        table.domain[gene.input_identifier].attributes[
                            self.target_database] = (str(gene.gene_id)
                                                     if gene.gene_id else '?')

                        if self.replace_id_with_symbol:
                            try:
                                table.domain[gene.input_identifier].name = str(
                                    gene.symbol)
                            except AttributeError:
                                # TODO: missing gene symbol, need to handle this?
                                pass

                # filter selected columns
                selected_genes_set = set(selected_genes)
                selected = [
                    column for column in table.domain.attributes
                    if self.target_database in column.attributes
                    and str(column.attributes[
                        self.target_database]) in selected_genes_set
                ]

                output_attrs = table.domain.attributes

                if selected:
                    output_attrs = selected

                if self.exclude_unmatched:
                    known_genes_set = set(known_genes)
                    output_attrs = [
                        col for col in output_attrs if col.attributes[
                            self.target_database] in known_genes_set
                    ]

                domain = Domain(output_attrs, table.domain.class_vars,
                                table.domain.metas)

                table = table.from_table(domain, table)

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = True
                table.attributes[GENE_ID_ATTRIBUTE] = self.target_database

            gm_table = self.gene_matcher.to_data_table(
                selected_genes=selected_genes if selected_genes else None)

        self.Outputs.data_table.send(table)
        self.Outputs.gene_matcher_results.send(gm_table)

    def toggle_radio_options(self):
        self.replace_radio.setEnabled(bool(self.use_attr_names))

        if self.gene_matcher.genes:
            # enable checkbox if unknown genes are detected
            self.exclude_radio.setEnabled(
                len(self.gene_matcher.genes) != len(
                    self.gene_matcher.get_known_genes()))
            self.exclude_unmatched = len(self.gene_matcher.genes) != len(
                self.gene_matcher.get_known_genes())

    def get_target_ids(self):
        return [
            str(gene.gene_id) if gene.gene_id else '?'
            for gene in self.gene_matcher.genes
        ]
예제 #16
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)
        corpus = Output("Corpus", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL
    )

    search_indices = ContextSetting([], exclude_metas=False)   # features included in search
    display_indices = ContextSetting([], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    selected_documents = ContextSetting([])
    regexp_filter = ContextSetting("")

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None              # Corpus
        self.corpus_docs = None         # Documents generated from Corpus
        self.doc_webview = None         # WebView for showing content
        self.search_features = []       # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]
        self.matches = 0                # Matches of the query

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching documents: %(n_matching)s')
        gui.label(info_box, self, 'Matches: %(n_matches)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea, self, 'search_indices', 'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features', callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box, self, 'display_list_indices', 'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs, enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens',
                                                 'Show Tokens && Tags', callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                                QSizePolicy.Fixed),
                                         label='RegExp Filter:',
                                         callback=self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )
        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(
            self.selection_changed
        )
        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selected_documents = [corpus.titles[0]] if \
                corpus.titles is not None and len(corpus.titles) else []
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        # TODO: remove search_keyword??
        search_keyword = self.regexp_filter.strip('|')
        matches = 0
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
                                                      self.corpus_docs)):
            res = len(list(reg.finditer(content))) if self.regexp_filter else 0
            if not self.regexp_filter or res:
                matches += res
                item = QStandardItem()
                item.setData(str(title), Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
        self.matches = matches

    def get_selected_documents_from_view(self) -> Set[str]:
        """
        Returns
        -------
        Set with names of selected documents in the QTableView
        """
        return {
            i.data(Qt.DisplayRole)
            for i in self.doc_list.selectionModel().selectedRows()
        }

    def set_selection(self) -> None:
        """
        Select documents in selected_documents attribute in the view
        """
        view = self.doc_list
        model = view.model()

        previously_selected = self.selected_documents.copy()
        selection = QItemSelection()
        for row in range(model.rowCount()):
            document = model.data(model.index(row, 0), Qt.DisplayRole)
            if document in self.selected_documents:
                selection.append(QItemSelectionRange(
                    view.model().index(row, 0),
                    view.model().index(row, 0)
                ))
        view.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect
        )
        if len(selection) == 0:
            # in cases when selection is empty qt's selection_changed is not
            # called and so we need to manually trigger show_docs
            self.show_docs()
        # select emmit selection change signal which causes calling
        # selection_changed when filtering it means that documents which
        # are currently filtered out get removed from self.selected_douments
        # we still want to keep them to be still selected after user removes
        # filter
        self.selected_documents = previously_selected

    def selection_changed(self) -> None:
        """
        Function is called every time the selection changes - when user select
        new range of documents
        """
        self.selected_documents = self.get_selected_documents_from_view()
        self.show_docs()
        self.commit()

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}
        
        .content {{
            /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */
        
            /* These are technically the same, but use both */
            overflow-wrap: break-word;
            word-wrap: break-word;
        
            -ms-word-break: break-all;
            /* This is the dangerous one in WebKit, as it breaks things wherever */
            word-break: break-all;
            /* Instead use this non-standard one: */
            word-break: break-word;
        
            /* Adds a hyphen where the word breaks, if supported (No Blink) */
            -ms-hyphens: auto;
            -moz-hyphens: auto;
            -webkit-hyphens: auto;
            hyphens: auto;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [f for i, f in enumerate(self.search_features)
                                  if i in self.search_indices]

        html = '<table>'
        for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:   # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                value = str(index.data(Qt.UserRole)[feature.name])
                if feature in marked_search_features:
                    value = self.__mark_text(value)
                value = value.replace('\n', '<br/>')
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td class="content">{}</td></tr>'.format(
                    feature.name, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def __mark_text(self, text):
        search_keyword = self.regexp_filter.strip('|')
        if not search_keyword:
            return text

        try:
            reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
        except sre_constants.error:
            return text

        matches = list(reg.finditer(text))
        if not matches:
            return text

        text = list(text)
        for m in matches[::-1]:
            text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
                .format("".join(text[m.start():m.end()])))

        return "".join(text)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.set_selection()
            self.update_info()
            self.commit()

    def update_info(self):
        if self.corpus is not None:
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), len(self.corpus))
            self.n_matches = self.matches if self.matches else 'n/a'
            self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
        else:
            self.n_matching = ''
            self.n_matches = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        matched = unmatched = annotated_corpus = None
        corpus = self.corpus
        if corpus is not None:
            # it returns a set of selected documents which are in view
            selected_docs = self.get_selected_documents_from_view()
            titles = corpus.titles
            matched_mask = [
                i for i, t in enumerate(titles) if t in selected_docs
            ]
            unmatched_mask = [
                i for i, t in enumerate(titles) if t not in selected_docs
            ]

            matched = corpus[matched_mask] if len(matched_mask) else None
            unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None
            annotated_corpus = create_annotated_table(corpus, matched_mask)
        self.Outputs.matching_docs.send(matched)
        self.Outputs.other_docs.send(unmatched)
        self.Outputs.corpus.send(annotated_corpus)

    def send_report(self):
        self.report_items((
            ("Query", self.regexp_filter),
            ("Matching documents", self.n_matching),
            ("Matches", self.n_matches)
        ))

    def showEvent(self, event):
        super().showEvent(event)
        self.update_splitter()

    def update_splitter(self):
        """
        Update splitter that document list on the left never take more
        than 1/3 of the space. It is only set on showEvent. If user
        later changes sizes it stays as it is.
        """
        w1, w2 = self.splitter.sizes()
        ws = w1 + w2
        if w2 < 2/3 * ws:
            self.splitter.setSizes([ws * 1/3, ws * 2/3])
예제 #17
0
class OWCorpusViewer(OWWidget):
    name = "Corpus Viewer"
    description = "Display corpus contents."
    icon = "icons/CorpusViewer.svg"
    priority = 500

    class Inputs:
        corpus = Input("Corpus", Corpus, replaces=["Data"])

    class Outputs:
        matching_docs = Output("Matching Docs", Corpus, default=True)
        other_docs = Output("Other Docs", Corpus)

    settingsHandler = PerfectDomainContextHandler(
        match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL)

    search_indices = ContextSetting(
        [], exclude_metas=False)  # features included in search
    display_indices = ContextSetting(
        [], exclude_metas=False)  # features for display
    display_features = ContextSetting([], exclude_metas=False)
    regexp_filter = ContextSetting("")

    selection = [0]  # TODO: DataHashContextHandler

    show_tokens = Setting(False)
    autocommit = Setting(True)

    class Warning(OWWidget.Warning):
        no_feats_search = Msg('No features included in search.')
        no_feats_display = Msg('No features selected for display.')

    def __init__(self):
        super().__init__()

        self.corpus = None  # Corpus
        self.corpus_docs = None  # Documents generated from Corpus
        self.output_mask = []  # Output corpus indices
        self.doc_webview = None  # WebView for showing content
        self.search_features = [
        ]  # two copies are needed since Display allows drag & drop
        self.display_list_indices = [0]

        # Info attributes
        self.update_info()
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s')
        gui.label(info_box, self, '  ◦ Tokens: %(n_tokens)s')
        gui.label(info_box, self, '  ◦ Types: %(n_types)s')
        gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s')
        gui.label(info_box, self, 'N-grams range: %(ngram_range)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Search features
        self.search_listbox = gui.listBox(
            self.controlArea,
            self,
            'search_indices',
            'search_features',
            selectionMode=QListView.ExtendedSelection,
            box='Search features',
            callback=self.search_features_changed)

        # Display features
        display_box = gui.widgetBox(self.controlArea, 'Display features')
        self.display_listbox = gui.listBox(
            display_box,
            self,
            'display_list_indices',
            'display_features',
            selectionMode=QListView.ExtendedSelection,
            callback=self.show_docs,
            enableDragDrop=True)
        self.show_tokens_checkbox = gui.checkBox(display_box,
                                                 self,
                                                 'show_tokens',
                                                 'Show Tokens && Tags',
                                                 callback=self.show_docs)

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data',
                        'Auto send is on')

        # Search
        self.filter_input = gui.lineEdit(self.mainArea,
                                         self,
                                         'regexp_filter',
                                         orientation=Qt.Horizontal,
                                         sizePolicy=QSizePolicy(
                                             QSizePolicy.MinimumExpanding,
                                             QSizePolicy.Fixed),
                                         label='RegExp Filter:')
        self.filter_input.textChanged.connect(self.refresh_search)

        # Main area
        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
        )

        # Document list
        self.doc_list = QTableView()
        self.doc_list.setSelectionBehavior(QTableView.SelectRows)
        self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
        self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
        self.doc_list.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)
        self.doc_list.horizontalHeader().setVisible(False)
        self.splitter.addWidget(self.doc_list)

        self.doc_list_model = QStandardItemModel(self)
        self.doc_list.setModel(self.doc_list_model)
        self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

        # Document contents
        self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
        self.doc_webview.loadFinished.connect(self.highlight_docs)

        self.mainArea.layout().addWidget(self.splitter)

    def copy_to_clipboard(self):
        text = self.doc_webview.selectedText()
        QApplication.clipboard().setText(text)

    @Inputs.corpus
    def set_data(self, corpus=None):
        self.closeContext()
        self.reset_widget()
        self.corpus = corpus
        self.search_features = []
        if corpus is not None:
            domain = self.corpus.domain
            # Enable/disable tokens checkbox
            if not self.corpus.has_tokens():
                self.show_tokens_checkbox.setCheckState(False)
            self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens())

            self.search_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.display_features = list(
                filter_visible(chain(domain.variables, domain.metas)))
            self.search_indices = list(range(len(self.search_features)))
            self.display_indices = list(range(len(self.display_features)))
            self.selection = [0]
            self.openContext(self.corpus)
            self.display_list_indices = self.display_indices
            self.regenerate_docs()
            self.list_docs()
            self.update_info()
            self.set_selection()
            self.show_docs()
        self.commit()

    def reset_widget(self):
        # Corpus
        self.corpus = None
        self.corpus_docs = None
        self.output_mask = []
        self.display_features = []
        # Widgets
        self.search_listbox.clear()
        self.display_listbox.clear()
        self.filter_input.clear()
        self.update_info()
        # Models/vars
        self.search_features.clear()
        self.search_indices.clear()
        self.display_indices.clear()
        self.doc_list_model.clear()
        # Warnings
        self.Warning.clear()
        # WebView
        self.doc_webview.setHtml('')

    def list_docs(self):
        """ List documents into the left scrolling area """
        if self.corpus_docs is None:
            return
        search_keyword = self.regexp_filter.strip('|')
        try:
            reg = re.compile(search_keyword, re.IGNORECASE)
        except sre_constants.error:
            return

        def is_match(x):
            return not bool(search_keyword) or reg.search(x)

        self.output_mask.clear()
        self.doc_list_model.clear()

        for i, (doc, title, content) in enumerate(
                zip(self.corpus, self.corpus.titles, self.corpus_docs)):
            if is_match(content):
                item = QStandardItem()
                item.setData(title, Qt.DisplayRole)
                item.setData(doc, Qt.UserRole)
                self.doc_list_model.appendRow(item)
                self.output_mask.append(i)

    def reset_selection(self):
        if self.doc_list_model.rowCount() > 0:
            self.doc_list.selectRow(0)  # Select the first document
        else:
            self.doc_webview.setHtml('')

    def set_selection(self):
        view = self.doc_list
        if len(self.selection):
            selection = QItemSelection()

            for row in self.selection:
                selection.append(
                    QItemSelectionRange(view.model().index(row, 0),
                                        view.model().index(row, 0)))
            view.selectionModel().select(selection,
                                         QItemSelectionModel.ClearAndSelect)

    def show_docs(self):
        """ Show the selected documents in the right area """
        HTML = '''
        <!doctype html>
        <html>
        <head>
        <script type="text/javascript" src="resources/jquery-3.1.1.min.js">
        </script>
        <script type="text/javascript" src="resources/jquery.mark.min.js">
        </script>
        <script type="text/javascript" src="resources/highlighter.js">
        </script>
        <meta charset='utf-8'>
        <style>

        table {{ border-collapse: collapse; }}
        mark {{ background: #FFCD28; }}

        tr > td {{
            padding-bottom: 3px;
            padding-top: 3px;
        }}

        body {{
            font-family: Helvetica;
            font-size: 10pt;
        }}

        .line {{ border-bottom: 1px solid #000; }}
        .separator {{ height: 5px; }}

        .variables {{
            vertical-align: top;
            padding-right: 10px;
        }}

        .token {{
            padding: 3px;
            border: 1px #B0B0B0 solid;
            margin-right: 5px;
            margin-bottom: 5px;
            display: inline-block;
        }}

        img {{
            max-width: 100%;
        }}

        </style>
        </head>
        <body>
        {}
        </body>
        </html>
        '''
        self.display_indices = self.display_list_indices
        if self.corpus is None:
            return

        self.Warning.no_feats_display.clear()
        if len(self.display_indices) == 0:
            self.Warning.no_feats_display()

        if self.show_tokens:
            tokens = list(self.corpus.ngrams_iterator(include_postags=True))

        marked_search_features = [
            f for i, f in enumerate(self.search_features)
            if i in self.search_indices
        ]

        html = '<table>'
        selection = [
            i.row() for i in self.doc_list.selectionModel().selectedRows()
        ]
        if selection != []:
            self.selection = selection
        for doc_count, index in enumerate(
                self.doc_list.selectionModel().selectedRows()):
            if doc_count > 0:  # add split
                html += '<tr class="line separator"><td/><td/></tr>' \
                        '<tr class="separator"><td/><td/></tr>'

            row_ind = index.data(Qt.UserRole).row_index
            for ind in self.display_indices:
                feature = self.display_features[ind]
                mark = 'class="mark-area"' if feature in marked_search_features else ''
                value = str(index.data(Qt.UserRole)[feature.name])
                is_image = feature.attributes.get('type', '') == 'image'
                if is_image and value != '?':
                    value = '<img src="{}"></img>'.format(value)
                html += '<tr><td class="variables"><strong>{}:</strong></td>' \
                        '<td {}>{}</td></tr>'.format(
                    feature.name, mark, value)

            if self.show_tokens:
                html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
                        '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format(
                    token) for token in tokens[row_ind]))

        html += '</table>'
        base = QUrl.fromLocalFile(__file__)
        self.doc_webview.setHtml(HTML.format(html), base)

    def search_features_changed(self):
        self.regenerate_docs()
        self.refresh_search()

    def regenerate_docs(self):
        self.corpus_docs = None
        self.Warning.no_feats_search.clear()
        if self.corpus is not None:
            feats = [self.search_features[i] for i in self.search_indices]
            if len(feats) == 0:
                self.Warning.no_feats_search()
            self.corpus_docs = self.corpus.documents_from_features(feats)

    def refresh_search(self):
        if self.corpus is not None:
            self.list_docs()
            self.reset_selection()
            self.update_info()
            self.commit()

    @Slot()
    def highlight_docs(self):
        search_keyword = self.regexp_filter.\
            strip('|').replace('\\', '\\\\')    # escape one \ to  two for mark.js

        if search_keyword:
            # mark is undefined when clearing the view (`setHtml('')`). Maybe
            # set and template html with all the scripts, ... but no contents?
            self.doc_webview.runJavaScript('''
                    if (typeof mark !== "undefined") {{
                        mark("{}");
                    }}
                '''.format(search_keyword))

    def update_info(self):
        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(),
                                             self.n_documents)
            self.n_tokens = sum(
                map(len,
                    self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(
                self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a'
            self.is_preprocessed = self.corpus.has_tokens()
            self.is_pos_tagged = self.corpus.pos_tags is not None
            self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range)
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''
            self.is_preprocessed = ''
            self.is_pos_tagged = ''
            self.ngram_range = ''

    def commit(self):
        if self.corpus is not None:
            matched = self.corpus[self.output_mask]
            output_mask = set(self.output_mask)
            unmatched_mask = [
                i for i in range(len(self.corpus)) if i not in output_mask
            ]
            unmatched = self.corpus[unmatched_mask]
            self.Outputs.matching_docs.send(matched)
            self.Outputs.other_docs.send(unmatched)
        else:
            self.Outputs.matching_docs.send(None)
            self.Outputs.other_docs.send(None)
예제 #18
0
class OWCreateInstance(OWWidget):
    name = "Create Instance"
    description = "Interactively create a data instance from sample dataset."
    icon = "icons/CreateInstance.svg"
    category = "Data"
    keywords = ["simulator"]
    priority = 4000

    class Inputs:
        data = Input("Data", Table)
        reference = Input("Reference", Table)

    class Outputs:
        data = Output("Data", Table)

    class Information(OWWidget.Information):
        nans_removed = Msg("Variables with only missing values were "
                           "removed from the list.")

    want_main_area = False
    ACTIONS = ["median", "mean", "random", "input"]
    HEADER = [["name", "Variable"], ["variable", "Value"]]
    Header = namedtuple("header",
                        [tag for tag, _ in HEADER])(*range(len(HEADER)))

    values: Dict[str, Union[float, str]] = Setting({}, schema_only=True)
    append_to_data = Setting(True)
    auto_commit = Setting(True)

    def __init__(self):
        super().__init__()
        self.data: Optional[Table] = None
        self.reference: Optional[Table] = None

        self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed,
                                     placeholderText="Filter...")
        self.view = QTableView(sortingEnabled=True,
                               contextMenuPolicy=Qt.CustomContextMenu,
                               selectionMode=QTableView.NoSelection)
        self.view.customContextMenuRequested.connect(self.__menu_requested)
        self.view.setItemDelegateForColumn(self.Header.variable,
                                           VariableDelegate(self))
        self.view.verticalHeader().hide()
        self.view.horizontalHeader().setStretchLastSection(True)
        self.view.horizontalHeader().setMaximumSectionSize(350)

        self.model = VariableItemModel(self)
        self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER])
        self.model.dataChanged.connect(self.__table_data_changed)
        self.model.dataHasNanColumn.connect(self.Information.nans_removed)
        self.proxy_model = QSortFilterProxyModel()
        self.proxy_model.setFilterKeyColumn(-1)
        self.proxy_model.setFilterCaseSensitivity(False)
        self.proxy_model.setSourceModel(self.model)
        self.view.setModel(self.proxy_model)

        vbox = gui.vBox(self.controlArea, box=True)
        vbox.layout().addWidget(self.filter_edit)
        vbox.layout().addWidget(self.view)

        box = gui.hBox(vbox)
        gui.rubber(box)
        for name in self.ACTIONS:
            gui.button(box,
                       self,
                       name.capitalize(),
                       lambda *args, fun=name: self._initialize_values(fun),
                       autoDefault=False)
        gui.rubber(box)

        box = gui.auto_apply(self.controlArea, self, "auto_commit")
        box.button.setFixedWidth(180)
        box.layout().insertStretch(0)
        # pylint: disable=unnecessary-lambda
        append = gui.checkBox(None,
                              self,
                              "append_to_data",
                              "Append this instance to input data",
                              callback=lambda: self.commit())
        box.layout().insertWidget(0, append)

        self._set_input_summary()
        self._set_output_summary()
        self.settingsAboutToBePacked.connect(self.pack_settings)

    def __filter_edit_changed(self):
        self.proxy_model.setFilterFixedString(self.filter_edit.text().strip())

    def __table_data_changed(self):
        self.commit()

    def __menu_requested(self, point: QPoint):
        index = self.view.indexAt(point)
        model: QSortFilterProxyModel = index.model()
        source_index = model.mapToSource(index)
        menu = QMenu(self)
        for action in self._create_actions(source_index):
            menu.addAction(action)
        menu.popup(self.view.viewport().mapToGlobal(point))

    def _create_actions(self, index: QModelIndex) -> List[QAction]:
        actions = []
        for name in self.ACTIONS:
            action = QAction(name.capitalize(), self)
            action.triggered.connect(
                lambda *args, fun=name: self._initialize_values(fun, [index]))
            actions.append(action)
        return actions

    def _initialize_values(self, fun: str, indices: List[QModelIndex] = None):
        cont_fun = {
            "median": np.nanmedian,
            "mean": np.nanmean,
            "random": cont_random,
            "input": np.nanmean
        }.get(fun, NotImplemented)
        disc_fun = {
            "median": majority,
            "mean": majority,
            "random": disc_random,
            "input": majority
        }.get(fun, NotImplemented)

        if not self.data or fun == "input" and not self.reference:
            return

        self.model.dataChanged.disconnect(self.__table_data_changed)
        rows = range(self.proxy_model.rowCount()) if indices is None else \
            [index.row() for index in indices]
        for row in rows:
            index = self.model.index(row, self.Header.variable)
            variable = self.model.data(index, VariableRole)

            if fun == "input":
                if variable not in self.reference.domain:
                    continue
                values = self.reference.get_column_view(variable)[0]
                if variable.is_primitive():
                    values = values.astype(float)
                    if all(np.isnan(values)):
                        continue
            else:
                values = self.model.data(index, ValuesRole)

            if variable.is_continuous:
                value = cont_fun(values)
                value = round(value, variable.number_of_decimals)
            elif variable.is_discrete:
                value = disc_fun(values)
            elif variable.is_string:
                value = ""
            else:
                raise NotImplementedError

            self.model.setData(index, value, ValueRole)
        self.model.dataChanged.connect(self.__table_data_changed)
        self.commit()

    @Inputs.data
    def set_data(self, data: Table):
        self.data = data
        self._set_input_summary()
        self._set_model_data()
        self.unconditional_commit()

    def _set_model_data(self):
        self.Information.nans_removed.clear()
        self.model.removeRows(0, self.model.rowCount())
        if not self.data:
            return

        self.model.set_data(self.data, self.values)
        self.values = {}
        self.view.horizontalHeader().setStretchLastSection(False)
        self.view.resizeColumnsToContents()
        self.view.resizeRowsToContents()
        self.view.horizontalHeader().setStretchLastSection(True)

    @Inputs.reference
    def set_reference(self, data: Table):
        self.reference = data
        self._set_input_summary()

    def _set_input_summary(self):
        n_data = len(self.data) if self.data else 0
        n_refs = len(self.reference) if self.reference else 0
        summary, details, kwargs = self.info.NoInput, "", {}

        if self.data or self.reference:
            summary = f"{self.info.format_number(n_data)}, " \
                      f"{self.info.format_number(n_refs)}"
            data_list = [("Data", self.data), ("Reference", self.reference)]
            details = format_multiple_summaries(data_list)
            kwargs = {"format": Qt.RichText}
        self.info.set_input_summary(summary, details, **kwargs)

    def _set_output_summary(self, data: Optional[Table] = None):
        if data:
            summary, details = len(data), format_summary_details(data)
        else:
            summary, details = self.info.NoOutput, ""
        self.info.set_output_summary(summary, details)

    def commit(self):
        output_data = None
        if self.data:
            output_data = self._create_data_from_values()
            if self.append_to_data:
                output_data = self._append_to_data(output_data)
        self._set_output_summary(output_data)
        self.Outputs.data.send(output_data)

    def _create_data_from_values(self) -> Table:
        data = Table.from_domain(self.data.domain, 1)
        data.name = "created"
        data.X[:] = np.nan
        data.Y[:] = np.nan
        for i, m in enumerate(self.data.domain.metas):
            data.metas[:, i] = "" if m.is_string else np.nan

        values = self._get_values()
        for var_name, value in values.items():
            data[:, var_name] = value
        return data

    def _append_to_data(self, data: Table) -> Table:
        assert self.data
        assert len(data) == 1

        var = DiscreteVariable("Source ID", values=(self.data.name, data.name))
        data = Table.concatenate([self.data, data], axis=0)
        domain = Domain(data.domain.attributes, data.domain.class_vars,
                        data.domain.metas + (var, ))
        data = data.transform(domain)
        data.metas[:len(self.data), -1] = 0
        data.metas[len(self.data):, -1] = 1
        return data

    def _get_values(self) -> Dict[str, Union[str, float]]:
        values = {}
        for row in range(self.model.rowCount()):
            index = self.model.index(row, self.Header.variable)
            values[self.model.data(index, VariableRole).name] = \
                self.model.data(index, ValueRole)
        return values

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_domain("Output", self.data.domain)
        items = []
        values: Dict = self._get_values()
        for var in self.data.domain.variables + self.data.domain.metas:
            val = values.get(var.name, np.nan)
            if var.is_primitive():
                val = var.repr_val(val)
            items.append([f"{var.name}:", val])
        self.report_table("Values", items)

    @staticmethod
    def sizeHint():
        return QSize(600, 500)

    def pack_settings(self):
        self.values: Dict[str, Union[str, float]] = self._get_values()
예제 #19
0
class OWRank(OWWidget):
    name = "Rank"
    description = "Rank and filter data features by their relevance."
    icon = "icons/Rank.svg"
    priority = 1102

    buttons_area_orientation = Qt.Vertical

    inputs = [("Data", Table, "setData"),
              ("Scorer", score.Scorer, "set_learner", widget.Multiple)]
    outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)]

    SelectNone, SelectAll, SelectManual, SelectNBest = range(4)

    cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"})
    reg_default_selected = Setting({"Univariate Linear Regression", "RReliefF"})
    selectMethod = Setting(SelectNBest)
    nSelected = Setting(5)
    auto_apply = Setting(True)

    # Header state for discrete/continuous/no_class scores
    headerState = Setting([None, None, None])

    settings_version = 1
    settingsHandler = DomainContextHandler()
    selected_rows = ContextSetting([])

    gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True
    _score_vars = ["gain", "inf_gain", "gini", "anova", "chi2", "relief",
                   "fcbc", "ulr", "rrelief"]

    class Warning(OWWidget.Warning):
        no_target_var = Msg("Data does not have a target variable")

    class Error(OWWidget.Error):
        invalid_type = Msg("Cannot handle target variable type {}")
        inadequate_learner = Msg("{}")

    def __init__(self):
        super().__init__()
        self.measure_scores = None
        self.update_scores = True
        self.usefulAttributes = []
        self.learners = {}
        self.labels = []
        self.out_domain_desc = None

        self.all_measures = SCORES

        self.selectedMeasures = dict([(m.name, True) for m
                                      in self.all_measures])
        # Discrete (0) or continuous (1) class mode
        self.rankMode = 0

        self.data = None

        self.discMeasures = [m for m in self.all_measures if
                             issubclass(DiscreteVariable, m.score.class_type)]
        self.contMeasures = [m for m in self.all_measures if
                             issubclass(ContinuousVariable, m.score.class_type)]

        self.score_checks = []
        self.cls_scoring_box = gui.vBox(None, "Scoring for Classification")
        self.reg_scoring_box = gui.vBox(None, "Scoring for Regression")
        boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2
        for _score, var, box in zip(SCORES, self._score_vars, boxes):
            check = gui.checkBox(
                box, self, var, label=_score.name,
                callback=lambda val=_score: self.measuresSelectionChanged(val))
            self.score_checks.append(check)

        self.score_stack = QStackedWidget(self)
        self.score_stack.addWidget(self.cls_scoring_box)
        self.score_stack.addWidget(self.reg_scoring_box)
        self.score_stack.addWidget(QWidget())
        self.controlArea.layout().addWidget(self.score_stack)

        gui.rubber(self.controlArea)

        selMethBox = gui.vBox(
                self.controlArea, "Select Attributes", addSpace=True)

        grid = QGridLayout()
        grid.setContentsMargins(6, 0, 6, 0)
        self.selectButtons = QButtonGroup()
        self.selectButtons.buttonClicked[int].connect(self.setSelectMethod)

        def button(text, buttonid, toolTip=None):
            b = QRadioButton(text)
            self.selectButtons.addButton(b, buttonid)
            if toolTip is not None:
                b.setToolTip(toolTip)
            return b

        b1 = button(self.tr("None"), OWRank.SelectNone)
        b2 = button(self.tr("All"), OWRank.SelectAll)
        b3 = button(self.tr("Manual"), OWRank.SelectManual)
        b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest)

        s = gui.spin(selMethBox, self, "nSelected", 1, 100,
                     callback=self.nSelectedChanged)

        grid.addWidget(b1, 0, 0)
        grid.addWidget(b2, 1, 0)
        grid.addWidget(b3, 2, 0)
        grid.addWidget(b4, 3, 0)
        grid.addWidget(s, 3, 1)

        self.selectButtons.button(self.selectMethod).setChecked(True)

        selMethBox.layout().addLayout(grid)

        gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False)

        # Discrete, continuous and no_class table views are stacked
        self.ranksViewStack = QStackedLayout()
        self.mainArea.layout().addLayout(self.ranksViewStack)

        self.discRanksView = QTableView()
        self.ranksViewStack.addWidget(self.discRanksView)
        self.discRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.discRanksView.setSelectionMode(QTableView.MultiSelection)
        self.discRanksView.setSortingEnabled(True)

        self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures]
        self.discRanksModel = QStandardItemModel(self)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)

        self.discRanksProxyModel = MySortProxyModel(self)
        self.discRanksProxyModel.setSourceModel(self.discRanksModel)
        self.discRanksView.setModel(self.discRanksProxyModel)

        self.discRanksView.setColumnWidth(0, 20)
        self.discRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.discRanksView.pressed.connect(self.onSelectItem)
        self.discRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.discRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[0] is not None:
            self.discRanksView.horizontalHeader().restoreState(
                self.headerState[0])

        self.contRanksView = QTableView()
        self.ranksViewStack.addWidget(self.contRanksView)
        self.contRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.contRanksView.setSelectionMode(QTableView.MultiSelection)
        self.contRanksView.setSortingEnabled(True)

        self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures]
        self.contRanksModel = QStandardItemModel(self)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)

        self.contRanksProxyModel = MySortProxyModel(self)
        self.contRanksProxyModel.setSourceModel(self.contRanksModel)
        self.contRanksView.setModel(self.contRanksProxyModel)

        self.contRanksView.setColumnWidth(0, 20)
        self.contRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.contRanksView.pressed.connect(self.onSelectItem)
        self.contRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.contRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[1] is not None:
            self.contRanksView.horizontalHeader().restoreState(
                self.headerState[1])

        self.noClassRanksView = QTableView()
        self.ranksViewStack.addWidget(self.noClassRanksView)
        self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows)
        self.noClassRanksView.setSelectionMode(QTableView.MultiSelection)
        self.noClassRanksView.setSortingEnabled(True)

        self.noClassRanksLabels = ["#"]
        self.noClassRanksModel = QStandardItemModel(self)
        self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels)

        self.noClassRanksProxyModel = MySortProxyModel(self)
        self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel)
        self.noClassRanksView.setModel(self.noClassRanksProxyModel)

        self.noClassRanksView.setColumnWidth(0, 20)
        self.noClassRanksView.selectionModel().selectionChanged.connect(
            self.commit
        )
        self.noClassRanksView.pressed.connect(self.onSelectItem)
        self.noClassRanksView.horizontalHeader().sectionClicked.connect(
            self.headerClick
        )
        self.noClassRanksView.verticalHeader().sectionClicked.connect(
            self.onSelectItem
        )

        if self.headerState[2] is not None:
            self.noClassRanksView.horizontalHeader().restoreState(
                self.headerState[2])

        # Switch the current view to Discrete
        self.switchRanksMode(0)
        self.resetInternals()
        self.updateDelegates()
        self.updateVisibleScoreColumns()

        self.resize(690, 500)

        self.measure_scores = table((len(self.measures), 0), None)

    def switchRanksMode(self, index):
        """
        Switch between discrete/continuous/no_class mode
        """
        self.rankMode = index
        self.ranksViewStack.setCurrentIndex(index)

        if index == 0:
            self.ranksView = self.discRanksView
            self.ranksModel = self.discRanksModel
            self.ranksProxyModel = self.discRanksProxyModel
            self.measures = self.discMeasures
            self.selected_checks = self.cls_default_selected
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        elif index == 1:
            self.ranksView = self.contRanksView
            self.ranksModel = self.contRanksModel
            self.ranksProxyModel = self.contRanksProxyModel
            self.measures = self.contMeasures
            self.selected_checks = self.reg_default_selected
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding,
                                               QSizePolicy.Expanding)
        else:
            self.ranksView = self.noClassRanksView
            self.ranksModel = self.noClassRanksModel
            self.ranksProxyModel = self.noClassRanksProxyModel
            self.measures = []
            self.selected_checks = set()
            self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)
            self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored,
                                               QSizePolicy.Ignored)

        shape = (len(self.measures) + len(self.learners), 0)
        self.measure_scores = table(shape, None)
        self.update_scores = False
        for check, score in zip(self.score_checks, SCORES):
            check.setChecked(score.name in self.selected_checks)
        self.update_scores = True
        self.score_stack.setCurrentIndex(index)
        self.updateVisibleScoreColumns()

    @check_sql_input
    def setData(self, data):
        self.closeContext()
        self.clear_messages()
        self.resetInternals()

        self.data = data
        self.switchRanksMode(0)
        if self.data is not None:
            domain = self.data.domain
            attrs = domain.attributes
            self.usefulAttributes = [attr for attr in attrs
                                     if attr.is_discrete or attr.is_continuous]

            if domain.has_continuous_class:
                self.switchRanksMode(1)
            elif not domain.class_var:
                self.Warning.no_target_var()
                self.switchRanksMode(2)
            elif not domain.has_discrete_class:
                self.Error.invalid_type(type(domain.class_var).__name__)

            if issparse(self.data.X):   # keep only measures supporting sparse data
                self.measures = [m for m in self.measures
                                 if m.score.supports_sparse_data]

            self.ranksModel.setRowCount(len(attrs))
            for i, a in enumerate(attrs):
                if a.is_discrete:
                    v = len(a.values)
                else:
                    v = "C"
                item = ScoreValueItem()
                item.setData(v, Qt.DisplayRole)
                self.ranksModel.setItem(i, 0, item)
                item = QStandardItem(a.name)
                item.setData(gui.attributeIconDict[a], Qt.DecorationRole)
                self.ranksModel.setVerticalHeaderItem(i, item)

            shape = (len(self.measures) + len(self.learners), len(attrs))
            self.measure_scores = table(shape, None)
            self.updateScores()
        else:
            self.send("Scores", None)

        self.selected_rows = []
        self.openContext(data)
        self.selectMethodChanged()
        self.commit()

    def get_selection(self):
        selection = self.ranksView.selectionModel().selection()
        return list(set(ind.row() for ind in selection.indexes()))

    def set_learner(self, learner, lid=None):
        if learner is None and lid is not None:
            del self.learners[lid]
        elif learner is not None:
            self.learners[lid] = score_meta(
                learner.name,
                learner.name,
                learner
            )
        attrs_len = 0 if not self.data else len(self.data.domain.attributes)
        shape = (len(self.learners), attrs_len)
        self.measure_scores = self.measure_scores[:len(self.measures)]
        self.measure_scores += table(shape, None)
        self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels)
        self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels)
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels)
        measures_mask = [False] * len(self.measures)
        measures_mask += [True for _ in self.learners]
        self.updateScores(measures_mask)
        self.commit()

    def updateScores(self, measuresMask=None):
        """
        Update the current computed scores.

        If `measuresMask` is given it must be an list of bool values
        indicating what measures should be recomputed.

        """
        if not self.data:
            return
        if self.data.has_missing():
            self.information("Missing values have been imputed.")

        measures = self.measures + [v for k, v in self.learners.items()]
        if measuresMask is None:
            # Update all selected measures
            measuresMask = [self.selectedMeasures.get(m.name)
                            for m in self.measures]
            measuresMask = measuresMask + [v.name for k, v in
                                           self.learners.items()]

        data = self.data
        learner_col = len(self.measures)
        if len(measuresMask) <= len(self.measures) or \
                measuresMask[len(self.measures)]:
            self.labels = []
            self.Error.inadequate_learner.clear()

        self.setStatusMessage("Running")
        with self.progressBar():
            n_measure_update = len([x for x in measuresMask if x is not False])
            count = 0
            for index, (meas, mask) in enumerate(zip(measures, measuresMask)):
                if not mask:
                    continue
                self.progressBarSet(90 * count / n_measure_update)
                count += 1
                if index < len(self.measures):
                    estimator = meas.score()
                    try:
                        self.measure_scores[index] = estimator(data)
                    except ValueError:
                        self.measure_scores[index] = []
                        for attr in data.domain.attributes:
                            try:
                                self.measure_scores[index].append(
                                    estimator(data, attr))
                            except ValueError:
                                self.measure_scores[index].append(None)
                else:
                    learner = meas.score
                    if isinstance(learner, Learner) and \
                            not learner.check_learner_adequacy(self.data.domain):
                        self.Error.inadequate_learner(
                            learner.learner_adequacy_err_msg)
                        scores = table((1, len(data.domain.attributes)))
                    else:
                        scores = meas.score.score_data(data)
                    for i, row in enumerate(scores):
                        self.labels.append(meas.shortname + str(i + 1))
                        if len(self.measure_scores) > learner_col:
                            self.measure_scores[learner_col] = row
                        else:
                            self.measure_scores.append(row)
                        learner_col += 1
            self.progressBarSet(90)
        self.contRanksModel.setHorizontalHeaderLabels(
            self.contRanksLabels + self.labels
        )
        self.discRanksModel.setHorizontalHeaderLabels(
            self.discRanksLabels + self.labels
        )
        self.noClassRanksModel.setHorizontalHeaderLabels(
            self.noClassRanksLabels + self.labels
        )
        self.updateRankModel(measuresMask)
        self.ranksProxyModel.invalidate()
        self.selectMethodChanged()
        self.send("Scores", self.create_scores_table(self.labels))
        self.setStatusMessage("")

    def updateRankModel(self, measuresMask):
        """
        Update the rankModel.
        """
        values = []
        diff = len(self.measure_scores) - len(measuresMask)
        if len(measuresMask):
            measuresMask += [measuresMask[-1]] * diff
        for i in range(self.ranksModel.columnCount() - 1,
                       len(self.measure_scores), -1):
            self.ranksModel.removeColumn(i)

        for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)):
            if not m and self.ranksModel.item(0, i + 1):
                values.append([])
                continue
            values_one = []
            for j, _score in enumerate(scores):
                values_one.append(_score)
                item = self.ranksModel.item(j, i + 1)
                if not item:
                    item = ScoreValueItem()
                    self.ranksModel.setItem(j, i + 1, item)
                item.setData(_score, Qt.DisplayRole)
            values.append(values_one)
        for i, (vals, m) in enumerate(zip(values, measuresMask)):
            if not m:
                continue
            valid_vals = [v for v in vals if v is not None]
            if valid_vals:
                vmin, vmax = min(valid_vals), max(valid_vals)
                for j, v in enumerate(vals):
                    if v is not None:
                        # Set the bar ratio role for i-th measure.
                        ratio = float((v - vmin) / ((vmax - vmin) or 1))
                        item = self.ranksModel.item(j, i + 1)
                        item.setData(ratio, gui.BarRatioRole)

        self.ranksView.setColumnWidth(0, 20)
        self.ranksView.resizeRowsToContents()

    def resetInternals(self):
        self.data = None
        self.usefulAttributes = []
        self.ranksModel.setRowCount(0)

    def onSelectItem(self, index):
        """
        Called when the user selects/unselects an item in the table view.
        """
        self.selectMethod = OWRank.SelectManual  # Manual
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.commit()

    def setSelectMethod(self, method):
        if self.selectMethod != method:
            self.selectMethod = method
            self.selectButtons.button(method).setChecked(True)
            self.selectMethodChanged()

    def selectMethodChanged(self):
        self.autoSelection()
        self.ranksView.setFocus()

    def nSelectedChanged(self):
        self.selectMethod = OWRank.SelectNBest
        self.selectButtons.button(self.selectMethod).setChecked(True)
        self.selectMethodChanged()

    def autoSelection(self):
        selModel = self.ranksView.selectionModel()
        rowCount = self.ranksModel.rowCount()
        columnCount = self.ranksModel.columnCount()
        model = self.ranksProxyModel

        if self.selectMethod == OWRank.SelectNone:
            selection = QItemSelection()
        elif self.selectMethod == OWRank.SelectAll:
            selection = QItemSelection(
                model.index(0, 0),
                model.index(rowCount - 1, columnCount - 1)
            )
        elif self.selectMethod == OWRank.SelectNBest:
            nSelected = min(self.nSelected, rowCount)
            selection = QItemSelection(
                model.index(0, 0),
                model.index(nSelected - 1, columnCount - 1)
            )
        else:
            selection = QItemSelection()
            if len(self.selected_rows):
                selection = QItemSelection()
                for row in self.selected_rows:
                    selection.append(QItemSelectionRange(
                        model.index(row, 0), model.index(row, columnCount - 1)))

        selModel.select(selection, QItemSelectionModel.ClearAndSelect)

    def headerClick(self, index):
        if index >= 1 and self.selectMethod == OWRank.SelectNBest:
            # Reselect the top ranked attributes
            self.autoSelection()

        # Store the header states
        disc = bytes(self.discRanksView.horizontalHeader().saveState())
        cont = bytes(self.contRanksView.horizontalHeader().saveState())
        no_class = bytes(self.noClassRanksView.horizontalHeader().saveState())
        self.headerState = [disc, cont, no_class]

    def measuresSelectionChanged(self, measure):
        """Measure selection has changed. Update column visibility.
        """
        checked = self.selectedMeasures[measure.name]
        self.selectedMeasures[measure.name] = not checked
        if not checked:
            self.selected_checks.add(measure.name)
        elif measure.name in self.selected_checks:
            self.selected_checks.remove(measure.name)
        measures_mask = [False] * len(self.measures)
        measures_mask += [False for _ in self.learners]
        # Update scores for shown column if they are not yet computed.
        if measure in self.measures and self.measure_scores:
            index = self.measures.index(measure)
            if all(s is None for s in self.measure_scores[index]):
                measures_mask[index] = True
        if self.update_scores:
            self.updateScores(measures_mask)
        self.updateVisibleScoreColumns()

    def updateVisibleScoreColumns(self):
        """
        Update the visible columns of the scores view.
        """
        for i, measure in enumerate(self.measures):
            shown = self.selectedMeasures.get(measure.name)
            self.ranksView.setColumnHidden(i + 1, not shown)
            self.ranksView.setColumnWidth(i + 1, 100)

        index = self.ranksView.horizontalHeader().sortIndicatorSection()
        if self.ranksView.isColumnHidden(index):
            self.headerState[self.rankMode] = None

        if self.headerState[self.rankMode] is None:
            def get_sort_by_col(measures, selected_measures):
                cols = [i + 1 for i, m in enumerate(measures) if
                        m.name in selected_measures]
                return cols[0] if cols else len(measures) + 1

            col = get_sort_by_col(self.measures, self.selected_checks)
            self.ranksView.sortByColumn(col, Qt.DescendingOrder)
            self.autoSelection()

    def updateDelegates(self):
        self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))
        self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self))

    def send_report(self):
        if not self.data:
            return
        self.report_domain("Input", self.data.domain)
        self.report_table("Ranks", self.ranksView, num_format="{:.3f}")
        if self.out_domain_desc is not None:
            self.report_items("Output", self.out_domain_desc)

    def commit(self):
        self.selected_rows = self.get_selection()
        if self.data and len(self.data.domain.attributes) == len(
                self.selected_rows):
            self.selectMethod = OWRank.SelectAll
            self.selectButtons.button(self.selectMethod).setChecked(True)
        selected = self.selectedAttrs()
        if not self.data or not selected:
            self.send("Reduced Data", None)
            self.out_domain_desc = None
        else:
            data = Table(Domain(selected, self.data.domain.class_var,
                                self.data.domain.metas), self.data)
            self.send("Reduced Data", data)
            self.out_domain_desc = report.describe_domain(data.domain)

    def selectedAttrs(self):
        if self.data:
            inds = self.ranksView.selectionModel().selectedRows(0)
            source = self.ranksProxyModel.mapToSource
            inds = map(source, inds)
            inds = [ind.row() for ind in inds]
            return [self.data.domain.attributes[i] for i in inds]
        else:
            return []

    def create_scores_table(self, labels):
        indices = [i for i, m in enumerate(self.measures)
                   if self.selectedMeasures.get(m.name, False)]
        measures = [s.name for s in self.measures if
                    self.selectedMeasures.get(s.name, False)]
        measures += [label for label in labels]
        if not measures:
            return None
        features = [ContinuousVariable(s) for s in measures]
        metas = [StringVariable("Feature name")]
        domain = Domain(features, metas=metas)

        scores = np.array([row for i, row in enumerate(self.measure_scores)
                           if i in indices or i >= len(self.measures)]).T
        feature_names = np.array([a.name for a in self.data.domain.attributes])
        # Reshape to 2d array as Table does not like 1d arrays
        feature_names = feature_names[:, None]

        new_table = Table(domain, scores, metas=feature_names)
        new_table.name = "Feature Scores"
        return new_table

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0
            # headerState had length 2
            headerState = settings.get("headerState", None)
            if headerState is not None and \
                    isinstance(headerState, tuple) and \
                    len(headerState) < 3:
                headerState = (list(headerState) + [None] * 3)[:3]
                settings["headerState"] = headerState
예제 #20
0
class OWConcordance(OWWidget):
    name = "Concordance"
    description = "Display the context of the word."
    icon = "icons/Concordance.svg"
    priority = 30000

    inputs = [
        ('Corpus', Table, 'set_corpus'),
        ('Query Word', Topic, 'set_word_from_input'),
    ]
    outputs = [('Selected Documents', Table, )]

    autocommit = Setting(True)
    context_width = Setting(5)
    word = Setting("")
    # TODO Set selection settings.

    class Warning(OWWidget.Warning):
        multiple_words_on_input = Msg("Multiple query words on input. "
                                      "Only the first one is considered!")

    def __init__(self):
        super().__init__()

        self.corpus = None      # Corpus
        self.n_documents = ''   # Info on docs
        self.n_matching = ''    # Info on docs matching the word
        self.n_tokens = ''      # Info on tokens
        self.n_types = ''       # Info on types (unique tokens)

        # Info attributes
        info_box = gui.widgetBox(self.controlArea, 'Info')
        gui.label(info_box, self, 'Documents: %(n_documents)s')
        gui.label(info_box, self, 'Tokens: %(n_tokens)s')
        gui.label(info_box, self, 'Types: %(n_types)s')
        gui.label(info_box, self, 'Matching: %(n_matching)s')

        # Width parameter
        gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True,
                 label="Number of words:", callback=self.set_width)

        gui.rubber(self.controlArea)

        # Search
        c_box = gui.widgetBox(self.mainArea, orientation="vertical")
        self.input = gui.lineEdit(
            c_box, self, 'word', orientation=Qt.Horizontal,
            sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                   QSizePolicy.Fixed),
            label='Query:', callback=self.set_word, callbackOnType=True)
        self.input.setFocus()

        # Concordances view
        self.conc_view = QTableView()
        self.model = ConcordanceModel()
        self.conc_view.setModel(self.model)
        self.conc_view.setWordWrap(False)
        self.conc_view.setSelectionBehavior(QTableView.SelectRows)
        self.conc_view.setSelectionModel(DocumentSelectionModel(self.model))
        self.conc_view.setItemDelegate(HorizontalGridDelegate())
        # connect selectionChanged to self.commit(), which will be
        # updated by gui.auto_commit()
        self.conc_view.selectionModel().selectionChanged.connect(lambda:
                                                                 self.commit())
        self.conc_view.horizontalHeader().hide()
        self.conc_view.setShowGrid(False)
        self.mainArea.layout().addWidget(self.conc_view)
        self.set_width()

        # Auto-commit box
        gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
                        'Auto commit is on')

    def sizeHint(self): # pragma: no cover
        return QSize(600, 400)

    def set_width(self):
        sel = self.conc_view.selectionModel().selection()
        self.model.set_width(self.context_width)
        if sel:
            self.conc_view.selectionModel().select(sel,
                QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)

    def set_corpus(self, data=None):
        self.corpus = data
        if data is not None and not isinstance(data, Corpus):
            self.corpus = Corpus.from_table(data.domain, data)
        self.model.set_corpus(self.corpus)
        self.update_widget()
        self.commit()

    def set_word_from_input(self, topic):
        self.Warning.multiple_words_on_input.clear()
        have_word = topic is not None and len(topic) > 0
        self.input.setEnabled(not have_word)
        if have_word:
            if len(topic) > 1:
                self.Warning.multiple_words_on_input()
            self.word = topic.metas[0, 0]
            self.set_word()

    def set_word(self):
        self.model.set_word(self.word)
        self.update_widget()

    def resize_columns(self):
        col_width = (self.conc_view.width() -
                     self.conc_view.columnWidth(1)) / 2 - 12
        self.conc_view.setColumnWidth(0, col_width)
        self.conc_view.setColumnWidth(2, col_width)

    def resizeEvent(self, event): # pragma: no cover
        super().resizeEvent(event)
        self.resize_columns()

    def update_widget(self):
        self.conc_view.resizeColumnToContents(1)
        self.resize_columns()
        self.conc_view.resizeRowsToContents()

        if self.corpus is not None:
            self.n_documents = len(self.corpus)
            self.n_matching = '{}/{}'.format(
                self.model.matching_docs() if self.word else 0,
                self.n_documents)
            self.n_tokens = sum(map(len, self.corpus.tokens)) \
                if self.corpus.has_tokens() else 'n/a'
            self.n_types = len(self.corpus.dictionary) \
                if self.corpus.has_tokens() else 'n/a'
        else:
            self.n_documents = ''
            self.n_matching = ''
            self.n_tokens = ''
            self.n_types = ''

    def commit(self):
        rows = [sel_range.top() for sel_range
                in self.conc_view.selectionModel().selection()]
        selected_docs = sorted(set(self.model.word_index[row][0]
                                   for row in rows))
        if selected_docs:
            selected = self.corpus[selected_docs]
            self.send("Selected Documents", selected)
        else:
            self.send("Selected Documents", None)