class OWResolwetSNE(OWWidget): name = "t-SNE" description = "Two-dimensional data projection with t-SNE." icon = "icons/OWResolwetSNE.svg" priority = 50 class Inputs: data = Input("Data", resolwe.Data, default=True) class Outputs: selected_data = Output("Selected Data", resolwe.Data, default=True) settings_version = 2 #: Runtime state Running, Finished, Waiting = 1, 2, 3 settingsHandler = settings.DomainContextHandler() max_iter = settings.Setting(300) perplexity = settings.Setting(30) pca_components = settings.Setting(20) # output embedding role. NoRole, AttrRole, AddAttrRole, MetaRole = 0, 1, 2, 3 auto_commit = settings.Setting(True) selection_indices = settings.Setting(None, schema_only=True) legend_anchor = settings.Setting(((1, 0), (1, 0))) graph = SettingProvider(OWMDSGraph) jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10] graph_name = "graph.plot_widget.plotItem" class Error(OWWidget.Error): not_enough_rows = Msg("Input data needs at least 2 rows") constant_data = Msg("Input data is constant") no_attributes = Msg("Data has no attributes") out_of_memory = Msg("Out of memory") optimization_error = Msg("Error during optimization\n{}") def __init__(self): super().__init__() #: Effective data used for plot styling/annotations. self.data = None # type: Optional[Orange.data.Table] #: Input subset data table self.subset_data = None # type: Optional[Orange.data.Table] #: Input data table self.signal_data = None # resolwe variables self.data_table_object = None # type: Optional[resolwe.Data] self._tsne_slug = 't-sne' self._tsne_selection_slug = 't-sne-selection' self._embedding_data_object = None self._embedding = None self._embedding_clas_var = None self.variable_x = ContinuousVariable("tsne-x") self.variable_y = ContinuousVariable("tsne-y") # threading self._task = None # type: Optional[ResolweTask] self._executor = ThreadExecutor() self.res = ResolweHelper() self._subset_mask = None # type: Optional[np.ndarray] self._invalidated = False self.pca_data = None self._curve = None self._data_metas = None self.variable_x = ContinuousVariable("tsne-x") self.variable_y = ContinuousVariable("tsne-y") self.__update_loop = None self.__in_next_step = False self.__draw_similar_pairs = False box = gui.vBox(self.controlArea, "t-SNE") form = QFormLayout(labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow, verticalSpacing=10) form.addRow("Max iterations:", gui.spin(box, self, "max_iter", 250, 2000, step=50)) form.addRow("Perplexity:", gui.spin(box, self, "perplexity", 1, 100, step=1)) box.layout().addLayout(form) gui.separator(box, 10) self.runbutton = gui.button(box, self, "Run", callback=self._run_embeding) box = gui.vBox(self.controlArea, "PCA Preprocessing") gui.hSlider(box, self, 'pca_components', label="Components: ", minValue=2, maxValue=50, step=1) #, callback=self._initialize) box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWMDSGraph(self, box, "MDSGraph", view_box=MDSInteractiveViewBox) box.layout().addWidget(self.graph.plot_widget) self.plot = self.graph.plot_widget g = self.graph.gui box = g.point_properties_box(self.controlArea) self.models = g.points_models # Because sc data frequently has many genes, # showing all attributes in combo boxes can cause problems # QUICKFIX: Remove a separator and attributes from order # (leaving just the class and metas) for model in self.models: model.order = model.order[:-2] g.add_widgets(ids=[g.JitterSizeSlider], widget=box) box = gui.vBox(self.controlArea, "Plot Properties") g.add_widgets([ g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity, g.LabelOnlySelected ], box) self.controlArea.layout().addStretch(100) self.icons = gui.attributeIconDict palette = self.graph.plot_widget.palette() self.graph.set_palette(palette) gui.rubber(self.controlArea) self.graph.box_zoom_select(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.plot.getPlotItem().hideButtons() self.plot.setRenderHint(QPainter.Antialiasing) self.graph.jitter_continuous = True # self._initialize() def update_colors(self): pass def update_density(self): self.update_graph(reset_view=False) def update_regression_line(self): self.update_graph(reset_view=False) def prepare_data(self): pass def update_graph(self, reset_view=True, **_): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view=True) def reset_graph_data(self, *_): if self.data is not None: self.graph.rescale_data() self.update_graph() def selection_changed(self): if self._task: self.cancel(clear_state=False) self._task = None self._executor = ThreadExecutor() self.commit() def _clear_plot(self): self.graph.plot_widget.clear() def _clear_state(self): self._clear_plot() self.graph.new_data(None) self._embedding_data_object = None self._embedding = None self._embedding_clas_var = None self._task = None self._executor = ThreadExecutor() def cancel(self, clear_state=True): """Cancel the current task (if any).""" if self._task is not None: self._executor.shutdown(wait=False) self.runbutton.setText('Run') self.progressBarFinished() if clear_state: self._clear_state() def run_task(self, slug, func): if self._task is not None: try: self.cancel() except CancelledError as e: print(e) assert self._task is None self.progressBarInit() self._task = ResolweTask(slug) self._task.future = self._executor.submit(func) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.finished.connect(self.task_finished) @Slot(Future, name='Finished') def task_finished(self, future): assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is future assert future.done() try: future_result = future.result() except Exception as ex: # TODO: raise exceptions raise ex else: if self._task.slug == self._tsne_slug: self._embedding_data_object = future_result self._embedding_clas_var = self.res.get_json( self._embedding_data_object, 'class_var') self._embedding = np.array( self.res.get_json(self._embedding_data_object, 'embedding_json', 'embedding')) self._setup_plot() if self._task.slug == self._tsne_selection_slug: print(future_result) self.Outputs.selected_data.send(future_result) finally: self.progressBarFinished() self.runbutton.setText('Start') self._task = None @Inputs.data def set_data(self, data): # type: (Optional[resolwe.Data]) -> None if data: self.data_table_object = data self._run_embeding() def _run_embeding(self): if self._task: self.cancel() return if self._task is None: inputs = { 'data_table': self.data_table_object, 'pca_components': self.pca_components, 'perplexity': self.perplexity, 'iterations': self.max_iter } if self._embedding is not None and self._embedding_data_object is not None: inputs['init'] = self._embedding_data_object func = partial(self.res.run_process, self._tsne_slug, **inputs) # move filter process in thread self.run_task(self._tsne_slug, func) self.runbutton.setText('Stop') def _setup_plot(self): class_var = DiscreteVariable(self._embedding_clas_var['name'], values=self._embedding_clas_var['values']) y_data = self._embedding_clas_var['y_data'] data = np.c_[self._embedding, y_data] plot_data = Table( Domain([self.variable_x, self.variable_y], class_vars=class_var), data) domain = plot_data and len(plot_data) and plot_data.domain or None for model in self.models: model.set_domain(domain) self.graph.attr_color = plot_data.domain.class_var if domain else None self.graph.attr_shape = None self.graph.attr_size = None self.graph.attr_label = None self.graph.new_data(plot_data) self.graph.update_data(self.variable_x, self.variable_y, True) def commit(self): selection = self.graph.get_selection() if self._embedding_data_object is not None and selection is not None: inputs = { 'data_table': self.data_table_object, 'embedding': self._embedding_data_object, 'selection': selection.tolist(), 'x_tsne_var': self.variable_x.name, 'y_tsne_var': self.variable_y.name } func = partial(self.res.run_process, self._tsne_selection_slug, **inputs) self.run_task(self._tsne_selection_slug, func) self.Outputs.selected_data.send(None) def onDeleteWidget(self): super().onDeleteWidget() self._clear_plot() self._clear_state() def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert( (("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption)
class OWResolweFilter(widget.OWWidget): name = "Resolwe Filter" icon = 'icons/OWResolweFilter.svg' description = "Filter cells/genes" priority = 40 class Inputs: data = widget.Input("Data", resolwe.Data) class Outputs: data = widget.Output("Data", resolwe.Data) class Warning(widget.OWWidget.Warning): invalid_range = widget.Msg( "Negative values in input data.\n" "This filter only makes sense for non-negative measurements " "where 0 indicates a lack (of) and/or a neutral reading.") sampling_in_effect = widget.Msg("Too many data points to display.\n" "Sampling {} of {} data points.") #: Filter mode. #: Filter out rows/columns. Cells, Genes = Cells, Genes settings_version = 1 #: The selected filter mode selected_filter_type = settings.Setting(Cells) # type: int #: Selected filter statistics / QC measure indexed by filter_type selected_filter_metric = settings.Setting(TotalCounts) # type: int #: Augment the violin plot with a dot plot (strip plot) of the (non-zero) #: measurement counts in Cells/Genes mode or data matrix values in Data #: mode. display_dotplot = settings.Setting(True) # type: bool #: Is min/max range selection enable limit_lower_enabled = settings.Setting(True) # type: bool limit_upper_enabled = settings.Setting(True) # type: bool #: The lower and upper selection limit for each filter type thresholds = settings.Setting({ (Cells, DetectionCount): (0, 2**31 - 1), (Cells, TotalCounts): (0, 2**31 - 1), (Genes, DetectionCount): (0, 2**31 - 1), (Genes, TotalCounts): (0, 2**31 - 1) }) # type: Dict[Tuple[int, int], Tuple[float, float]] auto_commit = settings.Setting(False) # type: bool def __init__(self): super().__init__() self.data_table_object = None # type: Optional[resolwe.Data] self._counts = None # type: Optional[np.ndarray] self._counts_data_obj = None # type: Optional[resolwe.Data] self._counts_slug = 'data-filter-counts' # type: str self._selection_data_obj = None # type: Optional[resolwe.Data] self._selection_slug = 'data-table-filter' # type: str # threading self._task = None # type: Optional[ResolweTask] self._executor = ThreadExecutor() self.res = ResolweHelper() box = gui.widgetBox(self.controlArea, "Info") self._info = QLabel(box) self._info.setWordWrap(True) self._info.setText("No data in input\n") box.layout().addWidget(self._info) box = gui.widgetBox(self.controlArea, "Filter Type", spacing=-1) rbg = QButtonGroup(box, exclusive=True) layout = QHBoxLayout() layout.setContentsMargins(0, 0, 0, 0) for id_ in [Cells, Genes]: name, _, tip = FilterInfo[id_] b = QRadioButton(name, toolTip=tip, checked=id_ == self.selected_filter_type) rbg.addButton(b, id_) layout.addWidget(b, stretch=10, alignment=Qt.AlignCenter) box.layout().addLayout(layout) rbg.buttonClicked[int].connect(self.set_filter_type) self.filter_metric_cb = gui.comboBox(box, self, "selected_filter_metric", callback=self._update_metric) for id_ in [DetectionCount, TotalCounts]: text, ttip = MeasureInfo[id_] self.filter_metric_cb.addItem(text) idx = self.filter_metric_cb.count() - 1 self.filter_metric_cb.setItemData(idx, ttip, Qt.ToolTipRole) self.filter_metric_cb.setCurrentIndex(self.selected_filter_metric) form = QFormLayout(labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow) self._filter_box = box = gui.widgetBox( self.controlArea, "Filter", orientation=form) # type: QGroupBox self.threshold_stacks = ( QStackedWidget(enabled=self.limit_lower_enabled), QStackedWidget(enabled=self.limit_upper_enabled), ) finfo = np.finfo(np.float64) for filter_ in [Cells, Genes]: if filter_ in {Cells, Genes}: minimum = 0.0 ndecimals = 1 metric = self.selected_filter_metric else: minimum = finfo.min ndecimals = 3 metric = -1 spinlower = QDoubleSpinBox( self, minimum=minimum, maximum=finfo.max, decimals=ndecimals, keyboardTracking=False, ) spinupper = QDoubleSpinBox( self, minimum=minimum, maximum=finfo.max, decimals=ndecimals, keyboardTracking=False, ) lower, upper = self.thresholds.get((filter_, metric), (0, 0)) spinlower.setValue(lower) spinupper.setValue(upper) self.threshold_stacks[0].addWidget(spinlower) self.threshold_stacks[1].addWidget(spinupper) spinlower.valueChanged.connect(self._limitchanged) spinupper.valueChanged.connect(self._limitchanged) self.threshold_stacks[0].setCurrentIndex(self.selected_filter_type) self.threshold_stacks[1].setCurrentIndex(self.selected_filter_type) self.limit_lower_enabled_cb = cb = QCheckBox( "Min", checked=self.limit_lower_enabled) cb.toggled.connect(self.set_lower_limit_enabled) cb.setAttribute(Qt.WA_LayoutUsesWidgetRect, True) form.addRow(cb, self.threshold_stacks[0]) self.limit_upper_enabled_cb = cb = QCheckBox( "Max", checked=self.limit_upper_enabled) cb.toggled.connect(self.set_upper_limit_enabled) cb.setAttribute(Qt.WA_LayoutUsesWidgetRect, True) form.addRow(cb, self.threshold_stacks[1]) box = gui.widgetBox(self.controlArea, "View") self._showpoints = gui.checkBox(box, self, "display_dotplot", "Show data points", callback=self._update_dotplot) self.controlArea.layout().addStretch(10) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") self._view = pg.GraphicsView() self._view.enableMouse(False) self._view.setAntialiasing(True) self._plot = plot = ViolinPlot() self._plot.setDataPointsVisible(self.display_dotplot) self._plot.setSelectionMode( (ViolinPlot.Low if self.limit_lower_enabled else 0) | (ViolinPlot.High if self.limit_upper_enabled else 0)) self._plot.selectionEdited.connect(self._limitchanged_plot) self._view.setCentralWidget(self._plot) self._plot.setTitle(FilterInfo[self.selected_filter_metric][1]) bottom = self._plot.getAxis("bottom") # type: pg.AxisItem bottom.hide() plot.setMouseEnabled(False, False) plot.hideButtons() self.mainArea.layout().addWidget(self._view) self.addAction( QAction("Select All", self, shortcut=QKeySequence.SelectAll, triggered=self._select_all)) def cancel(self): """Cancel the current task (if any).""" if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self.task_finished) self._task = None def run_task(self, slug, func): if self._task is not None: self.cancel() assert self._task is None self.progressBarInit() self._task = ResolweTask(slug) self._task.future = self._executor.submit(func) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self.task_finished) @Slot(Future, name='Future') def task_finished(self, future): assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is future assert future.done() try: future_result = future.result() except Exception as ex: # TODO: raise exceptions raise ex else: if self._task.slug == self._counts_slug: self._counts_data_obj = future_result self._setup_plot(future_result) elif self._task.slug == self._selection_slug: self._selection_data_obj = future_result self.Outputs.data.send(self._selection_data_obj) self._update_info() return self._selection_data_obj finally: self.progressBarFinished() self._task = None @Inputs.data def set_data(self, data): # type: (Optional[resolwe.Data]) -> None self.clear() self.data_table_object = data if data is not None: # self.res.get_object(id=data.id) self._setup(data, self.filter_type()) def commit(self): if self._counts_data_obj: inputs = { 'data_table': self.data_table_object, 'counts': self._counts_data_obj, 'axis': self._counts_data_obj.input['axis'] } if self.limit_upper_enabled: inputs['upper_limit'] = self.limit_upper if self.limit_lower_enabled: inputs['lower_limit'] = self.limit_lower func = partial(self.res.run_process, self._selection_slug, **inputs) self.run_task(self._selection_slug, func) self.Outputs.data.send(None) def _setup(self, data, filter_type): self.clear() axis = 1 if filter_type == Cells else 0 func = partial(self.res.run_process, self._counts_slug, data_table=data, axis=axis, measure=self.selected_filter_metric) # move filter process in thread self.run_task(self._counts_slug, func) def _setup_plot(self, data_object): # type: (resolwe.Data) -> None filter_data = self.res.get_json(data_object, 'counts_json', 'counts') axis_on_input = data_object.input['axis'] measure = self.selected_filter_metric if axis_on_input == Cells: title = "Cell Filter" if measure == TotalCounts: axis_label = "Total counts (library size)" else: axis_label = "Number of expressed genes" else: title = "Gene Filter" if measure == TotalCounts: axis_label = "Total counts" else: # TODO: Too long axis_label = "Number of cells a gene is expressed in" span = -1.0 # data span x = np.asarray(filter_data) if x.size: span = np.ptp(x) self._counts = x self.Warning.sampling_in_effect.clear() spinlow = self.threshold_stacks[0].widget(axis_on_input) spinhigh = self.threshold_stacks[1].widget(axis_on_input) if measure == TotalCounts: if span > 0: ndecimals = max(4 - int(np.floor(np.log10(span))), 1) else: ndecimals = 1 else: ndecimals = 1 spinlow.setDecimals(ndecimals) spinhigh.setDecimals(ndecimals) if x.size: xmin, xmax = np.min(x), np.max(x) self.limit_lower = np.clip(self.limit_lower, xmin, xmax) self.limit_upper = np.clip(self.limit_upper, xmin, xmax) if x.size > 0: # TODO: Need correction for lower bounded distribution (counts) # Use reflection around 0, but gaussian_kde does not provide # sufficient flexibility w.r.t bandwidth selection. self._plot.setData(x, 1000) self._plot.setBoundary(self.limit_lower, self.limit_upper) ax = self._plot.getAxis("left") # type: pg.AxisItem ax.setLabel(axis_label) self._plot.setTitle(title) self._update_info() def sizeHint(self): sh = super().sizeHint() # type: QSize return sh.expandedTo(QSize(800, 600)) def set_filter_type(self, type_): if self.selected_filter_type != type_: assert type_ in (Cells, Genes), str(type_) self.selected_filter_type = type_ self.threshold_stacks[0].setCurrentIndex(type_) self.threshold_stacks[1].setCurrentIndex(type_) if self.data_table_object is not None: self._setup(self.data_table_object, type_) def filter_type(self): return self.selected_filter_type def _update_metric(self): if self.data_table_object is not None: self._setup( self.data_table_object, self.selected_filter_type, ) def set_upper_limit_enabled(self, enabled): if enabled != self.limit_upper_enabled: self.limit_upper_enabled = enabled self.threshold_stacks[1].setEnabled(enabled) self.limit_upper_enabled_cb.setChecked(enabled) self._update_filter() def set_lower_limit_enabled(self, enabled): if enabled != self.limit_lower_enabled: self.limit_lower_enabled = enabled self.threshold_stacks[0].setEnabled(enabled) self.limit_lower_enabled_cb.setChecked(enabled) self._update_filter() def _update_filter(self): mode = 0 if self.limit_lower_enabled: mode |= ViolinPlot.Low if self.limit_upper_enabled: mode |= ViolinPlot.High self._plot.setSelectionMode(mode) def _is_filter_enabled(self): return self.limit_lower_enabled or self.limit_upper_enabled def clear(self): self._plot.clear() self._selection_data_obj = None self._counts_data_obj = None self._counts = None self._update_info() self.Warning.clear() def _update_info(self): text = [] if self.data_table_object: text.append('Input Data (object id): {}'.format( self.data_table_object.id)) if self._selection_data_obj and self._counts_data_obj: num_selected = self._selection_data_obj.output.get( 'num_selected', None) axis = self._counts_data_obj.input.get('axis', None) if num_selected is not None and axis is not None: text.append('Output data ({instance}{s}): {num} '.format( instance='gene' if axis == 0 else 'cell', s='s' if num_selected > 0 else '', num=num_selected)) self._info.setText('\n'.join(text)) def _select_all(self): self.limit_lower = 0 self.limit_upper = 2**31 - 1 self._limitchanged() def _update_dotplot(self): self._plot.setDataPointsVisible(self.display_dotplot) def current_filter_thresholds(self): if self.selected_filter_type in {Cells, Genes}: metric = self.selected_filter_metric else: metric = -1 return self.thresholds[self.selected_filter_type, metric] def set_current_filter_thesholds(self, lower, upper): if self.selected_filter_type in {Cells, Genes}: metric = self.selected_filter_metric else: metric = -1 self.thresholds[self.selected_filter_type, metric] = (lower, upper) @property def limit_lower(self): return self.current_filter_thresholds()[0] @limit_lower.setter def limit_lower(self, value): _, upper = self.current_filter_thresholds() self.set_current_filter_thesholds(value, upper) stacklower, _ = self.threshold_stacks sb = stacklower.widget(self.selected_filter_type) # prevent changes due to spin box rounding sb.setValue(value) @property def limit_upper(self): return self.current_filter_thresholds()[1] @limit_upper.setter def limit_upper(self, value): lower, _ = self.current_filter_thresholds() self.set_current_filter_thesholds(lower, value) _, stackupper = self.threshold_stacks sb = stackupper.widget(self.selected_filter_type) sb.setValue(value) @Slot() def _limitchanged(self): # Low/high limit changed via the spin boxes stacklow, stackhigh = self.threshold_stacks filter_ = self.selected_filter_type lower = stacklow.widget(filter_).value() upper = stackhigh.widget(filter_).value() self.set_current_filter_thesholds(lower, upper) if self._counts is not None and self._counts.size: xmin = np.min(self._counts) xmax = np.max(self._counts) self._plot.setBoundary(np.clip(lower, xmin, xmax), np.clip(upper, xmin, xmax)) def _limitchanged_plot(self): # Low/high limit changed via the plot if self._counts is not None: newlower, newupper = self._plot.boundary() filter_ = self.selected_filter_type lower, upper = self.current_filter_thresholds() stacklow, stackhigh = self.threshold_stacks spin_lower = stacklow.widget(filter_) spin_upper = stackhigh.widget(filter_) # do rounding to match the spin box's precision if self.limit_lower_enabled: newlower = round(newlower, spin_lower.decimals()) else: newlower = lower if self.limit_upper_enabled: newupper = round(newupper, spin_upper.decimals()) else: newupper = upper if self.limit_lower_enabled and newlower != lower: self.limit_lower = newlower if self.limit_upper_enabled and newupper != upper: self.limit_upper = newupper self._plot.setBoundary(newlower, newupper) def onDeleteWidget(self): self.data_table_object = None self.clear() self._plot.close() super().onDeleteWidget() @classmethod def migrate_settings(cls, settings, version): if (version is None or version < 2) and \ ("limit_lower" in settings and "limit_upper" in settings): # v2 changed limit_lower, limit_upper to per filter limits stored # in a single dict lower = settings.pop("limit_lower") upper = settings.pop("limit_upper") settings["thresholds"] = { (Cells, TotalCounts): (lower, upper), (Cells, DetectionCount): (lower, upper), (Genes, TotalCounts): (lower, upper), (Genes, DetectionCount): (lower, upper), } if version == 2: thresholds = settings["thresholds"] c = thresholds.pop(Cells) g = thresholds.pop(Genes) thresholds = { (Cells, TotalCounts): c, (Cells, DetectionCount): c, (Genes, TotalCounts): g, (Genes, DetectionCount): g, } settings["thresholds"] = thresholds