def training_thread(self, data, filter_roi, force_reset=False): """ 4 scenario's: - Data set to None. E.g. when sending widgets sends no data. Stop the other threads but don't do anything. - Data is changed or force_reset. Stop other threads and restart training with new data. - Roi is changed. Stop other threads. Change ROI and iterator and continue training. - Data and filter the same. Restart/continue training. Only (re)start/continue training when pause_training is not set. """ new_data = (self.data is None) or (not eq_lazyaware(self.data, data)) or force_reset new_roi = not self.filter_roi == filter_roi print("New training thread %s %s %s %s %s" % (data is None, filter_roi is None, new_data, new_roi, force_reset)) # Pause training to signal the other threads to stop and release the # lock self._pause_training = True with self.lock_on_training: if data is None: # We got the training lock, now we unset data. self.data = None # TODO: Save the ROI? self.filter_roi = None self.data_roi = None self.iterator_data = None else: if new_data or new_roi: self.data = data self.filter_roi = filter_roi self.data_roi = self.filter_roi(self.data) if filter_roi else self.data # TODO: Now it will restart from 0? self.iterator_data = iter(self.data_roi) if new_data: # We're received a new data set so create a new learner to replace # any existing one print("Setting new data with " + str(len_lazyaware(data)) + " instances.") self.get_statistics() self.learner = sgd.SGDLearner(self.all_classes, self.means, self.stds) self.learner.name = self.learner_name # Reset the trained instances. self.instances_trained = Orange.data.Table.from_domain(self.data.domain) self.no_of_instances_trained = 0 # Start training. self._pause_training = self.pause_training while not self._pause_training: self.train()
def table_summary(table): if isinstance(table, SqlTable): approx_len = table.approx_len() len_future = concurrent.futures.Future() def _len(): len_future.set_result(len(table)) threading.Thread(target=_len).start() # KILL ME !!! return ApproxSummary(approx_len, len_future, table.domain, NotAvailable(), NotAvailable(), NotAvailable()) else: domain = table.domain n_instances = len_lazyaware(table) # dist = basic_stats.DomainBasicStats(table, include_metas=True) bstats = datacaching.getCached( table, basic_stats.DomainBasicStats, (table, True) ) dist = bstats.stats X_dist, Y_dist, M_dist = numpy.split( dist, numpy.cumsum([len(domain.attributes), len(domain.class_vars)])) def parts(array, density, col_dist): array = numpy.atleast_2d(array) nans = sum([dist.nans for dist in col_dist]) non_nans = sum([dist.non_nans for dist in col_dist]) if density == Storage.DENSE: return DenseArray(nans, non_nans, col_dist) elif density == Storage.SPARSE: return SparseArray(nans, non_nans, col_dist) elif density == Storage.SPARSE_BOOL: return SparseBoolArray(nans, non_nans, col_dist) elif density == Storage.MISSING: return NotAvailable() else: assert False X_part = parts(table.X, table.X_density(), X_dist) Y_part = parts(table.Y, table.Y_density(), Y_dist) M_part = parts(table.metas, table.metas_density(), M_dist) return Summary(n_instances, domain, X_part, Y_part, M_part)
def commit(self): """ Commit/send the current selected row/column selection. """ selected_data = other_data = None view = self.tabs.currentWidget() if view and view.model() is not None: model = view.model() while isinstance(model, QtGui.QAbstractProxyModel): model = model.sourceModel() table = model.source # The input data table rowsel, colsel = self.get_selection(view) def select(data, rows, domain): """ Select the data subset with specified rows and domain subsets. If either rows or domain is None they mean select all. """ if rows is not None and domain is not None: return data.from_table(domain, data, rows) elif rows is not None: return data.from_table(data.domain, rows) elif domain is not None: return data.from_table(domain, data) else: return data domain = table.domain if len(colsel) < len(domain) + len(domain.metas): # only a subset of the columns is selected allvars = domain.variables + domain.metas columns = [(c, model.headerData(c, Qt.Horizontal, TableModel.DomainRole)) for c in colsel] assert all(role is not None for _, role in columns) def select_vars(role): """select variables for role (TableModel.DomainRole)""" return [allvars[c] for c, r in columns if r == role] attrs = select_vars(TableModel.Attribute) class_vars = select_vars(TableModel.ClassVar) metas = select_vars(TableModel.Meta) domain = Orange.data.Domain(attrs, class_vars, metas) # Avoid a copy if all/none rows are selected. if not rowsel: selected_data = None other_data = select(table, None, domain) elif len(rowsel) == len_lazyaware(table): selected_data = select(table, None, domain) other_data = None else: selected_data = select(table, rowsel, domain) selmask = numpy.ones((len_lazyaware(table),), dtype=bool) selmask[rowsel] = False other_data = select(table, numpy.flatnonzero(selmask), domain) self.send("Selected Data", selected_data) self.send("Other Data", other_data)
def set_dataset(self, data, tid=None): """Set the input dataset.""" if data is not None: if tid in self.inputs: # TODO: Fix length hack. if (len_lazyaware(data) == self.old_lengths[tid]) and (data.domain == self.old_domains[tid]): # Table Lengths are identical, thus the data is the same. # Need a better way to do this. return # update existing input slot slot = self.inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setMovable(True) header.setClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self.inputs[tid] = slot # TODO fix hack self.old_lengths[tid] = len_lazyaware(data) # length hack self.old_domains[tid] = data.domain self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod( self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self.inputs: slot = self.inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1)