Esempio n. 1
0
    def training_thread(self, data, filter_roi, force_reset=False):
        """
        4 scenario's:
        - Data set to None. E.g. when sending widgets sends no data.
          Stop the other threads but don't do anything.
        - Data is changed or force_reset.
          Stop other threads and restart training with new data.
        - Roi is changed.
          Stop other threads. Change ROI and iterator and continue training.
        - Data and filter the same.
          Restart/continue training.
        
        Only (re)start/continue training when pause_training is not set.
        """
        
        new_data = (self.data is None) or (not eq_lazyaware(self.data, data)) or force_reset
        new_roi = not self.filter_roi == filter_roi

        print("New training thread %s %s %s %s %s" % (data is None, filter_roi is None, new_data, new_roi, force_reset))
        
        # Pause training to signal the other threads to stop and release the
        # lock
        self._pause_training = True
        with self.lock_on_training:
            if data is None:
                # We got the training lock, now we unset data.
                self.data = None
                # TODO: Save the ROI?
                self.filter_roi = None
                self.data_roi = None
                self.iterator_data = None
            else:
                if new_data or new_roi:
                    self.data = data
                    self.filter_roi = filter_roi
                    self.data_roi = self.filter_roi(self.data) if filter_roi else self.data
                    # TODO: Now it will restart from 0?
                    self.iterator_data = iter(self.data_roi)
                
                if new_data:
                    # We're received a new data set so create a new learner to replace
                    # any existing one
                    print("Setting new data with " + str(len_lazyaware(data)) + " instances.")

                    self.get_statistics()
                    self.learner = sgd.SGDLearner(self.all_classes, self.means, self.stds)
                    self.learner.name = self.learner_name
                    
                    # Reset the trained instances.
                    self.instances_trained = Orange.data.Table.from_domain(self.data.domain)
                    self.no_of_instances_trained = 0
                
                # Start training.
                self._pause_training = self.pause_training
                while not self._pause_training:
                    self.train()
Esempio n. 2
0
def table_summary(table):
    if isinstance(table, SqlTable):
        approx_len = table.approx_len()
        len_future = concurrent.futures.Future()

        def _len():
            len_future.set_result(len(table))
        threading.Thread(target=_len).start()  # KILL ME !!!

        return ApproxSummary(approx_len, len_future, table.domain,
                             NotAvailable(), NotAvailable(), NotAvailable())
    else:
        domain = table.domain
        n_instances = len_lazyaware(table)
        # dist = basic_stats.DomainBasicStats(table, include_metas=True)
        bstats = datacaching.getCached(
            table, basic_stats.DomainBasicStats, (table, True)
        )

        dist = bstats.stats
        X_dist, Y_dist, M_dist = numpy.split(
            dist, numpy.cumsum([len(domain.attributes),
                                len(domain.class_vars)]))

        def parts(array, density, col_dist):
            array = numpy.atleast_2d(array)
            nans = sum([dist.nans for dist in col_dist])
            non_nans = sum([dist.non_nans for dist in col_dist])
            if density == Storage.DENSE:
                return DenseArray(nans, non_nans, col_dist)
            elif density == Storage.SPARSE:
                return SparseArray(nans, non_nans, col_dist)
            elif density == Storage.SPARSE_BOOL:
                return SparseBoolArray(nans, non_nans, col_dist)
            elif density == Storage.MISSING:
                return NotAvailable()
            else:
                assert False

        X_part = parts(table.X, table.X_density(), X_dist)
        Y_part = parts(table.Y, table.Y_density(), Y_dist)
        M_part = parts(table.metas, table.metas_density(), M_dist)
        return Summary(n_instances, domain, X_part, Y_part, M_part)
Esempio n. 3
0
    def commit(self):
        """
        Commit/send the current selected row/column selection.
        """
        selected_data = other_data = None
        view = self.tabs.currentWidget()
        if view and view.model() is not None:
            model = view.model()
            while isinstance(model, QtGui.QAbstractProxyModel):
                model = model.sourceModel()

            table = model.source  # The input data table
            rowsel, colsel = self.get_selection(view)

            def select(data, rows, domain):
                """
                Select the data subset with specified rows and domain subsets.

                If either rows or domain is None they mean select all.
                """
                if rows is not None and domain is not None:
                    return data.from_table(domain, data, rows)
                elif rows is not None:
                    return data.from_table(data.domain, rows)
                elif domain is not None:
                    return data.from_table(domain, data)
                else:
                    return data

            domain = table.domain

            if len(colsel) < len(domain) + len(domain.metas):
                # only a subset of the columns is selected
                allvars = domain.variables + domain.metas
                columns = [(c, model.headerData(c, Qt.Horizontal,
                                                TableModel.DomainRole))
                           for c in colsel]
                assert all(role is not None for _, role in columns)

                def select_vars(role):
                    """select variables for role (TableModel.DomainRole)"""
                    return [allvars[c] for c, r in columns if r == role]

                attrs = select_vars(TableModel.Attribute)
                class_vars = select_vars(TableModel.ClassVar)
                metas = select_vars(TableModel.Meta)
                domain = Orange.data.Domain(attrs, class_vars, metas)

            # Avoid a copy if all/none rows are selected.
            if not rowsel:
                selected_data = None
                other_data = select(table, None, domain)
            elif len(rowsel) == len_lazyaware(table):
                selected_data = select(table, None, domain)
                other_data = None
            else:
                selected_data = select(table, rowsel, domain)
                selmask = numpy.ones((len_lazyaware(table),), dtype=bool)
                selmask[rowsel] = False

                other_data = select(table, numpy.flatnonzero(selmask), domain)

        self.send("Selected Data", selected_data)
        self.send("Other Data", other_data)
Esempio n. 4
0
    def set_dataset(self, data, tid=None):
        """Set the input dataset."""

        if data is not None:
            if tid in self.inputs:
                # TODO: Fix length hack.
                if (len_lazyaware(data) == self.old_lengths[tid]) and (data.domain == self.old_domains[tid]):
                    # Table Lengths are identical, thus the data is the same.
                    # Need a better way to do this.
                    return

                # update existing input slot
                slot = self.inputs[tid]
                view = slot.view
                # reset the (header) view state.
                view.setModel(None)
                view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder)
            else:
                view = QTableView()
                view.setSortingEnabled(True)
                view.setHorizontalScrollMode(QTableView.ScrollPerPixel)

                if self.select_rows:
                    view.setSelectionBehavior(QTableView.SelectRows)

                header = view.horizontalHeader()
                header.setMovable(True)
                header.setClickable(True)
                header.setSortIndicatorShown(True)
                header.setSortIndicator(-1, Qt.AscendingOrder)

                # QHeaderView does not 'reset' the model sort column,
                # because there is no guaranty (requirement) that the
                # models understand the -1 sort column.
                def sort_reset(index, order):
                    if view.model() is not None and index == -1:
                        view.model().sort(index, order)

                header.sortIndicatorChanged.connect(sort_reset)

            view.dataset = data
            self.tabs.addTab(view, getattr(data, "name", "Data"))

            self._setup_table_view(view, data)
            slot = TableSlot(tid, data, table_summary(data), view)
            view._input_slot = slot
            self.inputs[tid] = slot
            
            # TODO fix hack
            self.old_lengths[tid] = len_lazyaware(data) # length hack
            self.old_domains[tid] = data.domain

            self.tabs.setCurrentIndex(self.tabs.indexOf(view))

            self.set_info(slot.summary)

            if isinstance(slot.summary.len, concurrent.futures.Future):
                def update(f):
                    QMetaObject.invokeMethod(
                        self, "_update_info", Qt.QueuedConnection)

                slot.summary.len.add_done_callback(update)

        elif tid in self.inputs:
            slot = self.inputs.pop(tid)
            view = slot.view
            view.hide()
            view.deleteLater()
            self.tabs.removeTab(self.tabs.indexOf(view))

            current = self.tabs.currentWidget()
            if current is not None:
                self.set_info(current._input_slot.summary)

        self.tabs.tabBar().setVisible(self.tabs.count() > 1)