Example #1
0
    def __new__(cls, data, address='localhost:9465', batch=100, max_iter=100):
        from orangecontrib.remote import aborted, save_state
        import Orange.data.sql.table

        cont = Continuize(multinomial_treatment=Continuize.Remove)
        data = cont(data)
        pca = Orange.projection.IncrementalPCA()
        percent = batch / data.approx_len() * 100
        if percent < 100:
            data_sample = data.sample_percentage(percent, no_cache=True)
        else:
            data_sample = data
        data_sample.download_data(1000000)
        data_sample = Orange.data.Table.from_numpy(
            Orange.data.Domain(data_sample.domain.attributes),
            data_sample.X)
        model = pca(data_sample)
        save_state(model)
        for i in range(max_iter if percent < 100 else 0):
            data_sample = data.sample_percentage(percent, no_cache=True)
            data_sample.download_data(1000000)
            data_sample = Orange.data.Table.from_numpy(
                Orange.data.Domain(data_sample.domain.attributes),
                data_sample.X)
            model.partial_fit(data_sample)
            model.iteration = i
            save_state(model)
            if aborted():
                break
        return model
Example #2
0
    def __new__(cls, data, address='localhost:9465', batch=100, max_iter=100):
        from orangecontrib.remote import aborted, save_state
        import Orange.data.sql.table

        cont = Continuize(multinomial_treatment=Continuize.Remove,
                          normalize_continuous=None)
        data = cont(data)
        pca = Orange.projection.IncrementalPCA()
        percent = batch / data.approx_len() * 100
        if percent < 100:
            data_sample = data.sample_percentage(percent, no_cache=True)
        else:
            data_sample = data
        data_sample.download_data(1000000)
        data_sample = Orange.data.Table.from_numpy(
            Orange.data.Domain(data_sample.domain.attributes), data_sample.X)
        model = pca(data_sample)
        save_state(model)
        for i in range(max_iter if percent < 100 else 0):
            data_sample = data.sample_percentage(percent, no_cache=True)
            data_sample.download_data(1000000)
            data_sample = Orange.data.Table.from_numpy(
                Orange.data.Domain(data_sample.domain.attributes),
                data_sample.X)
            model.partial_fit(data_sample)
            model.iteration = i
            save_state(model)
            if aborted():
                break
        return model
Example #3
0
    def _setup_table_view(self, view, data):
        """Setup the `view` (QTableView) with `data` (Orange.data.Table)
        """
        datamodel = RichTableModel(data)
        rowcount = data.approx_len()

        if self.color_by_class and data.domain.has_discrete_class:
            color_schema = [QColor(*c) for c in data.domain.class_var.colors]
        else:
            color_schema = None
        if self.show_distributions:
            view.setItemDelegate(
                TableBarItemDelegate(view,
                                     color=self.dist_color,
                                     color_schema=color_schema))
        else:
            view.setItemDelegate(TableDataDelegate(view))

        # Enable/disable view sorting based on data's type
        view.setSortingEnabled(is_sortable(data))
        header = view.horizontalHeader()
        header.setSectionsClickable(is_sortable(data))
        header.setSortIndicatorShown(is_sortable(data))
        header.sortIndicatorChanged.connect(self.update_selection)

        view.setModel(datamodel)

        vheader = view.verticalHeader()
        option = view.viewOptions()
        size = view.style().sizeFromContents(QStyle.CT_ItemViewItem, option,
                                             QSize(20, 20), view)

        vheader.setDefaultSectionSize(size.height() + 2)
        vheader.setMinimumSectionSize(5)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        # Limit the number of rows displayed in the QTableView
        # (workaround for QTBUG-18490 / QTBUG-28631)
        maxrows = (2**31 - 1) // (vheader.defaultSectionSize() + 2)
        if rowcount > maxrows:
            sliceproxy = TableSliceProxy(parent=view,
                                         rowSlice=slice(0, maxrows))
            sliceproxy.setSourceModel(datamodel)
            # First reset the view (without this the header view retains
            # it's state - at this point invalid/broken)
            view.setModel(None)
            view.setModel(sliceproxy)

        assert view.model().rowCount() <= maxrows
        assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0

        # update the header (attribute names)
        self._update_variable_labels(view)

        selmodel = BlockSelectionModel(view.model(),
                                       parent=view,
                                       selectBlocks=not self.select_rows)
        view.setSelectionModel(selmodel)
        view.selectionFinished.connect(self.update_selection)
Example #4
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        if data and (not bool(data) or len(data.domain) == 0):
            data = None
        if checksum(data) == checksum(self.data):
            return # check if the new data set is the same as the old one

        self.__ignore_updates = True
        self.closeContext()
        same_domain = self.data and data and data.domain.checksum() == self.data.domain.checksum() # preserve attribute choice if the domain is the same
        self.data = data

        if not same_domain:
            self.shown_attributes = None

        self.openContext(self.data)
        self.__ignore_updates = False
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        if data and (not bool(data) or len(data.domain) == 0):
            data = None
        if checksum(data) == checksum(self.data):
            return # check if the new data set is the same as the old one

        self.__ignore_updates = True
        self.closeContext()
        same_domain = self.data and data and data.domain.checksum() == self.data.domain.checksum() # preserve attribute choice if the domain is the same
        self.data = data

        if not same_domain:
            self.shown_attributes = None

        self.openContext(self.data)
        self.__ignore_updates = False
Example #6
0
 def __new__(cls, data, batch=100, max_iter=100):
     cont = Continuize(multinomial_treatment=Continuize.Remove)
     data = cont(data)
     model = Orange.projection.IncrementalPCA()
     percent = batch / data.approx_len() * 100
     for i in range(max_iter):
         data_sample = data.sample_percentage(percent, no_cache=True)
         if not data_sample:
             continue
         data_sample.download_data(1000000)
         data_sample = Orange.data.Table.from_numpy(
             Orange.data.Domain(data_sample.domain.attributes),
             data_sample.X)
         model = model.partial_fit(data_sample)
         model.iteration = i
         save_state(model)
         if aborted() or data_sample is data:
             break
     return model
Example #7
0
 def __new__(cls, data, batch=100, max_iter=100):
     cont = Continuize(multinomial_treatment=Continuize.Remove)
     data = cont(data)
     model = Orange.projection.IncrementalPCA()
     percent = batch / data.approx_len() * 100
     for i in range(max_iter):
         data_sample = data.sample_percentage(percent, no_cache=True)
         if not data_sample:
             continue
         data_sample.download_data(1000000)
         data_sample = Orange.data.Table.from_numpy(
             Orange.data.Domain(data_sample.domain.attributes),
             data_sample.X)
         model = model.partial_fit(data_sample)
         model.iteration = i
         save_state(model)
         if aborted() or data_sample is data:
             break
     return model
Example #8
0
    def _setup_table_view(self, view, data):
        """Setup the `view` (QTableView) with `data` (Orange.data.Table)
        """
        if data is None:
            view.setModel(None)
            return

        datamodel = TableModel(data)
        datamodel = RichTableDecorator(datamodel)

        rowcount = data.approx_len()

        if self.color_by_class and data.domain.has_discrete_class:
            color_schema = [
                QColor(*c) for c in data.domain.class_var.colors]
        else:
            color_schema = None
        if self.show_distributions:
            view.setItemDelegate(
                gui.TableBarItem(
                    self, color=self.dist_color, color_schema=color_schema)
            )
        else:
            view.setItemDelegate(QStyledItemDelegate(self))

        # Enable/disable view sorting based on data's type
        view.setSortingEnabled(is_sortable(data))
        header = view.horizontalHeader()
        header.setSectionsClickable(is_sortable(data))
        header.setSortIndicatorShown(is_sortable(data))

        view.setModel(datamodel)

        vheader = view.verticalHeader()
        option = view.viewOptions()
        size = view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option,
            QSize(20, 20), view)

        vheader.setDefaultSectionSize(size.height() + 2)
        vheader.setMinimumSectionSize(5)
        vheader.setSectionResizeMode(QHeaderView.Fixed)

        # Limit the number of rows displayed in the QTableView
        # (workaround for QTBUG-18490 / QTBUG-28631)
        maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2)
        if rowcount > maxrows:
            sliceproxy = TableSliceProxy(
                parent=view, rowSlice=slice(0, maxrows))
            sliceproxy.setSourceModel(datamodel)
            # First reset the view (without this the header view retains
            # it's state - at this point invalid/broken)
            view.setModel(None)
            view.setModel(sliceproxy)

        assert view.model().rowCount() <= maxrows
        assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0

        # update the header (attribute names)
        self._update_variable_labels(view)

        selmodel = BlockSelectionModel(
            view.model(), parent=view, selectBlocks=not self.select_rows)
        view.setSelectionModel(selmodel)
        view.selectionModel().selectionChanged.connect(self.update_selection)