예제 #1
0
class OWFeatureConstructor(OWWidget):
    name = "Feature Constructor"
    description = "Construct new features (data columns) from a set of " \
                  "existing features in the input data set."
    icon = "icons/FeatureConstructor.svg"
    inputs = [("Data", Orange.data.Table, "setData")]
    outputs = [("Data", Orange.data.Table)]

    want_main_area = False

    settingsHandler = FeatureConstructorSettingsHandler()
    descriptors = ContextSetting([])
    currentIndex = ContextSetting(-1)

    EDITORS = [
        (ContinuousDescriptor, ContinuousFeatureEditor),
        (DiscreteDescriptor, DiscreteFeatureEditor),
        (StringDescriptor, StringFeatureEditor)
    ]

    class Error(OWWidget.Error):
        more_values_needed = Msg("Discrete feature {} needs more values.")
        invalid_expressions = Msg("Invalid expressions: {}.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.editors = {}

        box = gui.vBox(self.controlArea, "Variable Definitions")

        toplayout = QHBoxLayout()
        toplayout.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(toplayout)

        self.editorstack = QStackedWidget(
            sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                   QSizePolicy.MinimumExpanding)
        )

        for descclass, editorclass in self.EDITORS:
            editor = editorclass()
            editor.featureChanged.connect(self._on_modified)
            self.editors[descclass] = editor
            self.editorstack.addWidget(editor)

        self.editorstack.setEnabled(False)

        buttonlayout = QVBoxLayout(spacing=10)
        buttonlayout.setContentsMargins(0, 0, 0, 0)

        self.addbutton = QPushButton(
            "New", toolTip="Create a new variable",
            minimumWidth=120,
            shortcut=QKeySequence.New
        )

        def unique_name(fmt, reserved):
            candidates = (fmt.format(i) for i in count(1))
            return next(c for c in candidates if c not in reserved)

        def reserved_names():
            varnames = []
            if self.data is not None:
                varnames = [var.name for var in
                            self.data.domain.variables + self.data.domain.metas]
            varnames += [desc.name for desc in self.featuremodel]
            return set(varnames)

        def generate_newname(fmt):
            return unique_name(fmt, reserved_names())

        menu = QMenu(self.addbutton)
        cont = menu.addAction("Continuous")
        cont.triggered.connect(
            lambda: self.addFeature(
                ContinuousDescriptor(generate_newname("X{}"), "", 3))
        )
        disc = menu.addAction("Discrete")
        disc.triggered.connect(
            lambda: self.addFeature(
                DiscreteDescriptor(generate_newname("D{}"), "",
                                   ("A", "B"), -1, False))
        )
        string = menu.addAction("String")
        string.triggered.connect(
            lambda: self.addFeature(
                StringDescriptor(generate_newname("S{}"), ""))
        )
        menu.addSeparator()
        self.duplicateaction = menu.addAction("Duplicate Selected Variable")
        self.duplicateaction.triggered.connect(self.duplicateFeature)
        self.duplicateaction.setEnabled(False)
        self.addbutton.setMenu(menu)

        self.removebutton = QPushButton(
            "Remove", toolTip="Remove selected variable",
            minimumWidth=120,
            shortcut=QKeySequence.Delete
        )
        self.removebutton.clicked.connect(self.removeSelectedFeature)

        buttonlayout.addWidget(self.addbutton)
        buttonlayout.addWidget(self.removebutton)
        buttonlayout.addStretch(10)

        toplayout.addLayout(buttonlayout, 0)
        toplayout.addWidget(self.editorstack, 10)

        # Layout for the list view
        layout = QVBoxLayout(spacing=1, margin=0)
        self.featuremodel = DescriptorModel(parent=self)

        self.featureview = QListView(
            minimumWidth=200,
            sizePolicy=QSizePolicy(QSizePolicy.Minimum,
                                   QSizePolicy.MinimumExpanding)
        )

        self.featureview.setItemDelegate(FeatureItemDelegate(self))
        self.featureview.setModel(self.featuremodel)
        self.featureview.selectionModel().selectionChanged.connect(
            self._on_selectedVariableChanged
        )

        layout.addWidget(self.featureview)

        box.layout().addLayout(layout, 1)

        box = gui.hBox(self.controlArea)
        box.layout().addWidget(self.report_button)
        self.report_button.setMinimumWidth(180)
        gui.rubber(box)
        commit = gui.button(box, self, "Send", callback=self.apply,
                            default=True)
        commit.setMinimumWidth(180)

    def setCurrentIndex(self, index):
        index = min(index, len(self.featuremodel) - 1)
        self.currentIndex = index
        if index >= 0:
            itemmodels.select_row(self.featureview, index)
            desc = self.featuremodel[min(index, len(self.featuremodel) - 1)]
            editor = self.editors[type(desc)]
            self.editorstack.setCurrentWidget(editor)
            editor.setEditorData(desc, self.data.domain if self.data else None)
        self.editorstack.setEnabled(index >= 0)
        self.duplicateaction.setEnabled(index >= 0)
        self.removebutton.setEnabled(index >= 0)

    def _on_selectedVariableChanged(self, selected, *_):
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        else:
            self.setCurrentIndex(-1)

    def _on_modified(self):
        if self.currentIndex >= 0:
            editor = self.editorstack.currentWidget()
            self.featuremodel[self.currentIndex] = editor.editorData()
            self.descriptors = list(self.featuremodel)

    def setDescriptors(self, descriptors):
        """
        Set a list of variable descriptors to edit.
        """
        self.descriptors = descriptors
        self.featuremodel[:] = list(self.descriptors)

    @check_sql_input
    def setData(self, data=None):
        """Set the input dataset."""
        self.closeContext()

        self.data = data

        if self.data is not None:
            descriptors = list(self.descriptors)
            currindex = self.currentIndex
            self.descriptors = []
            self.currentIndex = -1
            self.openContext(data)

            if descriptors != self.descriptors or \
                    self.currentIndex != currindex:
                # disconnect from the selection model while reseting the model
                selmodel = self.featureview.selectionModel()
                selmodel.selectionChanged.disconnect(
                    self._on_selectedVariableChanged)

                self.featuremodel[:] = list(self.descriptors)
                self.setCurrentIndex(self.currentIndex)

                selmodel.selectionChanged.connect(
                    self._on_selectedVariableChanged)

        self.editorstack.setEnabled(self.currentIndex >= 0)

    def handleNewSignals(self):
        if self.data is not None:
            self.apply()
        else:
            self.send("Data", None)

    def addFeature(self, descriptor):
        self.featuremodel.append(descriptor)
        self.setCurrentIndex(len(self.featuremodel) - 1)
        editor = self.editorstack.currentWidget()
        editor.nameedit.setFocus()
        editor.nameedit.selectAll()

    def removeFeature(self, index):
        del self.featuremodel[index]
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        elif index is None and len(self.featuremodel) > 0:
            # Deleting the last item clears selection
            self.setCurrentIndex(len(self.featuremodel) - 1)

    def removeSelectedFeature(self):
        if self.currentIndex >= 0:
            self.removeFeature(self.currentIndex)

    def duplicateFeature(self):
        desc = self.featuremodel[self.currentIndex]
        self.addFeature(copy.deepcopy(desc))

    def check_attrs_values(self, attr, data):
        for i in range(len(data)):
            for var in attr:
                if not math.isnan(data[i, var]) \
                        and int(data[i, var]) >= len(var.values):
                    return var.name
        return None

    def _validate_descriptors(self, desc):

        def validate(source):
            try:
                return validate_exp(ast.parse(source, mode="eval"))
            except Exception:
                return False

        final = []
        invalid = []
        for d in desc:
            if validate(d.expression):
                final.append(d)
            else:
                final.append(d._replace(expression=""))
                invalid.append(d)

        if invalid:
            self.Error.invalid_expressions(", ".join(s.name for s in invalid))

        return final

    def apply(self):
        self.Error.clear()

        if self.data is None:
            return

        desc = list(self.featuremodel)
        desc = self._validate_descriptors(desc)
        source_vars = tuple(self.data.domain) + self.data.domain.metas
        new_variables = construct_variables(desc, source_vars)

        attrs = [var for var in new_variables if var.is_primitive()]
        metas = [var for var in new_variables if not var.is_primitive()]
        new_domain = Orange.data.Domain(
            self.data.domain.attributes + tuple(attrs),
            self.data.domain.class_vars,
            metas=self.data.domain.metas + tuple(metas)
        )

        try:
            data = Orange.data.Table(new_domain, self.data)
        except Exception as err:
            log = logging.getLogger(__name__)
            log.error("", exc_info=True)
            self.error("".join(format_exception_only(type(err), err)).rstrip())
            return
        disc_attrs_not_ok = self.check_attrs_values(
            [var for var in attrs if var.is_discrete], data)
        if disc_attrs_not_ok:
            self.Error.more_values_needed(disc_attrs_not_ok)
            return

        self.send("Data", data)

    def send_report(self):
        items = OrderedDict()
        for feature in self.featuremodel:
            if isinstance(feature, DiscreteDescriptor):
                items[feature.name] = "{} (discrete with values {}{})".format(
                    feature.expression, feature.values,
                    "; ordered" * feature.ordered)
            elif isinstance(feature, ContinuousDescriptor):
                items[feature.name] = "{} (numeric)".format(feature.expression)
            else:
                items[feature.name] = "{} (text)".format(feature.expression)
        self.report_items(
            report.plural("Constructed feature{s}", len(items)), items)
예제 #2
0
class OWFeatureConstructor(OWWidget):
    name = "特征构造器(Feature Constructor)"
    description = "用输入数据集中的现有特征构造新特征。"
    icon = "icons/FeatureConstructor.svg"
    keywords = ['tezheng', 'gouzao', 'tezhenggouzao']
    category = "数据(Data)"
    icon = "icons/FeatureConstructor.svg"

    class Inputs:
        data = Input("数据(Data)", Orange.data.Table, replaces=['Data'])

    class Outputs:
        data = Output("数据(Data)", Orange.data.Table, replaces=['Data'])

    want_main_area = False

    settingsHandler = FeatureConstructorHandler()
    descriptors = ContextSetting([])
    currentIndex = ContextSetting(-1)
    expressions_with_values = ContextSetting(False)
    settings_version = 2

    EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor),
               (DateTimeDescriptor, DateTimeFeatureEditor),
               (DiscreteDescriptor, DiscreteFeatureEditor),
               (StringDescriptor, StringFeatureEditor)]

    class Error(OWWidget.Error):
        more_values_needed = Msg("Categorical feature {} needs more values.")
        invalid_expressions = Msg("Invalid expressions: {}.")

    class Warning(OWWidget.Warning):
        renamed_var = Msg("Recently added variable has been renamed, "
                          "to avoid duplicates.\n")

    def __init__(self):
        super().__init__()
        self.data = None
        self.editors = {}

        box = gui.vBox(self.controlArea, "变量定义")

        toplayout = QHBoxLayout()
        toplayout.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(toplayout)

        self.editorstack = QStackedWidget(sizePolicy=QSizePolicy(
            QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding))

        for descclass, editorclass in self.EDITORS:
            editor = editorclass()
            editor.featureChanged.connect(self._on_modified)
            self.editors[descclass] = editor
            self.editorstack.addWidget(editor)

        self.editorstack.setEnabled(False)

        buttonlayout = QVBoxLayout(spacing=10)
        buttonlayout.setContentsMargins(0, 0, 0, 0)

        self.addbutton = QPushButton("新建",
                                     toolTip="Create a new variable",
                                     minimumWidth=120,
                                     shortcut=QKeySequence.New)

        def unique_name(fmt, reserved):
            candidates = (fmt.format(i) for i in count(1))
            return next(c for c in candidates if c not in reserved)

        def generate_newname(fmt):
            return unique_name(fmt, self.reserved_names())

        menu = QMenu(self.addbutton)
        cont = menu.addAction("数值数据")
        cont.triggered.connect(lambda: self.addFeature(
            ContinuousDescriptor(generate_newname("X{}"), "", 3)))
        disc = menu.addAction("分类数据")
        disc.triggered.connect(lambda: self.addFeature(
            DiscreteDescriptor(generate_newname("D{}"), "", (), False)))
        string = menu.addAction("文本")
        string.triggered.connect(lambda: self.addFeature(
            StringDescriptor(generate_newname("S{}"), "")))
        datetime = menu.addAction("日期/时间")
        datetime.triggered.connect(lambda: self.addFeature(
            DateTimeDescriptor(generate_newname("T{}"), "")))

        menu.addSeparator()
        self.duplicateaction = menu.addAction("复制选中变量")
        self.duplicateaction.triggered.connect(self.duplicateFeature)
        self.duplicateaction.setEnabled(False)
        self.addbutton.setMenu(menu)

        self.removebutton = QPushButton("删除",
                                        toolTip="删除选中变量",
                                        minimumWidth=120,
                                        shortcut=QKeySequence.Delete)
        self.removebutton.clicked.connect(self.removeSelectedFeature)

        buttonlayout.addWidget(self.addbutton)
        buttonlayout.addWidget(self.removebutton)
        buttonlayout.addStretch(10)

        toplayout.addLayout(buttonlayout, 0)
        toplayout.addWidget(self.editorstack, 10)

        # Layout for the list view
        layout = QVBoxLayout(spacing=1, margin=0)
        self.featuremodel = DescriptorModel(parent=self)

        self.featureview = QListView(minimumWidth=200,
                                     minimumHeight=50,
                                     sizePolicy=QSizePolicy(
                                         QSizePolicy.Minimum,
                                         QSizePolicy.MinimumExpanding))

        self.featureview.setItemDelegate(FeatureItemDelegate(self))
        self.featureview.setModel(self.featuremodel)
        self.featureview.selectionModel().selectionChanged.connect(
            self._on_selectedVariableChanged)

        layout.addWidget(self.featureview)

        box.layout().addLayout(layout, 1)

        self.fix_button = gui.button(self.buttonsArea,
                                     self,
                                     "Upgrade Expressions",
                                     callback=self.fix_expressions)
        self.fix_button.setHidden(True)
        gui.button(self.buttonsArea,
                   self,
                   "Send",
                   callback=self.apply,
                   default=True)

    def setCurrentIndex(self, index):
        index = min(index, len(self.featuremodel) - 1)
        self.currentIndex = index
        if index >= 0:
            itemmodels.select_row(self.featureview, index)
            desc = self.featuremodel[min(index, len(self.featuremodel) - 1)]
            editor = self.editors[type(desc)]
            self.editorstack.setCurrentWidget(editor)
            editor.setEditorData(desc, self.data.domain if self.data else None)
        self.editorstack.setEnabled(index >= 0)
        self.duplicateaction.setEnabled(index >= 0)
        self.removebutton.setEnabled(index >= 0)

    def _on_selectedVariableChanged(self, selected, *_):
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        else:
            self.setCurrentIndex(-1)

    def _on_modified(self):
        if self.currentIndex >= 0:
            self.Warning.clear()
            editor = self.editorstack.currentWidget()
            proposed = editor.editorData().name
            uniq = get_unique_names(self.reserved_names(self.currentIndex),
                                    proposed)

            feature = editor.editorData()
            if editor.editorData().name != uniq:
                self.Warning.renamed_var()
                feature = feature.__class__(uniq, *feature[1:])

            self.featuremodel[self.currentIndex] = feature
            self.descriptors = list(self.featuremodel)

    def setDescriptors(self, descriptors):
        """
        Set a list of variable descriptors to edit.
        """
        self.descriptors = descriptors
        self.featuremodel[:] = list(self.descriptors)

    def reserved_names(self, idx_=None):
        varnames = []
        if self.data is not None:
            varnames = [
                var.name
                for var in self.data.domain.variables + self.data.domain.metas
            ]
        varnames += [
            desc.name for idx, desc in enumerate(self.featuremodel)
            if idx != idx_
        ]
        return set(varnames)

    @Inputs.data
    @check_sql_input
    def setData(self, data=None):
        """Set the input dataset."""
        self.closeContext()

        self.data = data
        self.expressions_with_values = False

        self.descriptors = []
        self.currentIndex = -1
        if self.data is not None:
            self.openContext(data)

        # disconnect from the selection model while reseting the model
        selmodel = self.featureview.selectionModel()
        selmodel.selectionChanged.disconnect(self._on_selectedVariableChanged)

        self.featuremodel[:] = list(self.descriptors)
        self.setCurrentIndex(self.currentIndex)

        selmodel.selectionChanged.connect(self._on_selectedVariableChanged)
        self.fix_button.setHidden(not self.expressions_with_values)
        self.editorstack.setEnabled(self.currentIndex >= 0)

    def handleNewSignals(self):
        if self.data is not None:
            self.apply()
        else:
            self.Outputs.data.send(None)
            self.fix_button.setHidden(True)

    def addFeature(self, descriptor):
        self.featuremodel.append(descriptor)
        self.setCurrentIndex(len(self.featuremodel) - 1)
        editor = self.editorstack.currentWidget()
        editor.nameedit.setFocus()
        editor.nameedit.selectAll()

    def removeFeature(self, index):
        del self.featuremodel[index]
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        elif index is None and self.featuremodel.rowCount():
            # Deleting the last item clears selection
            self.setCurrentIndex(self.featuremodel.rowCount() - 1)

    def removeSelectedFeature(self):
        if self.currentIndex >= 0:
            self.removeFeature(self.currentIndex)

    def duplicateFeature(self):
        desc = self.featuremodel[self.currentIndex]
        self.addFeature(copy.deepcopy(desc))

    @staticmethod
    def check_attrs_values(attr, data):
        for i in range(len(data)):
            for var in attr:
                if not math.isnan(data[i, var]) \
                        and int(data[i, var]) >= len(var.values):
                    return var.name
        return None

    def _validate_descriptors(self, desc):
        def validate(source):
            try:
                return validate_exp(ast.parse(source, mode="eval"))
            # ast.parse can return arbitrary errors, not only SyntaxError
            # pylint: disable=broad-except
            except Exception:
                return False

        final = []
        invalid = []
        for d in desc:
            if validate(d.expression):
                final.append(d)
            else:
                final.append(d._replace(expression=""))
                invalid.append(d)

        if invalid:
            self.Error.invalid_expressions(", ".join(s.name for s in invalid))

        return final

    def apply(self):
        def report_error(err):
            log = logging.getLogger(__name__)
            log.error("", exc_info=True)
            self.error("".join(format_exception_only(type(err), err)).rstrip())

        self.Error.clear()

        if self.data is None:
            return

        desc = list(self.featuremodel)
        desc = self._validate_descriptors(desc)
        try:
            new_variables = construct_variables(desc, self.data,
                                                self.expressions_with_values)
        # user's expression can contain arbitrary errors
        except Exception as err:  # pylint: disable=broad-except
            report_error(err)
            return

        attrs = [var for var in new_variables if var.is_primitive()]
        metas = [var for var in new_variables if not var.is_primitive()]
        new_domain = Orange.data.Domain(
            self.data.domain.attributes + tuple(attrs),
            self.data.domain.class_vars,
            metas=self.data.domain.metas + tuple(metas))

        try:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = False
            data = self.data.transform(new_domain)
        # user's expression can contain arbitrary errors
        # pylint: disable=broad-except
        except Exception as err:
            report_error(err)
            return
        finally:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = True

        disc_attrs_not_ok = self.check_attrs_values(
            [var for var in attrs if var.is_discrete], data)
        if disc_attrs_not_ok:
            self.Error.more_values_needed(disc_attrs_not_ok)
            return

        self.Outputs.data.send(data)

    def send_report(self):
        items = OrderedDict()
        for feature in self.featuremodel:
            if isinstance(feature, DiscreteDescriptor):
                desc = "categorical"
                if feature.values:
                    desc += " with values " \
                            + ", ".join(f"'{val}'" for val in feature.values)
                if feature.ordered:
                    desc += "; ordered"
            elif isinstance(feature, ContinuousDescriptor):
                desc = "numeric"
            elif isinstance(feature, DateTimeDescriptor):
                desc = "date/time"
            else:
                desc = "text"
            items[feature.name] = f"{feature.expression} ({desc})"
        self.report_items(report.plural("Constructed feature{s}", len(items)),
                          items)

    def fix_expressions(self):
        dlg = QMessageBox(
            QMessageBox.Question, "Fix Expressions",
            "This widget's behaviour has changed. Values of categorical "
            "variables are now inserted as their textual representations "
            "(strings); previously they appeared as integer numbers, with an "
            "attribute '.value' that contained the text.\n\n"
            "The widget currently runs in compatibility mode. After "
            "expressions are updated, manually check for their correctness.")
        dlg.addButton("Update", QMessageBox.ApplyRole)
        dlg.addButton("Cancel", QMessageBox.RejectRole)
        if dlg.exec() == QMessageBox.RejectRole:
            return

        def fixer(mo):
            var = domain[mo.group(2)]
            if mo.group(3) == ".value":  # uses string; remove `.value`
                return "".join(mo.group(1, 2, 4))
            # Uses ints: get them by indexing
            return mo.group(1) + "{" + \
                ", ".join(f"'{val}': {i}"
                          for i, val in enumerate(var.values)) + \
                f"}}[{var.name}]" + mo.group(4)

        domain = self.data.domain
        disc_vars = "|".join(f"{var.name}"
                             for var in chain(domain.variables, domain.metas)
                             if var.is_discrete)
        expr = re.compile(r"(^|\W)(" + disc_vars + r")(\.value)?(\W|$)")
        self.descriptors[:] = [
            descriptor._replace(
                expression=expr.sub(fixer, descriptor.expression))
            for descriptor in self.descriptors
        ]

        self.expressions_with_values = False
        self.fix_button.hide()
        index = self.currentIndex
        self.featuremodel[:] = list(self.descriptors)
        self.setCurrentIndex(index)
        self.apply()

    @classmethod
    def migrate_context(cls, context, version):
        if version is None or version < 2:
            used_vars = set(
                chain(*(
                    freevars(ast.parse(descriptor.expression, mode="eval"), [])
                    for descriptor in context.values["descriptors"]
                    if descriptor.expression)))
            disc_vars = {
                name
                for (name, vtype) in chain(context.attributes.items(),
                                           context.metas.items()) if vtype == 1
            }
            if used_vars & disc_vars:
                context.values["expressions_with_values"] = True
예제 #3
0
class OWFeatureConstructor(OWWidget):
    name = "Feature Constructor"
    description = "Construct new features (data columns) from a set of " \
                  "existing features in the input data set."
    icon = "icons/FeatureConstructor.svg"

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    want_main_area = False

    settingsHandler = FeatureConstructorHandler()
    descriptors = ContextSetting([])
    currentIndex = ContextSetting(-1)

    EDITORS = [
        (ContinuousDescriptor, ContinuousFeatureEditor),
        (DiscreteDescriptor, DiscreteFeatureEditor),
        (StringDescriptor, StringFeatureEditor)
    ]

    class Error(OWWidget.Error):
        more_values_needed = Msg("Categorical feature {} needs more values.")
        invalid_expressions = Msg("Invalid expressions: {}.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.editors = {}

        box = gui.vBox(self.controlArea, "Variable Definitions")

        toplayout = QHBoxLayout()
        toplayout.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(toplayout)

        self.editorstack = QStackedWidget(
            sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                   QSizePolicy.MinimumExpanding)
        )

        for descclass, editorclass in self.EDITORS:
            editor = editorclass()
            editor.featureChanged.connect(self._on_modified)
            self.editors[descclass] = editor
            self.editorstack.addWidget(editor)

        self.editorstack.setEnabled(False)

        buttonlayout = QVBoxLayout(spacing=10)
        buttonlayout.setContentsMargins(0, 0, 0, 0)

        self.addbutton = QPushButton(
            "New", toolTip="Create a new variable",
            minimumWidth=120,
            shortcut=QKeySequence.New
        )

        def unique_name(fmt, reserved):
            candidates = (fmt.format(i) for i in count(1))
            return next(c for c in candidates if c not in reserved)

        def reserved_names():
            varnames = []
            if self.data is not None:
                varnames = [var.name for var in
                            self.data.domain.variables + self.data.domain.metas]
            varnames += [desc.name for desc in self.featuremodel]
            return set(varnames)

        def generate_newname(fmt):
            return unique_name(fmt, reserved_names())

        menu = QMenu(self.addbutton)
        cont = menu.addAction("Numeric")
        cont.triggered.connect(
            lambda: self.addFeature(
                ContinuousDescriptor(generate_newname("X{}"), "", 3))
        )
        disc = menu.addAction("Categorical")
        disc.triggered.connect(
            lambda: self.addFeature(
                DiscreteDescriptor(generate_newname("D{}"), "",
                                   ("A", "B"), -1, False))
        )
        string = menu.addAction("Text")
        string.triggered.connect(
            lambda: self.addFeature(
                StringDescriptor(generate_newname("S{}"), ""))
        )
        menu.addSeparator()
        self.duplicateaction = menu.addAction("Duplicate Selected Variable")
        self.duplicateaction.triggered.connect(self.duplicateFeature)
        self.duplicateaction.setEnabled(False)
        self.addbutton.setMenu(menu)

        self.removebutton = QPushButton(
            "Remove", toolTip="Remove selected variable",
            minimumWidth=120,
            shortcut=QKeySequence.Delete
        )
        self.removebutton.clicked.connect(self.removeSelectedFeature)

        buttonlayout.addWidget(self.addbutton)
        buttonlayout.addWidget(self.removebutton)
        buttonlayout.addStretch(10)

        toplayout.addLayout(buttonlayout, 0)
        toplayout.addWidget(self.editorstack, 10)

        # Layout for the list view
        layout = QVBoxLayout(spacing=1, margin=0)
        self.featuremodel = DescriptorModel(parent=self)

        self.featureview = QListView(
            minimumWidth=200,
            sizePolicy=QSizePolicy(QSizePolicy.Minimum,
                                   QSizePolicy.MinimumExpanding)
        )

        self.featureview.setItemDelegate(FeatureItemDelegate(self))
        self.featureview.setModel(self.featuremodel)
        self.featureview.selectionModel().selectionChanged.connect(
            self._on_selectedVariableChanged
        )

        layout.addWidget(self.featureview)

        box.layout().addLayout(layout, 1)

        box = gui.hBox(self.controlArea)
        gui.rubber(box)
        commit = gui.button(box, self, "Send", callback=self.apply,
                            default=True)
        commit.setMinimumWidth(180)

    def setCurrentIndex(self, index):
        index = min(index, len(self.featuremodel) - 1)
        self.currentIndex = index
        if index >= 0:
            itemmodels.select_row(self.featureview, index)
            desc = self.featuremodel[min(index, len(self.featuremodel) - 1)]
            editor = self.editors[type(desc)]
            self.editorstack.setCurrentWidget(editor)
            editor.setEditorData(desc, self.data.domain if self.data else None)
        self.editorstack.setEnabled(index >= 0)
        self.duplicateaction.setEnabled(index >= 0)
        self.removebutton.setEnabled(index >= 0)

    def _on_selectedVariableChanged(self, selected, *_):
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        else:
            self.setCurrentIndex(-1)

    def _on_modified(self):
        if self.currentIndex >= 0:
            editor = self.editorstack.currentWidget()
            self.featuremodel[self.currentIndex] = editor.editorData()
            self.descriptors = list(self.featuremodel)

    def setDescriptors(self, descriptors):
        """
        Set a list of variable descriptors to edit.
        """
        self.descriptors = descriptors
        self.featuremodel[:] = list(self.descriptors)

    @Inputs.data
    @check_sql_input
    def setData(self, data=None):
        """Set the input dataset."""
        self.closeContext()

        self.data = data

        if self.data is not None:
            descriptors = list(self.descriptors)
            currindex = self.currentIndex
            self.descriptors = []
            self.currentIndex = -1
            self.openContext(data)

            if descriptors != self.descriptors or \
                    self.currentIndex != currindex:
                # disconnect from the selection model while reseting the model
                selmodel = self.featureview.selectionModel()
                selmodel.selectionChanged.disconnect(
                    self._on_selectedVariableChanged)

                self.featuremodel[:] = list(self.descriptors)
                self.setCurrentIndex(self.currentIndex)

                selmodel.selectionChanged.connect(
                    self._on_selectedVariableChanged)

        self.editorstack.setEnabled(self.currentIndex >= 0)

    def handleNewSignals(self):
        if self.data is not None:
            self.apply()
        else:
            self.Outputs.data.send(None)

    def addFeature(self, descriptor):
        self.featuremodel.append(descriptor)
        self.setCurrentIndex(len(self.featuremodel) - 1)
        editor = self.editorstack.currentWidget()
        editor.nameedit.setFocus()
        editor.nameedit.selectAll()

    def removeFeature(self, index):
        del self.featuremodel[index]
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        elif index is None and len(self.featuremodel) > 0:
            # Deleting the last item clears selection
            self.setCurrentIndex(len(self.featuremodel) - 1)

    def removeSelectedFeature(self):
        if self.currentIndex >= 0:
            self.removeFeature(self.currentIndex)

    def duplicateFeature(self):
        desc = self.featuremodel[self.currentIndex]
        self.addFeature(copy.deepcopy(desc))

    def check_attrs_values(self, attr, data):
        for i in range(len(data)):
            for var in attr:
                if not math.isnan(data[i, var]) \
                        and int(data[i, var]) >= len(var.values):
                    return var.name
        return None

    def _validate_descriptors(self, desc):

        def validate(source):
            try:
                return validate_exp(ast.parse(source, mode="eval"))
            except Exception:
                return False

        final = []
        invalid = []
        for d in desc:
            if validate(d.expression):
                final.append(d)
            else:
                final.append(d._replace(expression=""))
                invalid.append(d)

        if invalid:
            self.Error.invalid_expressions(", ".join(s.name for s in invalid))

        return final

    def apply(self):
        self.Error.clear()

        if self.data is None:
            return

        desc = list(self.featuremodel)
        desc = self._validate_descriptors(desc)
        source_vars = self.data.domain.variables + self.data.domain.metas
        new_variables = construct_variables(desc, source_vars)

        attrs = [var for var in new_variables if var.is_primitive()]
        metas = [var for var in new_variables if not var.is_primitive()]
        new_domain = Orange.data.Domain(
            self.data.domain.attributes + tuple(attrs),
            self.data.domain.class_vars,
            metas=self.data.domain.metas + tuple(metas)
        )

        try:
            data = self.data.transform(new_domain)
        except Exception as err:
            log = logging.getLogger(__name__)
            log.error("", exc_info=True)
            self.error("".join(format_exception_only(type(err), err)).rstrip())
            return
        disc_attrs_not_ok = self.check_attrs_values(
            [var for var in attrs if var.is_discrete], data)
        if disc_attrs_not_ok:
            self.Error.more_values_needed(disc_attrs_not_ok)
            return

        self.Outputs.data.send(data)

    def send_report(self):
        items = OrderedDict()
        for feature in self.featuremodel:
            if isinstance(feature, DiscreteDescriptor):
                items[feature.name] = "{} (categorical with values {}{})".format(
                    feature.expression, feature.values,
                    "; ordered" * feature.ordered)
            elif isinstance(feature, ContinuousDescriptor):
                items[feature.name] = "{} (numeric)".format(feature.expression)
            else:
                items[feature.name] = "{} (text)".format(feature.expression)
        self.report_items(
            report.plural("Constructed feature{s}", len(items)), items)
예제 #4
0
class OWFeatureConstructor(OWWidget):
    name = "Feature Constructor"
    description = "Construct new features (data columns) from a set of " \
                  "existing features in the input dataset."
    icon = "icons/FeatureConstructor.svg"
    keywords = ['function', 'lambda']

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    want_main_area = False

    settingsHandler = FeatureConstructorHandler()
    descriptors = ContextSetting([])
    currentIndex = ContextSetting(-1)

    EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor),
               (DateTimeDescriptor, DateTimeFeatureEditor),
               (DiscreteDescriptor, DiscreteFeatureEditor),
               (StringDescriptor, StringFeatureEditor)]

    class Error(OWWidget.Error):
        more_values_needed = Msg("Categorical feature {} needs more values.")
        invalid_expressions = Msg("Invalid expressions: {}.")

    class Warning(OWWidget.Warning):
        renamed_var = Msg("Recently added variable has been renamed, "
                          "to avoid duplicates.\n")

    def __init__(self):
        super().__init__()
        self.data = None
        self.editors = {}

        box = gui.vBox(self.controlArea, "Variable Definitions")

        toplayout = QHBoxLayout()
        toplayout.setContentsMargins(0, 0, 0, 0)
        box.layout().addLayout(toplayout)

        self.editorstack = QStackedWidget(sizePolicy=QSizePolicy(
            QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding))

        for descclass, editorclass in self.EDITORS:
            editor = editorclass()
            editor.featureChanged.connect(self._on_modified)
            self.editors[descclass] = editor
            self.editorstack.addWidget(editor)

        self.editorstack.setEnabled(False)

        buttonlayout = QVBoxLayout(spacing=10)
        buttonlayout.setContentsMargins(0, 0, 0, 0)

        self.addbutton = QPushButton("New",
                                     toolTip="Create a new variable",
                                     minimumWidth=120,
                                     shortcut=QKeySequence.New)

        def unique_name(fmt, reserved):
            candidates = (fmt.format(i) for i in count(1))
            return next(c for c in candidates if c not in reserved)

        def generate_newname(fmt):
            return unique_name(fmt, self.reserved_names())

        menu = QMenu(self.addbutton)
        cont = menu.addAction("Numeric")
        cont.triggered.connect(lambda: self.addFeature(
            ContinuousDescriptor(generate_newname("X{}"), "", 3)))
        disc = menu.addAction("Categorical")
        disc.triggered.connect(lambda: self.addFeature(
            DiscreteDescriptor(generate_newname("D{}"), "", (), False)))
        string = menu.addAction("Text")
        string.triggered.connect(lambda: self.addFeature(
            StringDescriptor(generate_newname("S{}"), "")))
        datetime = menu.addAction("Date/Time")
        datetime.triggered.connect(lambda: self.addFeature(
            DateTimeDescriptor(generate_newname("T{}"), "")))

        menu.addSeparator()
        self.duplicateaction = menu.addAction("Duplicate Selected Variable")
        self.duplicateaction.triggered.connect(self.duplicateFeature)
        self.duplicateaction.setEnabled(False)
        self.addbutton.setMenu(menu)

        self.removebutton = QPushButton("Remove",
                                        toolTip="Remove selected variable",
                                        minimumWidth=120,
                                        shortcut=QKeySequence.Delete)
        self.removebutton.clicked.connect(self.removeSelectedFeature)

        buttonlayout.addWidget(self.addbutton)
        buttonlayout.addWidget(self.removebutton)
        buttonlayout.addStretch(10)

        toplayout.addLayout(buttonlayout, 0)
        toplayout.addWidget(self.editorstack, 10)

        # Layout for the list view
        layout = QVBoxLayout(spacing=1, margin=0)
        self.featuremodel = DescriptorModel(parent=self)

        self.featureview = QListView(minimumWidth=200,
                                     minimumHeight=50,
                                     sizePolicy=QSizePolicy(
                                         QSizePolicy.Minimum,
                                         QSizePolicy.MinimumExpanding))

        self.featureview.setItemDelegate(FeatureItemDelegate(self))
        self.featureview.setModel(self.featuremodel)
        self.featureview.selectionModel().selectionChanged.connect(
            self._on_selectedVariableChanged)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        layout.addWidget(self.featureview)

        box.layout().addLayout(layout, 1)

        gui.button(self.buttonsArea,
                   self,
                   "Send",
                   callback=self.apply,
                   default=True)

    def setCurrentIndex(self, index):
        index = min(index, len(self.featuremodel) - 1)
        self.currentIndex = index
        if index >= 0:
            itemmodels.select_row(self.featureview, index)
            desc = self.featuremodel[min(index, len(self.featuremodel) - 1)]
            editor = self.editors[type(desc)]
            self.editorstack.setCurrentWidget(editor)
            editor.setEditorData(desc, self.data.domain if self.data else None)
        self.editorstack.setEnabled(index >= 0)
        self.duplicateaction.setEnabled(index >= 0)
        self.removebutton.setEnabled(index >= 0)

    def _on_selectedVariableChanged(self, selected, *_):
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        else:
            self.setCurrentIndex(-1)

    def _on_modified(self):
        if self.currentIndex >= 0:
            self.Warning.clear()
            editor = self.editorstack.currentWidget()
            proposed = editor.editorData().name
            unique = get_unique_names(self.reserved_names(self.currentIndex),
                                      proposed)

            feature = editor.editorData()
            if editor.editorData().name != unique:
                self.Warning.renamed_var()
                feature = feature.__class__(unique, *feature[1:])

            self.featuremodel[self.currentIndex] = feature
            self.descriptors = list(self.featuremodel)

    def setDescriptors(self, descriptors):
        """
        Set a list of variable descriptors to edit.
        """
        self.descriptors = descriptors
        self.featuremodel[:] = list(self.descriptors)

    def reserved_names(self, idx_=None):
        varnames = []
        if self.data is not None:
            varnames = [
                var.name
                for var in self.data.domain.variables + self.data.domain.metas
            ]
        varnames += [
            desc.name for idx, desc in enumerate(self.featuremodel)
            if idx != idx_
        ]
        return set(varnames)

    @Inputs.data
    @check_sql_input
    def setData(self, data=None):
        """Set the input dataset."""
        self.closeContext()

        self.data = data

        self.info.set_input_summary(self.info.NoInput)
        if self.data is not None:
            descriptors = list(self.descriptors)
            currindex = self.currentIndex
            self.descriptors = []
            self.currentIndex = -1
            self.openContext(data)
            self.info.set_input_summary(len(data),
                                        format_summary_details(data))

            if descriptors != self.descriptors or \
                    self.currentIndex != currindex:
                # disconnect from the selection model while reseting the model
                selmodel = self.featureview.selectionModel()
                selmodel.selectionChanged.disconnect(
                    self._on_selectedVariableChanged)

                self.featuremodel[:] = list(self.descriptors)
                self.setCurrentIndex(self.currentIndex)

                selmodel.selectionChanged.connect(
                    self._on_selectedVariableChanged)

        self.editorstack.setEnabled(self.currentIndex >= 0)

    def handleNewSignals(self):
        if self.data is not None:
            self.apply()
        else:
            self.info.set_output_summary(self.info.NoOutput)
            self.Outputs.data.send(None)

    def addFeature(self, descriptor):
        self.featuremodel.append(descriptor)
        self.setCurrentIndex(len(self.featuremodel) - 1)
        editor = self.editorstack.currentWidget()
        editor.nameedit.setFocus()
        editor.nameedit.selectAll()

    def removeFeature(self, index):
        del self.featuremodel[index]
        index = selected_row(self.featureview)
        if index is not None:
            self.setCurrentIndex(index)
        elif index is None and self.featuremodel.rowCount():
            # Deleting the last item clears selection
            self.setCurrentIndex(self.featuremodel.rowCount() - 1)

    def removeSelectedFeature(self):
        if self.currentIndex >= 0:
            self.removeFeature(self.currentIndex)

    def duplicateFeature(self):
        desc = self.featuremodel[self.currentIndex]
        self.addFeature(copy.deepcopy(desc))

    @staticmethod
    def check_attrs_values(attr, data):
        for i in range(len(data)):
            for var in attr:
                if not math.isnan(data[i, var]) \
                        and int(data[i, var]) >= len(var.values):
                    return var.name
        return None

    def _validate_descriptors(self, desc):
        def validate(source):
            try:
                return validate_exp(ast.parse(source, mode="eval"))
            # ast.parse can return arbitrary errors, not only SyntaxError
            # pylint: disable=broad-except
            except Exception:
                return False

        final = []
        invalid = []
        for d in desc:
            if validate(d.expression):
                final.append(d)
            else:
                final.append(d._replace(expression=""))
                invalid.append(d)

        if invalid:
            self.Error.invalid_expressions(", ".join(s.name for s in invalid))

        return final

    def apply(self):
        def report_error(err):
            log = logging.getLogger(__name__)
            log.error("", exc_info=True)
            self.error("".join(format_exception_only(type(err), err)).rstrip())

        self.Error.clear()

        if self.data is None:
            return

        desc = list(self.featuremodel)
        desc = self._validate_descriptors(desc)
        try:
            new_variables = construct_variables(desc, self.data)
        # user's expression can contain arbitrary errors
        except Exception as err:  # pylint: disable=broad-except
            report_error(err)
            return

        attrs = [var for var in new_variables if var.is_primitive()]
        metas = [var for var in new_variables if not var.is_primitive()]
        new_domain = Orange.data.Domain(
            self.data.domain.attributes + tuple(attrs),
            self.data.domain.class_vars,
            metas=self.data.domain.metas + tuple(metas))

        try:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = False
            data = self.data.transform(new_domain)
        # user's expression can contain arbitrary errors
        # pylint: disable=broad-except
        except Exception as err:
            report_error(err)
            return
        finally:
            for variable in new_variables:
                variable.compute_value.mask_exceptions = True

        disc_attrs_not_ok = self.check_attrs_values(
            [var for var in attrs if var.is_discrete], data)
        if disc_attrs_not_ok:
            self.Error.more_values_needed(disc_attrs_not_ok)
            return

        self.info.set_output_summary(len(data), format_summary_details(data))
        self.Outputs.data.send(data)

    def send_report(self):
        items = OrderedDict()
        for feature in self.featuremodel:
            if isinstance(feature, DiscreteDescriptor):
                items[
                    feature.name] = "{} (categorical with values {}{})".format(
                        feature.expression, feature.values,
                        "; ordered" * feature.ordered)
            elif isinstance(feature, ContinuousDescriptor):
                items[feature.name] = "{} (numeric)".format(feature.expression)
            elif isinstance(feature, DateTimeDescriptor):
                items[feature.name] = "{} (date/time)".format(
                    feature.expression)
            else:
                items[feature.name] = "{} (text)".format(feature.expression)
        self.report_items(report.plural("Constructed feature{s}", len(items)),
                          items)