예제 #1
0
파일: owsql.py 프로젝트: asenzh/orange3
class OWSql(OWWidget):
    name = "SQL Table"
    id = "orange.widgets.data.sql"
    description = """
    Load dataset from SQL."""
    long_description = """
    Sql widget connects to server and opens data from there. """
    icon = "icons/SQLTable.svg"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        OutputSignal("Data",
                     Table,
                     doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False
    resizing_enabled = False

    host = Setting(None)
    port = Setting(None)
    database = Setting(None)
    schema = Setting(None)
    username = Setting(None)
    password = Setting(None)
    table = Setting(None)
    sql = Setting("")
    guess_values = Setting(True)
    download = Setting(False)

    materialize = Setting(False)
    materialize_table_name = Setting("")

    class Information(OWWidget.Information):
        data_sampled = Msg("Data description was generated from a sample.")

    class Error(OWWidget.Error):
        connection = Msg("{}")
        missing_extension = Msg("Database is missing extension{}: {}")

    def __init__(self):
        super().__init__()

        self._connection = None
        self.data_desc_table = None
        self.database_desc = None

        vbox = gui.vBox(self.controlArea, "Server", addSpace=True)
        box = gui.vBox(vbox)
        self.servertext = QtGui.QLineEdit(box)
        self.servertext.setPlaceholderText('Server')
        self.servertext.setToolTip('Server')
        if self.host:
            self.servertext.setText(self.host if not self.port else '{}:{}'.
                                    format(self.host, self.port))
        box.layout().addWidget(self.servertext)
        self.databasetext = QtGui.QLineEdit(box)
        self.databasetext.setPlaceholderText('Database[/Schema]')
        self.databasetext.setToolTip('Database or optionally Database/Schema')
        if self.database:
            self.databasetext.setText(
                self.database if not self.schema else '{}/{}'.
                format(self.database, self.schema))
        box.layout().addWidget(self.databasetext)
        self.usernametext = QtGui.QLineEdit(box)
        self.usernametext.setPlaceholderText('Username')
        self.usernametext.setToolTip('Username')
        if self.username:
            self.usernametext.setText(self.username)
        box.layout().addWidget(self.usernametext)
        self.passwordtext = QtGui.QLineEdit(box)
        self.passwordtext.setPlaceholderText('Password')
        self.passwordtext.setToolTip('Password')
        self.passwordtext.setEchoMode(QtGui.QLineEdit.Password)
        if self.password:
            self.passwordtext.setText(self.password)
        box.layout().addWidget(self.passwordtext)

        tables = gui.hBox(box)
        self.tablecombo = QtGui.QComboBox(
            tables,
            minimumContentsLength=35,
            sizeAdjustPolicy=QtGui.QComboBox.AdjustToMinimumContentsLength)
        self.tablecombo.setToolTip('table')
        tables.layout().addWidget(self.tablecombo)
        self.tablecombo.activated[int].connect(self.select_table)
        self.connectbutton = gui.button(tables,
                                        self,
                                        '↻',
                                        callback=self.connect)
        self.connectbutton.setSizePolicy(QtGui.QSizePolicy.Fixed,
                                         QtGui.QSizePolicy.Fixed)
        tables.layout().addWidget(self.connectbutton)

        self.custom_sql = gui.vBox(box)
        self.custom_sql.setVisible(False)
        self.sqltext = QtGui.QTextEdit(self.custom_sql)
        self.sqltext.setPlainText(self.sql)
        self.custom_sql.layout().addWidget(self.sqltext)

        mt = gui.hBox(self.custom_sql)
        cb = gui.checkBox(mt, self, 'materialize', 'Materialize to table ')
        cb.setToolTip('Save results of the query in a table')
        le = gui.lineEdit(mt, self, 'materialize_table_name')
        le.setToolTip('Save results of the query in a table')

        self.executebtn = gui.button(self.custom_sql,
                                     self,
                                     'Execute',
                                     callback=self.open_table)

        box.layout().addWidget(self.custom_sql)

        gui.checkBox(box,
                     self,
                     "guess_values",
                     "Auto-discover discrete variables",
                     callback=self.open_table)

        gui.checkBox(box,
                     self,
                     "download",
                     "Download data to local memory",
                     callback=self.open_table)

        gui.rubber(self.buttonsArea)
        QTimer.singleShot(0, self.connect)

    def error(self, id=0, text=""):
        super().error(id, text)
        err_style = 'QLineEdit {border: 2px solid red;}'
        if 'server' in text or 'host' in text:
            self.servertext.setStyleSheet(err_style)
        else:
            self.servertext.setStyleSheet('')
        if 'role' in text:
            self.usernametext.setStyleSheet(err_style)
        else:
            self.usernametext.setStyleSheet('')
        if 'database' in text:
            self.databasetext.setStyleSheet(err_style)
        else:
            self.databasetext.setStyleSheet('')

    def connect(self):
        hostport = self.servertext.text().split(':')
        self.host = hostport[0]
        self.port = hostport[1] if len(hostport) == 2 else None
        self.database, _, self.schema = self.databasetext.text().partition('/')
        self.username = self.usernametext.text() or None
        self.password = self.passwordtext.text() or None
        try:
            self._connection = psycopg2.connect(host=self.host,
                                                port=self.port,
                                                database=self.database,
                                                user=self.username,
                                                password=self.password)
            self.Error.connection.clear()
            self.database_desc = OrderedDict(
                (("Host", self.host), ("Port", self.port),
                 ("Database", self.database), ("User name", self.username)))
            self.refresh_tables()
            self.select_table()
        except psycopg2.Error as err:
            self.Error.connection(str(err).split('\n')[0])
            self.database_desc = self.data_desc_table = None
            self.tablecombo.clear()

    def refresh_tables(self):
        self.tablecombo.clear()
        self.Error.missing_extension.clear()
        if self._connection is None:
            self.data_desc_table = None
            return

        cur = self._connection.cursor()
        if self.schema:
            schema_clause = "AND n.nspname = '{}'".format(self.schema)
        else:
            schema_clause = "AND pg_catalog.pg_table_is_visible(c.oid)"
        cur.execute("""SELECT --n.nspname as "Schema",
                              c.relname AS "Name"
                       FROM pg_catalog.pg_class c
                  LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                      WHERE c.relkind IN ('r','v','m','S','f','')
                        AND n.nspname <> 'pg_catalog'
                        AND n.nspname <> 'information_schema'
                        AND n.nspname !~ '^pg_toast'
                        {}
                        AND NOT c.relname LIKE '\\_\\_%'
                   ORDER BY 1;""".format(schema_clause))

        self.tablecombo.addItem("Select a table")
        for i, (table_name, ) in enumerate(cur.fetchall()):
            self.tablecombo.addItem(table_name)
            if table_name == self.table:
                self.tablecombo.setCurrentIndex(i + 1)
        self.tablecombo.addItem("Custom SQL")

    def select_table(self):
        curIdx = self.tablecombo.currentIndex()
        if self.tablecombo.itemText(curIdx) != "Custom SQL":
            self.custom_sql.setVisible(False)
            return self.open_table()
        else:
            self.custom_sql.setVisible(True)
            self.data_desc_table = None
            self.database_desc["Table"] = "(None)"
            self.table = None

    def create_extensions(self):
        missing = []
        for ext in EXTENSIONS:
            try:
                cur = self._connection.cursor()
                cur.execute("CREATE EXTENSION IF NOT EXISTS " + ext)
            except psycopg2.OperationalError:
                missing.append(ext)
            finally:
                self._connection.commit()
        self.Error.missing_extension('s' if len(missing) > 1 else '',
                                     ', '.join(missing),
                                     shown=missing)

    def open_table(self):
        self.create_extensions()
        table = self.get_table()
        self.data_desc_table = table
        self.send("Data", table)

    def get_table(self):
        if self.tablecombo.currentIndex() <= 0:
            if self.database_desc:
                self.database_desc["Table"] = "(None)"
            self.data_desc_table = None
            return

        if self.tablecombo.currentIndex() < self.tablecombo.count() - 1:
            self.table = self.tablecombo.currentText()
            self.database_desc["Table"] = self.table
            if "Query" in self.database_desc:
                del self.database_desc["Query"]
        else:
            self.sql = self.table = self.sqltext.toPlainText()
            if self.materialize:
                if not self.materialize_table_name:
                    self.Error.connection(
                        "Specify a table name to materialize the query")
                    return
                try:
                    cur = self._connection.cursor()
                    cur.execute("DROP TABLE IF EXISTS " +
                                self.materialize_table_name)
                    cur.execute("CREATE TABLE " + self.materialize_table_name +
                                " AS " + self.table)
                    cur.execute("ANALYZE " + self.materialize_table_name)
                    self.table = self.materialize_table_name
                except psycopg2.ProgrammingError as ex:
                    self.Error.connection(str(ex))
                    return
                finally:
                    self._connection.commit()

        try:
            table = SqlTable(dict(host=self.host,
                                  port=self.port,
                                  database=self.database,
                                  user=self.username,
                                  password=self.password),
                             self.table,
                             inspect_values=False)
        except psycopg2.ProgrammingError as ex:
            self.Error.connection(str(ex))
            return

        self.Error.connection.clear()

        sample = False
        if table.approx_len() > LARGE_TABLE and self.guess_values:
            confirm = QMessageBox(self)
            confirm.setIcon(QMessageBox.Warning)
            confirm.setText("Attribute discovery might take "
                            "a long time on large tables.\n"
                            "Do you want to auto discover attributes?")
            confirm.addButton("Yes", QMessageBox.YesRole)
            no_button = confirm.addButton("No", QMessageBox.NoRole)
            sample_button = confirm.addButton("Yes, on a sample",
                                              QMessageBox.YesRole)
            confirm.exec()
            if confirm.clickedButton() == no_button:
                self.guess_values = False
            elif confirm.clickedButton() == sample_button:
                sample = True

        self.Information.clear()
        if self.guess_values:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            if sample:
                s = table.sample_time(1)
                domain = s.get_domain(guess_values=True)
                self.Information.data_sampled()
            else:
                domain = table.get_domain(guess_values=True)
            QApplication.restoreOverrideCursor()
            table.domain = domain

        if self.download:
            if table.approx_len() > MAX_DL_LIMIT:
                QMessageBox.warning(
                    self, 'Warning', "Data is too big to download.\n"
                    "Consider using the Data Sampler widget to download "
                    "a sample instead.")
                self.download = False
            elif table.approx_len() > AUTO_DL_LIMIT:
                confirm = QMessageBox.question(
                    self, 'Question', "Data appears to be big. Do you really "
                    "want to download it to local memory?",
                    QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
                if confirm == QMessageBox.No:
                    self.download = False
        if self.download:
            table.download_data(MAX_DL_LIMIT)
            table = Table(table)

        return table

    def send_report(self):
        if not self.database_desc:
            self.report_paragraph("No database connection.")
            return
        self.report_items("Database", self.database_desc)
        if self.data_desc_table:
            self.report_items("Data",
                              report.describe_data(self.data_desc_table))
예제 #2
0
class OWDiscretize(widget.OWWidget):
    name = "Discretize"
    description = "Discretize the numeric data features."
    icon = "icons/Discretize.svg"
    inputs = [
        InputSignal("Data",
                    Orange.data.Table,
                    "set_data",
                    doc="Input data table")
    ]
    outputs = [
        OutputSignal("Data",
                     Orange.data.Table,
                     doc="Table with discretized features")
    ]

    settingsHandler = settings.DomainContextHandler()
    saved_var_states = settings.ContextSetting({})

    default_method = settings.Setting(2)
    default_k = settings.Setting(3)
    autosend = settings.Setting(True)

    #: Discretization methods
    Default, Leave, MDL, EqualFreq, EqualWidth, Remove, Custom = range(7)

    want_main_area = False
    resizing_enabled = False

    def __init__(self):
        super().__init__()

        #: input data
        self.data = None
        #: Current variable discretization state
        self.var_state = {}
        #: Saved variable discretization settings (context setting)
        self.saved_var_states = {}

        self.method = 0
        self.k = 5

        box = gui.vBox(self.controlArea, self.tr("Default Discretization"))
        self.default_bbox = rbox = gui.radioButtons(
            box, self, "default_method", callback=self._default_disc_changed)
        rb = gui.hBox(rbox)
        self.left = gui.vBox(rb)
        right = gui.vBox(rb)
        rb.layout().setStretch(0, 1)
        rb.layout().setStretch(1, 1)
        options = self.options = [
            self.tr("Default"),
            self.tr("Leave numeric"),
            self.tr("Entropy-MDL discretization"),
            self.tr("Equal-frequency discretization"),
            self.tr("Equal-width discretization"),
            self.tr("Remove numeric variables")
        ]

        for opt in options[1:]:
            t = gui.appendRadioButton(rbox, opt)
            # This condition is ugly, but it keeps the same order of
            # options for backward compatibility of saved schemata
            [right, self.left][opt.startswith("Equal")].layout().addWidget(t)
        gui.separator(right, 18, 18)

        def _intbox(widget, attr, callback):
            box = gui.indentedBox(widget)
            s = gui.spin(box,
                         self,
                         attr,
                         minv=2,
                         maxv=10,
                         label="Num. of intervals:",
                         callback=callback)
            s.setMaximumWidth(60)
            s.setAlignment(Qt.AlignRight)
            gui.rubber(s.box)
            return box.box

        self.k_general = _intbox(self.left, "default_k",
                                 self._default_disc_changed)
        self.k_general.layout().setContentsMargins(0, 0, 0, 0)
        vlayout = QHBoxLayout()
        box = gui.widgetBox(self.controlArea,
                            "Individual Attribute Settings",
                            orientation=vlayout,
                            spacing=8)

        # List view with all attributes
        self.varview = QListView(selectionMode=QListView.ExtendedSelection)
        self.varview.setItemDelegate(DiscDelegate())
        self.varmodel = itemmodels.VariableListModel()
        self.varview.setModel(self.varmodel)
        self.varview.selectionModel().selectionChanged.connect(
            self._var_selection_changed)

        vlayout.addWidget(self.varview)
        # Controls for individual attr settings
        self.bbox = controlbox = gui.radioButtons(
            box, self, "method", callback=self._disc_method_changed)
        vlayout.addWidget(controlbox)

        for opt in options[:5]:
            gui.appendRadioButton(controlbox, opt)

        self.k_specific = _intbox(controlbox, "k", self._disc_method_changed)

        gui.appendRadioButton(controlbox, "Remove attribute")

        gui.rubber(controlbox)
        controlbox.setEnabled(False)

        self.controlbox = controlbox

        box = gui.auto_commit(self.controlArea,
                              self,
                              "autosend",
                              "Apply",
                              orientation=Qt.Horizontal,
                              checkbox_label="Apply automatically")
        box.layout().insertSpacing(0, 20)
        box.layout().insertWidget(0, self.report_button)
        self._update_spin_positions()

    def set_data(self, data):
        self.closeContext()
        self.data = data
        if self.data is not None:
            self._initialize(data)
            self.openContext(data)
            # Restore the per variable discretization settings
            self._restore(self.saved_var_states)
            # Complete the induction of cut points
            self._update_points()
        else:
            self._clear()
        self.unconditional_commit()

    def _initialize(self, data):
        # Initialize the default variable states for new data.
        self.class_var = data.domain.class_var
        cvars = [var for var in data.domain if var.is_continuous]
        self.varmodel[:] = cvars

        class_var = data.domain.class_var
        has_disc_class = data.domain.has_discrete_class

        self.default_bbox.buttons[self.MDL - 1].setEnabled(has_disc_class)
        self.bbox.buttons[self.MDL].setEnabled(has_disc_class)

        # If the newly disabled MDL button is checked then change it
        if not has_disc_class and self.default_method == self.MDL - 1:
            self.default_method = 0
        if not has_disc_class and self.method == self.MDL:
            self.method = 0

        # Reset (initialize) the variable discretization states.
        self._reset()

    def _restore(self, saved_state):
        # Restore variable states from a saved_state dictionary.
        def_method = self._current_default_method()
        for i, var in enumerate(self.varmodel):
            key = variable_key(var)
            if key in saved_state:
                state = saved_state[key]
                if isinstance(state.method, Default):
                    state = DState(Default(def_method), None, None)
                self._set_var_state(i, state)

    def _reset(self):
        # restore the individual variable settings back to defaults.
        def_method = self._current_default_method()
        self.var_state = {}
        for i in range(len(self.varmodel)):
            state = DState(Default(def_method), None, None)
            self._set_var_state(i, state)

    def _set_var_state(self, index, state):
        # set the state of variable at `index` to `state`.
        self.var_state[index] = state
        self.varmodel.setData(self.varmodel.index(index), state, Qt.UserRole)

    def _clear(self):
        self.data = None
        self.varmodel[:] = []
        self.var_state = {}
        self.saved_var_states = {}
        self.default_bbox.buttons[self.MDL - 1].setEnabled(True)
        self.bbox.buttons[self.MDL].setEnabled(True)

    def _update_points(self):
        """
        Update the induced cut points.
        """
        if self.data is None or not len(self.data):
            return

        def induce_cuts(method, data, var):
            dvar = _dispatch[type(method)](method, data, var)
            if dvar is None:
                # removed
                return [], None
            elif dvar is var:
                # no transformation took place
                return None, var
            elif is_discretized(dvar):
                return dvar.compute_value.points, dvar
            else:
                assert False

        for i, var in enumerate(self.varmodel):
            state = self.var_state[i]
            if state.points is None and state.disc_var is None:
                points, dvar = induce_cuts(state.method, self.data, var)
                new_state = state._replace(points=points, disc_var=dvar)
                self._set_var_state(i, new_state)

    def _method_index(self, method):
        return METHODS.index((type(method), ))

    def _current_default_method(self):
        method = self.default_method + 1
        k = self.default_k
        if method == OWDiscretize.Leave:
            def_method = Leave()
        elif method == OWDiscretize.MDL:
            def_method = MDL()
        elif method == OWDiscretize.EqualFreq:
            def_method = EqualFreq(k)
        elif method == OWDiscretize.EqualWidth:
            def_method = EqualWidth(k)
        elif method == OWDiscretize.Remove:
            def_method = Remove()
        else:
            assert False
        return def_method

    def _current_method(self):
        if self.method == OWDiscretize.Default:
            method = Default(self._current_default_method())
        elif self.method == OWDiscretize.Leave:
            method = Leave()
        elif self.method == OWDiscretize.MDL:
            method = MDL()
        elif self.method == OWDiscretize.EqualFreq:
            method = EqualFreq(self.k)
        elif self.method == OWDiscretize.EqualWidth:
            method = EqualWidth(self.k)
        elif self.method == OWDiscretize.Remove:
            method = Remove()
        elif self.method == OWDiscretize.Custom:
            method = Custom(self.cutpoints)
        else:
            assert False
        return method

    def _update_spin_positions(self):
        self.k_general.setDisabled(self.default_method not in [2, 3])
        if self.default_method == 2:
            self.left.layout().insertWidget(1, self.k_general)
        elif self.default_method == 3:
            self.left.layout().insertWidget(2, self.k_general)

        self.k_specific.setDisabled(self.method not in [3, 4])
        if self.method == 3:
            self.bbox.layout().insertWidget(4, self.k_specific)
        elif self.method == 4:
            self.bbox.layout().insertWidget(5, self.k_specific)

    def _default_disc_changed(self):
        self._update_spin_positions()
        method = self._current_default_method()
        state = DState(Default(method), None, None)
        for i, _ in enumerate(self.varmodel):
            if isinstance(self.var_state[i].method, Default):
                self._set_var_state(i, state)
        self._update_points()
        self.commit()

    def _disc_method_changed(self):
        self._update_spin_positions()
        indices = self.selected_indices()
        method = self._current_method()
        state = DState(method, None, None)
        for idx in indices:
            self._set_var_state(idx, state)
        self._update_points()
        self.commit()

    def _var_selection_changed(self, *args):
        indices = self.selected_indices()
        # set of all methods for the current selection
        methods = [self.var_state[i].method for i in indices]
        mset = set(methods)
        self.controlbox.setEnabled(len(mset) > 0)
        if len(mset) == 1:
            method = mset.pop()
            self.method = self._method_index(method)
            if isinstance(method, (EqualFreq, EqualWidth)):
                self.k = method.k
            elif isinstance(method, Custom):
                self.cutpoints = method.points
        else:
            # deselect the current button
            self.method = -1
            bg = self.controlbox.group
            button_group_reset(bg)
        self._update_spin_positions()

    def selected_indices(self):
        rows = self.varview.selectionModel().selectedRows()
        return [index.row() for index in rows]

    def discretized_var(self, source):
        index = list(self.varmodel).index(source)
        state = self.var_state[index]
        if state.disc_var is None:
            return None
        elif state.disc_var is source:
            return source
        elif state.points == []:
            return None
        else:
            return state.disc_var

    def discretized_domain(self):
        """
        Return the current effective discretized domain.
        """
        if self.data is None:
            return None

        def disc_var(source):
            if source and source.is_continuous:
                return self.discretized_var(source)
            else:
                return source

        attributes = [disc_var(v) for v in self.data.domain.attributes]
        attributes = [v for v in attributes if v is not None]

        class_var = disc_var(self.data.domain.class_var)

        domain = Orange.data.Domain(attributes,
                                    class_var,
                                    metas=self.data.domain.metas)
        return domain

    def commit(self):
        output = None
        if self.data is not None and len(self.data):
            domain = self.discretized_domain()
            output = self.data.transform(domain)
        self.send("Data", output)

    def storeSpecificSettings(self):
        super().storeSpecificSettings()
        self.saved_var_states = {
            variable_key(var): self.var_state[i]._replace(points=None,
                                                          disc_var=None)
            for i, var in enumerate(self.varmodel)
        }

    def send_report(self):
        self.report_items(
            (("Default method", self.options[self.default_method + 1]), ))
        if self.varmodel:
            self.report_items(
                "Thresholds",
                [(var.name, DiscDelegate.cutsText(self.var_state[i])
                  or "leave numeric") for i, var in enumerate(self.varmodel)])
예제 #3
0
class OWSql(widget.OWWidget):
    name = "SQL Table"
    id = "orange.widgets.data.sql"
    description = """
    Load dataset from SQL."""
    long_description = """
    Sql widget connects to server and opens data from there. """
    icon = "icons/SQLTable.svg"
    author = "Anze Staric"
    maintainer_email = "*****@*****.**"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        OutputSignal("Data",
                     Table,
                     doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False
    resizing_enabled = False

    host = Setting(None)
    port = Setting(None)
    database = Setting(None)
    username = Setting(None)
    password = Setting(None)
    table = Setting(None)
    sql = Setting("")
    guess_values = Setting(True)
    download = Setting(False)

    def __init__(self):
        super().__init__()

        self._connection = None

        vbox = gui.widgetBox(self.controlArea, "Server", addSpace=True)
        box = gui.widgetBox(vbox)
        self.servertext = QtGui.QLineEdit(box)
        self.servertext.setPlaceholderText('Server')
        if self.host:
            self.servertext.setText(self.host if not self.port else '{}:{}'.
                                    format(self.host, self.port))
        box.layout().addWidget(self.servertext)
        self.databasetext = QtGui.QLineEdit(box)
        self.databasetext.setPlaceholderText('Database')
        if self.database:
            self.databasetext.setText(self.database)
        box.layout().addWidget(self.databasetext)
        self.usernametext = QtGui.QLineEdit(box)
        self.usernametext.setPlaceholderText('Username')
        if self.username:
            self.usernametext.setText(self.username)
        box.layout().addWidget(self.usernametext)
        self.passwordtext = QtGui.QLineEdit(box)
        self.passwordtext.setPlaceholderText('Password')
        self.passwordtext.setEchoMode(QtGui.QLineEdit.Password)
        if self.password:
            self.passwordtext.setText(self.password)
        box.layout().addWidget(self.passwordtext)

        tables = gui.widgetBox(box, orientation='horizontal')
        self.tablecombo = QtGui.QComboBox(
            tables,
            minimumContentsLength=35,
            sizeAdjustPolicy=QtGui.QComboBox.AdjustToMinimumContentsLength)
        tables.layout().addWidget(self.tablecombo)
        self.tablecombo.activated[int].connect(self.select_table)
        self.connectbutton = gui.button(tables,
                                        self,
                                        '↻',
                                        callback=self.connect)
        self.connectbutton.setSizePolicy(QtGui.QSizePolicy.Fixed,
                                         QtGui.QSizePolicy.Fixed)
        tables.layout().addWidget(self.connectbutton)

        self.custom_sql = gui.widgetBox(box, orientation='vertical')
        self.custom_sql.setVisible(False)
        self.sqltext = QtGui.QTextEdit(self.custom_sql)
        self.sqltext.setPlainText(self.sql)
        self.custom_sql.layout().addWidget(self.sqltext)

        self.executebtn = gui.button(self.custom_sql,
                                     self,
                                     'Execute',
                                     callback=self.open_table)

        box.layout().addWidget(self.custom_sql)

        gui.checkBox(box,
                     self,
                     "guess_values",
                     "Auto-discover discrete variables",
                     callback=self.open_table)

        gui.checkBox(box,
                     self,
                     "download",
                     "Download data to local memory",
                     callback=self.open_table)

        self.connect()
        if self.table:
            self.open_table()

    def error(self, id=0, text=""):
        super().error(id, text)
        if 'server' in text or 'host' in text:
            self.servertext.setStyleSheet('QLineEdit {border: 2px solid red;}')
        else:
            self.servertext.setStyleSheet('')
        if 'role' in text:
            self.usernametext.setStyleSheet(
                'QLineEdit {border: 2px solid red;}')
        else:
            self.usernametext.setStyleSheet('')
        if 'database' in text:
            self.databasetext.setStyleSheet(
                'QLineEdit {border: 2px solid red;}')
        else:
            self.databasetext.setStyleSheet('')

    def connect(self):
        hostport = self.servertext.text().split(':')
        self.host = hostport[0]
        self.port = hostport[1] if len(hostport) == 2 else None
        self.database = self.databasetext.text()
        self.username = self.usernametext.text() or None
        self.password = self.passwordtext.text() or None
        try:
            self._connection = psycopg2.connect(host=self.host,
                                                port=self.port,
                                                database=self.database,
                                                user=self.username,
                                                password=self.password)
            self.error(0)
            self.refresh_tables()
        except psycopg2.Error as err:
            self.error(0, str(err).split('\n')[0])
            self.tablecombo.clear()

    def refresh_tables(self):
        self.tablecombo.clear()
        if self._connection is None:
            return

        cur = self._connection.cursor()
        cur.execute("""SELECT --n.nspname as "Schema",
                              c.relname AS "Name"
                       FROM pg_catalog.pg_class c
                  LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                      WHERE c.relkind IN ('r','v','m','S','f','')
                        AND n.nspname <> 'pg_catalog'
                        AND n.nspname <> 'information_schema'
                        AND n.nspname !~ '^pg_toast'
                        AND pg_catalog.pg_table_is_visible(c.oid)
                        AND NOT c.relname LIKE '\\_\\_%'
                   ORDER BY 1;""")

        self.tablecombo.addItem("Select a table")
        for i, (table_name, ) in enumerate(cur.fetchall()):
            self.tablecombo.addItem(table_name)
            if table_name == self.table:
                self.tablecombo.setCurrentIndex(i + 1)
        self.tablecombo.addItem("Custom SQL")

    def select_table(self):
        curIdx = self.tablecombo.currentIndex()
        if self.tablecombo.itemText(curIdx) != "Custom SQL":
            self.custom_sql.setVisible(False)
            return self.open_table()
        else:
            self.custom_sql.setVisible(True)
            self.table = None

    def open_table(self):
        if self.tablecombo.currentIndex() <= 0:
            return

        if self.tablecombo.currentIndex() < self.tablecombo.count() - 1:
            self.table = self.tablecombo.currentText()
        else:
            self.table = self.sqltext.toPlainText()

        table = SqlTable(dict(host=self.host,
                              port=self.port,
                              database=self.database,
                              user=self.username,
                              password=self.password),
                         self.table,
                         inspect_values=False)
        sample = False
        if table.approx_len() > LARGE_TABLE and self.guess_values:
            confirm = QMessageBox(self)
            confirm.setIcon(QMessageBox.Warning)
            confirm.setText("Attribute discovery might take "
                            "a long time on large tables.\n"
                            "Do you want to auto discover attributes?")
            confirm.addButton("Yes", QMessageBox.YesRole)
            no_button = confirm.addButton("No", QMessageBox.NoRole)
            sample_button = confirm.addButton("Yes, on a sample",
                                              QMessageBox.YesRole)
            confirm.exec()
            if confirm.clickedButton() == no_button:
                self.guess_values = False
            elif confirm.clickedButton() == sample_button:
                sample = True

        self.information(1)
        if self.guess_values:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            if sample:
                s = table.sample_time(1)
                domain = s.get_domain(guess_values=True)
                self.information(
                    1, "Domain was generated from a sample of the table.")
            else:
                domain = table.get_domain(guess_values=True)
            QApplication.restoreOverrideCursor()
            table.domain = domain

        if self.download:
            if table.approx_len() > MAX_DL_LIMIT:
                QMessageBox.warning(
                    self, 'Warning', "Data is too big to download.\n"
                    "Consider using the Data Sampler widget to download "
                    "a sample instead.")
                self.download = False
            elif table.approx_len() > AUTO_DL_LIMIT:
                confirm = QMessageBox.question(
                    self, 'Question', "Data appears to be big. Do you really "
                    "want to download it to local memory?",
                    QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
                if confirm == QMessageBox.No:
                    self.download = False
        if self.download:
            table.download_data(MAX_DL_LIMIT)
            table = Table(table)

        self.send("Data", table)
예제 #4
0
class OWFile(widget.OWWidget):
    name = "File"
    id = "orange.widgets.data.file"
    description = "Read a data from an input file " \
                  "and send the data table to the output."
    icon = "icons/File.svg"
    author = "Janez Demsar"
    maintainer_email = "janez.demsar(@at@)fri.uni-lj.si"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        OutputSignal("Data",
                     Table,
                     doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False

    recent_files = Setting(["(none)"])
    new_variables = Setting(False)

    dlgFormats = ("All readable files ({})\n".format(" ".join(
        "*" + c for c in FileFormats.readers)) +
                  "\n".join("{} (*{})".format(FileFormats.names[ext], ext)
                            for ext in FileFormats.readers))

    def __init__(self):
        super().__init__()
        self.domain = None
        self.recent_files = [
            fn for fn in self.recent_files if os.path.exists(fn)
        ]
        self.loaded_file = ""

        vbox = gui.widgetBox(self.controlArea, "Data File", addSpace=True)
        box = gui.widgetBox(vbox, orientation=0)
        self.file_combo = QtGui.QComboBox(box)
        self.file_combo.setMinimumWidth(300)
        box.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        button = gui.button(box, self, '...', callback=self.browse_file)
        button.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        button.setSizePolicy(QtGui.QSizePolicy.Maximum,
                             QtGui.QSizePolicy.Fixed)

        button = gui.button(box,
                            self,
                            "Reload",
                            callback=self.reload,
                            default=True)
        button.setIcon(self.style().standardIcon(
            QtGui.QStyle.SP_BrowserReload))
        button.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        gui.checkBox(
            vbox, self, "new_variables",
            "Columns with same name in different files " +
            "represent different variables")

        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoa = gui.widgetLabel(box, 'No data loaded.')
        self.infob = gui.widgetLabel(box, ' ')
        self.warnings = gui.widgetLabel(box, ' ')
        #Set word wrap, so long warnings won't expand the widget
        self.warnings.setWordWrap(True)
        self.warnings.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                    QtGui.QSizePolicy.MinimumExpanding)

        self.set_file_list()
        if len(self.recent_files) > 0:
            self.open_file(self.recent_files[0])

    def set_file_list(self):
        self.file_combo.clear()
        if not self.recent_files:
            self.file_combo.addItem("(none)")
        for file in self.recent_files:
            if file == "(none)":
                self.file_combo.addItem("(none)")
            else:
                self.file_combo.addItem(os.path.split(file)[1])
        self.file_combo.addItem("Browse documentation data sets...")

    def reload(self):
        if self.recent_files:
            return self.open_file(self.recent_files[0])

    def select_file(self, n):
        if n < len(self.recent_files):
            name = self.recent_files[n]
            del self.recent_files[n]
            self.recent_files.insert(0, name)
        elif n:
            self.browse_file(True)

        if len(self.recent_files) > 0:
            self.set_file_list()
            self.open_file(self.recent_files[0])

    def browse_file(self, in_demos=0):
        if in_demos:
            try:
                start_file = get_sample_datasets_dir()
            except AttributeError:
                start_file = ""
            if not start_file or not os.path.exists(start_file):
                widgets_dir = os.path.dirname(gui.__file__)
                orange_dir = os.path.dirname(widgets_dir)
                start_file = os.path.join(orange_dir, "doc", "datasets")
            if not start_file or not os.path.exists(start_file):
                d = os.getcwd()
                if os.path.basename(d) == "canvas":
                    d = os.path.dirname(d)
                start_file = os.path.join(os.path.dirname(d), "doc",
                                          "datasets")
            if not os.path.exists(start_file):
                QtGui.QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with example data sets")
                return
        else:
            if self.recent_files and self.recent_files[0] != "(none)":
                start_file = self.recent_files[0]
            else:
                start_file = os.path.expanduser("~/")

        filename = QtGui.QFileDialog.getOpenFileName(self,
                                                     'Open Orange Data File',
                                                     start_file,
                                                     self.dlgFormats)
        if not filename:
            return
        if filename in self.recent_files:
            self.recent_files.remove(filename)
        self.recent_files.insert(0, filename)
        self.set_file_list()
        self.open_file(self.recent_files[0])

    # Open a file, create data from it and send it over the data channel
    def open_file(self, fn):
        self.error()
        self.warning()
        self.information()

        if not os.path.exists(fn):
            dir_name, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information(
                    "Loading '{}' from the current directory.".format(
                        basename))
        if fn == "(none)":
            self.send("Data", None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        data = None
        err_value = None
        try:
            # TODO handle self.new_variables
            data = Table(fn)
            self.loaded_file = fn
        except Exception as exc:
            err_value = str(exc)
            if "is being loaded as" in str(err_value):
                try:
                    data = Table(fn)
                    self.loaded_file = fn
                    self.warning(0, err_value)
                except:
                    data = None
        if err_value is not None:
            self.error(err_value)
            self.infoa.setText('Data was not loaded due to an error.')
            self.infob.setText('Error:')
            self.warnings.setText(err_value)

        if data is None:
            self.dataReport = None
        else:
            domain = data.domain
            self.infoa.setText(
                "{} instance(s), {} feature(s), {} meta attributes".format(
                    len(data), len(domain.attributes), len(domain.metas)))
            if domain.has_continuous_class:
                self.infob.setText("Regression; numerical class.")
            elif domain.has_discrete_class:
                self.infob.setText("Classification; " +
                                   "discrete class with {} values.".format(
                                       len(domain.class_var.values)))
            elif data.domain.class_vars:
                self.infob.setText("Multi-target; {} target variables.".format(
                    len(data.domain.class_vars)))
            else:
                self.infob.setText("Data has no target variable.")
            self.warnings.setText("")

            add_origin(data, fn)
            # make new data and send it
            file_name = os.path.split(fn)[1]
            if "." in file_name:
                data.name = file_name[:file_name.rfind('.')]
            else:
                data.name = file_name

            self.dataReport = self.prepareDataReport(data)
        self.send("Data", data)

    def sendReport(self):
        dataReport = getattr(self, "dataReport", None)
        if dataReport:
            self.reportSettings("File", [
                ("File name", self.loaded_file),
                ("Format",
                 self.formats.get(
                     os.path.splitext(self.loaded_file)[1], "unknown format"))
            ])
            self.reportData(self.dataReport)
예제 #5
0
class OWDiscretize(widget.OWWidget):
    name = "Discretize"
    description = "Discretize the continuous data features."
    icon = "icons/Discretize.svg"
    inputs = [InputSignal("Data", Orange.data.Table, "set_data",
                          doc="Input data table")]
    outputs = [OutputSignal("Data", Orange.data.Table,
                            doc="Table with discretized features")]

    settingsHandler = settings.DomainContextHandler()
    saved_var_states = settings.ContextSetting({})

    default_method = settings.Setting(2)
    default_k = settings.Setting(3)
    autosend = settings.Setting(True)

    #: Discretization methods
    Default, Leave, MDL, EqualFreq, EqualWidth, Remove, Custom = range(7)

    want_main_area = False

    def  __init__(self):
        super().__init__()

        #: input data
        self.data = None
        #: Current variable discretization state
        self.var_state = {}
        #: Saved variable discretization settings (context setting)
        self.saved_var_states = {}

        self.method = 0
        self.k = 5

        box = gui.widgetBox(
            self.controlArea, self.tr("Default Discretization"))
        self.default_bbox = rbox = gui.radioButtons(
            box, self, "default_method", callback=self._default_disc_changed)

        options = [
            self.tr("Default"),
            self.tr("Leave continuous"),
            self.tr("Entropy-MDL discretization"),
            self.tr("Equal-frequency discretization"),
            self.tr("Equal-width discretization"),
            self.tr("Remove continuous attributes")
        ]

        for opt in options[1:5]:
            gui.appendRadioButton(rbox, opt)

        s = gui.hSlider(gui.indentedBox(rbox),
                        self, "default_k", minValue=2, maxValue=10,
                        label="Num. of intervals:",
                        callback=self._default_disc_changed)
        s.setTracking(False)

        gui.appendRadioButton(rbox, options[-1])

        vlayout = QHBoxLayout()
        box = gui.widgetBox(
            self.controlArea, "Individual Attribute Settings",
            orientation=vlayout, spacing=8
        )

        # List view with all attributes
        self.varview = QListView(selectionMode=QListView.ExtendedSelection)
        self.varview.setItemDelegate(DiscDelegate())
        self.varmodel = itemmodels.VariableListModel()
        self.varview.setModel(self.varmodel)
        self.varview.selectionModel().selectionChanged.connect(
            self._var_selection_changed
        )

        vlayout.addWidget(self.varview)
        # Controls for individual attr settings
        self.bbox = controlbox = gui.radioButtons(
            box, self, "method", callback=self._disc_method_changed
        )
        vlayout.addWidget(controlbox)

        for opt in options[:5]:
            gui.appendRadioButton(controlbox, opt)

        s = gui.hSlider(gui.indentedBox(controlbox),
                        self, "k", minValue=2, maxValue=10,
                        label="Num. of intervals:",
                        callback=self._disc_method_changed)
        s.setTracking(False)

        gui.appendRadioButton(controlbox, "Remove attribute")

        gui.rubber(controlbox)
        controlbox.setEnabled(False)

        self.controlbox = controlbox

        gui.auto_commit(self.controlArea, self, "autosend", "Apply",
                        orientation="horizontal",
                        checkbox_label="Send data after every change")

    def set_data(self, data):
        self.closeContext()
        self.data = data
        if self.data is not None:
            self._initialize(data)
            self.openContext(data)
            # Restore the per variable discretization settings
            self._restore(self.saved_var_states)
            # Complete the induction of cut points
            self._update_points()
        else:
            self._clear()
        self.unconditional_commit()

    def _initialize(self, data):
        # Initialize the default variable states for new data.
        self.class_var = data.domain.class_var
        cvars = [var for var in data.domain if var.is_continuous]
        self.varmodel[:] = cvars

        class_var = data.domain.class_var
        has_disc_class = data.domain.has_discrete_class

        self.default_bbox.buttons[self.MDL - 1].setEnabled(has_disc_class)
        self.bbox.buttons[self.MDL].setEnabled(has_disc_class)

        # If the newly disabled MDL button is checked then change it
        if not has_disc_class and self.default_method == self.MDL - 1:
            self.default_method = 0
        if not has_disc_class and self.method == self.MDL:
            self.method = 0

        # Reset (initialize) the variable discretization states.
        self._reset()

    def _restore(self, saved_state):
        # Restore variable states from a saved_state dictionary.
        def_method = self._current_default_method()
        for i, var in enumerate(self.varmodel):
            key = variable_key(var)
            if key in saved_state:
                state = saved_state[key]
                if isinstance(state.method, Default):
                    state = DState(Default(def_method), None, None)
                self._set_var_state(i, state)

    def _reset(self):
        # restore the individual variable settings back to defaults.
        def_method = self._current_default_method()
        self.var_state = {}
        for i in range(len(self.varmodel)):
            state = DState(Default(def_method), None, None)
            self._set_var_state(i, state)

    def _set_var_state(self, index, state):
        # set the state of variable at `index` to `state`.
        self.var_state[index] = state
        self.varmodel.setData(self.varmodel.index(index), state, Qt.UserRole)

    def _clear(self):
        self.data = None
        self.varmodel[:] = []
        self.var_state = {}
        self.saved_var_states = {}
        self.default_bbox.buttons[self.MDL - 1].setEnabled(True)
        self.bbox.buttons[self.MDL].setEnabled(True)

    def _update_points(self):
        """
        Update the induced cut points.
        """
        def induce_cuts(method, data, var):
            dvar = _dispatch[type(method)](method, data, var)
            if dvar is None:
                # removed
                return [], None
            elif dvar is var:
                # no transformation took place
                return None, var
            elif is_discretized(dvar):
                return dvar.compute_value.points, dvar
            else:
                assert False
        for i, var in enumerate(self.varmodel):
            state = self.var_state[i]
            if state.points is None and state.disc_var is None:
                points, dvar = induce_cuts(state.method, self.data, var)
                new_state = state._replace(points=points, disc_var=dvar)
                self._set_var_state(i, new_state)
        self.commit()

    def _method_index(self, method):
        return METHODS.index((type(method), ))

    def _current_default_method(self):
        method = self.default_method + 1
        k = self.default_k
        if method == OWDiscretize.Leave:
            def_method = Leave()
        elif method == OWDiscretize.MDL:
            def_method = MDL()
        elif method == OWDiscretize.EqualFreq:
            def_method = EqualFreq(k)
        elif method == OWDiscretize.EqualWidth:
            def_method = EqualWidth(k)
        elif method == OWDiscretize.Remove:
            def_method = Remove()
        else:
            assert False
        return def_method

    def _current_method(self):
        if self.method == OWDiscretize.Default:
            method = Default(self._current_default_method())
        elif self.method == OWDiscretize.Leave:
            method = Leave()
        elif self.method == OWDiscretize.MDL:
            method = MDL()
        elif self.method == OWDiscretize.EqualFreq:
            method = EqualFreq(self.k)
        elif self.method == OWDiscretize.EqualWidth:
            method = EqualWidth(self.k)
        elif self.method == OWDiscretize.Remove:
            method = Remove()
        elif self.method == OWDiscretize.Custom:
            method = Custom(self.cutpoints)
        else:
            assert False
        return method

    def _default_disc_changed(self):
        method = self._current_default_method()
        state = DState(Default(method), None, None)
        for i, _ in enumerate(self.varmodel):
            if isinstance(self.var_state[i].method, Default):
                self._set_var_state(i, state)
        self._update_points()

    def _disc_method_changed(self):
        indices = self.selected_indices()
        method = self._current_method()
        state = DState(method, None, None)
        for idx in indices:
            self._set_var_state(idx, state)
        self._update_points()

    def _var_selection_changed(self, *args):
        indices = self.selected_indices()
        # set of all methods for the current selection
        methods = [self.var_state[i].method for i in indices]
        mset = set(methods)
        self.controlbox.setEnabled(len(mset) > 0)
        if len(mset) == 1:
            method = mset.pop()
            self.method = self._method_index(method)
            if isinstance(method, (EqualFreq, EqualWidth)):
                self.k = method.k
            elif isinstance(method, Custom):
                self.cutpoints = method.points
        else:
            # deselect the current button
            self.method = -1
            bg = self.controlbox.group
            button_group_reset(bg)

    def selected_indices(self):
        rows = self.varview.selectionModel().selectedRows()
        return [index.row() for index in rows]

    def discretized_var(self, source):
        index = list(self.varmodel).index(source)
        state = self.var_state[index]
        if state.disc_var is None:
            return None
        elif state.disc_var is source:
            return source
        elif state.points == []:
            return None
        else:
            return state.disc_var

    def discretized_domain(self):
        """
        Return the current effective discretized domain.
        """
        if self.data is None:
            return None

        def disc_var(source):
            if source and source.is_continuous:
                return self.discretized_var(source)
            else:
                return source

        attributes = [disc_var(v) for v in self.data.domain.attributes]
        attributes = [v for v in attributes if v is not None]

        class_var = disc_var(self.data.domain.class_var)

        domain = Orange.data.Domain(
            attributes, class_var,
            metas=self.data.domain.metas
        )
        return domain

    def commit(self):
        output = None
        if self.data is not None:
            domain = self.discretized_domain()
            output = self.data.from_table(domain, self.data)
        self.send("Data", output)

    def storeSpecificSettings(self):
        super().storeSpecificSettings()
        self.saved_var_states = {
            variable_key(var):
                self.var_state[i]._replace(points=None, disc_var=None)
            for i, var in enumerate(self.varmodel)
        }
예제 #6
0
class OWSql(OWWidget):
    name = "SQL Table"
    id = "orange.widgets.data.sql"
    description = "Load data set from SQL."
    icon = "icons/SQLTable.svg"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        OutputSignal("Data",
                     Table,
                     doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False
    resizing_enabled = False

    host = Setting(None)
    port = Setting(None)
    database = Setting(None)
    schema = Setting(None)
    username = Setting(None)
    password = Setting(None)
    table = Setting(None)
    sql = Setting("")
    guess_values = Setting(True)
    download = Setting(False)

    materialize = Setting(False)
    materialize_table_name = Setting("")

    class Information(OWWidget.Information):
        data_sampled = Msg("Data description was generated from a sample.")

    class Error(OWWidget.Error):
        connection = Msg("{}")
        no_backends = Msg("Please install a backend to use this widget")
        missing_extension = Msg("Database is missing extension{}: {}")

    def __init__(self):
        super().__init__()

        self.backend = None
        self.data_desc_table = None
        self.database_desc = None

        vbox = gui.vBox(self.controlArea, "Server", addSpace=True)
        box = gui.vBox(vbox)

        self.backendmodel = BackendModel(Backend.available_backends())
        self.backendcombo = QComboBox(box)
        if len(self.backendmodel):
            self.backendcombo.setModel(self.backendmodel)
        else:
            self.Error.no_backends()
            box.setEnabled(False)
        box.layout().addWidget(self.backendcombo)

        self.servertext = QLineEdit(box)
        self.servertext.setPlaceholderText('Server')
        self.servertext.setToolTip('Server')
        if self.host:
            self.servertext.setText(self.host if not self.port else '{}:{}'.
                                    format(self.host, self.port))
        box.layout().addWidget(self.servertext)
        self.databasetext = QLineEdit(box)
        self.databasetext.setPlaceholderText('Database[/Schema]')
        self.databasetext.setToolTip('Database or optionally Database/Schema')
        if self.database:
            self.databasetext.setText(
                self.database if not self.schema else '{}/{}'.
                format(self.database, self.schema))
        box.layout().addWidget(self.databasetext)
        self.usernametext = QLineEdit(box)
        self.usernametext.setPlaceholderText('Username')
        self.usernametext.setToolTip('Username')
        if self.username:
            self.usernametext.setText(self.username)
        box.layout().addWidget(self.usernametext)
        self.passwordtext = QLineEdit(box)
        self.passwordtext.setPlaceholderText('Password')
        self.passwordtext.setToolTip('Password')
        self.passwordtext.setEchoMode(QLineEdit.Password)
        if self.password:
            self.passwordtext.setText(self.password)
        box.layout().addWidget(self.passwordtext)

        tables = gui.hBox(box)
        self.tablemodel = TableModel()
        self.tablecombo = QComboBox(
            minimumContentsLength=35,
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLength)
        self.tablecombo.setModel(self.tablemodel)
        self.tablecombo.setToolTip('table')
        tables.layout().addWidget(self.tablecombo)
        self.tablecombo.activated[int].connect(self.select_table)
        self.connectbutton = gui.button(tables,
                                        self,
                                        '↻',
                                        callback=self.connect)
        self.connectbutton.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
        tables.layout().addWidget(self.connectbutton)

        self.custom_sql = gui.vBox(box)
        self.custom_sql.setVisible(False)
        self.sqltext = QTextEdit(self.custom_sql)
        self.sqltext.setPlainText(self.sql)
        self.custom_sql.layout().addWidget(self.sqltext)

        mt = gui.hBox(self.custom_sql)
        cb = gui.checkBox(mt, self, 'materialize', 'Materialize to table ')
        cb.setToolTip('Save results of the query in a table')
        le = gui.lineEdit(mt, self, 'materialize_table_name')
        le.setToolTip('Save results of the query in a table')

        self.executebtn = gui.button(self.custom_sql,
                                     self,
                                     'Execute',
                                     callback=self.open_table)

        box.layout().addWidget(self.custom_sql)

        gui.checkBox(box,
                     self,
                     "guess_values",
                     "Auto-discover discrete variables",
                     callback=self.open_table)

        gui.checkBox(box,
                     self,
                     "download",
                     "Download data to local memory",
                     callback=self.open_table)

        gui.rubber(self.buttonsArea)
        QTimer.singleShot(0, self.connect)

    def error(self, id=0, text=""):
        super().error(id, text)
        err_style = 'QLineEdit {border: 2px solid red;}'
        if 'server' in text or 'host' in text:
            self.servertext.setStyleSheet(err_style)
        else:
            self.servertext.setStyleSheet('')
        if 'role' in text:
            self.usernametext.setStyleSheet(err_style)
        else:
            self.usernametext.setStyleSheet('')
        if 'database' in text:
            self.databasetext.setStyleSheet(err_style)
        else:
            self.databasetext.setStyleSheet('')

    def connect(self):
        hostport = self.servertext.text().split(':')
        self.host = hostport[0]
        self.port = hostport[1] if len(hostport) == 2 else None
        self.database, _, self.schema = self.databasetext.text().partition('/')
        self.username = self.usernametext.text() or None
        self.password = self.passwordtext.text() or None
        try:
            if self.backendcombo.currentIndex() < 0:
                return
            backend = self.backendmodel[self.backendcombo.currentIndex()]
            self.backend = backend(
                dict(host=self.host,
                     port=self.port,
                     database=self.database,
                     user=self.username,
                     password=self.password))
            self.Error.connection.clear()
            self.database_desc = OrderedDict(
                (("Host", self.host), ("Port", self.port),
                 ("Database", self.database), ("User name", self.username)))
            self.refresh_tables()
            self.select_table()
        except BackendError as err:
            error = str(err).split('\n')[0]
            self.Error.connection(error)
            self.database_desc = self.data_desc_table = None
            self.tablecombo.clear()

    def refresh_tables(self):
        self.tablemodel.clear()
        self.Error.missing_extension.clear()
        if self.backend is None:
            self.data_desc_table = None
            return

        self.tablemodel.append("Select a table")
        self.tablemodel.extend(self.backend.list_tables(self.schema))
        self.tablemodel.append("Custom SQL")

    def select_table(self):
        curIdx = self.tablecombo.currentIndex()
        if self.tablecombo.itemText(curIdx) != "Custom SQL":
            self.custom_sql.setVisible(False)
            return self.open_table()
        else:
            self.custom_sql.setVisible(True)
            self.data_desc_table = None
            self.database_desc["Table"] = "(None)"
            self.table = None

        #self.Error.missing_extension(
        #    's' if len(missing) > 1 else '',
        #    ', '.join(missing),
        #    shown=missing)

    def open_table(self):
        table = self.get_table()
        self.data_desc_table = table
        self.send("Data", table)

    def get_table(self):
        if self.tablecombo.currentIndex() <= 0:
            if self.database_desc:
                self.database_desc["Table"] = "(None)"
            self.data_desc_table = None
            return

        if self.tablecombo.currentIndex() < self.tablecombo.count() - 1:
            self.table = self.tablemodel[self.tablecombo.currentIndex()]
            self.database_desc["Table"] = self.table
            if "Query" in self.database_desc:
                del self.database_desc["Query"]
        else:
            self.sql = self.table = self.sqltext.toPlainText()
            if self.materialize:
                import psycopg2
                if not self.materialize_table_name:
                    self.Error.connection(
                        "Specify a table name to materialize the query")
                    return
                try:
                    with self.backend.execute_sql_query(
                            "DROP TABLE IF EXISTS " +
                            self.materialize_table_name):
                        pass
                    with self.backend.execute_sql_query(
                            "CREATE TABLE " + self.materialize_table_name +
                            " AS " + self.table):
                        pass
                    with self.backend.execute_sql_query(
                            "ANALYZE " + self.materialize_table_name):
                        pass
                    self.table = self.materialize_table_name
                except psycopg2.ProgrammingError as ex:
                    self.Error.connection(str(ex))
                    return

        try:
            table = SqlTable(dict(host=self.host,
                                  port=self.port,
                                  database=self.database,
                                  user=self.username,
                                  password=self.password),
                             self.table,
                             backend=type(self.backend),
                             inspect_values=False)
        except BackendError as ex:
            self.Error.connection(str(ex))
            return

        self.Error.connection.clear()

        sample = False
        if table.approx_len() > LARGE_TABLE and self.guess_values:
            confirm = QMessageBox(self)
            confirm.setIcon(QMessageBox.Warning)
            confirm.setText("Attribute discovery might take "
                            "a long time on large tables.\n"
                            "Do you want to auto discover attributes?")
            confirm.addButton("Yes", QMessageBox.YesRole)
            no_button = confirm.addButton("No", QMessageBox.NoRole)
            sample_button = confirm.addButton("Yes, on a sample",
                                              QMessageBox.YesRole)
            confirm.exec()
            if confirm.clickedButton() == no_button:
                self.guess_values = False
            elif confirm.clickedButton() == sample_button:
                sample = True

        self.Information.clear()
        if self.guess_values:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            if sample:
                s = table.sample_time(1)
                domain = s.get_domain(inspect_values=True)
                self.Information.data_sampled()
            else:
                domain = table.get_domain(inspect_values=True)
            QApplication.restoreOverrideCursor()
            table.domain = domain

        if self.download:
            if table.approx_len() > MAX_DL_LIMIT:
                QMessageBox.warning(
                    self, 'Warning', "Data is too big to download.\n"
                    "Consider using the Data Sampler widget to download "
                    "a sample instead.")
                self.download = False
            elif table.approx_len() > AUTO_DL_LIMIT:
                confirm = QMessageBox.question(
                    self, 'Question', "Data appears to be big. Do you really "
                    "want to download it to local memory?",
                    QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
                if confirm == QMessageBox.No:
                    self.download = False
        if self.download:
            table.download_data(MAX_DL_LIMIT)
            table = Table(table)

        return table

    def send_report(self):
        if not self.database_desc:
            self.report_paragraph("No database connection.")
            return
        self.report_items("Database", self.database_desc)
        if self.data_desc_table:
            self.report_items("Data",
                              report.describe_data(self.data_desc_table))
예제 #7
0
class OWFile(widget.OWWidget):
    name = "File"
    id = "orange.widgets.data.file"
    description = "Read a data from an input file " \
                  "and send the data table to the output."
    icon = "icons/File.svg"
    author = "Janez Demsar"
    maintainer_email = "janez.demsar(@at@)fri.uni-lj.si"
    priority = 10
    category = "Data"
    keywords = ["data", "file", "load", "read"]
    outputs = [
        OutputSignal("Data",
                     Table,
                     doc="Attribute-valued data set read from the input file.")
    ]

    want_main_area = False

    #: back-compatibility: List[str] saved files list
    recent_files = Setting([])
    #: List[RecentPath]
    recent_paths = Setting([])

    new_variables = Setting(False)

    dlgFormats = ("All readable files ({});;".format(
        '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join(
            "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
            for f in sorted(set(FileFormat.readers.values()),
                            key=list(FileFormat.readers.values()).index)))

    def __init__(self):
        super().__init__()
        self.domain = None

        self.loaded_file = ""
        self._relocate_recent_files()

        vbox = gui.widgetBox(self.controlArea,
                             "Data File / URL",
                             addSpace=True)
        box = gui.widgetBox(vbox, orientation=0)
        self.file_combo = QtGui.QComboBox(box)
        self.file_combo.setMinimumWidth(300)
        self.file_combo.setEditable(True)
        self.file_combo.setItemDelegate(RecentPathDelegate())
        self.file_combo.lineEdit().setStyleSheet("padding-left: 1px;")
        box.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        button = gui.button(box,
                            self,
                            '...',
                            callback=self.browse_file,
                            autoDefault=False)
        button.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        button.setSizePolicy(QtGui.QSizePolicy.Maximum,
                             QtGui.QSizePolicy.Fixed)

        button = gui.button(box,
                            self,
                            "Reload",
                            callback=self.reload,
                            autoDefault=False)
        button.setIcon(self.style().standardIcon(
            QtGui.QStyle.SP_BrowserReload))
        button.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        gui.checkBox(
            vbox, self, "new_variables",
            "Columns with same name in different files " +
            "represent different variables")

        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoa = gui.widgetLabel(box, 'No data loaded.')
        self.infob = gui.widgetLabel(box, ' ')
        self.warnings = gui.widgetLabel(box, ' ')
        #Set word wrap, so long warnings won't expand the widget
        self.warnings.setWordWrap(True)
        self.warnings.setSizePolicy(QtGui.QSizePolicy.Ignored,
                                    QtGui.QSizePolicy.MinimumExpanding)

        self.set_file_list()
        if len(self.recent_paths) > 0:
            self.open_file(self.recent_paths[0].abspath)

    def _relocate_recent_files(self):
        if self.recent_files and not self.recent_paths:
            # backward compatibility settings restore
            existing = [
                path for path in self.recent_files if os.path.exists(path)
            ]
            existing = [RecentPath(path, None, None) for path in existing]
            self.recent_paths.extend(existing)
            self.recent_files = []

        paths = [("sample-datasets", get_sample_datasets_dir())]
        basedir = self.workflowEnv().get("basedir", None)
        if basedir is not None:
            paths.append(("basedir", basedir))

        rec = []
        for recent in self.recent_paths:
            resolved = recent.resolve(paths)
            if resolved is not None:
                rec.append(RecentPath.create(resolved.abspath, paths))
            elif recent.search(paths) is not None:
                rec.append(RecentPath.create(recent.search(paths), paths))
            elif recent.prefix == "url-datasets":
                valid, _ = self.is_url_valid(recent.abspath)
                if valid:
                    rec.append(recent)

        self.recent_paths = rec

    def set_file_list(self):
        self.file_combo.clear()

        if not self.recent_paths:
            self.file_combo.addItem("(none)")
            self.file_combo.model().item(0).setEnabled(False)
        else:
            for i, recent in enumerate(self.recent_paths):
                self.file_combo.addItem(recent.icon, recent.value)
                self.file_combo.model().item(i).setToolTip(recent.abspath)
        self.file_combo.addItem("Browse documentation data sets...")

    def reload(self):
        if self.recent_paths:
            basename = self.file_combo.currentText()
            if (basename == self.recent_paths[0].relpath or basename
                    == os.path.basename(self.recent_paths[0].abspath)):
                return self.open_file(self.recent_paths[0].abspath)
        self.select_file(len(self.recent_paths) + 1)

    def select_file(self, n):
        if n < len(self.recent_paths):
            recent = self.recent_paths[n]
            del self.recent_paths[n]
            self.recent_paths.insert(0, recent)
        elif n:
            path = self.file_combo.currentText()
            if path == "Browse documentation data sets...":
                self.browse_file(True)
            elif os.path.exists(path):
                self._add_path(path)
            else:
                valid, err = self.is_url_valid(path)
                if valid:
                    _, filename = os.path.split(path)
                    recent = RecentPath(path, "url-datasets", filename)
                    if recent in self.recent_paths:
                        self.recent_paths.remove(recent)
                    self.recent_paths.insert(0, recent)
                else:
                    self.error(0, err)
                    self.file_combo.removeItem(n)
                    self.file_combo.lineEdit().setText(path)
                    return

        if len(self.recent_paths) > 0:
            self.set_file_list()
            self.open_file(self.recent_paths[0].abspath)

    def browse_file(self, in_demos=0):
        if in_demos:
            try:
                start_file = get_sample_datasets_dir()
            except AttributeError:
                start_file = ""
            if not start_file or not os.path.exists(start_file):
                widgets_dir = os.path.dirname(gui.__file__)
                orange_dir = os.path.dirname(widgets_dir)
                start_file = os.path.join(orange_dir, "doc", "datasets")
            if not start_file or not os.path.exists(start_file):
                d = os.getcwd()
                if os.path.basename(d) == "canvas":
                    d = os.path.dirname(d)
                start_file = os.path.join(os.path.dirname(d), "doc",
                                          "datasets")
            if not os.path.exists(start_file):
                QtGui.QMessageBox.information(
                    None, "File",
                    "Cannot find the directory with example data sets")
                return
        else:
            if self.recent_paths:
                start_file = self.recent_paths[0].abspath
            else:
                start_file = os.path.expanduser("~/")

        filename = QtGui.QFileDialog.getOpenFileName(self,
                                                     'Open Orange Data File',
                                                     start_file,
                                                     self.dlgFormats)
        if not filename:
            return

        self._add_path(filename)
        self.set_file_list()
        self.open_file(self.recent_paths[0].abspath)

    def _add_path(self, filename):
        searchpaths = [("sample-datasets", get_sample_datasets_dir())]
        basedir = self.workflowEnv().get("basedir", None)
        if basedir is not None:
            searchpaths.append(("basedir", basedir))

        recent = RecentPath.create(filename, searchpaths)

        if recent in self.recent_paths:
            self.recent_paths.remove(recent)

        self.recent_paths.insert(0, recent)

    @staticmethod
    def is_url_valid(url):
        try:
            with urllib.request.urlopen(url) as f:
                pass
            return bool(f), ""
        except urllib.error.HTTPError:
            return False, "File '{}' is unavailable".format(
                os.path.basename(url))
        except urllib.error.URLError:
            return False, "URL '{}' is unavailable".format(url)
        except ValueError:
            return False, "Unknown file/URL '{}' ".format(url)
        except (OSError, Exception) as e:
            return False, str(e)

    # Open a file, create data from it and send it over the data channel
    def open_file(self, fn):
        self.error()
        self.warning()
        self.information()
        fn_original = fn
        if not os.path.exists(fn):
            dir_name, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information(
                    "Loading '{}' from the current directory.".format(
                        basename))
        if fn == "(none)":
            self.send("Data", None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        data = None
        err_value = None
        try:
            # TODO handle self.new_variables
            data = Table(fn)
            self.loaded_file = fn
        except Exception as exc:
            err_value = str(exc)
            if "is being loaded as" in str(err_value):
                try:
                    data = Table(fn)
                    self.loaded_file = fn
                    self.warning(0, err_value)
                except:
                    data = None
        if err_value is not None:
            if fn.startswith("http"):
                err_value = "File '{}' does not contain valid data".format(
                    os.path.basename(fn))
            ind = self.file_combo.currentIndex()
            text = self.file_combo.currentText()
            self.file_combo.removeItem(ind)
            self.file_combo.lineEdit().setText(text)
            if ind < len(self.recent_paths) and \
                            self.recent_paths[ind].abspath == fn_original:
                del self.recent_paths[ind]
            self.error(err_value)
            self.infoa.setText('Data was not loaded due to an error.')
            self.infob.setText('Error:')
            self.warnings.setText(err_value)

        if data is None:
            self.dataReport = None
        else:
            domain = data.domain
            self.infoa.setText(
                "{} instance(s), {} feature(s), {} meta attribute(s)".format(
                    len(data), len(domain.attributes), len(domain.metas)))
            if domain.has_continuous_class:
                self.infob.setText("Regression; numerical class.")
            elif domain.has_discrete_class:
                self.infob.setText("Classification; " +
                                   "discrete class with {} values.".format(
                                       len(domain.class_var.values)))
            elif data.domain.class_vars:
                self.infob.setText("Multi-target; {} target variables.".format(
                    len(data.domain.class_vars)))
            else:
                self.infob.setText("Data has no target variable.")
            self.warnings.setText("")

            add_origin(data, fn)
            # make new data and send it
            file_name = os.path.split(fn)[1]
            if "." in file_name:
                data.name = file_name[:file_name.rfind('.')]
            else:
                data.name = file_name

            self.dataReport = self.prepareDataReport(data)
        self.send("Data", data)

    def sendReport(self):
        dataReport = getattr(self, "dataReport", None)
        if dataReport:
            self.reportSettings("File", [
                ("File name", self.loaded_file),
                ("Format",
                 self.formats.get(
                     os.path.splitext(self.loaded_file)[1], "unknown format"))
            ])
            self.reportData(self.dataReport)

    def workflowEnvChanged(self, key, value, oldvalue):
        if key == "basedir":
            self._relocate_recent_files()
            self.set_file_list()