class OWSql(OWWidget): name = "SQL Table" id = "orange.widgets.data.sql" description = """ Load dataset from SQL.""" long_description = """ Sql widget connects to server and opens data from there. """ icon = "icons/SQLTable.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ OutputSignal("Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False resizing_enabled = False host = Setting(None) port = Setting(None) database = Setting(None) schema = Setting(None) username = Setting(None) password = Setting(None) table = Setting(None) sql = Setting("") guess_values = Setting(True) download = Setting(False) materialize = Setting(False) materialize_table_name = Setting("") class Information(OWWidget.Information): data_sampled = Msg("Data description was generated from a sample.") class Error(OWWidget.Error): connection = Msg("{}") missing_extension = Msg("Database is missing extension{}: {}") def __init__(self): super().__init__() self._connection = None self.data_desc_table = None self.database_desc = None vbox = gui.vBox(self.controlArea, "Server", addSpace=True) box = gui.vBox(vbox) self.servertext = QtGui.QLineEdit(box) self.servertext.setPlaceholderText('Server') self.servertext.setToolTip('Server') if self.host: self.servertext.setText(self.host if not self.port else '{}:{}'. format(self.host, self.port)) box.layout().addWidget(self.servertext) self.databasetext = QtGui.QLineEdit(box) self.databasetext.setPlaceholderText('Database[/Schema]') self.databasetext.setToolTip('Database or optionally Database/Schema') if self.database: self.databasetext.setText( self.database if not self.schema else '{}/{}'. format(self.database, self.schema)) box.layout().addWidget(self.databasetext) self.usernametext = QtGui.QLineEdit(box) self.usernametext.setPlaceholderText('Username') self.usernametext.setToolTip('Username') if self.username: self.usernametext.setText(self.username) box.layout().addWidget(self.usernametext) self.passwordtext = QtGui.QLineEdit(box) self.passwordtext.setPlaceholderText('Password') self.passwordtext.setToolTip('Password') self.passwordtext.setEchoMode(QtGui.QLineEdit.Password) if self.password: self.passwordtext.setText(self.password) box.layout().addWidget(self.passwordtext) tables = gui.hBox(box) self.tablecombo = QtGui.QComboBox( tables, minimumContentsLength=35, sizeAdjustPolicy=QtGui.QComboBox.AdjustToMinimumContentsLength) self.tablecombo.setToolTip('table') tables.layout().addWidget(self.tablecombo) self.tablecombo.activated[int].connect(self.select_table) self.connectbutton = gui.button(tables, self, '↻', callback=self.connect) self.connectbutton.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) tables.layout().addWidget(self.connectbutton) self.custom_sql = gui.vBox(box) self.custom_sql.setVisible(False) self.sqltext = QtGui.QTextEdit(self.custom_sql) self.sqltext.setPlainText(self.sql) self.custom_sql.layout().addWidget(self.sqltext) mt = gui.hBox(self.custom_sql) cb = gui.checkBox(mt, self, 'materialize', 'Materialize to table ') cb.setToolTip('Save results of the query in a table') le = gui.lineEdit(mt, self, 'materialize_table_name') le.setToolTip('Save results of the query in a table') self.executebtn = gui.button(self.custom_sql, self, 'Execute', callback=self.open_table) box.layout().addWidget(self.custom_sql) gui.checkBox(box, self, "guess_values", "Auto-discover discrete variables", callback=self.open_table) gui.checkBox(box, self, "download", "Download data to local memory", callback=self.open_table) gui.rubber(self.buttonsArea) QTimer.singleShot(0, self.connect) def error(self, id=0, text=""): super().error(id, text) err_style = 'QLineEdit {border: 2px solid red;}' if 'server' in text or 'host' in text: self.servertext.setStyleSheet(err_style) else: self.servertext.setStyleSheet('') if 'role' in text: self.usernametext.setStyleSheet(err_style) else: self.usernametext.setStyleSheet('') if 'database' in text: self.databasetext.setStyleSheet(err_style) else: self.databasetext.setStyleSheet('') def connect(self): hostport = self.servertext.text().split(':') self.host = hostport[0] self.port = hostport[1] if len(hostport) == 2 else None self.database, _, self.schema = self.databasetext.text().partition('/') self.username = self.usernametext.text() or None self.password = self.passwordtext.text() or None try: self._connection = psycopg2.connect(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password) self.Error.connection.clear() self.database_desc = OrderedDict( (("Host", self.host), ("Port", self.port), ("Database", self.database), ("User name", self.username))) self.refresh_tables() self.select_table() except psycopg2.Error as err: self.Error.connection(str(err).split('\n')[0]) self.database_desc = self.data_desc_table = None self.tablecombo.clear() def refresh_tables(self): self.tablecombo.clear() self.Error.missing_extension.clear() if self._connection is None: self.data_desc_table = None return cur = self._connection.cursor() if self.schema: schema_clause = "AND n.nspname = '{}'".format(self.schema) else: schema_clause = "AND pg_catalog.pg_table_is_visible(c.oid)" cur.execute("""SELECT --n.nspname as "Schema", c.relname AS "Name" FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE c.relkind IN ('r','v','m','S','f','') AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema' AND n.nspname !~ '^pg_toast' {} AND NOT c.relname LIKE '\\_\\_%' ORDER BY 1;""".format(schema_clause)) self.tablecombo.addItem("Select a table") for i, (table_name, ) in enumerate(cur.fetchall()): self.tablecombo.addItem(table_name) if table_name == self.table: self.tablecombo.setCurrentIndex(i + 1) self.tablecombo.addItem("Custom SQL") def select_table(self): curIdx = self.tablecombo.currentIndex() if self.tablecombo.itemText(curIdx) != "Custom SQL": self.custom_sql.setVisible(False) return self.open_table() else: self.custom_sql.setVisible(True) self.data_desc_table = None self.database_desc["Table"] = "(None)" self.table = None def create_extensions(self): missing = [] for ext in EXTENSIONS: try: cur = self._connection.cursor() cur.execute("CREATE EXTENSION IF NOT EXISTS " + ext) except psycopg2.OperationalError: missing.append(ext) finally: self._connection.commit() self.Error.missing_extension('s' if len(missing) > 1 else '', ', '.join(missing), shown=missing) def open_table(self): self.create_extensions() table = self.get_table() self.data_desc_table = table self.send("Data", table) def get_table(self): if self.tablecombo.currentIndex() <= 0: if self.database_desc: self.database_desc["Table"] = "(None)" self.data_desc_table = None return if self.tablecombo.currentIndex() < self.tablecombo.count() - 1: self.table = self.tablecombo.currentText() self.database_desc["Table"] = self.table if "Query" in self.database_desc: del self.database_desc["Query"] else: self.sql = self.table = self.sqltext.toPlainText() if self.materialize: if not self.materialize_table_name: self.Error.connection( "Specify a table name to materialize the query") return try: cur = self._connection.cursor() cur.execute("DROP TABLE IF EXISTS " + self.materialize_table_name) cur.execute("CREATE TABLE " + self.materialize_table_name + " AS " + self.table) cur.execute("ANALYZE " + self.materialize_table_name) self.table = self.materialize_table_name except psycopg2.ProgrammingError as ex: self.Error.connection(str(ex)) return finally: self._connection.commit() try: table = SqlTable(dict(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password), self.table, inspect_values=False) except psycopg2.ProgrammingError as ex: self.Error.connection(str(ex)) return self.Error.connection.clear() sample = False if table.approx_len() > LARGE_TABLE and self.guess_values: confirm = QMessageBox(self) confirm.setIcon(QMessageBox.Warning) confirm.setText("Attribute discovery might take " "a long time on large tables.\n" "Do you want to auto discover attributes?") confirm.addButton("Yes", QMessageBox.YesRole) no_button = confirm.addButton("No", QMessageBox.NoRole) sample_button = confirm.addButton("Yes, on a sample", QMessageBox.YesRole) confirm.exec() if confirm.clickedButton() == no_button: self.guess_values = False elif confirm.clickedButton() == sample_button: sample = True self.Information.clear() if self.guess_values: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) if sample: s = table.sample_time(1) domain = s.get_domain(guess_values=True) self.Information.data_sampled() else: domain = table.get_domain(guess_values=True) QApplication.restoreOverrideCursor() table.domain = domain if self.download: if table.approx_len() > MAX_DL_LIMIT: QMessageBox.warning( self, 'Warning', "Data is too big to download.\n" "Consider using the Data Sampler widget to download " "a sample instead.") self.download = False elif table.approx_len() > AUTO_DL_LIMIT: confirm = QMessageBox.question( self, 'Question', "Data appears to be big. Do you really " "want to download it to local memory?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.No: self.download = False if self.download: table.download_data(MAX_DL_LIMIT) table = Table(table) return table def send_report(self): if not self.database_desc: self.report_paragraph("No database connection.") return self.report_items("Database", self.database_desc) if self.data_desc_table: self.report_items("Data", report.describe_data(self.data_desc_table))
class OWDiscretize(widget.OWWidget): name = "Discretize" description = "Discretize the numeric data features." icon = "icons/Discretize.svg" inputs = [ InputSignal("Data", Orange.data.Table, "set_data", doc="Input data table") ] outputs = [ OutputSignal("Data", Orange.data.Table, doc="Table with discretized features") ] settingsHandler = settings.DomainContextHandler() saved_var_states = settings.ContextSetting({}) default_method = settings.Setting(2) default_k = settings.Setting(3) autosend = settings.Setting(True) #: Discretization methods Default, Leave, MDL, EqualFreq, EqualWidth, Remove, Custom = range(7) want_main_area = False resizing_enabled = False def __init__(self): super().__init__() #: input data self.data = None #: Current variable discretization state self.var_state = {} #: Saved variable discretization settings (context setting) self.saved_var_states = {} self.method = 0 self.k = 5 box = gui.vBox(self.controlArea, self.tr("Default Discretization")) self.default_bbox = rbox = gui.radioButtons( box, self, "default_method", callback=self._default_disc_changed) rb = gui.hBox(rbox) self.left = gui.vBox(rb) right = gui.vBox(rb) rb.layout().setStretch(0, 1) rb.layout().setStretch(1, 1) options = self.options = [ self.tr("Default"), self.tr("Leave numeric"), self.tr("Entropy-MDL discretization"), self.tr("Equal-frequency discretization"), self.tr("Equal-width discretization"), self.tr("Remove numeric variables") ] for opt in options[1:]: t = gui.appendRadioButton(rbox, opt) # This condition is ugly, but it keeps the same order of # options for backward compatibility of saved schemata [right, self.left][opt.startswith("Equal")].layout().addWidget(t) gui.separator(right, 18, 18) def _intbox(widget, attr, callback): box = gui.indentedBox(widget) s = gui.spin(box, self, attr, minv=2, maxv=10, label="Num. of intervals:", callback=callback) s.setMaximumWidth(60) s.setAlignment(Qt.AlignRight) gui.rubber(s.box) return box.box self.k_general = _intbox(self.left, "default_k", self._default_disc_changed) self.k_general.layout().setContentsMargins(0, 0, 0, 0) vlayout = QHBoxLayout() box = gui.widgetBox(self.controlArea, "Individual Attribute Settings", orientation=vlayout, spacing=8) # List view with all attributes self.varview = QListView(selectionMode=QListView.ExtendedSelection) self.varview.setItemDelegate(DiscDelegate()) self.varmodel = itemmodels.VariableListModel() self.varview.setModel(self.varmodel) self.varview.selectionModel().selectionChanged.connect( self._var_selection_changed) vlayout.addWidget(self.varview) # Controls for individual attr settings self.bbox = controlbox = gui.radioButtons( box, self, "method", callback=self._disc_method_changed) vlayout.addWidget(controlbox) for opt in options[:5]: gui.appendRadioButton(controlbox, opt) self.k_specific = _intbox(controlbox, "k", self._disc_method_changed) gui.appendRadioButton(controlbox, "Remove attribute") gui.rubber(controlbox) controlbox.setEnabled(False) self.controlbox = controlbox box = gui.auto_commit(self.controlArea, self, "autosend", "Apply", orientation=Qt.Horizontal, checkbox_label="Apply automatically") box.layout().insertSpacing(0, 20) box.layout().insertWidget(0, self.report_button) self._update_spin_positions() def set_data(self, data): self.closeContext() self.data = data if self.data is not None: self._initialize(data) self.openContext(data) # Restore the per variable discretization settings self._restore(self.saved_var_states) # Complete the induction of cut points self._update_points() else: self._clear() self.unconditional_commit() def _initialize(self, data): # Initialize the default variable states for new data. self.class_var = data.domain.class_var cvars = [var for var in data.domain if var.is_continuous] self.varmodel[:] = cvars class_var = data.domain.class_var has_disc_class = data.domain.has_discrete_class self.default_bbox.buttons[self.MDL - 1].setEnabled(has_disc_class) self.bbox.buttons[self.MDL].setEnabled(has_disc_class) # If the newly disabled MDL button is checked then change it if not has_disc_class and self.default_method == self.MDL - 1: self.default_method = 0 if not has_disc_class and self.method == self.MDL: self.method = 0 # Reset (initialize) the variable discretization states. self._reset() def _restore(self, saved_state): # Restore variable states from a saved_state dictionary. def_method = self._current_default_method() for i, var in enumerate(self.varmodel): key = variable_key(var) if key in saved_state: state = saved_state[key] if isinstance(state.method, Default): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _reset(self): # restore the individual variable settings back to defaults. def_method = self._current_default_method() self.var_state = {} for i in range(len(self.varmodel)): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _set_var_state(self, index, state): # set the state of variable at `index` to `state`. self.var_state[index] = state self.varmodel.setData(self.varmodel.index(index), state, Qt.UserRole) def _clear(self): self.data = None self.varmodel[:] = [] self.var_state = {} self.saved_var_states = {} self.default_bbox.buttons[self.MDL - 1].setEnabled(True) self.bbox.buttons[self.MDL].setEnabled(True) def _update_points(self): """ Update the induced cut points. """ if self.data is None or not len(self.data): return def induce_cuts(method, data, var): dvar = _dispatch[type(method)](method, data, var) if dvar is None: # removed return [], None elif dvar is var: # no transformation took place return None, var elif is_discretized(dvar): return dvar.compute_value.points, dvar else: assert False for i, var in enumerate(self.varmodel): state = self.var_state[i] if state.points is None and state.disc_var is None: points, dvar = induce_cuts(state.method, self.data, var) new_state = state._replace(points=points, disc_var=dvar) self._set_var_state(i, new_state) def _method_index(self, method): return METHODS.index((type(method), )) def _current_default_method(self): method = self.default_method + 1 k = self.default_k if method == OWDiscretize.Leave: def_method = Leave() elif method == OWDiscretize.MDL: def_method = MDL() elif method == OWDiscretize.EqualFreq: def_method = EqualFreq(k) elif method == OWDiscretize.EqualWidth: def_method = EqualWidth(k) elif method == OWDiscretize.Remove: def_method = Remove() else: assert False return def_method def _current_method(self): if self.method == OWDiscretize.Default: method = Default(self._current_default_method()) elif self.method == OWDiscretize.Leave: method = Leave() elif self.method == OWDiscretize.MDL: method = MDL() elif self.method == OWDiscretize.EqualFreq: method = EqualFreq(self.k) elif self.method == OWDiscretize.EqualWidth: method = EqualWidth(self.k) elif self.method == OWDiscretize.Remove: method = Remove() elif self.method == OWDiscretize.Custom: method = Custom(self.cutpoints) else: assert False return method def _update_spin_positions(self): self.k_general.setDisabled(self.default_method not in [2, 3]) if self.default_method == 2: self.left.layout().insertWidget(1, self.k_general) elif self.default_method == 3: self.left.layout().insertWidget(2, self.k_general) self.k_specific.setDisabled(self.method not in [3, 4]) if self.method == 3: self.bbox.layout().insertWidget(4, self.k_specific) elif self.method == 4: self.bbox.layout().insertWidget(5, self.k_specific) def _default_disc_changed(self): self._update_spin_positions() method = self._current_default_method() state = DState(Default(method), None, None) for i, _ in enumerate(self.varmodel): if isinstance(self.var_state[i].method, Default): self._set_var_state(i, state) self._update_points() self.commit() def _disc_method_changed(self): self._update_spin_positions() indices = self.selected_indices() method = self._current_method() state = DState(method, None, None) for idx in indices: self._set_var_state(idx, state) self._update_points() self.commit() def _var_selection_changed(self, *args): indices = self.selected_indices() # set of all methods for the current selection methods = [self.var_state[i].method for i in indices] mset = set(methods) self.controlbox.setEnabled(len(mset) > 0) if len(mset) == 1: method = mset.pop() self.method = self._method_index(method) if isinstance(method, (EqualFreq, EqualWidth)): self.k = method.k elif isinstance(method, Custom): self.cutpoints = method.points else: # deselect the current button self.method = -1 bg = self.controlbox.group button_group_reset(bg) self._update_spin_positions() def selected_indices(self): rows = self.varview.selectionModel().selectedRows() return [index.row() for index in rows] def discretized_var(self, source): index = list(self.varmodel).index(source) state = self.var_state[index] if state.disc_var is None: return None elif state.disc_var is source: return source elif state.points == []: return None else: return state.disc_var def discretized_domain(self): """ Return the current effective discretized domain. """ if self.data is None: return None def disc_var(source): if source and source.is_continuous: return self.discretized_var(source) else: return source attributes = [disc_var(v) for v in self.data.domain.attributes] attributes = [v for v in attributes if v is not None] class_var = disc_var(self.data.domain.class_var) domain = Orange.data.Domain(attributes, class_var, metas=self.data.domain.metas) return domain def commit(self): output = None if self.data is not None and len(self.data): domain = self.discretized_domain() output = self.data.transform(domain) self.send("Data", output) def storeSpecificSettings(self): super().storeSpecificSettings() self.saved_var_states = { variable_key(var): self.var_state[i]._replace(points=None, disc_var=None) for i, var in enumerate(self.varmodel) } def send_report(self): self.report_items( (("Default method", self.options[self.default_method + 1]), )) if self.varmodel: self.report_items( "Thresholds", [(var.name, DiscDelegate.cutsText(self.var_state[i]) or "leave numeric") for i, var in enumerate(self.varmodel)])
class OWSql(widget.OWWidget): name = "SQL Table" id = "orange.widgets.data.sql" description = """ Load dataset from SQL.""" long_description = """ Sql widget connects to server and opens data from there. """ icon = "icons/SQLTable.svg" author = "Anze Staric" maintainer_email = "*****@*****.**" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ OutputSignal("Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False resizing_enabled = False host = Setting(None) port = Setting(None) database = Setting(None) username = Setting(None) password = Setting(None) table = Setting(None) sql = Setting("") guess_values = Setting(True) download = Setting(False) def __init__(self): super().__init__() self._connection = None vbox = gui.widgetBox(self.controlArea, "Server", addSpace=True) box = gui.widgetBox(vbox) self.servertext = QtGui.QLineEdit(box) self.servertext.setPlaceholderText('Server') if self.host: self.servertext.setText(self.host if not self.port else '{}:{}'. format(self.host, self.port)) box.layout().addWidget(self.servertext) self.databasetext = QtGui.QLineEdit(box) self.databasetext.setPlaceholderText('Database') if self.database: self.databasetext.setText(self.database) box.layout().addWidget(self.databasetext) self.usernametext = QtGui.QLineEdit(box) self.usernametext.setPlaceholderText('Username') if self.username: self.usernametext.setText(self.username) box.layout().addWidget(self.usernametext) self.passwordtext = QtGui.QLineEdit(box) self.passwordtext.setPlaceholderText('Password') self.passwordtext.setEchoMode(QtGui.QLineEdit.Password) if self.password: self.passwordtext.setText(self.password) box.layout().addWidget(self.passwordtext) tables = gui.widgetBox(box, orientation='horizontal') self.tablecombo = QtGui.QComboBox( tables, minimumContentsLength=35, sizeAdjustPolicy=QtGui.QComboBox.AdjustToMinimumContentsLength) tables.layout().addWidget(self.tablecombo) self.tablecombo.activated[int].connect(self.select_table) self.connectbutton = gui.button(tables, self, '↻', callback=self.connect) self.connectbutton.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) tables.layout().addWidget(self.connectbutton) self.custom_sql = gui.widgetBox(box, orientation='vertical') self.custom_sql.setVisible(False) self.sqltext = QtGui.QTextEdit(self.custom_sql) self.sqltext.setPlainText(self.sql) self.custom_sql.layout().addWidget(self.sqltext) self.executebtn = gui.button(self.custom_sql, self, 'Execute', callback=self.open_table) box.layout().addWidget(self.custom_sql) gui.checkBox(box, self, "guess_values", "Auto-discover discrete variables", callback=self.open_table) gui.checkBox(box, self, "download", "Download data to local memory", callback=self.open_table) self.connect() if self.table: self.open_table() def error(self, id=0, text=""): super().error(id, text) if 'server' in text or 'host' in text: self.servertext.setStyleSheet('QLineEdit {border: 2px solid red;}') else: self.servertext.setStyleSheet('') if 'role' in text: self.usernametext.setStyleSheet( 'QLineEdit {border: 2px solid red;}') else: self.usernametext.setStyleSheet('') if 'database' in text: self.databasetext.setStyleSheet( 'QLineEdit {border: 2px solid red;}') else: self.databasetext.setStyleSheet('') def connect(self): hostport = self.servertext.text().split(':') self.host = hostport[0] self.port = hostport[1] if len(hostport) == 2 else None self.database = self.databasetext.text() self.username = self.usernametext.text() or None self.password = self.passwordtext.text() or None try: self._connection = psycopg2.connect(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password) self.error(0) self.refresh_tables() except psycopg2.Error as err: self.error(0, str(err).split('\n')[0]) self.tablecombo.clear() def refresh_tables(self): self.tablecombo.clear() if self._connection is None: return cur = self._connection.cursor() cur.execute("""SELECT --n.nspname as "Schema", c.relname AS "Name" FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE c.relkind IN ('r','v','m','S','f','') AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema' AND n.nspname !~ '^pg_toast' AND pg_catalog.pg_table_is_visible(c.oid) AND NOT c.relname LIKE '\\_\\_%' ORDER BY 1;""") self.tablecombo.addItem("Select a table") for i, (table_name, ) in enumerate(cur.fetchall()): self.tablecombo.addItem(table_name) if table_name == self.table: self.tablecombo.setCurrentIndex(i + 1) self.tablecombo.addItem("Custom SQL") def select_table(self): curIdx = self.tablecombo.currentIndex() if self.tablecombo.itemText(curIdx) != "Custom SQL": self.custom_sql.setVisible(False) return self.open_table() else: self.custom_sql.setVisible(True) self.table = None def open_table(self): if self.tablecombo.currentIndex() <= 0: return if self.tablecombo.currentIndex() < self.tablecombo.count() - 1: self.table = self.tablecombo.currentText() else: self.table = self.sqltext.toPlainText() table = SqlTable(dict(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password), self.table, inspect_values=False) sample = False if table.approx_len() > LARGE_TABLE and self.guess_values: confirm = QMessageBox(self) confirm.setIcon(QMessageBox.Warning) confirm.setText("Attribute discovery might take " "a long time on large tables.\n" "Do you want to auto discover attributes?") confirm.addButton("Yes", QMessageBox.YesRole) no_button = confirm.addButton("No", QMessageBox.NoRole) sample_button = confirm.addButton("Yes, on a sample", QMessageBox.YesRole) confirm.exec() if confirm.clickedButton() == no_button: self.guess_values = False elif confirm.clickedButton() == sample_button: sample = True self.information(1) if self.guess_values: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) if sample: s = table.sample_time(1) domain = s.get_domain(guess_values=True) self.information( 1, "Domain was generated from a sample of the table.") else: domain = table.get_domain(guess_values=True) QApplication.restoreOverrideCursor() table.domain = domain if self.download: if table.approx_len() > MAX_DL_LIMIT: QMessageBox.warning( self, 'Warning', "Data is too big to download.\n" "Consider using the Data Sampler widget to download " "a sample instead.") self.download = False elif table.approx_len() > AUTO_DL_LIMIT: confirm = QMessageBox.question( self, 'Question', "Data appears to be big. Do you really " "want to download it to local memory?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.No: self.download = False if self.download: table.download_data(MAX_DL_LIMIT) table = Table(table) self.send("Data", table)
class OWFile(widget.OWWidget): name = "File" id = "orange.widgets.data.file" description = "Read a data from an input file " \ "and send the data table to the output." icon = "icons/File.svg" author = "Janez Demsar" maintainer_email = "janez.demsar(@at@)fri.uni-lj.si" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ OutputSignal("Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False recent_files = Setting(["(none)"]) new_variables = Setting(False) dlgFormats = ("All readable files ({})\n".format(" ".join( "*" + c for c in FileFormats.readers)) + "\n".join("{} (*{})".format(FileFormats.names[ext], ext) for ext in FileFormats.readers)) def __init__(self): super().__init__() self.domain = None self.recent_files = [ fn for fn in self.recent_files if os.path.exists(fn) ] self.loaded_file = "" vbox = gui.widgetBox(self.controlArea, "Data File", addSpace=True) box = gui.widgetBox(vbox, orientation=0) self.file_combo = QtGui.QComboBox(box) self.file_combo.setMinimumWidth(300) box.layout().addWidget(self.file_combo) self.file_combo.activated[int].connect(self.select_file) button = gui.button(box, self, '...', callback=self.browse_file) button.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon)) button.setSizePolicy(QtGui.QSizePolicy.Maximum, QtGui.QSizePolicy.Fixed) button = gui.button(box, self, "Reload", callback=self.reload, default=True) button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_BrowserReload)) button.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) gui.checkBox( vbox, self, "new_variables", "Columns with same name in different files " + "represent different variables") box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoa = gui.widgetLabel(box, 'No data loaded.') self.infob = gui.widgetLabel(box, ' ') self.warnings = gui.widgetLabel(box, ' ') #Set word wrap, so long warnings won't expand the widget self.warnings.setWordWrap(True) self.warnings.setSizePolicy(QtGui.QSizePolicy.Ignored, QtGui.QSizePolicy.MinimumExpanding) self.set_file_list() if len(self.recent_files) > 0: self.open_file(self.recent_files[0]) def set_file_list(self): self.file_combo.clear() if not self.recent_files: self.file_combo.addItem("(none)") for file in self.recent_files: if file == "(none)": self.file_combo.addItem("(none)") else: self.file_combo.addItem(os.path.split(file)[1]) self.file_combo.addItem("Browse documentation data sets...") def reload(self): if self.recent_files: return self.open_file(self.recent_files[0]) def select_file(self, n): if n < len(self.recent_files): name = self.recent_files[n] del self.recent_files[n] self.recent_files.insert(0, name) elif n: self.browse_file(True) if len(self.recent_files) > 0: self.set_file_list() self.open_file(self.recent_files[0]) def browse_file(self, in_demos=0): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_files and self.recent_files[0] != "(none)": start_file = self.recent_files[0] else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlgFormats) if not filename: return if filename in self.recent_files: self.recent_files.remove(filename) self.recent_files.insert(0, filename) self.set_file_list() self.open_file(self.recent_files[0]) # Open a file, create data from it and send it over the data channel def open_file(self, fn): self.error() self.warning() self.information() if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information( "Loading '{}' from the current directory.".format( basename)) if fn == "(none)": self.send("Data", None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" data = None err_value = None try: # TODO handle self.new_variables data = Table(fn) self.loaded_file = fn except Exception as exc: err_value = str(exc) if "is being loaded as" in str(err_value): try: data = Table(fn) self.loaded_file = fn self.warning(0, err_value) except: data = None if err_value is not None: self.error(err_value) self.infoa.setText('Data was not loaded due to an error.') self.infob.setText('Error:') self.warnings.setText(err_value) if data is None: self.dataReport = None else: domain = data.domain self.infoa.setText( "{} instance(s), {} feature(s), {} meta attributes".format( len(data), len(domain.attributes), len(domain.metas))) if domain.has_continuous_class: self.infob.setText("Regression; numerical class.") elif domain.has_discrete_class: self.infob.setText("Classification; " + "discrete class with {} values.".format( len(domain.class_var.values))) elif data.domain.class_vars: self.infob.setText("Multi-target; {} target variables.".format( len(data.domain.class_vars))) else: self.infob.setText("Data has no target variable.") self.warnings.setText("") add_origin(data, fn) # make new data and send it file_name = os.path.split(fn)[1] if "." in file_name: data.name = file_name[:file_name.rfind('.')] else: data.name = file_name self.dataReport = self.prepareDataReport(data) self.send("Data", data) def sendReport(self): dataReport = getattr(self, "dataReport", None) if dataReport: self.reportSettings("File", [ ("File name", self.loaded_file), ("Format", self.formats.get( os.path.splitext(self.loaded_file)[1], "unknown format")) ]) self.reportData(self.dataReport)
class OWDiscretize(widget.OWWidget): name = "Discretize" description = "Discretize the continuous data features." icon = "icons/Discretize.svg" inputs = [InputSignal("Data", Orange.data.Table, "set_data", doc="Input data table")] outputs = [OutputSignal("Data", Orange.data.Table, doc="Table with discretized features")] settingsHandler = settings.DomainContextHandler() saved_var_states = settings.ContextSetting({}) default_method = settings.Setting(2) default_k = settings.Setting(3) autosend = settings.Setting(True) #: Discretization methods Default, Leave, MDL, EqualFreq, EqualWidth, Remove, Custom = range(7) want_main_area = False def __init__(self): super().__init__() #: input data self.data = None #: Current variable discretization state self.var_state = {} #: Saved variable discretization settings (context setting) self.saved_var_states = {} self.method = 0 self.k = 5 box = gui.widgetBox( self.controlArea, self.tr("Default Discretization")) self.default_bbox = rbox = gui.radioButtons( box, self, "default_method", callback=self._default_disc_changed) options = [ self.tr("Default"), self.tr("Leave continuous"), self.tr("Entropy-MDL discretization"), self.tr("Equal-frequency discretization"), self.tr("Equal-width discretization"), self.tr("Remove continuous attributes") ] for opt in options[1:5]: gui.appendRadioButton(rbox, opt) s = gui.hSlider(gui.indentedBox(rbox), self, "default_k", minValue=2, maxValue=10, label="Num. of intervals:", callback=self._default_disc_changed) s.setTracking(False) gui.appendRadioButton(rbox, options[-1]) vlayout = QHBoxLayout() box = gui.widgetBox( self.controlArea, "Individual Attribute Settings", orientation=vlayout, spacing=8 ) # List view with all attributes self.varview = QListView(selectionMode=QListView.ExtendedSelection) self.varview.setItemDelegate(DiscDelegate()) self.varmodel = itemmodels.VariableListModel() self.varview.setModel(self.varmodel) self.varview.selectionModel().selectionChanged.connect( self._var_selection_changed ) vlayout.addWidget(self.varview) # Controls for individual attr settings self.bbox = controlbox = gui.radioButtons( box, self, "method", callback=self._disc_method_changed ) vlayout.addWidget(controlbox) for opt in options[:5]: gui.appendRadioButton(controlbox, opt) s = gui.hSlider(gui.indentedBox(controlbox), self, "k", minValue=2, maxValue=10, label="Num. of intervals:", callback=self._disc_method_changed) s.setTracking(False) gui.appendRadioButton(controlbox, "Remove attribute") gui.rubber(controlbox) controlbox.setEnabled(False) self.controlbox = controlbox gui.auto_commit(self.controlArea, self, "autosend", "Apply", orientation="horizontal", checkbox_label="Send data after every change") def set_data(self, data): self.closeContext() self.data = data if self.data is not None: self._initialize(data) self.openContext(data) # Restore the per variable discretization settings self._restore(self.saved_var_states) # Complete the induction of cut points self._update_points() else: self._clear() self.unconditional_commit() def _initialize(self, data): # Initialize the default variable states for new data. self.class_var = data.domain.class_var cvars = [var for var in data.domain if var.is_continuous] self.varmodel[:] = cvars class_var = data.domain.class_var has_disc_class = data.domain.has_discrete_class self.default_bbox.buttons[self.MDL - 1].setEnabled(has_disc_class) self.bbox.buttons[self.MDL].setEnabled(has_disc_class) # If the newly disabled MDL button is checked then change it if not has_disc_class and self.default_method == self.MDL - 1: self.default_method = 0 if not has_disc_class and self.method == self.MDL: self.method = 0 # Reset (initialize) the variable discretization states. self._reset() def _restore(self, saved_state): # Restore variable states from a saved_state dictionary. def_method = self._current_default_method() for i, var in enumerate(self.varmodel): key = variable_key(var) if key in saved_state: state = saved_state[key] if isinstance(state.method, Default): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _reset(self): # restore the individual variable settings back to defaults. def_method = self._current_default_method() self.var_state = {} for i in range(len(self.varmodel)): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _set_var_state(self, index, state): # set the state of variable at `index` to `state`. self.var_state[index] = state self.varmodel.setData(self.varmodel.index(index), state, Qt.UserRole) def _clear(self): self.data = None self.varmodel[:] = [] self.var_state = {} self.saved_var_states = {} self.default_bbox.buttons[self.MDL - 1].setEnabled(True) self.bbox.buttons[self.MDL].setEnabled(True) def _update_points(self): """ Update the induced cut points. """ def induce_cuts(method, data, var): dvar = _dispatch[type(method)](method, data, var) if dvar is None: # removed return [], None elif dvar is var: # no transformation took place return None, var elif is_discretized(dvar): return dvar.compute_value.points, dvar else: assert False for i, var in enumerate(self.varmodel): state = self.var_state[i] if state.points is None and state.disc_var is None: points, dvar = induce_cuts(state.method, self.data, var) new_state = state._replace(points=points, disc_var=dvar) self._set_var_state(i, new_state) self.commit() def _method_index(self, method): return METHODS.index((type(method), )) def _current_default_method(self): method = self.default_method + 1 k = self.default_k if method == OWDiscretize.Leave: def_method = Leave() elif method == OWDiscretize.MDL: def_method = MDL() elif method == OWDiscretize.EqualFreq: def_method = EqualFreq(k) elif method == OWDiscretize.EqualWidth: def_method = EqualWidth(k) elif method == OWDiscretize.Remove: def_method = Remove() else: assert False return def_method def _current_method(self): if self.method == OWDiscretize.Default: method = Default(self._current_default_method()) elif self.method == OWDiscretize.Leave: method = Leave() elif self.method == OWDiscretize.MDL: method = MDL() elif self.method == OWDiscretize.EqualFreq: method = EqualFreq(self.k) elif self.method == OWDiscretize.EqualWidth: method = EqualWidth(self.k) elif self.method == OWDiscretize.Remove: method = Remove() elif self.method == OWDiscretize.Custom: method = Custom(self.cutpoints) else: assert False return method def _default_disc_changed(self): method = self._current_default_method() state = DState(Default(method), None, None) for i, _ in enumerate(self.varmodel): if isinstance(self.var_state[i].method, Default): self._set_var_state(i, state) self._update_points() def _disc_method_changed(self): indices = self.selected_indices() method = self._current_method() state = DState(method, None, None) for idx in indices: self._set_var_state(idx, state) self._update_points() def _var_selection_changed(self, *args): indices = self.selected_indices() # set of all methods for the current selection methods = [self.var_state[i].method for i in indices] mset = set(methods) self.controlbox.setEnabled(len(mset) > 0) if len(mset) == 1: method = mset.pop() self.method = self._method_index(method) if isinstance(method, (EqualFreq, EqualWidth)): self.k = method.k elif isinstance(method, Custom): self.cutpoints = method.points else: # deselect the current button self.method = -1 bg = self.controlbox.group button_group_reset(bg) def selected_indices(self): rows = self.varview.selectionModel().selectedRows() return [index.row() for index in rows] def discretized_var(self, source): index = list(self.varmodel).index(source) state = self.var_state[index] if state.disc_var is None: return None elif state.disc_var is source: return source elif state.points == []: return None else: return state.disc_var def discretized_domain(self): """ Return the current effective discretized domain. """ if self.data is None: return None def disc_var(source): if source and source.is_continuous: return self.discretized_var(source) else: return source attributes = [disc_var(v) for v in self.data.domain.attributes] attributes = [v for v in attributes if v is not None] class_var = disc_var(self.data.domain.class_var) domain = Orange.data.Domain( attributes, class_var, metas=self.data.domain.metas ) return domain def commit(self): output = None if self.data is not None: domain = self.discretized_domain() output = self.data.from_table(domain, self.data) self.send("Data", output) def storeSpecificSettings(self): super().storeSpecificSettings() self.saved_var_states = { variable_key(var): self.var_state[i]._replace(points=None, disc_var=None) for i, var in enumerate(self.varmodel) }
class OWSql(OWWidget): name = "SQL Table" id = "orange.widgets.data.sql" description = "Load data set from SQL." icon = "icons/SQLTable.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ OutputSignal("Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False resizing_enabled = False host = Setting(None) port = Setting(None) database = Setting(None) schema = Setting(None) username = Setting(None) password = Setting(None) table = Setting(None) sql = Setting("") guess_values = Setting(True) download = Setting(False) materialize = Setting(False) materialize_table_name = Setting("") class Information(OWWidget.Information): data_sampled = Msg("Data description was generated from a sample.") class Error(OWWidget.Error): connection = Msg("{}") no_backends = Msg("Please install a backend to use this widget") missing_extension = Msg("Database is missing extension{}: {}") def __init__(self): super().__init__() self.backend = None self.data_desc_table = None self.database_desc = None vbox = gui.vBox(self.controlArea, "Server", addSpace=True) box = gui.vBox(vbox) self.backendmodel = BackendModel(Backend.available_backends()) self.backendcombo = QComboBox(box) if len(self.backendmodel): self.backendcombo.setModel(self.backendmodel) else: self.Error.no_backends() box.setEnabled(False) box.layout().addWidget(self.backendcombo) self.servertext = QLineEdit(box) self.servertext.setPlaceholderText('Server') self.servertext.setToolTip('Server') if self.host: self.servertext.setText(self.host if not self.port else '{}:{}'. format(self.host, self.port)) box.layout().addWidget(self.servertext) self.databasetext = QLineEdit(box) self.databasetext.setPlaceholderText('Database[/Schema]') self.databasetext.setToolTip('Database or optionally Database/Schema') if self.database: self.databasetext.setText( self.database if not self.schema else '{}/{}'. format(self.database, self.schema)) box.layout().addWidget(self.databasetext) self.usernametext = QLineEdit(box) self.usernametext.setPlaceholderText('Username') self.usernametext.setToolTip('Username') if self.username: self.usernametext.setText(self.username) box.layout().addWidget(self.usernametext) self.passwordtext = QLineEdit(box) self.passwordtext.setPlaceholderText('Password') self.passwordtext.setToolTip('Password') self.passwordtext.setEchoMode(QLineEdit.Password) if self.password: self.passwordtext.setText(self.password) box.layout().addWidget(self.passwordtext) tables = gui.hBox(box) self.tablemodel = TableModel() self.tablecombo = QComboBox( minimumContentsLength=35, sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLength) self.tablecombo.setModel(self.tablemodel) self.tablecombo.setToolTip('table') tables.layout().addWidget(self.tablecombo) self.tablecombo.activated[int].connect(self.select_table) self.connectbutton = gui.button(tables, self, '↻', callback=self.connect) self.connectbutton.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) tables.layout().addWidget(self.connectbutton) self.custom_sql = gui.vBox(box) self.custom_sql.setVisible(False) self.sqltext = QTextEdit(self.custom_sql) self.sqltext.setPlainText(self.sql) self.custom_sql.layout().addWidget(self.sqltext) mt = gui.hBox(self.custom_sql) cb = gui.checkBox(mt, self, 'materialize', 'Materialize to table ') cb.setToolTip('Save results of the query in a table') le = gui.lineEdit(mt, self, 'materialize_table_name') le.setToolTip('Save results of the query in a table') self.executebtn = gui.button(self.custom_sql, self, 'Execute', callback=self.open_table) box.layout().addWidget(self.custom_sql) gui.checkBox(box, self, "guess_values", "Auto-discover discrete variables", callback=self.open_table) gui.checkBox(box, self, "download", "Download data to local memory", callback=self.open_table) gui.rubber(self.buttonsArea) QTimer.singleShot(0, self.connect) def error(self, id=0, text=""): super().error(id, text) err_style = 'QLineEdit {border: 2px solid red;}' if 'server' in text or 'host' in text: self.servertext.setStyleSheet(err_style) else: self.servertext.setStyleSheet('') if 'role' in text: self.usernametext.setStyleSheet(err_style) else: self.usernametext.setStyleSheet('') if 'database' in text: self.databasetext.setStyleSheet(err_style) else: self.databasetext.setStyleSheet('') def connect(self): hostport = self.servertext.text().split(':') self.host = hostport[0] self.port = hostport[1] if len(hostport) == 2 else None self.database, _, self.schema = self.databasetext.text().partition('/') self.username = self.usernametext.text() or None self.password = self.passwordtext.text() or None try: if self.backendcombo.currentIndex() < 0: return backend = self.backendmodel[self.backendcombo.currentIndex()] self.backend = backend( dict(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password)) self.Error.connection.clear() self.database_desc = OrderedDict( (("Host", self.host), ("Port", self.port), ("Database", self.database), ("User name", self.username))) self.refresh_tables() self.select_table() except BackendError as err: error = str(err).split('\n')[0] self.Error.connection(error) self.database_desc = self.data_desc_table = None self.tablecombo.clear() def refresh_tables(self): self.tablemodel.clear() self.Error.missing_extension.clear() if self.backend is None: self.data_desc_table = None return self.tablemodel.append("Select a table") self.tablemodel.extend(self.backend.list_tables(self.schema)) self.tablemodel.append("Custom SQL") def select_table(self): curIdx = self.tablecombo.currentIndex() if self.tablecombo.itemText(curIdx) != "Custom SQL": self.custom_sql.setVisible(False) return self.open_table() else: self.custom_sql.setVisible(True) self.data_desc_table = None self.database_desc["Table"] = "(None)" self.table = None #self.Error.missing_extension( # 's' if len(missing) > 1 else '', # ', '.join(missing), # shown=missing) def open_table(self): table = self.get_table() self.data_desc_table = table self.send("Data", table) def get_table(self): if self.tablecombo.currentIndex() <= 0: if self.database_desc: self.database_desc["Table"] = "(None)" self.data_desc_table = None return if self.tablecombo.currentIndex() < self.tablecombo.count() - 1: self.table = self.tablemodel[self.tablecombo.currentIndex()] self.database_desc["Table"] = self.table if "Query" in self.database_desc: del self.database_desc["Query"] else: self.sql = self.table = self.sqltext.toPlainText() if self.materialize: import psycopg2 if not self.materialize_table_name: self.Error.connection( "Specify a table name to materialize the query") return try: with self.backend.execute_sql_query( "DROP TABLE IF EXISTS " + self.materialize_table_name): pass with self.backend.execute_sql_query( "CREATE TABLE " + self.materialize_table_name + " AS " + self.table): pass with self.backend.execute_sql_query( "ANALYZE " + self.materialize_table_name): pass self.table = self.materialize_table_name except psycopg2.ProgrammingError as ex: self.Error.connection(str(ex)) return try: table = SqlTable(dict(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password), self.table, backend=type(self.backend), inspect_values=False) except BackendError as ex: self.Error.connection(str(ex)) return self.Error.connection.clear() sample = False if table.approx_len() > LARGE_TABLE and self.guess_values: confirm = QMessageBox(self) confirm.setIcon(QMessageBox.Warning) confirm.setText("Attribute discovery might take " "a long time on large tables.\n" "Do you want to auto discover attributes?") confirm.addButton("Yes", QMessageBox.YesRole) no_button = confirm.addButton("No", QMessageBox.NoRole) sample_button = confirm.addButton("Yes, on a sample", QMessageBox.YesRole) confirm.exec() if confirm.clickedButton() == no_button: self.guess_values = False elif confirm.clickedButton() == sample_button: sample = True self.Information.clear() if self.guess_values: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) if sample: s = table.sample_time(1) domain = s.get_domain(inspect_values=True) self.Information.data_sampled() else: domain = table.get_domain(inspect_values=True) QApplication.restoreOverrideCursor() table.domain = domain if self.download: if table.approx_len() > MAX_DL_LIMIT: QMessageBox.warning( self, 'Warning', "Data is too big to download.\n" "Consider using the Data Sampler widget to download " "a sample instead.") self.download = False elif table.approx_len() > AUTO_DL_LIMIT: confirm = QMessageBox.question( self, 'Question', "Data appears to be big. Do you really " "want to download it to local memory?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.No: self.download = False if self.download: table.download_data(MAX_DL_LIMIT) table = Table(table) return table def send_report(self): if not self.database_desc: self.report_paragraph("No database connection.") return self.report_items("Database", self.database_desc) if self.data_desc_table: self.report_items("Data", report.describe_data(self.data_desc_table))
class OWFile(widget.OWWidget): name = "File" id = "orange.widgets.data.file" description = "Read a data from an input file " \ "and send the data table to the output." icon = "icons/File.svg" author = "Janez Demsar" maintainer_email = "janez.demsar(@at@)fri.uni-lj.si" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ OutputSignal("Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False #: back-compatibility: List[str] saved files list recent_files = Setting([]) #: List[RecentPath] recent_paths = Setting([]) new_variables = Setting(False) dlgFormats = ("All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join( "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) def __init__(self): super().__init__() self.domain = None self.loaded_file = "" self._relocate_recent_files() vbox = gui.widgetBox(self.controlArea, "Data File / URL", addSpace=True) box = gui.widgetBox(vbox, orientation=0) self.file_combo = QtGui.QComboBox(box) self.file_combo.setMinimumWidth(300) self.file_combo.setEditable(True) self.file_combo.setItemDelegate(RecentPathDelegate()) self.file_combo.lineEdit().setStyleSheet("padding-left: 1px;") box.layout().addWidget(self.file_combo) self.file_combo.activated[int].connect(self.select_file) button = gui.button(box, self, '...', callback=self.browse_file, autoDefault=False) button.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon)) button.setSizePolicy(QtGui.QSizePolicy.Maximum, QtGui.QSizePolicy.Fixed) button = gui.button(box, self, "Reload", callback=self.reload, autoDefault=False) button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_BrowserReload)) button.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) gui.checkBox( vbox, self, "new_variables", "Columns with same name in different files " + "represent different variables") box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoa = gui.widgetLabel(box, 'No data loaded.') self.infob = gui.widgetLabel(box, ' ') self.warnings = gui.widgetLabel(box, ' ') #Set word wrap, so long warnings won't expand the widget self.warnings.setWordWrap(True) self.warnings.setSizePolicy(QtGui.QSizePolicy.Ignored, QtGui.QSizePolicy.MinimumExpanding) self.set_file_list() if len(self.recent_paths) > 0: self.open_file(self.recent_paths[0].abspath) def _relocate_recent_files(self): if self.recent_files and not self.recent_paths: # backward compatibility settings restore existing = [ path for path in self.recent_files if os.path.exists(path) ] existing = [RecentPath(path, None, None) for path in existing] self.recent_paths.extend(existing) self.recent_files = [] paths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: paths.append(("basedir", basedir)) rec = [] for recent in self.recent_paths: resolved = recent.resolve(paths) if resolved is not None: rec.append(RecentPath.create(resolved.abspath, paths)) elif recent.search(paths) is not None: rec.append(RecentPath.create(recent.search(paths), paths)) elif recent.prefix == "url-datasets": valid, _ = self.is_url_valid(recent.abspath) if valid: rec.append(recent) self.recent_paths = rec def set_file_list(self): self.file_combo.clear() if not self.recent_paths: self.file_combo.addItem("(none)") self.file_combo.model().item(0).setEnabled(False) else: for i, recent in enumerate(self.recent_paths): self.file_combo.addItem(recent.icon, recent.value) self.file_combo.model().item(i).setToolTip(recent.abspath) self.file_combo.addItem("Browse documentation data sets...") def reload(self): if self.recent_paths: basename = self.file_combo.currentText() if (basename == self.recent_paths[0].relpath or basename == os.path.basename(self.recent_paths[0].abspath)): return self.open_file(self.recent_paths[0].abspath) self.select_file(len(self.recent_paths) + 1) def select_file(self, n): if n < len(self.recent_paths): recent = self.recent_paths[n] del self.recent_paths[n] self.recent_paths.insert(0, recent) elif n: path = self.file_combo.currentText() if path == "Browse documentation data sets...": self.browse_file(True) elif os.path.exists(path): self._add_path(path) else: valid, err = self.is_url_valid(path) if valid: _, filename = os.path.split(path) recent = RecentPath(path, "url-datasets", filename) if recent in self.recent_paths: self.recent_paths.remove(recent) self.recent_paths.insert(0, recent) else: self.error(0, err) self.file_combo.removeItem(n) self.file_combo.lineEdit().setText(path) return if len(self.recent_paths) > 0: self.set_file_list() self.open_file(self.recent_paths[0].abspath) def browse_file(self, in_demos=0): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_paths: start_file = self.recent_paths[0].abspath else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlgFormats) if not filename: return self._add_path(filename) self.set_file_list() self.open_file(self.recent_paths[0].abspath) def _add_path(self, filename): searchpaths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: searchpaths.append(("basedir", basedir)) recent = RecentPath.create(filename, searchpaths) if recent in self.recent_paths: self.recent_paths.remove(recent) self.recent_paths.insert(0, recent) @staticmethod def is_url_valid(url): try: with urllib.request.urlopen(url) as f: pass return bool(f), "" except urllib.error.HTTPError: return False, "File '{}' is unavailable".format( os.path.basename(url)) except urllib.error.URLError: return False, "URL '{}' is unavailable".format(url) except ValueError: return False, "Unknown file/URL '{}' ".format(url) except (OSError, Exception) as e: return False, str(e) # Open a file, create data from it and send it over the data channel def open_file(self, fn): self.error() self.warning() self.information() fn_original = fn if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information( "Loading '{}' from the current directory.".format( basename)) if fn == "(none)": self.send("Data", None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" data = None err_value = None try: # TODO handle self.new_variables data = Table(fn) self.loaded_file = fn except Exception as exc: err_value = str(exc) if "is being loaded as" in str(err_value): try: data = Table(fn) self.loaded_file = fn self.warning(0, err_value) except: data = None if err_value is not None: if fn.startswith("http"): err_value = "File '{}' does not contain valid data".format( os.path.basename(fn)) ind = self.file_combo.currentIndex() text = self.file_combo.currentText() self.file_combo.removeItem(ind) self.file_combo.lineEdit().setText(text) if ind < len(self.recent_paths) and \ self.recent_paths[ind].abspath == fn_original: del self.recent_paths[ind] self.error(err_value) self.infoa.setText('Data was not loaded due to an error.') self.infob.setText('Error:') self.warnings.setText(err_value) if data is None: self.dataReport = None else: domain = data.domain self.infoa.setText( "{} instance(s), {} feature(s), {} meta attribute(s)".format( len(data), len(domain.attributes), len(domain.metas))) if domain.has_continuous_class: self.infob.setText("Regression; numerical class.") elif domain.has_discrete_class: self.infob.setText("Classification; " + "discrete class with {} values.".format( len(domain.class_var.values))) elif data.domain.class_vars: self.infob.setText("Multi-target; {} target variables.".format( len(data.domain.class_vars))) else: self.infob.setText("Data has no target variable.") self.warnings.setText("") add_origin(data, fn) # make new data and send it file_name = os.path.split(fn)[1] if "." in file_name: data.name = file_name[:file_name.rfind('.')] else: data.name = file_name self.dataReport = self.prepareDataReport(data) self.send("Data", data) def sendReport(self): dataReport = getattr(self, "dataReport", None) if dataReport: self.reportSettings("File", [ ("File name", self.loaded_file), ("Format", self.formats.get( os.path.splitext(self.loaded_file)[1], "unknown format")) ]) self.reportData(self.dataReport) def workflowEnvChanged(self, key, value, oldvalue): if key == "basedir": self._relocate_recent_files() self.set_file_list()