class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("Data", Table, doc="Attribute-valued dataset read from the input file.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), RecentPath("", "sample-datasets", "brown-selected.tab"), RecentPath("", "sample-datasets", "zoo.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") class NoFileSelected: pass UserAdviceMessages = [ widget.Message( "Use CSV File Import widget for advanced options " "for comma-separated files", "use-csv-file-import"), widget.Message( "This widget loads only tabular data. Use other widgets to load " "other data types like models, distance matrices and networks.", "other-data-types") ] def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation datasets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) gui.button(box, self, "Reset", callback=self.reset_domain_edit) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("No data.") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader if self.reader is self.NoFileSelected: self.Outputs.data.send(None) return None try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() if path is None: return self.NoFileSelected if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() if url: return UrlReader(url) else: return self.NoFileSelected def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% missing values)" else: return "(no missing values)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} instance(s)" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>{len(domain.attributes)} feature(s) {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>Regression; numerical class {missing_in_class}" elif domain.has_discrete_class: text += "<br/>Classification; categorical class " \ f"with {len(domain.class_var.values)} values {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>{len(domain.metas)} meta attribute(s)" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "文件(File)" id = "orange.widgets.data.file" description = "从输入文件或网络读取数据并将数据表发送到输出。" icon = "icons/File.svg" priority = 10 category = "数据(Data)" keywords = ["file", "load", "read", "open", "wenjian"] class Outputs: data = Output("数据(Data)", Table, doc="Attribute-valued dataset read from the input file.", replaces=['Data']) want_main_area = False buttons_area_orientation = None SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), RecentPath("", "sample-datasets", "brown-selected.tab"), RecentPath("", "sample-datasets", "zoo.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Information(widget.OWWidget.Information): no_file_selected = Msg("No file selected.") class Warning(widget.OWWidget.Warning): file_too_big = Msg("The file is too large to load automatically." " Press Reload to load.") load_warning = Msg("Read warning:\n{}") performance_warning = Msg( "Categorical variables with >100 values may decrease performance.") renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") multiple_targets = Msg("Most widgets do not support multiple targets") class Error(widget.OWWidget.Error): file_not_found = Msg("File not found.") missing_reader = Msg("Missing reader.") sheet_error = Msg("Error listing available sheets.") unknown = Msg("Read error:\n{}") UserAdviceMessages = [ widget.Message( "Use CSV File Import widget for advanced options " "for comma-separated files", "use-csv-file-import"), widget.Message( "This widget loads only tabular data. Use other widgets to load " "other data types like models, distance matrices and networks.", "other-data-types") ] def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] def group_readers_per_addon_key(w): # readers from Orange.data.io should go first def package(w): package = w.qualified_name().split(".")[:-1] package = package[:2] if ".".join(package) == "Orange.data": return ["0"] # force "Orange" to come first return package return package(w), w.DESCRIPTION self.available_readers = sorted(set(readers), key=group_readers_per_addon_key) layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='数据源') vbox = gui.radioButtons(None, self, "source", box=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "文件:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.Expanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.file_combo.setMinimumSize(QSize(100, 1)) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "重新加载", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = QComboBox() self.sheet_combo.activated[str].connect(self.select_sheet) self.sheet_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.sheet_combo.setMinimumSize(QSize(50, 1)) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 1, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='文件类型') box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo = QComboBox(self) self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo.setMinimumSize(QSize(100, 1)) self.reader_combo.activated[int].connect(self.select_reader) box.layout().addWidget(self.reader_combo) layout.addWidget(box, 0, 1) box = gui.vBox(self.controlArea, "信息") self.infolabel = gui.widgetLabel(box, '未加载数据.') box = gui.widgetBox(self.controlArea, "列(双击编辑)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(box) gui.button(box, self, "重置", callback=self.reset_domain_edit, autoDefault=False) gui.rubber(box) self.apply_button = gui.button(box, self, "应用", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) hBox = gui.hBox(self.controlArea) gui.rubber(hBox) gui.button(hBox, self, "浏览文档数据集", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(hBox) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def select_reader(self, n): if self.source != self.LOCAL_FILE: return # ignore for URL's if self.recent_paths: path = self.recent_paths[0] if n == 0: # default path.file_format = None self.load_data() elif n <= len(self.available_readers): reader = self.available_readers[n - 1] path.file_format = reader.qualified_name() self.load_data() else: # the rest include just qualified names path.file_format = self.reader_combo.itemText(n) self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information(None, "文件", "无法找到文件") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("无数据") def _try_load(self): self._initialize_reader_combo() # pylint: disable=broad-except if self.source == self.LOCAL_FILE: if self.last_path() is None: return self.Information.no_file_selected elif not os.path.exists(self.last_path()): return self.Error.file_not_found else: url = self.url_combo.currentText().strip() if not url: return self.Information.no_file_selected def mark_problematic_reader(): self.reader_combo.setItemData(self.reader_combo.currentIndex(), QBrush(Qt.red), Qt.ForegroundRole) try: self.reader = self._get_reader() # also sets current reader index assert self.reader is not None except MissingReaderException: mark_problematic_reader() return self.Error.missing_reader except Exception as ex: mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with log_warnings() as warnings: try: data = self.reader.read() except Exception as ex: mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() self.reader_combo.setEnabled(True) if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format qname_index = { r.qualified_name(): i for i, r in enumerate(self.available_readers) } if qname in qname_index: self.reader_combo.setCurrentIndex(qname_index[qname] + 1) else: # reader may be accessible, but not in self.available_readers # (perhaps its code was moved) self.reader_combo.addItem(qname) self.reader_combo.setCurrentIndex( len(self.reader_combo) - 1) try: reader_class = class_from_qualified_name(qname) except Exception as ex: raise MissingReaderException( f'Can not find reader "{qname}"') from ex reader = reader_class(path) else: self.reader_combo.setCurrentIndex(0) reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() return UrlReader(url) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) self.sheet_combo.setCurrentIndex(0) def _initialize_reader_combo(self): self.reader_combo.clear() filters = [format_filter(f) for f in self.available_readers] self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters) self.reader_combo.setCurrentIndex(0) self.reader_combo.setDisabled(True) # additional readers may be added in self._get_reader() @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% 个缺失值)" else: return "(无缺失值)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} 条数据" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>特征数目: {len(domain.attributes)} {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>回归; 数值类 {missing_in_class}" elif domain.has_discrete_class: text += "<br/>分类: 分类种类共 " \ f"{len(domain.class_var.values)} 个 {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>元属性: { len(domain.metas)}" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def _inspect_discrete_variables(self, domain): for var in chain(domain.variables, domain.metas): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Warning.multiple_targets( shown=table is not None and len(table.domain.class_vars) > 1) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except MissingReaderException: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("Data", Table, doc="Attribute-valued dataset read from the input file.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL ) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg("The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") class NoFileSelected: pass def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button( None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button( None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox(None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True,) self.sheet_combo.setSizePolicy( Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy( Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget( self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget( self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button( box, self, "Browse documentation datasets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) gui.button( box, self, "Reset", callback=self.reset_domain_edit) self.apply_button = gui.button( box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("No data.") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader if self.reader is self.NoFileSelected: self.Outputs.data.send(None) return None try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() if path is None: return self.NoFileSelected if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() if url: return UrlReader(url) else: return self.NoFileSelected def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% missing values)" else: return "(no missing values)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [attrs[desc] for desc in ("Name", "Description") if desc in attrs] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} instance(s)" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>{len(domain.attributes)} feature(s) {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>Regression; numerical class {missing_in_class}" elif domain.has_discrete_class: text += "<br/>Classification; categorical class " \ f"with {len(domain.class_var.values)} values {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>{len(domain.metas)} meta attribute(s)" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain(self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWMultifile(widget.OWWidget, RelocatablePathsWidgetMixin): name = "Multifile" id = "orangecontrib.spectroscopy.widgets.files" icon = "icons/multifile.svg" description = "Read data from input files " \ "and send a data table to the output." priority = 10000 replaces = [ "orangecontrib.infrared.widgets.owfiles.OWFiles", "orangecontrib.infrared.widgets.owmultifile.OWMultifile" ] class Outputs: data = Output("Data", Table, doc="Concatenated input files.") want_main_area = False file_idx = [] settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) recent_paths: List[RecentPath] variables: list sheet = Setting(None, schema_only=True) label = Setting("", schema_only=True) recent_paths = Setting([], schema_only=True) variables = ContextSetting([], schema_only=True) class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File(s) not found.") missing_reader = widget.Msg("Missing reader(s).") read_error = widget.Msg("Read error(s).") domain_editor = SettingProvider(DomainEditor) def __init__(self): widget.OWWidget.__init__(self) RelocatablePathsWidgetMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.sheets = [] self.lb = gui.listBox(self.controlArea, self, "file_idx", selectionMode=QListWidget.MultiSelection) self.default_foreground = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) file_button = gui.button(None, self, ' ...', callback=self.browse_files, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 0) remove_button = gui.button(None, self, 'Remove', callback=self.remove_item) clear_button = gui.button(None, self, 'Clear', callback=self.clear) layout.addWidget(remove_button, 0, 1) layout.addWidget(clear_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 7) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_index = 0 self.sheet_combo = gui.comboBox(None, self, "sheet_index", callback=self.select_sheet) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() layout.addWidget(self.sheet_box, 0, 5) label_box = gui.hBox(None, addToLayout=False, margin=0) gui.lineEdit(label_box, self, "label", callback=self.set_label, label="Label", orientation=Qt.Horizontal) layout.addWidget(label_box, 0, 6) layout.setColumnStretch(3, 2) box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) for rp in self.recent_paths: self.lb.addItem(rp.abspath) box = gui.hBox(self.controlArea) gui.rubber(box) if hasattr(DomainEditor, "reset_domain"): # Orange>=3.21 gui.button(box, self, "Reset", callback=self.reset_domain_edit) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self._update_sheet_combo() self.load_data() def set_label(self): self.load_data() def _select_active_sheet(self): if self.sheet: try: sheet_list = [s[0] for s in self.sheets] idx = sheet_list.index(self.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.sheet = None else: self.sheet_combo.setCurrentIndex(0) def _update_sheet_combo(self): sheets = Counter() for rp in self.recent_paths: try: reader = _get_reader(rp) sheets.update(reader.sheets) except: pass sheets = sorted(sheets.items(), key=lambda x: x[0]) self.sheets = [(s, s + " (" + str(n) + ")") for s, n in sheets] if len(sheets) < 2: self.sheet_box.hide() self.sheet = None else: self.sheets.insert(0, (None, "(None)")) self.sheet_combo.clear() self.sheet_combo.addItems([s[1] for s in self.sheets]) self._select_active_sheet() self.sheet_box.show() def select_sheet(self): self.sheet = self.sheets[self.sheet_combo.currentIndex()][0] self.load_data() def remove_item(self): ri = [i.row() for i in self.lb.selectedIndexes()] for i in sorted(ri, reverse=True): self.recent_paths.pop(i) self.lb.takeItem(i) self._update_sheet_combo() self.load_data() def clear(self): self.lb.clear() while self.recent_paths: self.recent_paths.pop() self._update_sheet_combo() self.load_data() def browse_files(self, in_demos=False): start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filenames, reader, _ = open_filename_dialog( start_file, None, readers, dialog=QFileDialog.getOpenFileNames) self.load_files(filenames, reader) def load_files(self, filenames, reader): if not filenames: return for f in filenames: self.add_path(f, reader) self.lb.addItem(f) self._update_sheet_combo() self.load_data() def load_data(self): self.closeContext() self.Error.file_not_found.clear() self.Error.missing_reader.clear() self.Error.read_error.clear() data_list = [] fnok_list = [] def show_error(li, msg): li.setForeground(Qt.red) li.setToolTip(msg) empty_domain = Domain(attributes=[]) for i, rp in enumerate(self.recent_paths): fn = rp.abspath li = self.lb.item(i) li.setToolTip("") if self.default_foreground is None: self.default_foreground = li.foreground() li.setForeground(self.default_foreground) if not os.path.exists(fn): show_error(li, "File not found.") self.Error.file_not_found() continue try: reader = _get_reader(rp) assert reader is not None except Exception: # pylint: disable=broad-except show_error(li, "Reader not found.") self.Error.missing_reader() continue try: if self.sheet in reader.sheets: reader.select_sheet(self.sheet) if isinstance(reader, SpectralFileFormat): xs, vals, additional = reader.read_spectra() if additional is None: additional = Table.from_domain(empty_domain, n_rows=len(vals)) data_list.append((xs, vals, additional)) else: data_list.append(reader.read()) fnok_list.append(fn) except Exception as ex: # pylint: disable=broad-except show_error(li, "Read error:\n" + str(ex)) self.Error.read_error() if not data_list \ or self.Error.file_not_found.is_shown() \ or self.Error.missing_reader.is_shown() \ or self.Error.read_error.is_shown(): self.data = None self.domain_editor.set_domain(None) else: data = concatenate_data(data_list, fnok_list, self.label) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def send_report(self): def get_format_name(format): try: return format.DESCRIPTION except AttributeError: return format.__class__.__name__ if self.data is None: self.report_paragraph("File", "No file.") return files = [] for rp in self.recent_paths: format = _get_reader(rp) files.append([rp.abspath, get_format_name(format)]) self.report_table("Files", table=files) self.report_data("Data", self.data) def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue) def update_file_list(self, key, value, oldvalue): if key == "basedir": self._relocate_recent_files()