def test_locate_sample_datasets(self): with self.assertRaises(OSError): FileFormat.locate("iris.tab", search_dirs=[os.path.dirname(__file__)]) iris = FileFormat.locate("iris.tab", search_dirs=[get_sample_datasets_dir()]) self.assertEqual(os.path.basename(iris), "iris.tab") # test extension adding iris = FileFormat.locate("iris", search_dirs=[get_sample_datasets_dir()]) self.assertEqual(os.path.basename(iris), "iris.tab")
def _relocate_recent_files(self): if self.recent_files and not self.recent_paths: # backward compatibility settings restore existing = [ path for path in self.recent_files if os.path.exists(path) ] existing = [RecentPath(path, None, None) for path in existing] self.recent_paths.extend(existing) self.recent_files = [] paths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: paths.append(("basedir", basedir)) rec = [] for recent in self.recent_paths: resolved = recent.resolve(paths) if resolved is not None: rec.append(RecentPath.create(resolved.abspath, paths)) elif recent.search(paths) is not None: rec.append(RecentPath.create(recent.search(paths), paths)) elif recent.prefix == "url-datasets": valid, _ = self.is_url_valid(recent.abspath) if valid: rec.append(recent) self.recent_paths = rec
def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets", ) return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, "read", None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data()
def browseFile(self, in_demos=0): if in_demos: try: startfile = get_sample_datasets_dir() except AttributeError: startfile = "" if not startfile or not os.path.exists(startfile): widgetsdir = os.path.dirname(gui.__file__) orangedir = os.path.dirname(widgetsdir) startfile = os.path.join(orangedir, "doc", "datasets") if not startfile or not os.path.exists(startfile): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) startfile = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(startfile): QtGui.QMessageBox.information(None, "File", "Cannot find the directory with example data sets") return else: if self.recent_files and self.recent_files[0] != "(none)": startfile = self.recent_files[0] else: startfile = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Data File', startfile, self.dlgFormats) if not filename: return if filename in self.recent_files: self.recent_files.remove(filename) self.recent_files.insert(0, filename) self.set_file_list() self.open_file(self.recent_files[0])
def browse_file(self, in_demos=False): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_paths: start_file = self.recent_paths[0].abspath else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self._add_path(filename) self.set_file_list() self.source = self.LOCAL_FILE self.load_data()
def _relocate_recent_files(self): if self.recent_files and not self.recent_paths: # backward compatibility settings restore existing = [path for path in self.recent_files if os.path.exists(path)] existing = [RecentPath(path, None, None) for path in existing] self.recent_paths.extend(existing) self.recent_files = [] paths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: paths.append(("basedir", basedir)) rec = [] for recent in self.recent_paths: resolved = recent.resolve(paths) if resolved is not None: rec.append(RecentPath.create(resolved.abspath, paths)) elif recent.search(paths) is not None: rec.append(RecentPath.create(recent.search(paths), paths)) elif recent.prefix == "url-datasets": valid, _ = self.is_url_valid(recent.abspath) if valid: rec.append(recent) self.recent_paths = rec
def browse_files(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filenames = QFileDialog.getOpenFileNames(self, 'Open Multiple Data Files', start_file, self.dlg_formats) if isinstance(filenames, tuple): # has a file description filenames = filenames[0] if not filenames: return for f in filenames: self.add_path(f) self.lb.addItem(f) self._update_sheet_combo() self.load_data()
def _add_path(self, filename): searchpaths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: searchpaths.append(("basedir", basedir)) recent = RecentPath.create(filename, searchpaths) if recent in self.recent_paths: self.recent_paths.remove(recent) self.recent_paths.insert(0, recent)
def browse_file(self, in_demos=0): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_paths: start_file = self.recent_paths[0].abspath else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Data File', start_file, self.dlgFormats) if not filename: return searchpaths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: searchpaths.append(("basedir", basedir)) recent = RecentPath.create(filename, searchpaths) if recent in self.recent_paths: self.recent_paths.remove(recent) self.recent_paths.insert(0, recent) self.set_file_list() self.open_file(self.recent_paths[0].abspath)
def _relocate_recent_files(self): paths = [("sample-datasets", get_sample_datasets_dir())] basedir = self.workflowEnv().get("basedir", None) if basedir is not None: paths.append(("basedir", basedir)) rec = [] for recent in self.recent_paths: resolved = recent.resolve(paths) if resolved is not None: rec.append(RecentPath.create(resolved.abspath, paths)) elif recent.search(paths) is not None: rec.append(RecentPath.create(recent.search(paths), paths)) self.recent_paths = rec
def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, _ = QFileDialog.getOpenFileName( self, 'Open Orange Data File', start_file, dialog_formats()) if not filename: return self.add_path(filename) self.source = self.LOCAL_FILE self.load_data()
def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information(None, "文件", "无法找到文件") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data()
def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, _ = QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self.add_path(filename) self.source = self.LOCAL_FILE self.load_data()
def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data()
def browse_file(self, in_demos=False): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_paths: start_file = self.recent_paths[0].abspath else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self._add_path(filename) self.set_file_list() self.source = self.LOCAL_FILE self.load_data()
def browse_file(self, in_demos=0): if in_demos: try: start_file = get_sample_datasets_dir() except AttributeError: start_file = "" if not start_file or not os.path.exists(start_file): widgets_dir = os.path.dirname(gui.__file__) orange_dir = os.path.dirname(widgets_dir) start_file = os.path.join(orange_dir, "doc", "datasets") if not start_file or not os.path.exists(start_file): d = os.getcwd() if os.path.basename(d) == "canvas": d = os.path.dirname(d) start_file = os.path.join(os.path.dirname(d), "doc", "datasets") if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with example data sets") return else: if self.recent_files and self.recent_files[0] != "(none)": start_file = self.recent_files[0] else: start_file = os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlgFormats) if not filename: return if filename in self.recent_files: self.recent_files.remove(filename) self.recent_files.insert(0, filename) self.set_file_list() self.open_file(self.recent_files[0])
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read a data from an input file or network" \ "and send the data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ widget.OutputSignal( "Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False resizing_enabled = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] LOCAL_FILE, URL = range(2) settingsHandler = XlsContextHandler() # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") dlg_formats = ("All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join( "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" layout = QtGui.QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File", addToLayout=False) layout.addWidget(rb_button, 0, 0, QtCore.Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.reload, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox(None, self, "xls_sheet", callback=self.load_data, sendSelectedValue=True) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QtGui.QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, QtCore.Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, QtCore.Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL", addToLayout=False) layout.addWidget(rb_button, 3, 0, QtCore.Qt.AlignVCenter) self.url_combo = url_combo = QtGui.QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setMaximumWidth(500) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation data sets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) box.layout().addWidget(self.report_button) self.report_button.setFixedWidth(170) # Set word wrap, so long warnings won't expand the widget self.warnings.setWordWrap(True) self.warnings.setSizePolicy(Policy.Ignored, Policy.MinimumExpanding) self.set_file_list() if self.last_path() is not None: self.fill_sheet_combo(self.last_path()) # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) QtCore.QTimer.singleShot(0, self.load_data) def reload(self): if self.recent_paths: basename = self.file_combo.currentText() path = self.recent_paths[0] if basename in [path.relpath, path.basename]: self.source = self.LOCAL_FILE if self.is_multisheet_excel(path.abspath): self.fill_sheet_combo(path.abspath) return self.load_data() self.select_file(len(self.recent_paths) + 1) def select_file(self, n): if n < len(self.recent_paths): super().select_file(n) self.fill_sheet_combo(self.last_path()) # TODO: This is weird. Has it remained here from "Browse data sets" # or from when this combo was editable? A `n` this large can come from # `reload`, but ... how?! elif n: path = self.file_combo.currentText() if os.path.exists(path): self.add_path(path) else: self.info.setText('Data was not loaded:') self.warnings.setText("File {} does not exist".format(path)) self.file_combo.removeItem(n) self.file_combo.lineEdit().setText(path) return if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def _url_set(self): self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self.add_path(filename) self.source = self.LOCAL_FILE self.fill_sheet_combo(filename) self.load_data() def fill_sheet_combo(self, path): if os.path.exists(path) and self.is_multisheet_excel(path): self.closeContext() self.sheet_combo.clear() self.sheet_box.show() book = open_workbook(path) sheet_names = [ str(book.sheet_by_index(i).name) for i in range(book.nsheets) ] self.sheet_combo.addItems(sheet_names) self.openContext(path, sheet_names) else: self.sheet_box.hide() @staticmethod def is_multisheet_excel(fn): try: return open_workbook(fn).nsheets > 1 except XLRDError: return False # Open a file, create data from it and send it over the data channel def load_data(self): def load(method, fn): with catch_warnings(record=True) as warnings: data = method(fn) self.warning(33, warnings[-1].message.args[0] if warnings else '') return data, fn def load_from_file(): fn = self.last_path() if not fn: return None, "" if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information( "Loading '{}' from the current directory.".format( basename)) if self.is_multisheet_excel(fn): data = ExcelFormat.read_file(fn + ':' + self.xls_sheet) if data: return data, fn try: return load(Table.from_file, fn) except Exception as exc: self.warnings.setText(str(exc)) # Let us not remove from recent files: user may fix them raise def load_from_network(): combo = self.url_combo model = combo.model() # combo.currentText does not work when the widget is initialized url = model.data(model.index(combo.currentIndex()), QtCore.Qt.EditRole) if not url: return None, "" elif "://" not in url: url = "http://" + url try: data, url = load(Table.from_url, url) except: self.warnings.setText( "URL '{}' does not contain valid data".format(url)) # Don't remove from recent_urls: # resource may reappear, or the user mistyped it # and would like to retrieve it from history and fix it. raise combo.clearFocus() if "://docs.google.com/spreadsheets" in url: model.add_name(url, data.name) self.url = \ "{} from {}".format(data.name.replace("- Sheet1", ""), url) combo.lineEdit().setPlaceholderText(self.url) return data, data.name else: self.url = url return data, url self.warning() self.information() try: loader = [load_from_file, load_from_network][self.source] self.data, self.loaded_file = loader() except: self.info.setText("Data was not loaded:") self.data = None self.loaded_file = "" return else: self.warnings.setText("") data = self.data if data is None: self.send("Data", None) self.info.setText("No data loaded") return domain = data.domain text = "{} instance(s), {} feature(s), {} meta attribute(s)".format( len(data), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "\nRegression; numerical class." elif domain.has_discrete_class: text += "\nClassification; discrete class with {} values.".format( len(domain.class_var.values)) elif data.domain.class_vars: text += "\nMulti-target; {} target variables.".format( len(data.domain.class_vars)) else: text += "\nData has no target variable." if 'Timestamp' in data.domain: # Google Forms uses this header to timestamp responses text += '\n\nFirst entry: {}\nLast entry: {}'.format( data[0, 'Timestamp'], data[-1, 'Timestamp']) self.info.setText(text) add_origin(data, self.loaded_file) self.send("Data", data) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~/" + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data)
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("Data", Table, doc="Attribute-valued dataset read from the input file.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), RecentPath("", "sample-datasets", "brown-selected.tab"), RecentPath("", "sample-datasets", "zoo.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") class NoFileSelected: pass UserAdviceMessages = [ widget.Message( "Use CSV File Import widget for advanced options " "for comma-separated files", "use-csv-file-import"), widget.Message( "This widget loads only tabular data. Use other widgets to load " "other data types like models, distance matrices and networks.", "other-data-types") ] def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation datasets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) gui.button(box, self, "Reset", callback=self.reset_domain_edit) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("No data.") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader if self.reader is self.NoFileSelected: self.Outputs.data.send(None) return None try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() if path is None: return self.NoFileSelected if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() if url: return UrlReader(url) else: return self.NoFileSelected def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% missing values)" else: return "(no missing values)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} instance(s)" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>{len(domain.attributes)} feature(s) {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>Regression; numerical class {missing_in_class}" elif domain.has_discrete_class: text += "<br/>Classification; categorical class " \ f"with {len(domain.class_var.values)} values {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>{len(domain.metas)} meta attribute(s)" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ widget.OutputSignal( "Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler() # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) dlg_formats = ("All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join( "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation data sets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) box.layout().addWidget(self.report_button) self.report_button.setFixedWidth(170) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) def sizeHint(self): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, _ = QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self.add_path(filename) self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers # pylint: disable=broad-except self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() if self.last_path() and not os.path.exists(self.last_path()): self.Error.file_not_found() self.send("Data", None) self.info.setText("No data.") return error = None try: self.reader = self._get_reader() if self.reader is None: self.data = None self.send("Data", None) self.info.setText("No data.") self.sheet_box.hide() return except Exception as ex: error = ex if not error: self._update_sheet_combo() with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) error = ex self.warning(warnings[-1].message.args[0] if warnings else '') if error: self.data = None self.send("Data", None) self.info.setText("An error occurred:\n{}".format(error)) self.sheet_box.hide() return self.info.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: reader = FileFormat.get_reader(self.last_path()) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) def _describe(self, table): domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = "<b>{}</b>".format(descs[0]) if descs: text += "<p>{}</p>".format("<br/>".join(descs)) text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\ format(len(table), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "<br/>Regression; numerical class." elif domain.has_discrete_class: text += "<br/>Classification; discrete class with {} values.".\ format(len(domain.class_var.values)) elif table.domain.class_vars: text += "<br/>Multi-target; {} target variables.".format( len(table.domain.class_vars)) else: text += "<br/>Data has no target variable." text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += '<p>First entry: {}<br/>Last entry: {}</p>'.format( table[0, 'Timestamp'], table[-1, 'Timestamp']) return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def apply_domain_edit(self): if self.data is not None: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) X, y, m = cols X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any( isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) else: table = self.data self.send("Data", table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data()
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read a data from an input file or network " \ "and send the data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [widget.OutputSignal( "Data", Table, doc="Attribute-valued data set read from the input file.")] want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler() # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) dlg_formats = ( "All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join("{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QtGui.QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, QtCore.Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button( None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button( None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QtGui.QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox(None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True) self.sheet_combo.setSizePolicy( Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QtGui.QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy( Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget( self.sheet_label, QtCore.Qt.AlignLeft) self.sheet_box.layout().addWidget( self.sheet_combo, QtCore.Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, QtCore.Qt.AlignVCenter) self.url_combo = url_combo = QtGui.QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") domain_editor = DomainEditor(self.variables) self.editor_model = domain_editor.model() box.layout().addWidget(domain_editor) box = gui.hBox(self.controlArea) gui.button( box, self, "Browse documentation data sets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) box.layout().addWidget(self.report_button) self.report_button.setFixedWidth(170) self.apply_button = gui.button( box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.hide() self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect(self.apply_button.show) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) QtCore.QTimer.singleShot(0, self.load_data) self.setAcceptDrops(True) def sizeHint(self): return QtCore.QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QtGui.QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self.loaded_file = filename self.add_path(filename) self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): self.reader = self._get_reader() self._update_sheet_combo() errors = [] with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: errors.append("An error occurred:") errors.append(str(ex)) data = None self.editor_model.reset() self.warning(warnings[-1].message.args[0] if warnings else '') if data is None: self.send("Data", None) self.info.setText("\n".join(errors)) return self.info.setText(self._describe(data)) add_origin(data, self.loaded_file or self.last_path()) self.send("Data", data) self.editor_model.set_domain(data.domain) self.data = data def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: reader = FileFormat.get_reader(self.last_path()) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: return UrlReader(self.url_combo.currentText()) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) def _describe(self, table): domain = table.domain text = "{} instance(s), {} feature(s), {} meta attribute(s)".format( len(table), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "\nRegression; numerical class." elif domain.has_discrete_class: text += "\nClassification; discrete class with {} values.".format( len(domain.class_var.values)) elif table.domain.class_vars: text += "\nMulti-target; {} target variables.".format( len(table.domain.class_vars)) else: text += "\nData has no target variable." if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += '\n\nFirst entry: {}\nLast entry: {}'.format( table[0, 'Timestamp'], table[-1, 'Timestamp']) return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list(str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [np.nan if is_missing(x) else values.index(str(x)) for x in col_data] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any(isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.hide() def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~/" + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data()
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "文件(File)" id = "orange.widgets.data.file" description = "从输入文件或网络读取数据并将数据表发送到输出。" icon = "icons/File.svg" priority = 10 category = "数据(Data)" keywords = ["file", "load", "read", "open", "wenjian"] class Outputs: data = Output("数据(Data)", Table, doc="Attribute-valued dataset read from the input file.", replaces=['Data']) want_main_area = False buttons_area_orientation = None SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), RecentPath("", "sample-datasets", "brown-selected.tab"), RecentPath("", "sample-datasets", "zoo.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Information(widget.OWWidget.Information): no_file_selected = Msg("No file selected.") class Warning(widget.OWWidget.Warning): file_too_big = Msg("The file is too large to load automatically." " Press Reload to load.") load_warning = Msg("Read warning:\n{}") performance_warning = Msg( "Categorical variables with >100 values may decrease performance.") renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") multiple_targets = Msg("Most widgets do not support multiple targets") class Error(widget.OWWidget.Error): file_not_found = Msg("File not found.") missing_reader = Msg("Missing reader.") sheet_error = Msg("Error listing available sheets.") unknown = Msg("Read error:\n{}") UserAdviceMessages = [ widget.Message( "Use CSV File Import widget for advanced options " "for comma-separated files", "use-csv-file-import"), widget.Message( "This widget loads only tabular data. Use other widgets to load " "other data types like models, distance matrices and networks.", "other-data-types") ] def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] def group_readers_per_addon_key(w): # readers from Orange.data.io should go first def package(w): package = w.qualified_name().split(".")[:-1] package = package[:2] if ".".join(package) == "Orange.data": return ["0"] # force "Orange" to come first return package return package(w), w.DESCRIPTION self.available_readers = sorted(set(readers), key=group_readers_per_addon_key) layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='数据源') vbox = gui.radioButtons(None, self, "source", box=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "文件:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.Expanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.file_combo.setMinimumSize(QSize(100, 1)) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "重新加载", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = QComboBox() self.sheet_combo.activated[str].connect(self.select_sheet) self.sheet_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.sheet_combo.setMinimumSize(QSize(50, 1)) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 1, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='文件类型') box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo = QComboBox(self) self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo.setMinimumSize(QSize(100, 1)) self.reader_combo.activated[int].connect(self.select_reader) box.layout().addWidget(self.reader_combo) layout.addWidget(box, 0, 1) box = gui.vBox(self.controlArea, "信息") self.infolabel = gui.widgetLabel(box, '未加载数据.') box = gui.widgetBox(self.controlArea, "列(双击编辑)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(box) gui.button(box, self, "重置", callback=self.reset_domain_edit, autoDefault=False) gui.rubber(box) self.apply_button = gui.button(box, self, "应用", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) hBox = gui.hBox(self.controlArea) gui.rubber(hBox) gui.button(hBox, self, "浏览文档数据集", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(hBox) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def select_reader(self, n): if self.source != self.LOCAL_FILE: return # ignore for URL's if self.recent_paths: path = self.recent_paths[0] if n == 0: # default path.file_format = None self.load_data() elif n <= len(self.available_readers): reader = self.available_readers[n - 1] path.file_format = reader.qualified_name() self.load_data() else: # the rest include just qualified names path.file_format = self.reader_combo.itemText(n) self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information(None, "文件", "无法找到文件") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("无数据") def _try_load(self): self._initialize_reader_combo() # pylint: disable=broad-except if self.source == self.LOCAL_FILE: if self.last_path() is None: return self.Information.no_file_selected elif not os.path.exists(self.last_path()): return self.Error.file_not_found else: url = self.url_combo.currentText().strip() if not url: return self.Information.no_file_selected def mark_problematic_reader(): self.reader_combo.setItemData(self.reader_combo.currentIndex(), QBrush(Qt.red), Qt.ForegroundRole) try: self.reader = self._get_reader() # also sets current reader index assert self.reader is not None except MissingReaderException: mark_problematic_reader() return self.Error.missing_reader except Exception as ex: mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with log_warnings() as warnings: try: data = self.reader.read() except Exception as ex: mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() self.reader_combo.setEnabled(True) if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format qname_index = { r.qualified_name(): i for i, r in enumerate(self.available_readers) } if qname in qname_index: self.reader_combo.setCurrentIndex(qname_index[qname] + 1) else: # reader may be accessible, but not in self.available_readers # (perhaps its code was moved) self.reader_combo.addItem(qname) self.reader_combo.setCurrentIndex( len(self.reader_combo) - 1) try: reader_class = class_from_qualified_name(qname) except Exception as ex: raise MissingReaderException( f'Can not find reader "{qname}"') from ex reader = reader_class(path) else: self.reader_combo.setCurrentIndex(0) reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() return UrlReader(url) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) self.sheet_combo.setCurrentIndex(0) def _initialize_reader_combo(self): self.reader_combo.clear() filters = [format_filter(f) for f in self.available_readers] self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters) self.reader_combo.setCurrentIndex(0) self.reader_combo.setDisabled(True) # additional readers may be added in self._get_reader() @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% 个缺失值)" else: return "(无缺失值)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} 条数据" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>特征数目: {len(domain.attributes)} {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>回归; 数值类 {missing_in_class}" elif domain.has_discrete_class: text += "<br/>分类: 分类种类共 " \ f"{len(domain.class_var.values)} 个 {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>元属性: { len(domain.metas)}" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def _inspect_discrete_variables(self, domain): for var in chain(domain.variables, domain.metas): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() def apply_domain_edit(self): self.Warning.performance_warning.clear() self.Warning.renamed_vars.clear() if self.data is None: table = None else: domain, cols, renamed = \ self.domain_editor.get_domain(self.data.domain, self.data, deduplicate=True) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Warning.multiple_targets( shown=table is not None and len(table.domain.class_vars) > 1) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except MissingReaderException: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "领域编辑器2" icon = "icons/gear.svg" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("领域背景", Table, doc="专业领域背景的介绍") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) ##用于警告代码可以无视 class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") ##用于报错代码可以无视 class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() ##画布的布局,使用网格划分的方式 gui.widgetBox(self.controlArea, margin=20, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) #确定位置0,0 box = gui.hBox(None, addToLayout=False, margin=0) #水平box box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) #设置size self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) # 按钮和下拉菜单的联合体 self.file_combo.activated[int].connect( self.select_file) ##使用.connect(功能函数)来实现与功能函数的连接 box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) #确定位置0,1 file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) ## 含Information的box设置 box = gui.widgetBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, '请设置领域特征') # self.warnings = gui.widgetLabel(box, '') ##下面几句控制含有table的box box = gui.widgetBox(self.controlArea, "双击进行编辑") self.domain_editor = DomainEditor(self) ##对table操作的事情在DomainEditor内部定义 self.editor_model = self.domain_editor.model() ##设置与Apply激活状态有关 box.layout().addWidget(self.domain_editor) ## Apply 按钮 box = gui.hBox(self.controlArea) # gui.button( # box, self, "Browse documentation datasets", # callback=lambda: self.browse_file(True), autoDefault=False) # gui.rubber(box) self.apply_button = gui.button(box, self, "应用", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) # print('editor_model',self.editor_model) ## 如果数据改变就激活apply按钮.dataChange表示是否改变数据 self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() ##设置文件列表中的项 # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) ##表示接受响应释放操作 if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return ##QTimer.singleShot()表示在s秒后调用一个槽函数(self.load_data) QTimer.singleShot(0, self.load_data) def sizeHint(self): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() ## 读取文件 def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() ## 获取数据self.data,方式是调用了_try_load函数,并且将数据send到Output的channel中 # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() ##重新设置widget 的context self.domain_editor.set_domain(None) #把domain设置为None self.apply_button.setEnabled(False) #把apply button设置为不可见 self.clear_messages() self.set_file_list() ##这句话判断数据导入是否有错误 error = self._try_load() if error: error() self.data = None # self.sheet_box.hide() self.Outputs.data.send(None) self.info.setText("无数据.") ## 导入数据的核心方法:获取self.data数据,同时判断这个出错可能性 def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() ##这里获取reader assert self.reader is not None except Exception: return self.Error.missing_reader try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() ##通过这句话读取数据,这是的data已经是table型数据了 print('jia', type(data)) except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.info.setText(self._describe(data)) #描述info的text self.loaded_file = self.last_path() ##描述文档地址 add_origin(data, self.loaded_file) self.data = data # print('liangyue',dir(self.data)) self.openContext(data.domain) # print('data',data) self.apply_domain_edit() # sends data ## 获取导入文件的格式 def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) print('reader_class', reader_class) else: reader = FileFormat.get_reader(path) # Return reader instance that can be used to read the file if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url) ## 更新file的下拉列表中的内容 def _update_sheet_combo(self): if len(self.reader.sheets) < 2: # self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() # self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) ## 下面是info的描述语句 def _describe(self, table): domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = "<b>{}</b>".format(descs[0]) if descs: text += "<p>{}</p>".format("<br/>".join(descs)) text += "<p>{} 个实例数据(s), {} 个输入特征(s), {} 个元特征(s)".\ format(len(table), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "<br/>回归模型 ." elif domain.has_discrete_class: text += "<br/>分类模型; 共分为 {} 类.".\ format(len(domain.class_var.values)) elif table.domain.class_vars: text += "<br/>多目标模型; {} 个目标".format(len(table.domain.class_vars)) else: text += "<br/>无目标值." text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += '<p>First entry: {}<br/>Last entry: {}</p>'.format( table[0, 'Timestamp'], table[-1, 'Timestamp']) return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables ## 对Ourputs的data赋值为table def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) printData = self.data printDomain = self.data.domain if not (domain.variables or domain.metas): table = None else: X, y, m = cols #X是输入,domain.attributes;y是输出class_var;m是元特征 ## 下面解决将self.data的数据付给了table。 # 1data's name; 2数据编号ids;3数据属性attributes table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name index = self.data.ids table.ids = np.array(self.data.ids) # print('ids',table.ids) data = self.data table.attributes = getattr(self.data, 'attributes', {}) ## 将table的属性定义为{} ''' 对Ourputs的data赋值为table''' # print('table is :',table) # print('table domain',table.domain) # print('table name',table.name) # print('table class_var name',table.domain.class_vars[0].name) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWVcfFile(widget.OWWidget, RecentPathsWComboMixin): name = "VCF File" id = "orangecontrib.variants.widgets.vcf" description = "Read data from a VCF file." icon = "icons/VCFFile.svg" priority = 10 category = "Variants" keywords = ["data", "vcf", "file", "load", "read"] class Outputs: data = Output( "Data", Table, doc="Attribute-valued data set read from the input file.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "small.vcf"), ]) quality = Setting(1) cb_qual = Setting(True) frequency = Setting(1) cb_freq = Setting(True) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.variants = None self.table = None self.loaded_file = "" layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) label = gui.widgetLabel(self, " File: ") layout.addWidget(label, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') def enable_apply(): self.apply_button.setEnabled(True) box = gui.vBox(self.controlArea, "Filtering") _, qspin = gui.spin(box, self, 'quality', 0, 999, step=1, label='Quality threshold (QT)', callback=enable_apply, checked='cb_qual', checkCallback=enable_apply) qspin.setToolTip("Minimum quality to use reads.") _, fspin = gui.spin(box, self, 'frequency', 0, 999, step=1, label='Frequency threshold (FT)', callback=enable_apply, checked='cb_freq', checkCallback=enable_apply) fspin.setToolTip("Keep only variants with at least this many " "occurrences of alternative alleles.") gui.rubber(self.controlArea) box = gui.hBox(self.controlArea) box.layout().addWidget(self.report_button) self.report_button.setFixedWidth(170) gui.rubber(box) self.apply_button = gui.button(box, self, "Apply", callback=self.apply) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) def sizeHint(self): return QSize(500, 200) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.load_data() self.set_file_list() def browse_file(self): start_file = self.last_path() or os.path.expanduser("~/") dialog_formats = "VCF files (*.vcf);;All files (*)" filename, _ = QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, dialog_formats) if not filename: return self.add_path(filename) self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers # pylint: disable=broad-except self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() if not self.last_path() or not os.path.exists(self.last_path()): if self.last_path(): self.Error.file_not_found() self.Outputs.data.send(None) self.info.setText("No data.") return error = None if not error: with catch_warnings(record=True) as warnings: try: variants = VariantData(self.last_path()) except Exception as ex: log.exception(ex) error = ex self.warning(warnings[-1].message.args[0] if warnings else '') if error: self.variants = self.table = None self.Outputs.data.send(None) self.info.setText("An error occurred:\n{}".format(error)) return self.loaded_file = self.last_path() self.variants = variants self.apply() # sends data def update_info(self): pl = lambda x: '' if x == 1 else 's' text = "" if self.variants is not None: nsamples, nvariants = self.variants.gt.T.shape text += ("<p>Before filtering:<br/>" + " {} sample{}, {} variant{}</p>").\ format(nsamples, pl(nsamples), nvariants, pl(nvariants), ) if self.table is not None: nsamples, nvariants = self.table.X.shape below = np.isnan(self.table.X).sum() / self.table.X.size * 100 text += ("<p>After filtering:<br/>" + " {} sample{}, {} variant{}<br/>" + " {:.2f}% reads below QT</p>").\ format(nsamples, pl(nsamples), nvariants, pl(nvariants), below) self.info.setText(text) def apply(self): if self.variants is None: self.table = None else: q = self.quality if self.cb_qual else None f = self.frequency if self.cb_freq else None self.table = self.variants.get_data(q, f) self.update_info() self.Outputs.data.send(self.table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): if self.table is None: self.report_paragraph("VCF File", "No file.") return home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file self.report_items("VCF File", [ ("File name", name), ]) parameters = [("Quality", self.quality, self.cb_qual), ("Frequency", self.frequency, self.cb_freq)] self.report_items("Filtering parameters", [(name, value) for name, value, enabled in parameters if enabled]) self.report_data("Data", self.table) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.load_data()