def __init__(self): super().__init__() self.corpus = None # Refresh recent files self.recent_files = [fn for fn in self.recent_files if os.path.exists(fn)] # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) # Drop-down for recent files self.file_combo = QtGui.QComboBox(fbox) self.file_combo.setMinimumWidth(300) fbox.layout().addWidget(self.file_combo) self.file_combo.activated[int].connect(self.select_file) # Browse button browse = gui.button(fbox, self, 'Browse', callback=self.browse_file) browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon)) browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Reload button reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True) reload.setIcon(self.style().standardIcon(QtGui.QStyle.SP_BrowserReload)) reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) corp_info = "Corpus of 0 documents." self.info_label = gui.label(ibox, self, corp_info) # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=True) self.used_attrs = VariablesListItemModel() self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True) self.unused_attrs = VariablesListItemModel() self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs) ibox.layout().addWidget(self.unused_attrs_view) # Load the most recent file self.set_file_list() if len(self.recent_files) > 0: self.open_file(self.recent_files[0])
def test_drop_mime(self): iris = Table("iris") m = VariablesListItemModel(iris.domain.variables) mime = m.mimeData([m.index(1, 0)]) self.assertTrue(mime.hasFormat(VariablesListItemModel.MIME_TYPE)) assert m.dropMimeData(mime, Qt.MoveAction, 5, 0, m.index(-1, -1)) self.assertIs(m[5], m[1]) assert m.dropMimeData(mime, Qt.MoveAction, -1, -1, m.index(-1, -1)) self.assertIs(m[6], m[1])
def test_flags(self): m = VariablesListItemModel([ContinuousVariable("X")]) index = m.index(0) flags = m.flags(m.index(0)) self.assertTrue(flags & Qt.ItemIsDragEnabled) self.assertFalse(flags & Qt.ItemIsDropEnabled) # 'invalid' index is drop enabled -> indicates insertion capability flags = m.flags(m.index(-1, -1)) self.assertTrue(flags & Qt.ItemIsDropEnabled)
def __init__(self): super().__init__() self.corpus = None # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) widget = widgets.FileWidget(recent_files=self.recent_files, icon_size=(16, 16), on_open=self.open_file, directory_aliases={"Browse documentation corpora ...": get_sample_corpora_dir()}, dialog_format=self.dlgFormats, dialog_title='Open Orange Document Corpus', allow_empty=False, reload_label='Reload', browse_label='Browse') fbox.layout().addWidget(widget) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) corp_info = "Corpus of 0 documents." self.info_label = gui.label(ibox, self, corp_info) # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=True) self.used_attrs = VariablesListItemModel() self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True) self.unused_attrs = VariablesListItemModel() self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs) ibox.layout().addWidget(self.unused_attrs_view) # load first file widget.select(0)
class OWLoadCorpus(OWWidget): name = "Corpus" description = "Load a corpus of text documents, (optionally) tagged with categories." icon = "icons/TextFile.svg" priority = 10 outputs = [(Output.CORPUS, Corpus)] want_main_area = False resizing_enabled = False dlgFormats = "Only tab files (*.tab)" recent_files = Setting(["(none)"]) def __init__(self): super().__init__() self.corpus = None # Refresh recent files self.recent_files = [fn for fn in self.recent_files if os.path.exists(fn)] # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) # Drop-down for recent files self.file_combo = QtGui.QComboBox(fbox) self.file_combo.setMinimumWidth(300) fbox.layout().addWidget(self.file_combo) self.file_combo.activated[int].connect(self.select_file) # Browse button browse = gui.button(fbox, self, 'Browse', callback=self.browse_file) browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon)) browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Reload button reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True) reload.setIcon(self.style().standardIcon(QtGui.QStyle.SP_BrowserReload)) reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) corp_info = "Corpus of 0 documents." self.info_label = gui.label(ibox, self, corp_info) # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=True) self.used_attrs = VariablesListItemModel() self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True) self.unused_attrs = VariablesListItemModel() self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs) ibox.layout().addWidget(self.unused_attrs_view) # Load the most recent file self.set_file_list() if len(self.recent_files) > 0: self.open_file(self.recent_files[0]) def set_file_list(self): self.file_combo.clear() if not self.recent_files: self.file_combo.addItem("(none)") for file in self.recent_files: if file == "(none)": self.file_combo.addItem("(none)") else: self.file_combo.addItem(os.path.split(file)[1]) self.file_combo.addItem("Browse documentation corpora ...") def reload(self): if self.recent_files: return self.open_file(self.recent_files[0]) def select_file(self, n): if n < len(self.recent_files) : name = self.recent_files[n] del self.recent_files[n] self.recent_files.insert(0, name) elif n: self.browse_file(True) if len(self.recent_files) > 0: self.set_file_list() self.open_file(self.recent_files[0]) def browse_file(self, demos_loc=False): start_file = os.path.expanduser("~/") if demos_loc: start_file = get_sample_corpora_dir() filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Document Corpus', start_file, self.dlgFormats) if not filename: return if filename in self.recent_files: self.recent_files.remove(filename) self.recent_files.insert(0, filename) self.set_file_list() self.open_file(filename) def open_file(self, path): self.error(1, '') self.used_attrs[:] = [] self.unused_attrs[:] = [] try: self.corpus = Corpus.from_file(path) for f in self.corpus.domain.metas: if f in self.corpus.text_features: self.used_attrs.append(f) else: self.unused_attrs.append(f) self.info_label.setText("Corpus of {} documents.".format(len(self.corpus))) self.send(Output.CORPUS, self.corpus) except BaseException as err: self.error(1, str(err)) def update_feature_selection(self): if self.corpus is not None: self.corpus.set_text_features(self.used_attrs) self.send(Output.CORPUS, self.corpus)
class OWLoadCorpus(OWWidget): name = "Corpus" description = "Load a corpus of text documents, (optionally) tagged with categories." icon = "icons/TextFile.svg" priority = 10 outputs = [(Output.CORPUS, Corpus)] want_main_area = False resizing_enabled = False dlgFormats = ( "All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join("{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) recent_files = Setting([]) class Error(OWWidget.Error): read_file = Msg("Can't read file {} ({})") def __init__(self): super().__init__() self.corpus = None # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) widget = widgets.FileWidget(recent_files=self.recent_files, icon_size=(16, 16), on_open=self.open_file, directory_aliases={"Browse documentation corpora ...": get_sample_corpora_dir()}, dialog_format=self.dlgFormats, dialog_title='Open Orange Document Corpus', allow_empty=False, reload_label='Reload', browse_label='Browse') fbox.layout().addWidget(widget) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) corp_info = "Corpus of 0 documents." self.info_label = gui.label(ibox, self, corp_info) # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=True) self.used_attrs = VariablesListItemModel() self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True) self.unused_attrs = VariablesListItemModel() self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs) ibox.layout().addWidget(self.unused_attrs_view) # load first file widget.select(0) def open_file(self, path): self.Error.read_file.clear() self.used_attrs[:] = [] self.unused_attrs[:] = [] if path: try: self.corpus = Corpus.from_file(path) self.corpus.name = os.path.splitext(os.path.basename(path))[0] self.info_label.setText("Corpus of {} documents.".format(len(self.corpus))) self.used_attrs.extend(self.corpus.text_features) self.unused_attrs.extend([f for f in self.corpus.domain.metas if f.is_string and f not in self.corpus.text_features]) except BaseException as err: self.Error.read_file(path, str(err)) def update_feature_selection(self): # TODO fix VariablesListItemView so it does not emit # duplicated data when reordering inside a single window def remove_duplicates(l): unique = [] for i in l: if i not in unique: unique.append(i) return unique if self.corpus is not None: self.corpus.set_text_features(remove_duplicates(self.used_attrs)) self.send(Output.CORPUS, self.corpus)
class OWLoadCorpus(OWWidget): name = "Corpus" description = "Load a corpus of text documents, (optionally) tagged with categories." icon = "icons/TextFile.svg" priority = 10 outputs = [(Output.CORPUS, Corpus)] want_main_area = False dlgFormats = "Only tab files (*.tab)" recent_files = Setting(["(none)"]) def __init__(self): super().__init__() self.corpus = None # Refresh recent files self.recent_files = [ fn for fn in self.recent_files if os.path.exists(fn) ] # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) # Drop-down for recent files self.file_combo = QtGui.QComboBox(fbox) self.file_combo.setMinimumWidth(300) fbox.layout().addWidget(self.file_combo) self.file_combo.activated[int].connect(self.select_file) # Browse button browse = gui.button(fbox, self, 'Browse', callback=self.browse_file) browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon)) browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Reload button reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True) reload.setIcon(self.style().standardIcon( QtGui.QStyle.SP_BrowserReload)) reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) corp_info = "Corpus of 0 documents." self.info_label = gui.label(ibox, self, corp_info) # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=True) self.used_attrs = VariablesListItemModel() self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True) self.unused_attrs = VariablesListItemModel() self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs) ibox.layout().addWidget(self.unused_attrs_view) # Load the most recent file self.set_file_list() if len(self.recent_files) > 0: self.open_file(self.recent_files[0]) def set_file_list(self): self.file_combo.clear() if not self.recent_files: self.file_combo.addItem("(none)") for file in self.recent_files: if file == "(none)": self.file_combo.addItem("(none)") else: self.file_combo.addItem(os.path.split(file)[1]) self.file_combo.addItem("Browse documentation corpora ...") def reload(self): if self.recent_files: return self.open_file(self.recent_files[0]) def select_file(self, n): if n < len(self.recent_files): name = self.recent_files[n] del self.recent_files[n] self.recent_files.insert(0, name) elif n: self.browse_file(True) if len(self.recent_files) > 0: self.set_file_list() self.open_file(self.recent_files[0]) def browse_file(self, demos_loc=False): start_file = os.path.expanduser("~/") if demos_loc: start_file = get_sample_corpora_dir() filename = QtGui.QFileDialog.getOpenFileName( self, 'Open Orange Document Corpus', start_file, self.dlgFormats) if not filename: return if filename in self.recent_files: self.recent_files.remove(filename) self.recent_files.insert(0, filename) self.set_file_list() self.open_file(filename) def open_file(self, path): self.error(1, '') self.used_attrs[:] = [] self.unused_attrs[:] = [] try: self.corpus = Corpus.from_file(path) for i in self.corpus.used_features: self.used_attrs.append(i) for i in self.corpus.domain.metas: if i not in self.corpus.used_features: self.unused_attrs.append(i) self.info_label.setText("Corpus of {} documents.".format( len(self.corpus))) self.send(Output.CORPUS, self.corpus) except BaseException as err: self.error(1, str(err)) def update_feature_selection(self): if self.corpus is not None: self.corpus.regenerate_documents(self.used_attrs) self.send(Output.CORPUS, self.corpus)