Ejemplo n.º 1
0
    def __init__(self):
        super().__init__()

        self.corpus = None

        # Refresh recent files
        self.recent_files = [fn for fn in self.recent_files
                             if os.path.exists(fn)]

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)

        # Drop-down for recent files
        self.file_combo = QtGui.QComboBox(fbox)
        self.file_combo.setMinimumWidth(300)
        fbox.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        # Browse button
        browse = gui.button(fbox, self, 'Browse', callback=self.browse_file)
        browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Reload button
        reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True)
        reload.setIcon(self.style().standardIcon(QtGui.QStyle.SP_BrowserReload))
        reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # Load the most recent file
        self.set_file_list()
        if len(self.recent_files) > 0:
            self.open_file(self.recent_files[0])
Ejemplo n.º 2
0
    def __init__(self):
        super().__init__()

        self.corpus = None

        # Refresh recent files
        self.recent_files = [fn for fn in self.recent_files
                             if os.path.exists(fn)]

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)

        # Drop-down for recent files
        self.file_combo = QtGui.QComboBox(fbox)
        self.file_combo.setMinimumWidth(300)
        fbox.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        # Browse button
        browse = gui.button(fbox, self, 'Browse', callback=self.browse_file)
        browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Reload button
        reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True)
        reload.setIcon(self.style().standardIcon(QtGui.QStyle.SP_BrowserReload))
        reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # Load the most recent file
        self.set_file_list()
        if len(self.recent_files) > 0:
            self.open_file(self.recent_files[0])
Ejemplo n.º 3
0
 def test_drop_mime(self):
     iris = Table("iris")
     m = VariablesListItemModel(iris.domain.variables)
     mime = m.mimeData([m.index(1, 0)])
     self.assertTrue(mime.hasFormat(VariablesListItemModel.MIME_TYPE))
     assert m.dropMimeData(mime, Qt.MoveAction, 5, 0, m.index(-1, -1))
     self.assertIs(m[5], m[1])
     assert m.dropMimeData(mime, Qt.MoveAction, -1, -1, m.index(-1, -1))
     self.assertIs(m[6], m[1])
Ejemplo n.º 4
0
 def test_flags(self):
     m = VariablesListItemModel([ContinuousVariable("X")])
     index = m.index(0)
     flags = m.flags(m.index(0))
     self.assertTrue(flags & Qt.ItemIsDragEnabled)
     self.assertFalse(flags & Qt.ItemIsDropEnabled)
     # 'invalid' index is drop enabled -> indicates insertion capability
     flags = m.flags(m.index(-1, -1))
     self.assertTrue(flags & Qt.ItemIsDropEnabled)
Ejemplo n.º 5
0
 def test_drop_mime(self):
     iris = Table("iris")
     m = VariablesListItemModel(iris.domain.variables)
     mime = m.mimeData([m.index(1, 0)])
     self.assertTrue(mime.hasFormat(VariablesListItemModel.MIME_TYPE))
     assert m.dropMimeData(mime, Qt.MoveAction, 5, 0, m.index(-1, -1))
     self.assertIs(m[5], m[1])
     assert m.dropMimeData(mime, Qt.MoveAction, -1, -1, m.index(-1, -1))
     self.assertIs(m[6], m[1])
Ejemplo n.º 6
0
    def __init__(self):
        super().__init__()

        self.corpus = None

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)
        widget = widgets.FileWidget(recent_files=self.recent_files, icon_size=(16, 16), on_open=self.open_file,
                                    directory_aliases={"Browse documentation corpora ...": get_sample_corpora_dir()},
                                    dialog_format=self.dlgFormats, dialog_title='Open Orange Document Corpus',
                                    allow_empty=False, reload_label='Reload', browse_label='Browse')
        fbox.layout().addWidget(widget)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # load first file
        widget.select(0)
Ejemplo n.º 7
0
 def test_flags(self):
     m = VariablesListItemModel([ContinuousVariable("X")])
     index = m.index(0)
     flags = m.flags(m.index(0))
     self.assertTrue(flags & Qt.ItemIsDragEnabled)
     self.assertFalse(flags & Qt.ItemIsDropEnabled)
     # 'invalid' index is drop enabled -> indicates insertion capability
     flags = m.flags(m.index(-1, -1))
     self.assertTrue(flags & Qt.ItemIsDropEnabled)
Ejemplo n.º 8
0
class OWLoadCorpus(OWWidget):
    name = "Corpus"
    description = "Load a corpus of text documents, (optionally) tagged with categories."
    icon = "icons/TextFile.svg"
    priority = 10

    outputs = [(Output.CORPUS, Corpus)]
    want_main_area = False
    resizing_enabled = False

    dlgFormats = "Only tab files (*.tab)"

    recent_files = Setting(["(none)"])

    def __init__(self):
        super().__init__()

        self.corpus = None

        # Refresh recent files
        self.recent_files = [fn for fn in self.recent_files
                             if os.path.exists(fn)]

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)

        # Drop-down for recent files
        self.file_combo = QtGui.QComboBox(fbox)
        self.file_combo.setMinimumWidth(300)
        fbox.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        # Browse button
        browse = gui.button(fbox, self, 'Browse', callback=self.browse_file)
        browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Reload button
        reload = gui.button(fbox, self, "Reload", callback=self.reload, default=True)
        reload.setIcon(self.style().standardIcon(QtGui.QStyle.SP_BrowserReload))
        reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # Load the most recent file
        self.set_file_list()
        if len(self.recent_files) > 0:
            self.open_file(self.recent_files[0])

    def set_file_list(self):
        self.file_combo.clear()
        if not self.recent_files:
            self.file_combo.addItem("(none)")
        for file in self.recent_files:
            if file == "(none)":
                self.file_combo.addItem("(none)")
            else:
                self.file_combo.addItem(os.path.split(file)[1])
        self.file_combo.addItem("Browse documentation corpora ...")

    def reload(self):
        if self.recent_files:
            return self.open_file(self.recent_files[0])

    def select_file(self, n):
        if n < len(self.recent_files) :
            name = self.recent_files[n]
            del self.recent_files[n]
            self.recent_files.insert(0, name)
        elif n:
            self.browse_file(True)

        if len(self.recent_files) > 0:
            self.set_file_list()
            self.open_file(self.recent_files[0])

    def browse_file(self, demos_loc=False):
        start_file = os.path.expanduser("~/")
        if demos_loc:
            start_file = get_sample_corpora_dir()
        filename = QtGui.QFileDialog.getOpenFileName(
            self, 'Open Orange Document Corpus', start_file, self.dlgFormats)
        if not filename:
            return
        if filename in self.recent_files:
            self.recent_files.remove(filename)
        self.recent_files.insert(0, filename)
        self.set_file_list()
        self.open_file(filename)

    def open_file(self, path):
        self.error(1, '')
        self.used_attrs[:] = []
        self.unused_attrs[:] = []

        try:
            self.corpus = Corpus.from_file(path)
            for f in self.corpus.domain.metas:
                if f in self.corpus.text_features:
                    self.used_attrs.append(f)
                else:
                    self.unused_attrs.append(f)
            self.info_label.setText("Corpus of {} documents.".format(len(self.corpus)))
            self.send(Output.CORPUS, self.corpus)
        except BaseException as err:
            self.error(1, str(err))

    def update_feature_selection(self):
        if self.corpus is not None:
            self.corpus.set_text_features(self.used_attrs)
            self.send(Output.CORPUS, self.corpus)
Ejemplo n.º 9
0
class OWLoadCorpus(OWWidget):
    name = "Corpus"
    description = "Load a corpus of text documents, (optionally) tagged with categories."
    icon = "icons/TextFile.svg"
    priority = 10

    outputs = [(Output.CORPUS, Corpus)]
    want_main_area = False
    resizing_enabled = False

    dlgFormats = (
        "All readable files ({});;".format(
            '*' + ' *'.join(FileFormat.readers.keys())) +
        ";;".join("{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
                  for f in sorted(set(FileFormat.readers.values()),
                                  key=list(FileFormat.readers.values()).index)))

    recent_files = Setting([])

    class Error(OWWidget.Error):
        read_file = Msg("Can't read file {} ({})")

    def __init__(self):
        super().__init__()

        self.corpus = None

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)
        widget = widgets.FileWidget(recent_files=self.recent_files, icon_size=(16, 16), on_open=self.open_file,
                                    directory_aliases={"Browse documentation corpora ...": get_sample_corpora_dir()},
                                    dialog_format=self.dlgFormats, dialog_title='Open Orange Document Corpus',
                                    allow_empty=False, reload_label='Reload', browse_label='Browse')
        fbox.layout().addWidget(widget)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # load first file
        widget.select(0)

    def open_file(self, path):
        self.Error.read_file.clear()
        self.used_attrs[:] = []
        self.unused_attrs[:] = []
        if path:
            try:
                self.corpus = Corpus.from_file(path)
                self.corpus.name = os.path.splitext(os.path.basename(path))[0]
                self.info_label.setText("Corpus of {} documents.".format(len(self.corpus)))
                self.used_attrs.extend(self.corpus.text_features)
                self.unused_attrs.extend([f for f in self.corpus.domain.metas
                                          if f.is_string and f not in self.corpus.text_features])
            except BaseException as err:
                self.Error.read_file(path, str(err))

    def update_feature_selection(self):
        # TODO fix VariablesListItemView so it does not emit
        # duplicated data when reordering inside a single window
        def remove_duplicates(l):
            unique = []
            for i in l:
                if i not in unique:
                    unique.append(i)
            return unique

        if self.corpus is not None:
            self.corpus.set_text_features(remove_duplicates(self.used_attrs))
            self.send(Output.CORPUS, self.corpus)
Ejemplo n.º 10
0
class OWLoadCorpus(OWWidget):
    name = "Corpus"
    description = "Load a corpus of text documents, (optionally) tagged with categories."
    icon = "icons/TextFile.svg"
    priority = 10

    outputs = [(Output.CORPUS, Corpus)]
    want_main_area = False

    dlgFormats = "Only tab files (*.tab)"

    recent_files = Setting(["(none)"])

    def __init__(self):
        super().__init__()

        self.corpus = None

        # Refresh recent files
        self.recent_files = [
            fn for fn in self.recent_files if os.path.exists(fn)
        ]

        # Browse file box
        fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)

        # Drop-down for recent files
        self.file_combo = QtGui.QComboBox(fbox)
        self.file_combo.setMinimumWidth(300)
        fbox.layout().addWidget(self.file_combo)
        self.file_combo.activated[int].connect(self.select_file)

        # Browse button
        browse = gui.button(fbox, self, 'Browse', callback=self.browse_file)
        browse.setIcon(self.style().standardIcon(QtGui.QStyle.SP_DirOpenIcon))
        browse.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Reload button
        reload = gui.button(fbox,
                            self,
                            "Reload",
                            callback=self.reload,
                            default=True)
        reload.setIcon(self.style().standardIcon(
            QtGui.QStyle.SP_BrowserReload))
        reload.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)

        # Corpus info
        ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True)
        corp_info = "Corpus of 0 documents."
        self.info_label = gui.label(ibox, self, corp_info)

        # Used Text Features
        fbox = gui.widgetBox(self.controlArea, orientation=0)
        ubox = gui.widgetBox(fbox, "Used text features", addSpace=True)
        self.used_attrs = VariablesListItemModel()
        self.used_attrs_view = VariablesListItemView()
        self.used_attrs_view.setModel(self.used_attrs)
        ubox.layout().addWidget(self.used_attrs_view)

        aa = self.used_attrs
        aa.dataChanged.connect(self.update_feature_selection)
        aa.rowsInserted.connect(self.update_feature_selection)
        aa.rowsRemoved.connect(self.update_feature_selection)

        # Ignored Text Features
        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=True)
        self.unused_attrs = VariablesListItemModel()
        self.unused_attrs_view = VariablesListItemView()
        self.unused_attrs_view.setModel(self.unused_attrs)
        ibox.layout().addWidget(self.unused_attrs_view)

        # Load the most recent file
        self.set_file_list()
        if len(self.recent_files) > 0:
            self.open_file(self.recent_files[0])

    def set_file_list(self):
        self.file_combo.clear()
        if not self.recent_files:
            self.file_combo.addItem("(none)")
        for file in self.recent_files:
            if file == "(none)":
                self.file_combo.addItem("(none)")
            else:
                self.file_combo.addItem(os.path.split(file)[1])
        self.file_combo.addItem("Browse documentation corpora ...")

    def reload(self):
        if self.recent_files:
            return self.open_file(self.recent_files[0])

    def select_file(self, n):
        if n < len(self.recent_files):
            name = self.recent_files[n]
            del self.recent_files[n]
            self.recent_files.insert(0, name)
        elif n:
            self.browse_file(True)

        if len(self.recent_files) > 0:
            self.set_file_list()
            self.open_file(self.recent_files[0])

    def browse_file(self, demos_loc=False):
        start_file = os.path.expanduser("~/")
        if demos_loc:
            start_file = get_sample_corpora_dir()
        filename = QtGui.QFileDialog.getOpenFileName(
            self, 'Open Orange Document Corpus', start_file, self.dlgFormats)
        if not filename:
            return
        if filename in self.recent_files:
            self.recent_files.remove(filename)
        self.recent_files.insert(0, filename)
        self.set_file_list()
        self.open_file(filename)

    def open_file(self, path):
        self.error(1, '')
        self.used_attrs[:] = []
        self.unused_attrs[:] = []

        try:
            self.corpus = Corpus.from_file(path)
            for i in self.corpus.used_features:
                self.used_attrs.append(i)
            for i in self.corpus.domain.metas:
                if i not in self.corpus.used_features:
                    self.unused_attrs.append(i)

            self.info_label.setText("Corpus of {} documents.".format(
                len(self.corpus)))
            self.send(Output.CORPUS, self.corpus)
        except BaseException as err:
            self.error(1, str(err))

    def update_feature_selection(self):
        if self.corpus is not None:
            self.corpus.regenerate_documents(self.used_attrs)
            self.send(Output.CORPUS, self.corpus)