def test_emits_column_changes_on_row_insert(self): inserted = [] removed = [] model = PyTableModel() model.columnsInserted.connect(inserted.append) model.columnsRemoved.connect(removed.append) inserted = QSignalSpy(model.columnsInserted) removed = QSignalSpy(model.columnsRemoved) model.append([2]) self.assertEqual(list(inserted)[-1][1:], [0, 0]) model.append([2, 3]) self.assertEqual(list(inserted)[-1][1:], [1, 1]) del model[:] self.assertEqual(list(removed)[0][1:], [0, 1]) model.extend([[0, 1], [0, 2]]) self.assertEqual(list(inserted)[-1][1:], [0, 1]) model.clear() self.assertEqual(list(removed)[0][1:], [0, 1]) model[:] = [[1], [2]] self.assertEqual(list(inserted)[-1][1:], [0, 0])
class TestPyTableModel(TestCase): def setUp(self): self.model = PyTableModel([[1, 4], [2, 3]]) def test_init(self): self.model = PyTableModel() self.assertEqual(self.model.rowCount(), 0) def test_rowCount(self): self.assertEqual(self.model.rowCount(), 2) self.assertEqual(len(self.model), 2) def test_columnCount(self): self.assertEqual(self.model.columnCount(), 2) def test_data(self): mi = self.model.index(0, 0) self.assertEqual(self.model.data(mi), '1') self.assertEqual(self.model.data(mi, Qt.EditRole), 1) def test_editable(self): editable_model = PyTableModel([[0]], editable=True) self.assertFalse( int(self.model.flags(self.model.index(0, 0)) & Qt.ItemIsEditable)) self.assertTrue( int( editable_model.flags(editable_model.index(0, 0)) & Qt.ItemIsEditable)) def test_sort(self): self.model.sort(1) self.assertEqual(self.model[0][0], 2) def test_setHeaderLabels(self): self.model.setHorizontalHeaderLabels(['Col 1', 'Col 2']) self.assertEqual(self.model.headerData(1, Qt.Horizontal), 'Col 2') self.assertEqual(self.model.headerData(1, Qt.Vertical), '1') def test_removeRows(self): self.model.removeRows(0, 1) self.assertEqual(len(self.model), 1) self.assertEqual(self.model[0][1], 3) def test_removeColumns(self): self.model.removeColumns(0, 1) self.assertEqual(self.model.columnCount(), 1) self.assertEqual(self.model[1][0], 3) def test_insertRows(self): self.model.insertRows(0, 1) self.assertEqual(self.model[1][0], 1) def test_insertColumns(self): self.model.insertColumns(0, 1) self.assertEqual(self.model[0], ['', 1, 4]) def test_wrap(self): self.model.wrap([[0]]) self.assertEqual(self.model.rowCount(), 1) self.assertEqual(self.model.columnCount(), 1) def test_clear(self): self.model.clear() self.assertEqual(self.model.rowCount(), 0) def test_append(self): self.model.append([5, 6]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_extend(self): self.model.extend([[5, 6]]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_insert(self): self.model.insert(0, [5, 6]) self.assertEqual(self.model[0][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_remove(self): self.model.remove([2, 3]) self.assertEqual(self.model.rowCount(), 1) def test_other_roles(self): self.model.append([2, 3]) self.model.setData(self.model.index(2, 0), Qt.AlignCenter, Qt.TextAlignmentRole) del self.model[1] self.assertTrue( Qt.AlignCenter & self.model.data(self.model.index(1, 0), Qt.TextAlignmentRole))
class TestPyTableModel(unittest.TestCase): def setUp(self): self.model = PyTableModel([[1, 4], [2, 3]]) def test_init(self): self.model = PyTableModel() self.assertEqual(self.model.rowCount(), 0) def test_rowCount(self): self.assertEqual(self.model.rowCount(), 2) self.assertEqual(len(self.model), 2) def test_columnCount(self): self.assertEqual(self.model.columnCount(), 2) def test_data(self): mi = self.model.index(0, 0) self.assertEqual(self.model.data(mi), '1') self.assertEqual(self.model.data(mi, Qt.EditRole), 1) def test_editable(self): editable_model = PyTableModel([[0]], editable=True) self.assertFalse( int(self.model.flags(self.model.index(0, 0)) & Qt.ItemIsEditable)) self.assertTrue( int( editable_model.flags(editable_model.index(0, 0)) & Qt.ItemIsEditable)) def test_sort(self): self.model.sort(1) self.assertEqual(self.model.index(0, 0).data(Qt.EditRole), 2) def test_setHeaderLabels(self): self.model.setHorizontalHeaderLabels(['Col 1', 'Col 2']) self.assertEqual(self.model.headerData(1, Qt.Horizontal), 'Col 2') self.assertEqual(self.model.headerData(1, Qt.Vertical), 2) def test_removeRows(self): self.model.removeRows(0, 1) self.assertEqual(len(self.model), 1) self.assertEqual(self.model[0][1], 3) def test_removeColumns(self): self.model.removeColumns(0, 1) self.assertEqual(self.model.columnCount(), 1) self.assertEqual(self.model[1][0], 3) def test_insertRows(self): self.model.insertRows(0, 1) self.assertEqual(self.model[1][0], 1) def test_insertColumns(self): self.model.insertColumns(0, 1) self.assertEqual(self.model[0], ['', 1, 4]) def test_wrap(self): self.model.wrap([[0]]) self.assertEqual(self.model.rowCount(), 1) self.assertEqual(self.model.columnCount(), 1) def test_clear(self): self.model.clear() self.assertEqual(self.model.rowCount(), 0) def test_append(self): self.model.append([5, 6]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_extend(self): self.model.extend([[5, 6]]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_insert(self): self.model.insert(0, [5, 6]) self.assertEqual(self.model[0][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_remove(self): self.model.remove([2, 3]) self.assertEqual(self.model.rowCount(), 1) def test_other_roles(self): self.model.append([2, 3]) self.model.setData(self.model.index(2, 0), Qt.AlignCenter, Qt.TextAlignmentRole) del self.model[1] self.assertTrue( Qt.AlignCenter & self.model.data(self.model.index(1, 0), Qt.TextAlignmentRole)) def test_set_iten_slice(self): self.model[:1] = [[10, 11], [12, 13], [14, 15]] self.assertEqual(list(self.model), [[10, 11], [12, 13], [14, 15], [2, 3]]) self.model[1:3] = [] self.assertEqual(list(self.model), [[10, 11], [2, 3]]) self.model[:] = [[20, 21]] self.assertEqual(list(self.model), [[20, 21]]) self.model[1:] = [[10, 11], [2, 3]] self.assertEqual(list(self.model), [[20, 21], [10, 11], [2, 3]]) def test_emits_column_changes_on_row_insert(self): inserted = [] removed = [] model = PyTableModel() model.columnsInserted.connect(inserted.append) model.columnsRemoved.connect(removed.append) inserted = QSignalSpy(model.columnsInserted) removed = QSignalSpy(model.columnsRemoved) model.append([2]) self.assertEqual(list(inserted)[-1][1:], [0, 0]) model.append([2, 3]) self.assertEqual(list(inserted)[-1][1:], [1, 1]) del model[:] self.assertEqual(list(removed)[0][1:], [0, 1]) model.extend([[0, 1], [0, 2]]) self.assertEqual(list(inserted)[-1][1:], [0, 1]) model.clear() self.assertEqual(list(removed)[0][1:], [0, 1]) model[:] = [[1], [2]] self.assertEqual(list(inserted)[-1][1:], [0, 0])
class OWDuplicates(widget.OWWidget): name = 'Duplicate Detection' description = 'Detect & remove duplicates from a corpus.' icon = 'icons/Duplicates.svg' priority = 45 inputs = [(IO.distances, DistMatrix, 'set_distances')] outputs = [ (IO.corpus_without_duplicates, Corpus), (IO.duplicates, Corpus), (IO.corpus, Corpus), ] resizing_enabled = True class Error(OWWidget.Error): dist_matrix_invalid_shape = Msg('Duplicate detection only supports ' 'distances calculated between rows.') too_little_documents = Msg('More than one document is required.') LINKAGE = ['Single', 'Average', 'Complete', 'Weighted', 'Ward'] linkage_method = settings.Setting(1) # Cluster variable domain role AttributeRole, ClassRole, MetaRole = 0, 1, 2 CLUSTER_ROLES = ["Attributes", "Class", "Metas"] cluster_role = settings.Setting(2) def __init__(self): super().__init__() self.corpus = None # corpus taken from distances self.linkage = None # hierarchical clustering linkage as returned by Orange self.distances = None # DistMatrix on input self.clustering_mask = None # 1D array of clusters for self.corpus self.threshold = 0 # hierarchical clustering distance threshold self.threshold_spin = None # Info self.n_documents = '' self.n_unique = '' self.n_duplicates = '' info_box = gui.widgetBox(self.controlArea, box='Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, ' ◦ unique: %(n_unique)s') gui.label(info_box, self, ' ◦ duplicates: %(n_duplicates)s') # Threshold Histogram & Cluster View self.histogram = Histogram(self) self.table_view = gui.TableView(selectionMode=QListView.SingleSelection) self.table_model = PyTableModel() self.table_model.setHorizontalHeaderLabels(['Cluster', 'Size']) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect(self.send_duplicates) # Add to main area height = 300 main_area = gui.hBox(self.mainArea) self.histogram.setMinimumWidth(500) self.histogram.setMinimumHeight(height) self.table_view.setFixedWidth(140) main_area.layout().addWidget(self.histogram) main_area.layout().addWidget(self.table_view) # Controls gui.comboBox(self.controlArea, self, 'linkage_method', items=self.LINKAGE, box='Linkage', callback=self.recalculate_linkage, orientation=Qt.Horizontal) self.threshold_spin = gui.doubleSpin(self.controlArea, self, 'threshold', 0, float('inf'), 0.01, decimals=2, label='Distance threshold', box='Distances', callback=self.threshold_changed, keyboardTracking=False, controlWidth=60) self.histogram.region.sigRegionChangeFinished.connect(self.threshold_from_histogram_region) self.threshold_spin.setEnabled(False) gui.rubber(self.controlArea) # Output gui.comboBox(self.controlArea, self, "cluster_role", box='Output', label='Append Cluster IDs to:', callback=self.send_corpus, items=self.CLUSTER_ROLES) def reset(self): self.corpus = None self.linkage = None self.distances = None self.clustering_mask = None self.n_documents = '' self.n_unique = '' self.n_duplicates = '' self.threshold = 0 self.threshold_spin.setEnabled(False) self.table_model.clear() self.histogram.setValues([]) def set_distances(self, distances): self.Error.clear() self.distances = distances if distances is None: self.reset() return self.corpus = self.distances.row_items self.n_documents = len(self.corpus) if self.n_documents < 2: self.Error.too_little_documents() self.reset() return if distances.shape != (self.n_documents, self.n_documents): self.Error.dist_matrix_invalid_shape() self.reset() return self.threshold_spin.setEnabled(True) self.recalculate_linkage() def threshold_from_histogram_region(self): _, self.threshold = self.histogram.getRegion() self.threshold_changed() def threshold_changed(self): self.threshold = np.clip(self.threshold, *self.histogram.boundary()) self.histogram.setRegion(0, self.threshold) self.detect_duplicates() def recalculate_linkage(self): if self.distances is not None: self.linkage = dist_matrix_linkage(self.distances, self.LINKAGE[self.linkage_method].lower()) # Magnitude of the spinbox's step is data-dependent vals = sorted(self.linkage[:, 2]) low, up = vals[0], vals[-1] step = (up - low) / 20 self.threshold_spin.setSingleStep(step) self.threshold = np.clip(self.threshold, low, up) self.histogram.setValues([]) # without this range breaks when changing linkages self.histogram.setValues(vals) self.histogram.setRegion(0, self.threshold) self.detect_duplicates() def detect_duplicates(self): if self.distances is not None: self.cluster_linkage() self.send_corpus() self.send_corpus_without_duplicates() self.fill_cluster_view() def cluster_linkage(self): # cluster documents n = int(self.n_documents) clusters = {j: [j] for j in range(n)} for i, (c1, c2, dist, size) in enumerate(self.linkage): if dist > self.threshold: break clusters[n + i] = clusters[c1] + clusters[c2] del clusters[c1] del clusters[c2] self.n_unique = len(clusters) self.n_duplicates = n - self.n_unique # create mask self.clustering_mask = np.empty(n, dtype=int) for i, c in enumerate(clusters.values()): self.clustering_mask[c] = i def fill_cluster_view(self): self.table_model.clear() c = Counter(self.clustering_mask) for id_, count in c.items(): self.table_model.append([Cluster(id_), count]) self.table_view.sortByColumn(1, Qt.DescendingOrder) self.table_view.selectRow(0) def send_corpus(self): if self.clustering_mask is not None: cluster_var = DiscreteVariable( 'Duplicates Cluster', values=[str(Cluster(v)) for v in set(self.clustering_mask.flatten())] ) corpus, domain = self.corpus, self.corpus.domain attrs = domain.attributes class_ = domain.class_vars metas = domain.metas if self.cluster_role == self.AttributeRole: attrs = attrs + (cluster_var,) elif self.cluster_role == self.ClassRole: class_ = class_ + (cluster_var,) elif self.cluster_role == self.MetaRole: metas = metas + (cluster_var,) domain = Domain(attrs, class_, metas) corpus = corpus.from_table(domain, corpus) corpus.get_column_view(cluster_var)[0][:] = self.clustering_mask self.send(IO.corpus, corpus) else: self.send(IO.corpus, None) def send_corpus_without_duplicates(self): if self.clustering_mask is not None: # TODO make this more general, currently we just take the first document mask = [np.where(self.clustering_mask == i)[0][0] for i in set(self.clustering_mask)] c = self.corpus[mask] c.name = '{} (Without Duplicates)'.format(self.corpus.name) self.send(IO.corpus_without_duplicates, c) else: self.send(IO.corpus_without_duplicates, None) def send_duplicates(self): index = self.table_view.selectionModel().currentIndex().row() cluster = self.table_model[index][0] mask = np.flatnonzero(self.clustering_mask == cluster.id) c = self.corpus[mask] c.name = '{} {}'.format(IO.duplicates, cluster) self.send(IO.duplicates, c) def send_report(self): self.report_items([ ('Linkage', self.LINKAGE[self.linkage_method]), ('Distance threshold', '{:.2f}'.format(self.threshold)), ('Documents', self.n_documents), ('Unique', self.n_unique), ('Duplicates', self.n_duplicates), ])
class TestPyTableModel(unittest.TestCase): def setUp(self): self.model = PyTableModel([[1, 4], [2, 3]]) def test_init(self): self.model = PyTableModel() self.assertEqual(self.model.rowCount(), 0) def test_rowCount(self): self.assertEqual(self.model.rowCount(), 2) self.assertEqual(len(self.model), 2) def test_columnCount(self): self.assertEqual(self.model.columnCount(), 2) def test_data(self): mi = self.model.index(0, 0) self.assertEqual(self.model.data(mi), '1') self.assertEqual(self.model.data(mi, Qt.EditRole), 1) def test_editable(self): editable_model = PyTableModel([[0]], editable=True) self.assertFalse(int(self.model.flags(self.model.index(0, 0)) & Qt.ItemIsEditable)) self.assertTrue(int(editable_model.flags(editable_model.index(0, 0)) & Qt.ItemIsEditable)) def test_sort(self): self.model.sort(1) self.assertEqual(self.model.index(0, 0).data(Qt.EditRole), 2) def test_setHeaderLabels(self): self.model.setHorizontalHeaderLabels(['Col 1', 'Col 2']) self.assertEqual(self.model.headerData(1, Qt.Horizontal), 'Col 2') self.assertEqual(self.model.headerData(1, Qt.Vertical), 2) def test_removeRows(self): self.model.removeRows(0, 1) self.assertEqual(len(self.model), 1) self.assertEqual(self.model[0][1], 3) def test_removeColumns(self): self.model.removeColumns(0, 1) self.assertEqual(self.model.columnCount(), 1) self.assertEqual(self.model[1][0], 3) def test_insertRows(self): self.model.insertRows(0, 1) self.assertEqual(self.model[1][0], 1) def test_insertColumns(self): self.model.insertColumns(0, 1) self.assertEqual(self.model[0], ['', 1, 4]) def test_wrap(self): self.model.wrap([[0]]) self.assertEqual(self.model.rowCount(), 1) self.assertEqual(self.model.columnCount(), 1) def test_clear(self): self.model.clear() self.assertEqual(self.model.rowCount(), 0) def test_append(self): self.model.append([5, 6]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_extend(self): self.model.extend([[5, 6]]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_insert(self): self.model.insert(0, [5, 6]) self.assertEqual(self.model[0][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_remove(self): self.model.remove([2, 3]) self.assertEqual(self.model.rowCount(), 1) def test_other_roles(self): self.model.append([2, 3]) self.model.setData(self.model.index(2, 0), Qt.AlignCenter, Qt.TextAlignmentRole) del self.model[1] self.assertTrue(Qt.AlignCenter & self.model.data(self.model.index(1, 0), Qt.TextAlignmentRole))
class TestPyTableModel(TestCase): def setUp(self): self.model = PyTableModel([[1, 4], [2, 3]]) def test_init(self): self.model = PyTableModel() self.assertEqual(self.model.rowCount(), 0) def test_rowCount(self): self.assertEqual(self.model.rowCount(), 2) self.assertEqual(len(self.model), 2) def test_columnCount(self): self.assertEqual(self.model.columnCount(), 2) def test_data(self): self.assertEqual(str(self.model.data(self.model.index(0, 0))), '1') def test_sort(self): self.model.sort(1) self.assertEqual(self.model[0][0], 2) def test_setHeaderLabels(self): self.model.setHorizontalHeaderLabels(['Col 1', 'Col 2']) self.assertEqual(self.model.headerData(1, Qt.Horizontal), 'Col 2') self.assertEqual(self.model.headerData(1, Qt.Vertical), '1') def test_removeRows(self): self.model.removeRows(0, 1) self.assertEqual(len(self.model), 1) self.assertEqual(self.model[0][1], 3) def test_removeColumns(self): self.model.removeColumns(0, 1) self.assertEqual(self.model.columnCount(), 1) self.assertEqual(self.model[1][0], 3) def test_insertRows(self): self.model.insertRows(0, 1) self.assertEqual(self.model[1][0], 1) def test_insertColumns(self): self.model.insertColumns(0, 1) self.assertEqual(self.model[0], ['', 1, 4]) def test_wrap(self): self.model.wrap([[0]]) self.assertEqual(self.model.rowCount(), 1) self.assertEqual(self.model.columnCount(), 1) def test_clear(self): self.model.clear() self.assertEqual(self.model.rowCount(), 0) def test_append(self): self.model.append([5, 6]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_extend(self): self.model.extend([[5, 6]]) self.assertEqual(self.model[2][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_insert(self): self.model.insert(0, [5, 6]) self.assertEqual(self.model[0][1], 6) self.assertEqual(self.model.rowCount(), 3) def test_remove(self): self.model.remove([2, 3]) self.assertEqual(self.model.rowCount(), 1)