def __call__(self, e1, e2=None, axis=None, impute=False): assert self.VI is not None, \ "Mahalanobis distance must be initialized with the fit() method." x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if axis is not None: assert axis == self.axis, \ "Axis must match its value at initialization." if self.axis == 0: x1 = x1.T if x2 is not None: x2 = x2.T if not x1.shape[1] == self.VI.shape[0] or \ x2 is not None and not x2.shape[1] == self.VI.shape[0]: raise ValueError('Incorrect number of features.') dist = skl_metrics.pairwise.pairwise_distances( x1, x2, metric='mahalanobis', VI=self.VI) if np.isnan(dist).any() and impute: dist = np.nan_to_num(dist) if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, self.axis) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1, **kwargs): """ Method for calculating distances. :param e1: input data instances :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param e2: optional second argument for data instances if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param axis: if axis=1 we calculate distances between rows, if axis=0 we calculate distances between columns :type axis: int :param kwargs: used just for Mahalanobis for passing inverse of covariance matrix :return: the matrix with distances between given examples :rtype: :class:`Orange.misc.DistMatrix` """ x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if axis == 0: x1 = x1.T if x2 is not None: x2 = x2.T if not sparse.issparse(x1): x1 = np.atleast_2d(x1) if e2 is not None and not sparse.issparse(x2): x2 = np.atleast_2d(x2) dist = metrics.pairwise.pairwise_distances(x1, x2, metric=self.metric, **kwargs) if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1, impute=False): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 if x1.ndim == 1 or x2.ndim == 1: axis = 0 slc = len(x1) if x1.ndim > 1 else 1 else: slc = len(x1) if axis == 1 else x1.shape[1] # stats.spearmanr does not work when e1=Table and e2=RowInstance # so we replace e1 and e2 and then transpose the result transpose = False if x1.ndim == 2 and x2.ndim == 1: x1, x2 = x2, x1 slc = len(e1) if x1.ndim > 1 else 1 transpose = True rho, _ = stats.spearmanr(x1, x2, axis=axis) if np.isnan(rho).any() and impute: rho = np.nan_to_num(rho) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(dist, np.float): dist = np.array([[dist]]) elif isinstance(dist, np.ndarray): dist = dist[:slc, slc:] if transpose: dist = dist.T if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def test_numpy_type(self): """GH-3658""" data1 = np.array([1, 2], dtype=np.int64) data2 = np.array([2, 3], dtype=np.int64) dm1, dm2 = DistMatrix(data1), DistMatrix(data2) self.assertIsInstance(dm1.max(), np.int64) self.assertNotIsInstance(dm1.max(), int) with self.assertRaises(AssertionError): np.testing.assert_array_equal(dm1, dm2)
def __call__(self, e1, e2=None, axis=1): """ :param e1: input data instances :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param e2: optional second argument for data instances if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param axis: if axis=1 we calculate distances between rows, if axis=0 we calculate distances between columns :type axis: int :return: the matrix with distances between given examples :rtype: :class:`Orange.misc.DistMatrix` Returns Spearman's dissimilarity between e1 and e2, i.e. .. math:: (1-r)/2 where r is Spearman's rank coefficient. """ x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 if x1.ndim == 1 or x2.ndim == 1: axis = 0 slc = len(x1) if x1.ndim > 1 else 1 else: slc = len(x1) if axis == 1 else x1.shape[1] # stats.spearmanr does not work when e1=Table and e2=RowInstance # so we replace e1 and e2 and then transpose the result transpose = False if x1.ndim == 2 and x2.ndim == 1: x1, x2 = x2, x1 slc = len(e1) if x1.ndim > 1 else 1 transpose = True rho, _ = stats.spearmanr(x1, x2, axis=axis) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(dist, np.float): dist = np.array([[dist]]) elif isinstance(dist, np.ndarray): dist = dist[:slc, slc:] if transpose: dist = dist.T if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1, impute=False): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if axis == 0: x1 = x1.T if x2 is not None: x2 = x2.T dist = skl_metrics.pairwise.pairwise_distances( x1, x2, metric=self.metric) if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1, impute=False): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if axis == 0: x1 = x1.T if x2 is not None: x2 = x2.T dist = skl_metrics.pairwise.pairwise_distances( x1, x2, metric=self.metric) if impute and np.isnan(dist).any(): dist = np.nan_to_num(dist) if isinstance(e1, (Table, RowInstance)): dist_matrix = DistMatrix(dist, e1, e2, axis) else: dist_matrix = DistMatrix(dist) return dist_matrix
def test_deprecated(self): a9 = np.arange(9).reshape(3, 3) m = DistMatrix(a9) with self.assertWarns(OrangeDeprecationWarning): self.assertEqual(m.dim, 3) with self.assertWarns(OrangeDeprecationWarning): np.testing.assert_almost_equal(m.X, a9)
def setUp(self): self.widget = self.create_widget(OWMDS, stored_settings={ "__version__": 2, "max_iter": 10, "initialization": OWMDS.PCA, }) # type: OWMDS self.towns = DistMatrix.from_file( os.path.join(self.datasets_dir, "slovenian-towns.dst"))
def test_input_distance_no_data(self): widget = self.widget matrix = DistMatrix(np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]), row_items=None) self.send_signal(widget.Inputs.data, matrix, widget=widget) self.assertTrue(widget.Error.input_validation_error.is_shown()) self.assertIsNone(widget.data) self.assertIsNone(widget.distances) self.send_signal(widget.Inputs.data, None, widget=widget) self.assertFalse(widget.Error.input_validation_error.is_shown())
def setUp(self): self.widget = self.create_widget( OWMDS, stored_settings={ "__version__": 2, "max_iter": 10, "initialization": OWMDS.PCA, } ) # type: OWMDS self.towns = DistMatrix.from_file( os.path.join(self.datasets_dir, "slovenian-towns.dst"))
def __call__(self, e1, e2=None, axis=1): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if axis == 0: x1 = x1.T if x2 is not None: x2 = x2.T if not sparse.issparse(x1): x1 = np.atleast_2d(x1) if e2 is not None and not sparse.issparse(x2): x2 = np.atleast_2d(x2) dist = skl_metrics.pairwise.pairwise_distances(x1, x2, metric=self.metric) if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def _save_and_load(self): widget = self.widget widget.auto_save = False with named_file("", suffix=".dst") as filename: widget.get_save_filename = Mock(return_value=(filename, widget.filters[0])) self.send_signal(widget.Inputs.distances, self.distances) widget.save_file_as() return DistMatrix.from_file(filename)
def __call__(self, e1, e2=None, axis=1, impute=False): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 if axis == 0: x1 = x1.T x2 = x2.T rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1]) if np.isnan(rho).any() and impute: rho = np.nan_to_num(rho) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1): """ :param e1: input data instances :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param e2: optional second argument for data instances if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray` :param axis: if axis=1 we calculate distances between rows, if axis=0 we calculate distances between columns :type axis: int :return: the matrix with distances between given examples :rtype: :class:`Orange.misc.DistMatrix` Returns Pearson's dissimilarity between e1 and e2, i.e. .. math:: (1-r)/2 where r is Pearson's rank coefficient. """ x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 if axis == 0: x1 = x1.T x2 = x2.T if x1.ndim == 1: x1 = list([x1]) if x2.ndim == 1: x2 = list([x2]) rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1]) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 if axis == 0: x1 = x1.T x2 = x2.T if x1.ndim == 1: x1 = list([x1]) if x2.ndim == 1: x2 = list([x2]) rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1]) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def __call__(self, e1, e2=None, axis=1, impute=False): x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) if x2 is None: x2 = x1 slc = len(x1) if axis == 1 else x1.shape[1] rho, _ = stats.spearmanr(x1, x2, axis=axis) if np.isnan(rho).any() and impute: rho = np.nan_to_num(rho) if self.absolute: dist = (1. - np.abs(rho)) / 2. else: dist = (1. - rho) / 2. if isinstance(dist, np.float): dist = np.array([[dist]]) elif isinstance(dist, np.ndarray): dist = dist[:slc, slc:] if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance): dist = DistMatrix(dist, e1, e2, axis) else: dist = DistMatrix(dist) return dist
def test_save(self): with named_file( """3 axis=1 row_labels danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90""") as name: m = DistMatrix.from_file(name) m.save(name) m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) self.assertIsInstance(m.row_items, Table) self.assertIsNone(m.col_items) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 1) with named_file( """3 axis=0 asymmetric col_labels row_labels ann bert chad danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90 1.23 4.56""") as name: m = DistMatrix.from_file(name) m.save(name) m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78], [9.01, 2.34, 5.67], [8.90, 1.23, 4.56]])) self.assertIsInstance(m.row_items, Table) self.assertIsInstance(m.col_items, Table) self.assertEqual([e.metas[0] for e in m.col_items], ["ann", "bert", "chad"]) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 0)
def __call__(self, e1, e2=None): """ If e2 is omitted, calculate distances between all rows (axis=1) or columns (axis=2) of e1. If e2 is present, calculate distances between all pairs if rows from e1 and e2. This method converts the data into numpy arrays, calls the method `compute_data` and packs the result into `DistMatrix`. Subclasses are expected to define the `compute_data` and not the `__call__` method. Args: e1 (Orange.data.Table or Orange.data.Instance or numpy.ndarray): input data e2 (Orange.data.Table or Orange.data.Instance or numpy.ndarray): secondary data Returns: A distance matrix (Orange.misc.distmatrix.DistMatrix) """ if self.axis == 0 and e2 is not None: # Backward compatibility fix if e2 is e1: e2 = None else: raise ValueError("Two tables cannot be compared by columns") x1 = _orange_to_numpy(e1) x2 = _orange_to_numpy(e2) with np.errstate(invalid="ignore"): # nans are handled below dist = self.compute_distances(x1, x2) if self.impute and np.isnan(dist).any(): dist = np.nan_to_num(dist) if isinstance(e1, (Table, RowInstance)): dist = DistMatrix(dist, e1, e2, self.axis) else: dist = DistMatrix(dist) return dist
def compute_distances(self, data): nb_data = len(data) nb_domain = len(data.domain) distances = [[] for _ in range(nb_data)] # look for the biggest and the lowest value of each continuous column min_max = [] for i in range(nb_domain): if isinstance(data.domain[i], ContinuousVariable): min_max.append( [data[0][data.domain[i]], data[1][data.domain[i]]]) for j in range(1, nb_data): if data[j][data.domain[i]] < min_max[-1][0]: min_max[-1][0] = data[j][data.domain[i]] if data[j][data.domain[i]] > min_max[-1][1]: min_max[-1][1] = data[j][data.domain[i]] # Compute the difference between the biggest and lowest value for each continuous columns diff_extrema = [extrema[1] - extrema[0] for extrema in min_max] # Compute the distances between each data for i in range(nb_data): for j in range(i, nb_data): sum_continuous_values = 0 # Sum of the squares of the difference of two data for each continuous columns sum_discrete_values = 0 # Sum of distances of two points for each discrete columns (0 if equals, 1 else) for k in range(nb_domain): if isinstance(data.domain[k], ContinuousVariable): if diff_extrema[k] != 0: sum_continuous_values += ( (data[i][data.domain[k]] - data[j][data.domain[k]]) / (diff_extrema[k]))**2 else: if data[i][data.domain[k]] != data[j][data.domain[k]]: sum_discrete_values += 1 sum = (sqrt(sum_continuous_values) + sum_discrete_values) / nb_domain distances[i].append(sum) if i != j: distances[j].append(sum) return DistMatrix(np.array(distances))
def open_file(self): self.error() self.warning() self.information() fn = self.last_path() if not fn: return if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information("Loading '{}' from the current directory." .format(basename)) if fn == "(none)": self.send("Distances", None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" try: distances = DistMatrix.from_file(fn) self.loaded_file = fn except Exception as exc: err_value = str(exc) self.error("Invalid file format") self.infoa.setText('Data was not loaded due to an error.') self.warnings.setText(err_value) distances = None if distances is not None: self.infoa.setText( "{} points(s), ".format(len(distances)) + (["unlabelled", "labelled"][distances.row_items is not None])) self.warnings.setText("") file_name = os.path.split(fn)[1] if "." in file_name: distances.name = file_name[:file_name.rfind('.')] else: distances.name = file_name self.send("Distances", distances)
def open_file(self): self.clear_messages() fn = self.last_path() if not fn: return if not os.path.exists(fn): dir_name, basename = os.path.split(fn) if os.path.exists(os.path.join(".", basename)): fn = os.path.join(".", basename) self.information( "Loading '{}' from the current directory.".format( basename)) if fn == "(none)": self.Outputs.distances.send(None) self.infoa.setText("No data loaded") self.infob.setText("") self.warnings.setText("") return self.loaded_file = "" try: distances = DistMatrix.from_file(fn) self.loaded_file = fn except Exception as exc: err_value = str(exc) self.error("Invalid file format") self.infoa.setText('错误,数据未加载。') self.warnings.setText(err_value) distances = None if distances is not None: self.infoa.setText("{} 个点, ".format(len(distances)) + ( ["未标记", "已标记"][distances.row_items is not None])) self.warnings.setText("") file_name = os.path.split(fn)[1] if "." in file_name: distances.name = file_name[:file_name.rfind('.')] else: distances.name = file_name self.Outputs.distances.send(distances)
def setUp(self): self.widget = self.create_widget( OWNxFromDistances) # type: OWNxFromDistances self.data = Table("iris") self.distances = Euclidean(self.data) # When converted to a graph, this has the following components: # At threshold 0.5: {1, 6} and disconnected {0}, {2}, {3}, {4}, {5} # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4} # At threshold 2 {0, 1, 2, 3, 5, 6}, {4} m = np.full((7, 7), 10.0) m[1, 6] = m[6, 1] = 0.5 m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1 m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1 m[3, 5] = m[5, 3] = 1 m[2, 3] = m[3, 2] = 2 self.distances1 = DistMatrix(m)
def test_from_file(self): with named_file("""3 axis=0 asymmetric col_labels row_labels ann bert chad danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90 1.23 4.56""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal( m, np.array([[0.12, 3.45, 6.78], [9.01, 2.34, 5.67], [8.90, 1.23, 4.56]])) self.assertIsInstance(m.row_items, Table) self.assertIsInstance(m.col_items, Table) self.assertEqual([e.metas[0] for e in m.col_items], ["ann", "bert", "chad"]) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 0) with named_file("""3 axis=1 row_labels danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal( m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) self.assertIsInstance(m.row_items, Table) self.assertIsNone(m.col_items) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 1) with named_file("""3 axis=1 symmetric 0.12 3.45 6.78 9.01 2.34 5.67 8.90""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal( m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) with named_file("""3 row_labels starič 0.12 3.45 6.78 aleš 9.01 2.34 5.67 anže 8.90""", encoding="utf-8" "") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal( m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) self.assertIsInstance(m.row_items, Table) self.assertIsNone(m.col_items) self.assertEqual([e.metas[0] for e in m.row_items], ["starič", "aleš", "anže"]) self.assertEqual(m.axis, 1) def assertErrorMsg(content, msg): with named_file(content) as name: with self.assertRaises(ValueError) as cm: DistMatrix.from_file(name) self.assertEqual(str(cm.exception), msg) assertErrorMsg("", "empty file") assertErrorMsg("axis=1\n1\t3\n4", "distance file must begin with dimension") assertErrorMsg("3 col_labels\na\tb\n1\n\2\n3", "mismatching number of column labels") assertErrorMsg("3 col_labels\na\tb\tc\td\n1\n\2\n3", "mismatching number of column labels") assertErrorMsg("2\n 1\t2\t3\n 5", "too many columns in matrix row 1") assertErrorMsg("2 row_labels\na\t1\t2\t3\nb\t5", "too many columns in matrix row 'a'") assertErrorMsg("2 noflag\n 1\t2\t3\n 5", "invalid flag 'noflag'") assertErrorMsg("2 noflag=5\n 1\t2\t3\n 5", "invalid flag 'noflag=5'") assertErrorMsg("2\n1\n2\n3", "too many rows") assertErrorMsg("2\n1\nasd", "invalid element at row 2, column 1") assertErrorMsg("2 row_labels\na\t1\nb\tasd", "invalid element at row 'b', column 1") assertErrorMsg("2 col_labels row_labels\nd\te\na\t1\nb\tasd", "invalid element at row 'b', column 'd'") assertErrorMsg("2 col_labels\nd\te\n1\nasd", "invalid element at row 2, column 'd'")
def assertErrorMsg(content, msg): with named_file(content) as name: with self.assertRaises(ValueError) as cm: DistMatrix.from_file(name) self.assertEqual(str(cm.exception), msg)
def test_from_file(self): with named_file( """3 axis=0 asymmetric col_labels row_labels ann bert chad danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90 1.23 4.56""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78], [9.01, 2.34, 5.67], [8.90, 1.23, 4.56]])) self.assertIsInstance(m.row_items, Table) self.assertIsInstance(m.col_items, Table) self.assertEqual([e.metas[0] for e in m.col_items], ["ann", "bert", "chad"]) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 0) with named_file( """3 axis=1 row_labels danny 0.12 3.45 6.78 eve 9.01 2.34 5.67 frank 8.90""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) self.assertIsInstance(m.row_items, Table) self.assertIsNone(m.col_items) self.assertEqual([e.metas[0] for e in m.row_items], ["danny", "eve", "frank"]) self.assertEqual(m.axis, 1) with named_file( """3 axis=1 symmetric 0.12 3.45 6.78 9.01 2.34 5.67 8.90""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) with named_file( """3 row_labels starič 0.12 3.45 6.78 aleš 9.01 2.34 5.67 anže 8.90""", encoding="utf-8""") as name: m = DistMatrix.from_file(name) np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]])) self.assertIsInstance(m.row_items, Table) self.assertIsNone(m.col_items) self.assertEqual([e.metas[0] for e in m.row_items], ["starič", "aleš", "anže"]) self.assertEqual(m.axis, 1) def assertErrorMsg(content, msg): with named_file(content) as name: with self.assertRaises(ValueError) as cm: DistMatrix.from_file(name) self.assertEqual(str(cm.exception), msg) assertErrorMsg("", "empty file") assertErrorMsg("axis=1\n1\t3\n4", "distance file must begin with dimension") assertErrorMsg("3 col_labels\na\tb\n1\n\2\n3", "mismatching number of column labels") assertErrorMsg("3 col_labels\na\tb\tc\td\n1\n\2\n3", "mismatching number of column labels") assertErrorMsg("2\n 1\t2\t3\n 5", "too many columns in matrix row 1") assertErrorMsg("2 row_labels\na\t1\t2\t3\nb\t5", "too many columns in matrix row 'a'") assertErrorMsg("2 noflag\n 1\t2\t3\n 5", "invalid flag 'noflag'") assertErrorMsg("2 noflag=5\n 1\t2\t3\n 5", "invalid flag 'noflag=5'") assertErrorMsg("2\n1\n2\n3", "too many rows") assertErrorMsg("2\n1\nasd", "invalid element at row 2, column 1") assertErrorMsg("2 row_labels\na\t1\nb\tasd", "invalid element at row 'b', column 1") assertErrorMsg("2 col_labels row_labels\nd\te\na\t1\nb\tasd", "invalid element at row 'b', column 'd'") assertErrorMsg("2 col_labels\nd\te\n1\nasd", "invalid element at row 2, column 'd'")