def test_eq(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, vals) self.assertTrue(inst == inst2) self.assertTrue(inst2 == inst) inst2[0] = 43 self.assertFalse(inst == inst2) inst2[0] = Unknown self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[2] = "C" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-1] = "Y" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-2] = "33" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-3] = "Bar" self.assertFalse(inst == inst2)
def test_init_xy_no_data(self): domain = self.mock_domain(with_classes=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (len(self.attributes) + len(self.class_vars), )) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._values)) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y))
def test_repr(self): domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual(repr(inst), "[0, 1, 2, 3, 4, ...]") for attr in domain.variables: attr.number_of_decimals = 3 self.assertEqual(repr(inst), "[0.000, 1.000, 2.000, 3.000, 4.000, ...]") for attr in domain.variables: attr.number_of_decimals = 0 self.assertEqual(repr(inst), "[0, 1, 2, 3, 4, ...]")
def test_init_inst(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, inst) assert_array_equal(inst2._x, np.array([42, 0])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, 43, "Foo"], dtype=object)) domain2 = self.create_domain(["z", domain[1], self.metas[1]], domain.class_vars, [self.metas[0], "w", domain[0]]) inst2 = Instance(domain2, inst) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst2._x, np.array([Unknown, 0, 43])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, Unknown, 42], dtype=object))
def test_eq(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas, ) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, vals) self.assertEqual(inst, inst2) self.assertEqual(inst2, inst) inst2[0] = 43 self.assertNotEqual(inst, inst2) inst2[0] = Unknown self.assertNotEqual(inst, inst2) for index, val in ((2, "C"), (-1, "Y"), (-2, "33"), (-3, "Bar")): inst2 = Instance(domain, vals) inst2[index] = val self.assertNotEqual(inst, inst2)
def test_init_xym_no_data(self): domain = self.mock_domain(with_classes=True, with_metas=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (3, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst._metas, np.array([Unknown, Unknown, None]))
def error(self, inst, neighbours): """Compute the average weighted probability prediction error for predicting the actual class of each neighbour from the other ones. Include the new example among the neighbours if the parameter :py:attr:`include` is True.""" sc = 0 ws = [] for i in range(len(neighbours)): neigh = Instance(neighbours.domain, neighbours[i]) w = 1/self.distance(inst, neigh) ws.append(w) if self.neighbourhood == 'fixed': neighbours_i = neighbours[np.arange(len(neighbours)) != i] else: neighbours_i = self.get_neighbourhood(neigh) if self.include: neighbours_i = neighbours_i.copy() neighbours_i.append(inst) model = self.classifier(neighbours_i) sc += w * (1-model(neigh, ret=Model.Probs)[0][int(neigh.get_class())]) return float(sc / sum(ws))
def test_filter_string_greater_equal_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.GreaterEqual, 'donec', case_sensitive=False) ])(self.table) correct_data = [ Instance(filtered_data.domain, row) for row in self.data if row[0] is not None and row[0].lower() >= 'donec' ] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_init_xym_list(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object))
def test_list(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas, ) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) l = inst.list self.assertIsInstance(l, list) self.assertEqual(l, [42, "M", "B", "X", 43, "Foo"]) self.assertGreater(len(l), len(inst)) self.assertEqual(len(l), 6)
def test_init_xym_no_data(self): domain = self.mock_domain(with_classes=True, with_metas=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (3, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) assert_array_nanequal(inst._metas, np.array([var.Unknown for var in domain.metas], dtype=object))
def test_filter_string_outside_case_insensitive(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Outside, 'd', 'k', case_sensitive=False) ])(self.table) correct_data = [ Instance(filtered_data.domain, row) for row in self.data if row[0] is not None and not 'd' < row[0].lower() < 'k' ] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_filter_string_between_case_insensitive_data(self): filtered_data = filter.Values(conditions=[ filter.FilterString(-1, filter.FilterString.Between, 'i', 'O', case_sensitive=False) ])(self.table) correct_data = [ Instance(filtered_data.domain, row) for row in self.data if row[0] is not None and 'i' <= row[0].lower() <= 'o' ] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def create_data_from_states(example_states, example_traces): data_desc = example_states[0].domain attributes = data_desc.get_attributes() domain = Domain( attributes, ContinuousVariable.make("complexity"), metas=[StringVariable.make("id"), ContinuousVariable("trace")]) data = Table.from_domain(domain) for si, s in enumerate(example_states): e = Instance(domain) for f in attributes: e[f] = s.get_attribute(f) e["id"] = s.get_id() e["trace"] = example_traces[si] data.append(e) return data
def __call__(self, data): if isinstance(data, Table): if data.domain is not self.pre_domain: data = Table(self.pre_domain, data) c = DiscreteVariable(name='Cluster id', values=range(self.k)) domain = Domain([c]) return Table( domain, self.proj.predict(data.X).astype(int).reshape((len(data), 1))) elif isinstance(data, Instance): if data.domain is not self.pre_domain: data = Instance(self.pre_domain, data) c = DiscreteVariable(name='Cluster id', values=range(self.k)) domain = Domain([c]) return Table(domain, np.atleast_2d(self.proj.predict(data._x)).astype(int)) else: return self.proj.predict(data).reshape((len(data), 1))
def group_by(table, group_attrs, aggregate_disc="first", aggregate_cont="mean", aggregate_string="join", attr_aggregate=None): if attr_aggregate is None: attr_aggregate = {} else: attr_aggregate = dict(attr_aggregate) # It is modified later all_vars = table.domain.variables + table.domain.getmetas().values() aggregate_vars = [] for v in all_vars: if v not in group_attrs: if v in attr_aggregate: pass elif isinstance(v, variable.Continuous): attr_aggregate[v] = aggregate_cont elif isinstance(v, variable.Discrete): attr_aggregate[v] = aggregate_disc elif isinstance(v, variable.String): attr_aggregate[v] = aggregate_string else: raise TypeError(v) aggregate_vars.append(v) attr_aggregate[v] = _aggregate_func(attr_aggregate[v]) indices_map = table_map(table, group_attrs, exclude_special=False) new_instances = [] key_set = set() for inst in table: # Iterate over the table instead of the inidces_map to preserve order key = tuple([str(inst[v]) for v in group_attrs]) if key in key_set: continue # Already seen this group indices = indices_map[key] new_instance = Instance(inst) # Copy for v in aggregate_vars: values = [table[i][v] for i in indices] # Values to aggregate new_instance[v] = attr_aggregate[v](values) new_instances.append(new_instance) key_set.add(key) return Table(new_instances)
def test_set_item(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) for idx1, idx2, val in ((0, 0, 42), ("x", 0, 44), (1, 1, "F"), ("g", 1, "M"), (2, 2, "C"), ("y", 2, "A"), (domain.class_var, 2, "B"), (-1, -1, "Y"), ("Meta 1", -1, "Z"), (domain.metas[0], -1, "X")): inst[idx1] = val self.assertEqual(inst[idx2], val) with self.assertRaises(ValueError): inst[1] = "N" with self.assertRaises(ValueError): inst["asdf"] = 42
def __call__(self, data): if isinstance(data, ndarray): return self.proj.fit_predict(data).reshape((len(data), 1)) if isinstance(data, Table): if data.domain is not self.pre_domain: data = data.transform(self.pre_domain) y = self.proj.fit_predict(data.X) vals = [-1] + list(self.proj.core_sample_indices_) c = DiscreteVariable(name='Core sample index', values=vals) domain = Domain([c]) return Table(domain, y.reshape(len(y), 1)) elif isinstance(data, Instance): if data.domain is not self.pre_domain: data = Instance(self.pre_domain, data) # Instances-by-Instance classification is not defined; raise Exception("Core sample assignment is not supported " "for single instances.")
def test_get_item(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) for idx_int, idx_name, idx_var, value in ((0, "x", domain[0], 42), (1, "g", domain[1], "M"), (2, "y", domain.class_var, "B"), (-2, "Meta 2", self.metas[1], 43)): val = inst[idx_int] self.assertIsInstance(val, Value) self.assertEqual(inst[idx_int], value) self.assertEqual(inst[idx_name], value) self.assertEqual(inst[idx_var], value) with self.assertRaises(ValueError): inst["asdf"] = 42 with self.assertRaises(ValueError): inst[ContinuousVariable("asdf")] = 42
def predict(self, example, eps): """Compute a regression prediction object for a given example and significance level. Function determines what is the :py:attr:`eps`-th lowest nonconformity score and computes the range of values that would result in a lower or equal nonconformity. This inverse of the nonconformity score is computed by the nonconformity measure's :py:func:`cp.nonconformity.RegrNC.predict` function. Args: example (ndarray): Attributes array. eps (float): Default significance level (error rate). Returns: PredictionRegr: Regression prediction object. """ s = int(eps * (len(self.alpha) + 1)) - 1 s = min(max(s, 0), len(self.alpha) - 1) nc = self.alpha[s] inst = Instance(self.domain, np.concatenate((example, [Unknown]))) lo, hi = self.nc_measure.predict(inst, nc) return PredictionRegr(lo, hi)
def _grid_indices_to_image_list(self, images): """ Return the image grid as a Table of images, ordered by rows. If a grid cell does not contain an image, put None in its place. Parameters ---------- images: Orange.data.Table The images to order. Returns ------- Orange.data.Table A Table of images in the grid, ordered by rows. """ image_list = [ Instance(images.domain, image) for image in self.order_to_grid(images) ] image_list = Table.from_list(images.domain, image_list) return image_list
def __call__(self, data): if isinstance(data, Table): if data.domain is not self.pre_domain: data = data.transform(self.pre_domain) c = DiscreteVariable(name="Cluster id", values=[str(i) for i in range(self.k)]) domain = Domain([c]) return Table( domain, self.proj.predict(data.X).astype(int).reshape((len(data), 1))) elif isinstance(data, Instance): if data.domain is not self.pre_domain: data = Instance(self.pre_domain, data) c = DiscreteVariable(name="Cluster id", values=[str(i) for i in range(self.k)]) domain = Domain([c]) return Table( domain, np.atleast_2d(self.proj.predict(data._x.reshape( 1, -1))).astype(int), ) else: return self.proj.predict(data).reshape((len(data), 1))
def test_str(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) inst = Instance(domain, [42, 0]) self.assertEqual(str(inst), "[42.000, M]") domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], ) inst = Instance(domain, [42, "M", "B"]) self.assertEqual(str(inst), "[42.000, M | B]") domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas, ) inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}") domain = self.create_domain([], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, ["B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}") domain = self.create_domain([], [], self.metas) inst = Instance(domain, ["X", 43, "Foo"]) self.assertEqual(str(inst), "[] {X, 43.000, Foo}") domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual( str(inst), "[{}]".format(", ".join("{:.3f}".format(x) for x in range(len(self.attributes)))), ) for attr in domain.variables: attr.number_of_decimals = 0 self.assertEqual( str(inst), "[{}]".format(", ".join("{}".format(x) for x in range(len(self.attributes)))), )
def test_set_item(self): domain = self.create_domain( ["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst[0] = 43 self.assertEqual(inst[0], 43) inst["x"] = 44 self.assertEqual(inst[0], 44) inst[domain[0]] = 45 self.assertEqual(inst[0], 45) inst[1] = "F" self.assertEqual(inst[1], "F") inst["g"] = "M" self.assertEqual(inst[1], "M") with self.assertRaises(ValueError): inst[1] = "N" with self.assertRaises(ValueError): inst["asdf"] = 42 inst[2] = "C" self.assertEqual(inst[2], "C") inst["y"] = "A" self.assertEqual(inst[2], "A") inst[domain.class_var] = "B" self.assertEqual(inst[2], "B") inst[-1] = "Y" self.assertEqual(inst[-1], "Y") inst["Meta 1"] = "Z" self.assertEqual(inst[-1], "Z") inst[domain.metas[0]] = "X" self.assertEqual(inst[-1], "X")
def __call__(self, data, ret=Value): if not 0 <= ret <= 2: raise ValueError("invalid value of argument 'ret'") if (ret > 0 and any(v.is_continuous for v in self.domain.class_vars)): raise ValueError("cannot predict continuous distributions") # Call the predictor if isinstance(data, np.ndarray): prediction = self.predict(np.atleast_2d(data)) elif isinstance(data, scipy.sparse.csr.csr_matrix): prediction = self.predict(data) elif isinstance(data, Instance): if data.domain != self.domain: data = Instance(self.domain, data) data = Table(data.domain, [data]) prediction = self.predict_storage(data) elif isinstance(data, Table): if data.domain != self.domain: data = data.from_table(self.domain, data) prediction = self.predict_storage(data) elif isinstance(data, (list, tuple)): if not isinstance(data[0], (list, tuple)): data = [ data ] data = Table(self.original_domain, data) data = Table(self.domain, data) prediction = self.predict_storage(data) else: raise TypeError("Unrecognized argument (instance of '{}')".format( type(data).__name__)) # Parse the result into value and probs multitarget = len(self.domain.class_vars) > 1 if isinstance(prediction, tuple): value, probs = prediction elif prediction.ndim == 1 + multitarget: value, probs = prediction, None elif prediction.ndim == 2 + multitarget: value, probs = None, prediction else: raise TypeError("model returned a %i-dimensional array", prediction.ndim) # Ensure that we have what we need to return if ret != Model.Probs and value is None: value = np.argmax(probs, axis=-1) if ret != Model.Value and probs is None: if multitarget: max_card = max(len(c.values) for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card,), float) for i, cvar in enumerate(self.domain.class_vars): probs[:, i, :], _ = bn.bincount(np.atleast_2d(value[:, i]), max_card - 1) else: probs, _ = bn.bincount(np.atleast_2d(value), len(self.domain.class_var.values) - 1) if ret == Model.ValueProbs: return value, probs else: return probs # Return what we need to if ret == Model.Probs: return probs if isinstance(data, Instance) and not multitarget: value = Value(self.domain.class_var, value[0]) if ret == Model.Value: return value else: # ret == Model.ValueProbs return value, probs
def test_no_hash(self): domain = self.mock_domain() inst = Instance(domain) with self.assertRaises(TypeError): {inst} # pylint: disable=pointless-statement
def test_single_instance(self): iris = Table("iris") inst = Instance(iris.domain, [5.2, 3.8, 1.4, 0.5, "Iris-virginica"]) d_iris = Discretize(iris) Instance(d_iris.domain, inst)
def test_distance_to_instance(self): iris = Table('iris') inst = Instance(iris.domain, np.concatenate((iris[1].x, iris[1].y))) self.assertEqual(Euclidean(iris[1], inst), 0)
def __call__(self, data, ret=Value): if not 0 <= ret <= 2: raise ValueError("invalid value of argument 'ret'") if (ret > 0 and any(v.is_continuous for v in self.domain.class_vars)): raise ValueError("cannot predict continuous distributions") # Call the predictor if isinstance(data, np.ndarray): prediction = self.predict(np.atleast_2d(data)) elif isinstance(data, scipy.sparse.csr.csr_matrix): prediction = self.predict(data) elif isinstance(data, Instance): if data.domain != self.domain: data = Instance(self.domain, data) data = Table(data.domain, [data]) prediction = self.predict_storage(data) elif isinstance(data, Table): if data.domain != self.domain: data = data.from_table(self.domain, data) prediction = self.predict_storage(data) else: raise TypeError("Unrecognized argument (instance of '{}')".format( type(data).__name__)) # Parse the result into value and probs multitarget = len(self.domain.class_vars) > 1 if isinstance(prediction, tuple): value, probs = prediction elif prediction.ndim == 1 + multitarget: value, probs = prediction, None elif prediction.ndim == 2 + multitarget: value, probs = None, prediction else: raise TypeError("model returned a %i-dimensional array", prediction.ndim) # Ensure that we have what we need to return if ret != Model.Probs and value is None: value = np.argmax(probs, axis=-1) if ret != Model.Value and probs is None: if multitarget: max_card = max(len(c.values) for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card,), float) for i, cvar in enumerate(self.domain.class_vars): probs[:, i, :], _ = bn.bincount(np.atleast_2d(value[:, i]), max_card - 1) else: probs, _ = bn.bincount(np.atleast_2d(value), len(self.domain.class_var.values) - 1) if ret == Model.ValueProbs: return value, probs else: return probs # Return what we need to if ret == Model.Probs: return probs if isinstance(data, Instance) and not multitarget: value = Value(self.domain.class_var, value[0]) if ret == Model.Value: return value else: # ret == Model.ValueProbs return value, probs