def test_make(self): ContinuousVariable._clear_cache() age1 = ContinuousVariable.make("age") age2 = ContinuousVariable.make("age") age3 = ContinuousVariable("age") self.assertEqual(age1, age2) self.assertNotEqual(age1, age3)
def test_invalid_input_colors(self): a = ContinuousVariable("a") a.attributes["colors"] = "invalid" _ = a.colors t = Table(Domain([a])) self.send_signal(self.widget.Inputs.data, t)
def freeviz_variable(i): v = ContinuousVariable( "FreeViz Component {}".format(i + 1), compute_value=FreeVizProjector(self, i, freeviz_transform)) v.to_sql = LinearCombinationSql( domain.attributes, self.components_[i, :], getattr(self, 'mean_', None)) return v
def test_make_proxy_cont(self): abc = ContinuousVariable("abc") abc1 = abc.make_proxy() abc2 = abc1.make_proxy() self.assertIs(abc.master, abc) self.assertIs(abc1.master, abc) self.assertIs(abc2.master, abc) self.assertEqual(abc, abc1) self.assertEqual(abc, abc2) self.assertEqual(abc1, abc2)
def test_to_val(self): string_var = StringVariable("x") self.assertEqual(string_var.to_val("foo"), "foo") self.assertEqual(string_var.to_val(42), "42") cont_var = ContinuousVariable("x") self.assertTrue(math.isnan(cont_var.to_val("?"))) self.assertTrue(math.isnan(Unknown)) var = Variable("x") self.assertEqual(var.to_val("x"), "x")
def test_colors(self): a = ContinuousVariable("a") self.assertEqual(a.colors, ((0, 0, 255), (255, 255, 0), False)) self.assertIs(a.colors, a._colors) a = ContinuousVariable("a") a.attributes["colors"] = ['#010203', '#040506', True] self.assertEqual(a.colors, ((1, 2, 3), (4, 5, 6), True)) a.colors = ((3, 2, 1), (6, 5, 4), True) self.assertEqual(a.colors, ((3, 2, 1), (6, 5, 4), True))
def test_copy_descriptor_continuous(self): var = ContinuousVariable("foo", number_of_decimals=42) var.attributes = {"bar": 42, "baz": 13} copied = copy_descriptor(var) self.assertIsInstance(copied, ContinuousVariable) self.assertEqual(copied.name, "foo") self.assertEqual(copied.number_of_decimals, 42) self.assertEqual(copied.attributes, var.attributes) self.assertIsNot(copied.attributes, var.attributes) var = ContinuousVariable("foo", number_of_decimals=42) copied = copy_descriptor(var, "cux") self.assertEqual(copied.name, "cux")
def test_construct_numeric_names(self): data = Table("iris") data.domain.attributes[0].name = "0.1" data.domain.attributes[1].name = "1" desc = PyListModel( [ContinuousDescriptor(name="S", expression="_0_1 + _1", number_of_decimals=3)] ) nv = construct_variables(desc, data.domain.variables) ndata = Table(Domain(nv, None), data) np.testing.assert_array_equal(ndata.X[:, 0], data.X[:, :2].sum(axis=1)) ContinuousVariable._clear_all_caches()
def test_proxy_has_separate_colors(self): abc = ContinuousVariable("abc") abc1 = abc.make_proxy() abc2 = abc1.make_proxy() original_colors = abc.colors red_to_green = (255, 0, 0), (0, 255, 0), False blue_to_red = (0, 0, 255), (255, 0, 0), False abc1.colors = red_to_green abc2.colors = blue_to_red self.assertEqual(abc.colors, original_colors) self.assertEqual(abc1.colors, red_to_green) self.assertEqual(abc2.colors, blue_to_red)
def transform_continuous(var): if not self.normalize_continuous: return var new_var = ContinuousVariable(var.name) dma, dmi = dists[var_ptr].max(), dists[var_ptr].min() diff = dma - dmi if diff < 1e-15: diff = 1 if self.zero_based: new_var.get_value_from = Normalizer(var, dmi, 1 / diff) else: new_var.get_value_from = Normalizer(var, (dma + dmi) / 2, 2 / diff) return new_var
def read(self): with open(self.filename, "rb") as f: # read first row separately because of two empty columns header = f.readline().decode("ascii").rstrip().split("\t") header = [a.strip() for a in header] assert header[0] == header[1] == "" dom_vals = [float(v) for v in header[2:]] domain = Orange.data.Domain([ContinuousVariable.make("%f" % f) for f in dom_vals], None) tbl = np.loadtxt(f, ndmin=2) data = Orange.data.Table(domain, tbl[:, 2:]) metas = [ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')] domain = Orange.data.Domain(domain.attributes, None, metas=metas) data = data.transform(domain) data[:, metas[0]] = tbl[:, 0].reshape(-1, 1) data[:, metas[1]] = tbl[:, 1].reshape(-1, 1) return data
def concatenate_data(tables, filenames, label): domain, xs = domain_union_for_spectra(tables) ntables = [(table if isinstance(table, Table) else table[2]).transform(domain) for table in tables] data = type(ntables[0]).concatenate(ntables, axis=0) source_var = StringVariable.make("Filename") label_var = StringVariable.make("Label") # add other variables xs_atts = tuple([ContinuousVariable.make("%f" % f) for f in xs]) domain = Domain(xs_atts + domain.attributes, domain.class_vars, domain.metas + (source_var, label_var)) data = data.transform(domain) # fill in spectral data xs_sind = np.argsort(xs) xs_sorted = xs[xs_sind] pos = 0 for table in tables: t = table if isinstance(table, Table) else table[2] if not isinstance(table, Table): indices = xs_sind[np.searchsorted(xs_sorted, table[0])] data.X[pos:pos+len(t), indices] = table[1] pos += len(t) data[:, source_var] = np.array(list( chain(*(repeat(fn, len(table)) for fn, table in zip(filenames, ntables))) )).reshape(-1, 1) data[:, label_var] = np.array(list( chain(*(repeat(label, len(table)) for fn, table in zip(filenames, ntables))) )).reshape(-1, 1) return data
def single_x_reader(self, spc_file): domvals = spc_file.x # first column is attribute name domain = Domain([ContinuousVariable.make("%f" % f) for f in domvals], None) y_data = [sub.y for sub in spc_file.sub] y_data = np.array(y_data) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C')) return table
def extend_attributes(self, X, feature_names, var_attrs=None): """ Append features to corpus. Args: X (numpy.ndarray): Features to append feature_names (list): List of string containing feature names var_attrs (dict): Additional attributes appended to variable.attributes. """ self.X = np.hstack((self.X, X)) new_attr = self.domain.attributes for f in feature_names: var = ContinuousVariable.make(f) if isinstance(var_attrs, dict): var.attributes.update(var_attrs) new_attr += (var, ) new_domain = Domain( attributes=new_attr, class_vars=self.domain.class_vars, metas=self.domain.metas ) self.domain = new_domain
def test_decimals(self): a = ContinuousVariable("a", 4) self.assertEqual(a.str_val(4.654321), "4.6543") self.assertEqual(a.str_val(4.654321654321), "4.6543") self.assertEqual(a.str_val(Unknown), "?") a = ContinuousVariable("a", 5) self.assertEqual(a.str_val(0.000000000001), "0.00000") a = ContinuousVariable("a", 10) self.assertEqual(a.str_val(0.000000000001), "1e-12")
def _predict_as_table(self, prediction, confidence): from Orange.data import Domain, ContinuousVariable means, lows, highs = [], [], [] n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1 for i, name in zip(range(n_vars), self._table_var_names or range(n_vars)): mean = ContinuousVariable('{} (forecast)'.format(name)) low = ContinuousVariable('{} ({:d}%CI low)'.format(name, confidence)) high = ContinuousVariable('{} ({:d}%CI high)'.format(name, confidence)) low.ci_percent = high.ci_percent = confidence mean.ci_attrs = (low, high) means.append(mean) lows.append(low) highs.append(high) domain = Domain(means + lows + highs) X = np.column_stack(prediction) table = Timeseries.from_numpy(domain, X) table.name = (self._table_name or '') + '({} forecast)'.format(self) return table
def transform_continuous(var): if self.normalize_continuous == self.Leave: return var elif self.normalize_continuous == self.NormalizeBySpan: new_var = ContinuousVariable(var.name) dma, dmi = dists[var_ptr].max(), dists[var_ptr].min() diff = dma - dmi if diff < 1e-15: diff = 1 if self.zero_based: new_var.get_value_from = Normalizer(var, dmi, 1 / diff) else: new_var.get_value_from = Normalizer(var, (dma + dmi) / 2, 2 / diff) return new_var elif self.normalize_continuous == self.NormalizeBySD: new_var = ContinuousVariable(var.name) avg = dists[var_ptr].mean() sd = dists[var_ptr].standard_deviation() new_var.get_value_from = Normalizer(var, avg, 1 / sd) return new_var
def test_list_attributes_remain_lists(self): a = ContinuousVariable("a") a.attributes["list"] = [1, 2, 3] d = Domain([a]) t = Table(d) self.send_signal(self.widget.Inputs.data, t) assert isinstance(self.widget, OWEditDomain) # select first variable idx = self.widget.domain_view.model().index(0) self.widget.domain_view.setCurrentIndex(idx) # change first attribute value editor = self.widget.editor_stack.findChild(ContinuousVariableEditor) assert isinstance(editor, ContinuousVariableEditor) idx = editor.labels_model.index(0, 1) editor.labels_model.setData(idx, "[1, 2, 4]", Qt.EditRole) self.widget.commit() t2 = self.get_output(self.widget.Outputs.data) self.assertEqual(t2.domain["a"].attributes["list"], [1, 2, 4])
def _guess_variable(self, field_name, field_metadata, inspect_table): type_code = field_metadata[0] FLOATISH_TYPES = (700, 701, 1700) # real, float8, numeric INT_TYPES = (20, 21, 23) # bigint, int, smallint CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar BOOLEAN_TYPES = (16,) # bool DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz # time, timestamp, timestamptz, timetz TIME_TYPES = (1083, 1114, 1184, 1266,) if type_code in FLOATISH_TYPES: return ContinuousVariable.make(field_name) if type_code in TIME_TYPES + DATE_TYPES: tv = TimeVariable.make(field_name) tv.have_date |= type_code in DATE_TYPES tv.have_time |= type_code in TIME_TYPES return tv if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: values = self.get_distinct_values(field_name, inspect_table) if values: return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in BOOLEAN_TYPES: return DiscreteVariable.make(field_name, ['false', 'true']) if type_code in CHAR_TYPES: if inspect_table: values = self.get_distinct_values(field_name, inspect_table) # remove trailing spaces values = [v.rstrip() for v in values] if values: return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name)
def etc_to_table(self, etc_json, time_var=False, callback=lambda: None): """ Converts data from Json to :obj:`Orange.data.table` Args: etc_json (dict): Data in json like format time_var (bool): Create column of time points. Default is set to False. Returns: :obj:`Orange.data.Table` """ cbc = CallBack(2, callback, callbacks=30) variables = [] time_point = 1 for time in etc_json['etc']['timePoints']: var = ContinuousVariable('TP ' + str(time_point)) var.attributes['Time'] = str(time) variables.append(var) time_point += 1 meta_attr = StringVariable.make('Gene') domain = Domain(variables, metas=[meta_attr]) cbc() table = [] for row in etc_json['etc']['genes']: gene_expression = [exp for exp in etc_json['etc']['genes'][row]] gene_expression.append(row) table.append(gene_expression) orange_table = Table(domain, table) if time_var: orange_table = transpose_table(orange_table) cbc() cbc.end() return orange_table
def construct_output_data_table(embedded_images, embeddings): # X = util.hstack((embedded_images.X, embeddings)) # embedded_images.X = X new_attributes = [ContinuousVariable.make('n{:d}'.format(d)) for d in range(embeddings.shape[1])] domain_new = Domain( list(embedded_images.domain.attributes) + new_attributes, embedded_images.domain.class_vars, embedded_images.domain.metas) table = embedded_images.transform(domain_new) table[:, new_attributes] = embeddings return table
def read(self): with open(self.filename, "rb") as f: #print(f.readline()) if not (f.readline() == b'Gwyddion Simple Field 1.0\n'): raise ValueError('Not a correct file') meta = {} term = False #there are mandatory fileds while term != b'\x00': l = f.readline().decode('utf-8') name, value = l.split("=") name = name.strip() value = value.strip() meta[name] = value term = f.read(1) f.seek(-1, 1) f.read(4 - f.tell() % 4) meta["XRes"] = XR = int(meta["XRes"]) meta["YRes"] = YR = int(meta["YRes"]) meta["XReal"] = float(meta.get("XReal", 1)) meta["YReal"] = float(meta.get("YReal", 1)) meta["XOffset"] = float(meta.get("XOffset", 0)) meta["YOffset"] = float(meta.get("YOffset", 0)) meta["Title"] = meta.get("Title", None) meta["XYUnits"] = meta.get("XYUnits", None) meta["ZUnits"] = meta.get("ZUnits", None) X = np.fromfile(f, dtype='float32', count=XR*YR).reshape(XR, YR) metas = [Orange.data.ContinuousVariable.make("x"), Orange.data.ContinuousVariable.make("y")] XRr = np.arange(XR) YRr = np.arange(YR) indices = np.transpose([np.tile(XRr, len(YRr)), np.repeat(YRr, len(XRr))]) domain = Domain([ContinuousVariable.make("value")], None, metas=metas) data = Orange.data.Table(domain, X.reshape(meta["XRes"]*meta["YRes"], 1), metas=np.array(indices, dtype="object")) data.attributes = meta return data
def test_domaineditor_makes_variables(self): # Variables created with domain editor should be interchangeable # with variables read from file. dat = """V0\tV1\nc\td\n\n1.0\t2""" v0 = StringVariable.make("V0") v1 = ContinuousVariable.make("V1") with named_file(dat, suffix=".tab") as filename: self.open_dataset(filename) model = self.widget.domain_editor.model() model.setData(model.createIndex(0, 1), "text", Qt.EditRole) model.setData(model.createIndex(1, 1), "numeric", Qt.EditRole) self.widget.apply_button.click() data = self.get_output(self.widget.Outputs.data) self.assertEqual(data.domain["V0"], v0) self.assertEqual(data.domain["V1"], v1)
def multi_x_reader(self, spc_file): # use x-values as domain all_x = [] for sub in spc_file.sub: x = sub.x # assume values in x do not repeat all_x = np.union1d(all_x, x) domain = Domain([ContinuousVariable.make("%f" % f) for f in all_x], None) instances = [] for sub in spc_file.sub: x, y = sub.x, sub.y newinstance = np.ones(len(all_x))*np.nan ss = np.searchsorted(all_x, x) # find positions to set newinstance[ss] = y instances.append(newinstance) y_data = np.array(instances).astype(float, order='C') return Orange.data.Table.from_numpy(domain, y_data)
def transpose_table(table): """ Transpose the rows and columns of the table. Args: table: Data in :obj:`Orange.data.Table` Returns: Transposed :obj:`Orange.data.Table`. (Genes as columns) """ attrs = table.domain.attributes attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table] # Set metas new_metas = [StringVariable.make(name) if name is not 'Time' else TimeVariable.make(name) for name in sorted(table.domain.variables[0].attributes.keys())] domain = Domain(attr, metas=new_metas) meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs] return Table(domain, table.X.transpose(), metas=meta_values)
def read_spectra(self): am = agilentMosaicIFG(self.filename) info = am.info X = am.data features = np.arange(X.shape[-1]) try: px_size = info['FPA Pixel Size'] * info['PixelAggregationSize'] except KeyError: # Use pixel units if FPA Pixel Size is not known px_size = 1 x_locs = np.linspace(0, X.shape[1]*px_size, num=X.shape[1], endpoint=False) y_locs = np.linspace(0, X.shape[0]*px_size, num=X.shape[0], endpoint=False) features, data, additional_table = _spectra_from_image(X, features, x_locs, y_locs) import_params = ['Effective Laser Wavenumber', 'Under Sampling Ratio', ] new_attributes = [] new_columns = [] for param_key in import_params: try: param = info[param_key] except KeyError: pass else: new_attributes.append(ContinuousVariable.make(param_key)) new_columns.append(np.full((len(data),), param)) domain = Domain(additional_table.domain.attributes, additional_table.domain.class_vars, additional_table.domain.metas + tuple(new_attributes)) table = additional_table.transform(domain) table[:, new_attributes] = np.asarray(new_columns).T return (features, data, table)
def test_properties(self): a = ContinuousVariable() self.assertTrue(a.is_continuous) self.assertFalse(a.is_discrete) self.assertFalse(a.is_string) self.assertTrue(a.is_primitive()) a = DiscreteVariable() self.assertFalse(a.is_continuous) self.assertTrue(a.is_discrete) self.assertFalse(a.is_string) self.assertTrue(a.is_primitive()) a = StringVariable() self.assertFalse(a.is_continuous) self.assertFalse(a.is_discrete) self.assertTrue(a.is_string) self.assertFalse(a.is_primitive())
def transform_discrete(var): if (len(var.values) < 2 or treat == Continuize.Remove or treat == Continuize.RemoveMultinomial and len(var.values) > 2): return [] if treat == Continuize.AsOrdinal: new_var = ContinuousVariable(var.name) new_var.compute_value = Identity(var) return [new_var] if treat == Continuize.AsNormalizedOrdinal: new_var = ContinuousVariable(var.name) n_values = max(1, len(var.values)) if self.zero_based: new_var.compute_value = \ Normalizer(var, 0, 1 / (n_values - 1)) else: new_var.compute_value = \ Normalizer(var, (n_values - 1) / 2, 2 / (n_values - 1)) return [new_var] new_vars = [] if treat == Continuize.Indicators: base = -1 elif treat in (Continuize.FirstAsBase, Continuize.RemoveMultinomial): base = max(var.base_value, 0) else: base = dists[var_ptr].modus() ind_class = [Indicator1, Indicator][self.zero_based] for i, val in enumerate(var.values): if i == base: continue new_var = ContinuousVariable( "{}={}".format(var.name, val)) new_var.compute_value = ind_class(var, i) new_vars.append(new_var) return new_vars
def transform_discrete(var): if (len(var.values) < 2 or treat == self.Ignore or treat == self.IgnoreMulti and len(var.values) > 2): return [] if treat == self.AsOrdinal: new_var = ContinuousVariable(var.name) new_var.get_value_from = Identity(var) return [new_var] if treat == self.AsNormalizedOrdinal: new_var = ContinuousVariable(var.name) n_values = max(1, len(var.values)) if self.zero_based: new_var.get_value_from = \ Normalizer(var, 0, 1 / (n_values - 1)) else: new_var.get_value_from = \ Normalizer(var, (n_values - 1) / 2, 2 / (n_values - 1)) return [new_var] new_vars = [] if treat == self.NValues: base = -1 elif treat == self.LowestIsBase or treat == self.IgnoreMulti: base = max(var.base_value, 0) else: base = dists[var_ptr].modus() IndClass = [Indicator_1, Indicator][self.zero_based] for i, val in enumerate(var.values): if i == base: continue new_var = ContinuousVariable( "{}={}".format(var.name, val)) new_var.get_value_from = IndClass(var, i) new_vars.append(new_var) return new_vars
def anytime_explain(self, instance, callback=None, update_func=None, update_prediction=None): data_rows, no_atr = self.data.X.shape class_value = self.model(instance)[0] prng = RandomState(self.seed) self.init_arrays(no_atr) attr_values = self.get_atr_column(instance) batch_mx_size = self.batch_size * no_atr z_sq = abs(st.norm.ppf(self.p_val/2))**2 tiled_inst = self.tile_instance(instance) inst1 = copy.deepcopy(tiled_inst) inst2 = copy.deepcopy(tiled_inst) worst_case = self.max_iter*no_atr time_point = time.time() update_table = False domain = Domain([ContinuousVariable("Score"), ContinuousVariable("Error")], metas=[StringVariable(name="Feature"), StringVariable(name = "Value")]) if update_prediction is not None: update_prediction(class_value) def create_res_table(): nonzero = self.steps != 0 expl_scaled = (self.expl[nonzero]/self.steps[nonzero]).reshape(1, -1) # creating return array ips = np.hstack((expl_scaled.T, np.sqrt( z_sq * self.var[nonzero] / self.steps[nonzero]).reshape(-1, 1))) table = Table.from_numpy(domain, ips, metas=np.hstack((np.asarray(self.atr_names)[nonzero[0]].reshape(-1, 1), attr_values[nonzero[0]].reshape(-1,1)))) return table while not(all(self.iterations_reached[0, :] > self.max_iter)): prog = 1 - np.sum(self.max_iter - self.iterations_reached)/worst_case if (callback(int(prog*100))): break if not(any(self.iterations_reached[0, :] > self.max_iter)): a = np.argmax(prng.multinomial( 1, pvals=(self.var[0, :]/(np.sum(self.var[0, :]))))) else: a = np.argmin(self.iterations_reached[0, :]) perm = (prng.random_sample(batch_mx_size).reshape( self.batch_size, no_atr)) > 0.5 rand_data = self.data.X[prng.randint(0, data_rows, size=self.batch_size), :] inst1.X = np.copy(tiled_inst.X) inst1.X[perm] = rand_data[perm] inst2.X = np.copy(inst1.X) inst1.X[:, a] = tiled_inst.X[:, a] inst2.X[:, a] = rand_data[:, a] f1 = self._get_predictions(inst1, class_value) f2 = self._get_predictions(inst2, class_value) diff = np.sum(f1 - f2) self.expl[0, a] += diff # update variance self.steps[0, a] += self.batch_size self.iterations_reached[0, a] += self.batch_size d = diff - self.mu[0, a] self.mu[0, a] += d / self.steps[0, a] self.M2[0, a] += d * (diff - self.mu[0, a]) self.var[0, a] = self.M2[0, a] / (self.steps[0, a] - 1) if time.time() - time_point > 1: update_table = True time_point = time.time() if update_table: update_table = False update_func(create_res_table()) # exclude from sampling if necessary needed_iter = z_sq * self.var[0, a] / (self.error**2) if (needed_iter <= self.steps[0, a]) and (self.steps[0, a] >= self.min_iter) or (self.steps[0, a] > self.max_iter): self.iterations_reached[0, a] = self.max_iter + 1 return class_value, create_res_table()
def __init__(self): super().__init__() self.data = None self.model = None self.to_explain = None self.explanations = None self.stop = True self.e = None self._task = None self._executor = ThreadExecutor() self.dataview = QTableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, sortingEnabled=True, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.dataview.sortByColumn(2, Qt.DescendingOrder) self.dataview.horizontalHeader().setResizeMode(QHeaderView.Stretch) domain = Domain([ContinuousVariable("Score"), ContinuousVariable("Error")], metas=[StringVariable(name="Feature"), StringVariable(name="Value")]) self.placeholder_table_model = TableModel( Table.from_domain(domain), parent=None) self.dataview.setModel(self.placeholder_table_model) info_box = gui.vBox(self.controlArea, "Info") self.data_info = gui.widgetLabel(info_box, "Data: N/A") self.model_info = gui.widgetLabel(info_box, "Model: N/A") self.sample_info = gui.widgetLabel(info_box, "Sample: N/A") criteria_box = gui.vBox(self.controlArea, "Stopping criteria") self.error_spin = gui.spin(criteria_box, self, "gui_error", 0.01, 1, step=0.01, label="Error < ", spinType=float, callback=self._update_error_spin, controlWidth=80, keyboardTracking=False) self.p_val_spin = gui.spin(criteria_box, self, "gui_p_val", 0.01, 1, step=0.01, label="Error p-value < ", spinType=float, callback=self._update_p_val_spin, controlWidth=80, keyboardTracking=False) gui.rubber(self.controlArea) self.cancel_button = gui.button(self.controlArea, self, "Stop Computation", callback=self.toggle_button, autoDefault=True, tooltip="Stops and restarts computation") self.cancel_button.setDisabled(True) predictions_box = gui.vBox(self.mainArea, "Model prediction") self.predict_info = gui.widgetLabel(predictions_box, "") self.mainArea.layout().addWidget(self.dataview) self.resize(640, 480)
def get_lsp_results(self, in_data): attr = in_data.domain.attributes #Obtem os Atributos provenientes do Workflow target = in_data.domain.class_vars #Obtem as Classes provenientes do Workflow meta = in_data.domain.metas #Obtem os Meta dados provenientes do Workflow data = np.array(in_data) #Converte os dados para Numpy Array (Pois a LSP da MPPY trabalha com dados Numpy) coordinates_2d = mppy.lsp_2d(data, n_neighbors=self.neighborhood) #Calcula as coordenadas em dimensao 2D com o LSP """ Obtem os Atributos provenientes do Workflow e adiciona os demais""" X = [] for j in range(len(in_data)): aux = [] for i in range(len(in_data.domain.attributes)): attr_name = in_data.domain.attributes[i].name attr_index = in_data.domain.index(attr_name) attr_data = in_data[j, attr_index] aux.append(attr_data) X.append(aux) """ Obtem as Classes provenientes do Workflow e adiciona os demais""" Y = [] for j in range(len(in_data)): aux = [] for i in range(len(in_data.domain.class_vars)): values_name = in_data.domain.class_vars[i].name values_index = in_data.domain.index(values_name) values_data = in_data[j, values_index] aux.append(values_data) Y.append(aux) """ Obtem os Meta dados provenientes do Workflow e adiciona os demais""" M = [] for j in range(len(in_data)): aux = [] for i in range(len(in_data.domain.metas)): meta_name = in_data.domain.metas[i].name meta_index = in_data.domain.index(meta_name) meta_data = in_data[j, meta_index] aux.append(meta_data) aux.append(coordinates_2d[j][0]) #Coordenada LSP-x aux.append(coordinates_2d[j][1]) #Coordenada LSP-y M.append(aux) """Adiciona as informacoes do LSP nos Meta dados, para passar a frente no workflow""" meta = meta + (ContinuousVariable("LSP-x"),) meta = meta + (ContinuousVariable("LSP-y"),) """ Domain(...) Attributes: attributes (list of Variable) – a list of attributes Classes: class_vars (Variable or list of Variable) – target variable or a list of target variables Metas: metas (list of Variable) – a list of meta attributes Source: source (Orange.data.Domain) – the source domain for attributes """ domain = Domain( attr, target, meta, None ) """ Table.from_numpy(...) Domain: domain (Orange.data.Domain) – the domain for the new table Values: X (np.array) – array with attribute values Classes: Y (np.array) – array with class values #Metas: metas (np.array) – array with meta attributes Weights: W (np.array) – array with weights """ out_data = Table.from_numpy( domain, X, Y, M, None ) return out_data
def test_missing_values(self): var = ContinuousVariable("var") self.assertRaises(ValueError, ContinuousVariableEditor, self.parent, var, np.nan, np.nan, Mock())
def __init__(self): super().__init__() self.data = None self.subset_data = None self._subset_mask = None self._validmask = None self._X = None self._Y = None self._selection = None self.__replot_requested = False self.variable_x = ContinuousVariable("freeviz-x") self.variable_y = ContinuousVariable("freeviz-y") box0 = gui.vBox(self.mainArea, True, margin=0) self.graph = OWFreeVizGraph(self, box0, "Plot", view_box=FreeVizInteractiveViewBox) box0.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget box = gui.widgetBox(self.controlArea, "Optimization", spacing=10) form = QFormLayout(labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow, verticalSpacing=10) form.addRow( "Initialization", gui.comboBox(box, self, "initialization", items=["Circular", "Random"], callback=self.reset_initialization)) box.layout().addLayout(form) self.btn_start = gui.button(widget=box, master=self, label="Optimize", callback=self.toogle_start, enabled=False) self.viewbox = plot.getViewBox() self.replot = None g = self.graph.gui g.point_properties_box(self.controlArea) self.models = g.points_models box = gui.widgetBox(self.controlArea, "Show anchors") self.rslider = gui.hSlider(box, self, "radius", minValue=0, maxValue=100, step=5, label="Radius", createLabel=False, ticks=True, callback=self.update_radius) self.rslider.setTickInterval(0) self.rslider.setPageStep(10) box = gui.vBox(self.controlArea, "Plot Properties") g.add_widgets([g.JitterSizeSlider], box) g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box) self.graph.box_zoom_select(self.controlArea) self.controlArea.layout().addStretch(100) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.graph.zoom_actions(self) # FreeViz self._loop = AsyncUpdateLoop(parent=self) self._loop.yielded.connect(self.__set_projection) self._loop.finished.connect(self.__freeviz_finished) self._loop.raised.connect(self.__on_error) self._new_plotdata()
def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs): def map_values(index, _X): values = np.unique(_X[:, index]) values = np.delete(values, np.where(values == "nan")[0]) for j, value in enumerate(values): _X[:, index][_X[:, index] == value] = j return values create_time_var = \ isinstance(val_var, TimeVariable) and \ all(fun in self.TimeVarFunctions for fun in agg_funs) create_cont_var = \ not val_var or val_var.is_continuous and \ (not isinstance(val_var, TimeVariable) or all(fun in self.FloatFunctions for fun in agg_funs)) vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)] if create_time_var: kwargs = { "have_date": val_var.have_date, "have_time": val_var.have_time } attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2 attrs.extend([[TimeVariable("Total", **kwargs)]] * 2) elif create_cont_var: attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2 attrs.extend([[ContinuousVariable("Total", 1)]] * 2) else: attrs = [] for x in (X, X_h): attrs.append([ DiscreteVariable(f"{v}", map_values(i, x)) for i, v in enumerate(vals, 2) ]) for x in (X_v, X_t): attrs.append([DiscreteVariable("Total", map_values(0, x))]) row_var_h = DiscreteVariable(self._row_var.name, values=["Total"]) aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs]) same_row_col = self._col_var is self._row_var extra_vars = [self._row_var, aggr_attr] uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] + [atr.name for atr in attrs[0]]) for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])), uniq_a): if var.name == u: continue if idx == 0: self.renamed.append(self._row_var.name) self._row_var = self._row_var.copy(name=u) if same_row_col: self._col_var = self._row_var row_var_h = row_var_h.copy(name=u) elif idx == 1: self.renamed.append(aggr_attr.name) aggr_attr = aggr_attr.copy(name=u) else: self.renamed.append(var.name) attrs[0][idx - 2] = var.copy(name=u) attrs[1][idx - 2] = var.copy(name=u) if same_row_col: vals = tuple(v.name for v in attrs[0]) self._row_var.make(self._row_var.name, values=vals) vals = tuple(v.name for v in attrs[2]) row_var_h.make(row_var_h.name, vals) return (Domain([self._row_var, aggr_attr] + attrs[0]), Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]), Domain(attrs[3]))
def test_label_changes_yields_modified_labels(self): v1 = ContinuousVariable("a") v1.attributes["a"] = "b" v2 = v1.copy(None) v2.attributes["a"] = "c" self.assertNotEmpty(self.report._label_changes(v1, v2))
def test_invalid_input_colors(self): a = ContinuousVariable("a") a.attributes["colors"] = "invalid" t = Table.from_domain(Domain([a])) self.send_signal(self.widget.Inputs.data, t)
def setUp(self): z = ContinuousVariable("z") w = ContinuousVariable("w") u = ContinuousVariable("u") self.descs = [owcolor.ContAttrDesc(v) for v in (z, w, u)] self.model = owcolor.ContColorTableModel()
class TestInstance(unittest.TestCase): attributes = ["Feature %i" % i for i in range(10)] class_vars = ["Class %i" % i for i in range(1)] metas = [DiscreteVariable("Meta 1", values="XYZ"), ContinuousVariable("Meta 2"), StringVariable("Meta 3")] def mock_domain(self, with_classes=False, with_metas=False): attributes = self.attributes class_vars = self.class_vars if with_classes else [] metas = self.metas if with_metas else [] variables = attributes + class_vars return MagicMock(Domain, attributes=attributes, class_vars=class_vars, metas=metas, variables=variables) def create_domain(self, attributes=(), classes=(), metas=()): attr_vars = [ContinuousVariable(name=a) if isinstance(a, str) else a for a in attributes] class_vars = [ContinuousVariable(name=c) if isinstance(c, str) else c for c in classes] meta_vars = [DiscreteVariable(name=m, values=map(str, range(5))) if isinstance(m, str) else m for m in metas] domain = Domain(attr_vars, class_vars, meta_vars) return domain def test_init_x_no_data(self): domain = self.mock_domain() inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._x)) def test_init_xy_no_data(self): domain = self.mock_domain(with_classes=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) def test_init_xym_no_data(self): domain = self.mock_domain(with_classes=True, with_metas=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (3, )) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst._metas, np.array([Unknown, Unknown, None])) def test_init_x_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) vals = np.array([42, 0]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, np.empty((0,))) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_x_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) lst = [42, 0] vals = np.array(lst) inst = Instance(domain, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, []) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) vals = np.array([42, 0, 1]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 1) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) lst = [42, "M", "C"] vals = np.array([42, 0, 2]) inst = Instance(domain, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 2) self.assertEqual(inst._metas.shape, (0, )) def test_init_xym_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = np.array([42, "M", "B", "X", 43, "Foo"], dtype=object) inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_xym_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_inst(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, inst) assert_array_equal(inst2._x, np.array([42, 0])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, 43, "Foo"], dtype=object)) domain2 = self.create_domain(["z", domain[1], self.metas[1]], domain.class_vars, [self.metas[0], "w", domain[0]]) inst2 = Instance(domain2, inst) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst2._x, np.array([Unknown, 0, 43])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, Unknown, 42], dtype=object)) def test_get_item(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) val = inst[0] self.assertIsInstance(val, Value) self.assertEqual(inst[0], 42) self.assertEqual(inst["x"], 42) self.assertEqual(inst[domain[0]], 42) val = inst[1] self.assertIsInstance(val, Value) self.assertEqual(inst[1], "M") self.assertEqual(inst["g"], "M") self.assertEqual(inst[domain[1]], "M") val = inst[2] self.assertIsInstance(val, Value) self.assertEqual(inst[2], "B") self.assertEqual(inst["y"], "B") self.assertEqual(inst[domain.class_var], "B") val = inst[-2] self.assertIsInstance(val, Value) self.assertEqual(inst[-2], 43) self.assertEqual(inst["Meta 2"], 43) self.assertEqual(inst[self.metas[1]], 43) with self.assertRaises(ValueError): inst["asdf"] = 42 with self.assertRaises(ValueError): inst[ContinuousVariable("asdf")] = 42 def test_set_item(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst[0] = 43 self.assertEqual(inst[0], 43) inst["x"] = 44 self.assertEqual(inst[0], 44) inst[domain[0]] = 45 self.assertEqual(inst[0], 45) inst[1] = "F" self.assertEqual(inst[1], "F") inst["g"] = "M" self.assertEqual(inst[1], "M") with self.assertRaises(ValueError): inst[1] = "N" with self.assertRaises(ValueError): inst["asdf"] = 42 inst[2] = "C" self.assertEqual(inst[2], "C") inst["y"] = "A" self.assertEqual(inst[2], "A") inst[domain.class_var] = "B" self.assertEqual(inst[2], "B") inst[-1] = "Y" self.assertEqual(inst[-1], "Y") inst["Meta 1"] = "Z" self.assertEqual(inst[-1], "Z") inst[domain.metas[0]] = "X" self.assertEqual(inst[-1], "X") def test_str(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) inst = Instance(domain, [42, 0]) self.assertEqual(str(inst), "[42.000, M]") domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) inst = Instance(domain, [42, "M", "B"]) self.assertEqual(str(inst), "[42.000, M | B]") domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}") domain = self.create_domain([], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, ["B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}") domain = self.create_domain([], [], self.metas) inst = Instance(domain, ["X", 43, "Foo"]) self.assertEqual(str(inst), "[] {X, 43.000, Foo}") domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual( str(inst), "[{}]".format(", ".join("{:.3f}".format(x) for x in range(len(self.attributes))))) for attr in domain: attr.number_of_decimals = 0 self.assertEqual( str(inst), "[{}]".format(", ".join("{}".format(x) for x in range(len(self.attributes))))) def test_repr(self): domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual(repr(inst), "[0.000, 1.000, 2.000, 3.000, 4.000, ...]") for attr in domain: attr.number_of_decimals = 0 self.assertEqual(repr(inst), "[0, 1, 2, 3, 4, ...]") def test_eq(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, vals) self.assertTrue(inst == inst2) self.assertTrue(inst2 == inst) inst2[0] = 43 self.assertFalse(inst == inst2) inst2[0] = Unknown self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[2] = "C" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-1] = "Y" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-2] = "33" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-3] = "Bar" self.assertFalse(inst == inst2)
class TestSparseTablePandas(TestTablePandas): features = ( ContinuousVariable(name="c2"), ContinuousVariable(name="Continuous Feature 2"), DiscreteVariable(name="d1", values=("0", "1")), DiscreteVariable(name="Discrete Feature 2", values=("value1", "value2")), ) class_vars = (ContinuousVariable(name="Continuous Class"), DiscreteVariable(name="Discrete Class", values=("m", "f"))) feature_data = ( (1, 0, 0, 0), (0, 1, 0, 0), (0, 1, 1, 0), (0, 0, 0, 0), (0, 1, 1, 0), (0, 0, 0, 0), (0, 1, 1, 0), ) class_data = ( (1, 0), (0, 1), (1, 0), (0, 1), (1, 0), (0, 1), (1, 0), ) def setUp(self): self.domain = Domain(attributes=self.features, class_vars=self.class_vars) table = Table.from_numpy( self.domain, np.array(self.feature_data), np.array(self.class_data), ) self.table = Table.from_numpy(self.domain, csr_matrix(table.X), csr_matrix(table.Y), W=np.array([1, 0, 1, 0, 1, 1, 1])) def arreq(t1, t2): if all(sp.issparse(t) for t in (t1, t2)): return self.assertEqual((t1 != t2).nnz, 0) else: return np.array_equal(t1, t2) self.__arreq__ = arreq def test_to_dense(self): df = self.table.X_df self.assertIsInstance(df, OrangeDataFrame) ddf = df.sparse.to_dense() np.testing.assert_array_equal(df.index, ddf.index) np.testing.assert_array_equal(df.orange_variables, ddf.orange_variables) np.testing.assert_array_equal(df.orange_attributes, ddf.orange_attributes) np.testing.assert_array_equal(df.orange_role, ddf.orange_role) np.testing.assert_array_equal(df.orange_weights, ddf.orange_weights) table = self.table.to_dense() table2 = ddf.to_orange_table() np.testing.assert_array_equal(table2.X, table.X) np.testing.assert_array_equal(table2.ids, table.ids) np.testing.assert_array_equal(table2.W, table.W) np.testing.assert_array_equal(table2.attributes, table.attributes)
def moving_transform(data, spec, fixed_wlen=0): """ Return data transformed according to spec. Parameters ---------- data : Timeseries A table with features to transform. spec : list of lists A list of lists [feature:Variable, window_length:int, function:callable]. fixed_wlen : int If not 0, then window_length in spec is disregarded and this length is used. Also the windows don't shift by one but instead align themselves side by side. Returns ------- transformed : Timeseries A table of original data its transformations. """ from itertools import chain from Orange.data import ContinuousVariable, Domain from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product X = [] attrs = [] for var, wlen, func in spec: col = np.ravel(data[:, var]) if fixed_wlen: wlen = fixed_wlen if func in (Cumulative_sum, Cumulative_product): out = list( chain.from_iterable( func(col[i:i + wlen]) for i in range(0, len(col), wlen))) else: # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much col = col[::-1] out = [ func(col[i:i + wlen]) for i in range(0, len(col), wlen if bool(fixed_wlen) else 1) ] out = out[::-1] X.append(out) template = '{} ({}; {})'.format( var.name, wlen, func.__name__.lower().replace('_', ' ')) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) dataX, dataY, dataM = data.X, data.Y, data.metas if fixed_wlen: n = len(X[0]) dataX = dataX[::-1][::fixed_wlen][:n][::-1] dataY = dataY[::-1][::fixed_wlen][:n][::-1] dataM = dataM[::-1][::fixed_wlen][:n][::-1] ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((dataX, np.column_stack(X))) if X else dataX, dataY, dataM) ts.time_variable = data.time_variable return ts
def new_table(): return Table.from_list(Domain([ContinuousVariable("a")]), [[1]])
import warnings from distutils.version import LooseVersion from unittest import TestCase from unittest.mock import Mock import Orange from Orange.data import Domain, DiscreteVariable from Orange.data import ContinuousVariable from Orange.util import OrangeDeprecationWarning from Orange.widgets.settings import DomainContextHandler, ContextSetting from Orange.widgets.utils import vartype Continuous = 100 + vartype(ContinuousVariable()) Discrete = 100 + vartype(DiscreteVariable()) class TestDomainContextHandler(TestCase): def setUp(self): self.domain = Domain( attributes=[ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def')], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl')] ) self.args = (self.domain, {'c1': Continuous - 100, 'd1': Discrete - 100, 'd2': Discrete - 100, 'd3': Discrete - 100}, {'c2': Continuous - 100, 'd4': Discrete - 100, }) self.handler = DomainContextHandler()
def setUp(self): x = ContinuousVariable("x") self.desc = owcolor.ContAttrDesc(x)
def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1)))
import numpy as np from AnyQt.QtCore import QMimeData, QPoint, Qt from AnyQt.QtGui import QDragEnterEvent from Orange.data import Table, ContinuousVariable, DiscreteVariable, Domain from Orange.widgets.settings import ContextSetting from Orange.widgets.utils import vartype from Orange.widgets.tests.base import WidgetTest from Orange.widgets.data.owselectcolumns \ import OWSelectAttributes, VariablesListItemModel, \ SelectAttributesDomainContextHandler from Orange.widgets.data.owrank import OWRank from Orange.widgets.widget import AttributeList Continuous = vartype(ContinuousVariable("c")) Discrete = vartype(DiscreteVariable("d")) class TestSelectAttributesDomainContextHandler(TestCase): def setUp(self): self.domain = Domain(attributes=[ ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def') ], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl') ])
def seasonal_decompose(data, model='multiplicative', period=12, *, callback=None): """ Return table of decomposition components of original features and original features seasonally adjusted. Parameters ---------- data : Timeseries A table of featres to decompose/adjust. model : str {'additive', 'multiplicative'} A decompostition model. See: https://en.wikipedia.org/wiki/Decomposition_of_time_series period : int The period length of season. callback : callable Optional callback to call (with no parameters) after each iteration. Returns ------- table : Timeseries Table with columns: original series seasonally adjusted, original series' seasonal components, trend components, and residual components. """ from operator import sub, truediv from Orange.data import Domain, ContinuousVariable from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name import statsmodels.api as sm def _interp_trend(trend): first = next(i for i, val in enumerate(trend) if val == val) last = trend.size - 1 - next( i for i, val in enumerate(trend[::-1]) if val == val) d = 3 first_last = min(first + d, last) last_first = max(first, last - d) k, n = np.linalg.lstsq( np.column_stack( (np.arange(first, first_last), np.ones(first_last - first))), trend[first:first_last])[0] trend[:first] = np.arange(0, first) * k + n k, n = np.linalg.lstsq( np.column_stack((np.arange(last_first, last), np.ones(last - last_first))), trend[last_first:last])[0] trend[last + 1:] = np.arange(last + 1, trend.size) * k + n return trend attrs = [] X = [] recomposition = sub if model == 'additive' else truediv interp_data = data.interp() for var in data.domain.variables: decomposed = sm.tsa.seasonal_decompose(np.ravel(interp_data[:, var]), model=model, freq=period) adjusted = recomposition(decomposed.observed, decomposed.seasonal) season = decomposed.seasonal trend = _interp_trend(decomposed.trend) resid = recomposition(adjusted, trend) # Re-apply nans isnan = np.isnan(data[:, var]).ravel() adjusted[isnan] = np.nan trend[isnan] = np.nan resid[isnan] = np.nan attrs.extend( ContinuousVariable( available_name(data.domain, var.name + ' ({})'.format(transform))) for transform in ('season. adj.', 'seasonal', 'trend', 'residual')) X.extend((adjusted, season, trend, resid)) if callback: callback() ts = Timeseries(Domain(attrs), np.column_stack(X)) return ts
def constr_vars(inds): if inds: return [ ContinuousVariable(x.decode("utf-8")) for _, x in sorted( (ind, name) for name, ind in inds.items()) ]
def test_overflow(self): var = ContinuousVariable("var", number_of_decimals=10) editor = ContinuousVariableEditor(self.parent, var, -100000, 1, self.callback) self.assertLess(editor._n_decimals, 10)
def test_continous(self): X = ContinuousVariable("X") self._test_common(X)
def _get_projection_variables(self): names = get_unique_names( self.data.domain, self.embedding_variables_names) return ContinuousVariable(names[0]), ContinuousVariable(names[1])
def read(self): who = matlab.whosmat(self.filename) if not who: raise IOError("Couldn't load matlab file " + self.filename) else: ml = matlab.loadmat(self.filename, chars_as_strings=True) ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)} def num_elements(array): return reduce(lambda x, y: x * y, array.shape, 1) def find_biggest(arrays): sizes = [] for n, c in arrays.items(): sizes.append((num_elements(c), n)) return max(sizes)[1] def is_string_array(array): return issubclass(array.dtype.type, np.str_) def is_number_array(array): return issubclass(array.dtype.type, numbers.Number) numeric = {n: a for n, a in ml.items() if is_number_array(a)} # X is the biggest numeric array X = ml.pop(find_biggest(numeric)) if numeric else None # find an array with compatible shapes attributes = [] if X is not None: name_array = None for name in sorted(ml): con = ml[name] if con.shape in [(X.shape[1], ), (1, X.shape[1])]: name_array = name break names = ml.pop(name_array).ravel() if name_array else range( X.shape[1]) names = [str(a).rstrip() for a in names] # remove matlab char padding attributes = [ContinuousVariable.make(a) for a in names] meta_names = [] metas = [] meta_size = None if X is None: counts = defaultdict(list) for name, con in ml.items(): counts[len(con)].append(name) if counts: meta_size = max(counts.keys(), key=lambda x: len(counts[x])) else: meta_size = len(X) if meta_size: for name, con in ml.items(): if len(con) == meta_size: meta_names.append(name) meta_data = [] for m in sorted(meta_names): f = ml[m] if is_string_array(f) and len( f.shape) == 1: # 1D string arrays metas.append(StringVariable.make(m)) f = np.array([a.rstrip() for a in f]) # remove matlab char padding f.resize(meta_size, 1) meta_data.append(f) elif is_number_array(f) and len(f.shape) == 2: if f.shape[1] == 1: names = [m] else: names = [ m + "_" + str(i + 1) for i in range(f.shape[1]) ] for n in names: metas.append(ContinuousVariable.make(n)) meta_data.append(f) meta_data = np.hstack(tuple(meta_data)) if meta_data else None domain = Domain(attributes, metas=metas) if X is None: X = np.zeros((meta_size, 0)) return Orange.data.Table.from_numpy(domain, X, Y=None, metas=meta_data)
def _add_similarity(data, dist): dist = np.min(dist, axis=1)[:, None] metas = data.domain.metas + (ContinuousVariable("similarity"), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) data_metas = np.hstack((data.metas, 100 * (1 - dist / np.max(dist)))) return Table(domain, data.X, data.Y, data_metas)
def read(self): try: import opusFC except ImportError: raise RuntimeError(self._OPUS_WARNING) if self.sheet: db = self.sheet else: db = self.sheets[0] db = tuple(db.split(" ")) dim = db[1] try: data = opusFC.getOpusData(self.filename, db) except Exception: raise IOError("Couldn't load spectrum from " + self.filename) attrs, clses, metas = [], [], [] attrs = [ ContinuousVariable.make(repr(data.x[i])) for i in range(data.x.shape[0]) ] y_data = None meta_data = None if type(data) == opusFC.MultiRegionDataReturn: y_data = [] meta_data = [] metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region'), TimeVariable.make('start_time') ]) for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) start_time = region.start_time meta_region = np.column_stack( (mapX, mapY, map_region, start_time)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.MultiRegionTRCDataReturn: y_data = [] meta_data = [] metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region') ]) attrs = [ ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels)) ] for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) meta_region = np.column_stack((mapX, mapY, map_region)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.ImageDataReturn: metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y') ]) data_3D = data.spectra for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.ImageTRCDataReturn: metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y') ]) attrs = [ ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels)) ] data_3D = data.traces for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.TimeResolvedTRCDataReturn: y_data = data.traces elif type(data) == opusFC.TimeResolvedDataReturn: metas.extend([ContinuousVariable.make('z')]) y_data = data.spectra meta_data = data.z elif type(data) == opusFC.SingleDataReturn: y_data = data.y[None, :] else: raise ValueError( "Empty or unsupported opusFC DataReturn object: " + type(data)) import_params = ['SRT', 'SNM'] for param_key in import_params: try: param = data.parameters[param_key] except KeyError: pass # TODO should notify user? else: try: param_name = opusFC.paramDict[param_key] except KeyError: param_name = param_key if param_key == 'SRT': var = TimeVariable.make(param_name) elif type(param) is float: var = ContinuousVariable.make(param_name) elif type(param) is str: var = StringVariable.make(param_name) else: raise ValueError #Found a type to handle metas.extend([var]) params = np.full((y_data.shape[0], ), param, np.array(param).dtype) if meta_data is not None: # NB dtype default will be np.array(fill_value).dtype in future meta_data = np.column_stack( (meta_data, params.astype(object))) else: meta_data = params domain = Orange.data.Domain(attrs, clses, metas) meta_data = np.atleast_2d(meta_data) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C'), metas=meta_data) return table
def test_adjust_decimals(self): a = ContinuousVariable("a") self.assertEqual(a.str_val(4.65432), "4.65432") a.val_from_str_add("5") self.assertEqual(a.str_val(4.65432), "5") a.val_from_str_add(" 5.12 ") self.assertEqual(a.str_val(4.65432), "4.65") a.val_from_str_add("5.1234") self.assertEqual(a.str_val(4.65432), "4.6543")
import unittest import datetime from collections import namedtuple import numpy as np from Orange.widgets.utils.state_summary import format_summary_details from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \ DiscreteVariable, TimeVariable VarDataPair = namedtuple('VarDataPair', ['variable', 'data']) # Continuous variable variations continuous_full = VarDataPair( ContinuousVariable('continuous_full'), np.array([0, 1, 2, 3, 4], dtype=float), ) continuous_missing = VarDataPair( ContinuousVariable('continuous_missing'), np.array([0, 1, 2, np.nan, 4], dtype=float), ) # Unordered discrete variable variations rgb_full = VarDataPair( DiscreteVariable('rgb_full', values=['r', 'g', 'b']), np.array([0, 1, 1, 1, 2], dtype=float), ) rgb_missing = VarDataPair( DiscreteVariable('rgb_missing', values=['r', 'g', 'b']), np.array([0, 1, 1, np.nan, 2], dtype=float), )
def test_value_changes_yields_nothing_for_continuous_variables(self): v1, v2 = ContinuousVariable("a"), ContinuousVariable("b") self.assertEmpty(self.report._value_changes(v1, v2))
def test_label_changes_yields_nothing_for_no_change(self): v1 = ContinuousVariable("a") v1.attributes["a"] = "b" self.assertEmpty(self.report._value_changes(v1, v1))
def calculateFFT(self): """ Calculate FFT from input interferogram(s). This is a handler method for - bad data / data shape - splitting the array in the case of two interferogram sweeps per dataset. - multiple input interferograms Based on mertz module by Eric Peach, 2014 """ wavenumbers = None spectra = [] phases = [] zpd_fwd = [] zpd_back = [] # Reset info, error and warning dialogs self.Error.clear() self.Warning.clear() fft_single = irfft.IRFFT( dx=self.dx, apod_func=self.apod_func, zff=2**self.zff, phase_res=self.phase_resolution if self.phase_res_limit else None, phase_corr=self.phase_corr, peak_search=self.peak_search, ) ifg_data = self.data.X stored_phase = self.stored_phase stored_zpd_fwd, stored_zpd_back = None, None # Only use first row stored phase for now if stored_phase is not None: stored_phase = stored_phase[0] try: stored_zpd_fwd = int(stored_phase["zpd_fwd"].value) except ValueError: stored_zpd_fwd = None try: stored_zpd_back = int(stored_phase["zpd_back"].value) except ValueError: stored_zpd_back = None stored_phase = stored_phase.x # lowercase x for RowInstance # Use manual zpd value(s) if specified and enable batch processing elif not self.peak_search_enable: stored_zpd_fwd = self.zpd1 stored_zpd_back = self.zpd2 chunks = max(1, len(self.data) // CHUNK_SIZE) ifg_data = np.array_split(self.data.X, chunks, axis=0) fft_single = irfft.MultiIRFFT( dx=self.dx, apod_func=self.apod_func, zff=2**self.zff, phase_res=self.phase_resolution if self.phase_res_limit else None, phase_corr=self.phase_corr, peak_search=self.peak_search, ) if self.reader == 'NeaReaderGSF': fft_single = irfft.ComplexFFT( dx=self.dx, apod_func=self.apod_func, zff=2**self.zff, phase_res=self.phase_resolution if self.phase_res_limit else None, phase_corr=self.phase_corr, peak_search=self.peak_search, ) full_data = self.data.X[::2] * np.exp(self.data.X[1::2] * 1j) for row in full_data: spectrum_out, phase_out, wavenumbers = fft_single( row, zpd=stored_zpd_fwd) spectra.append(spectrum_out) spectra.append(phase_out) spectra = np.vstack(spectra) if self.limit_output is True: wavenumbers, spectra = self.limit_range(wavenumbers, spectra) self.spectra_table = build_spec_table(wavenumbers, spectra, additional_table=self.data) self.Outputs.spectra.send(self.spectra_table) return for row in ifg_data: if self.sweeps in [2, 3]: # split double-sweep for forward/backward # forward: 2-2 = 0 , backward: 3-2 = 1 try: row = np.hsplit(row, 2)[self.sweeps - 2] except ValueError as e: self.Error.ifg_split_error(e) return if self.sweeps in [0, 2, 3]: try: spectrum_out, phase_out, wavenumbers = fft_single( row, zpd=stored_zpd_fwd, phase=stored_phase) zpd_fwd.append(fft_single.zpd) except ValueError as e: self.Error.fft_error(e) return elif self.sweeps == 1: # Double sweep interferogram is split, solved independently and the # two results are averaged. try: data = np.hsplit(row, 2) except ValueError as e: self.Error.ifg_split_error(e) return fwd = data[0] # Reverse backward sweep to match fwd sweep back = data[1][::-1] # Calculate spectrum for both forward and backward sweeps try: spectrum_fwd, phase_fwd, wavenumbers = fft_single( fwd, zpd=stored_zpd_fwd, phase=stored_phase) zpd_fwd.append(fft_single.zpd) spectrum_back, phase_back, wavenumbers = fft_single( back, zpd=stored_zpd_back, phase=stored_phase) zpd_back.append(fft_single.zpd) except ValueError as e: self.Error.fft_error(e) return # Calculate the average of the forward and backward sweeps spectrum_out = np.mean(np.array([spectrum_fwd, spectrum_back]), axis=0) phase_out = np.mean(np.array([phase_fwd, phase_back]), axis=0) else: return spectra.append(spectrum_out) phases.append(phase_out) spectra = np.vstack(spectra) phases = np.vstack(phases) self.phases_table = build_spec_table(wavenumbers, phases, additional_table=self.data) if not self.peak_search_enable: # All zpd values are equal by definition zpd_fwd = zpd_fwd[:1] self.phases_table = add_meta_to_table( self.phases_table, ContinuousVariable.make("zpd_fwd"), zpd_fwd) if zpd_back: if not self.peak_search_enable: zpd_back = zpd_back[:1] self.phases_table = add_meta_to_table( self.phases_table, ContinuousVariable.make("zpd_back"), zpd_back) if self.limit_output is True: wavenumbers, spectra = self.limit_range(wavenumbers, spectra) self.spectra_table = build_spec_table(wavenumbers, spectra, additional_table=self.data) self.Outputs.spectra.send(self.spectra_table) self.Outputs.phases.send(self.phases_table)
def test_palette(self): palette = ContinuousPalettes["rainbow_bgyr_35_85_c73"] a = ContinuousVariable("a") a.palette = palette self.assertIs(a.palette, palette) a = ContinuousVariable("a") a.attributes["palette"] = palette.name self.assertIs(a.palette, palette) a = ContinuousVariable("a") self.assertIs(a.palette, DefaultContinuousPalette) with patch.object(ContinuousPalette, "from_colors") as from_colors: a = ContinuousVariable("a") a.attributes["colors"] = ('#0a0b0c', '#0d0e0f', False) palette = a.palette from_colors.assert_called_with((10, 11, 12), (13, 14, 15), False) self.assertIs(palette, from_colors.return_value) with patch.object(ContinuousPalette, "from_colors") as from_colors: a = ContinuousVariable("a") a.colors = (10, 11, 12), (13, 14, 15), False palette = a.palette from_colors.assert_called_with((10, 11, 12), (13, 14, 15), False) self.assertIs(palette, from_colors.return_value)