def capture_image(self): cap = self.cap for i in range(3): # Need some warmup time; use the last frame success, frame = cap.read() if success: self.Error.no_webcam.clear() else: self.Error.no_webcam() return def normalize(name): return ''.join(ch for ch in unicodedata.normalize('NFD', name.replace(' ', '_')) if unicodedata.category(ch) in 'LuLlPcPd') timestamp = datetime.now().strftime('%Y%m%d%H%M%S.%f') image_title, self.image_title = self.image_title or self.DEFAULT_TITLE, '' normed_name = normalize(image_title) for image, suffix, output in ( (frame, '', self.Output.SNAPSHOT), (self.clip_aspect_frame(frame), '_aspect', self.Output.SNAPSHOT_ASPECT)): path = os.path.join( self.IMAGE_DIR, '{normed_name}_{timestamp}{suffix}.png'.format(**locals())) cv2.imwrite(path, # imwrite expects original bgr image, so this is reversed self.bgr2rgb(image) if self.avatar_filter else image) image_var = StringVariable('image') image_var.attributes['type'] = 'image' table = Table.from_numpy(Domain([], metas=[StringVariable('name'), image_var]), np.empty((1, 0)), metas=np.array([[image_title, path]])) self.send(output, table) self.snapshot_flash = 80
def test_nyt_corpus_domain_generation(self): corpus = self.nyt.run_query('slovenia') meta_vars = [StringVariable.make(field) for field in NYT_TEXT_FIELDS] + \ [StringVariable.make('pub_date'), StringVariable.make('country')] self.assertEqual(len(meta_vars), len(corpus.domain.metas)) self.assertEqual(len(corpus.Y), 10)
def concatenate_data(tables, filenames, label): domain, xs = domain_union_for_spectra(tables) ntables = [(table if isinstance(table, Table) else table[2]).transform(domain) for table in tables] data = type(ntables[0]).concatenate(ntables, axis=0) source_var = StringVariable.make("Filename") label_var = StringVariable.make("Label") # add other variables xs_atts = tuple([ContinuousVariable.make("%f" % f) for f in xs]) domain = Domain(xs_atts + domain.attributes, domain.class_vars, domain.metas + (source_var, label_var)) data = data.transform(domain) # fill in spectral data xs_sind = np.argsort(xs) xs_sorted = xs[xs_sind] pos = 0 for table in tables: t = table if isinstance(table, Table) else table[2] if not isinstance(table, Table): indices = xs_sind[np.searchsorted(xs_sorted, table[0])] data.X[pos:pos+len(t), indices] = table[1] pos += len(t) data[:, source_var] = np.array(list( chain(*(repeat(fn, len(table)) for fn, table in zip(filenames, ntables))) )).reshape(-1, 1) data[:, label_var] = np.array(list( chain(*(repeat(label, len(table)) for fn, table in zip(filenames, ntables))) )).reshape(-1, 1) return data
def test_val(self): a = StringVariable("a") self.assertEqual(a.to_val(None), "") self.assertEqual(a.str_val(Unknown), "?") self.assertEqual(a.str_val(Value(a, None)), "None") self.assertEqual(a.repr_val(Value(a, "foo")), '"foo"')
def test_to_val(self): string_var = StringVariable("x") self.assertEqual(string_var.to_val("foo"), "foo") self.assertEqual(string_var.to_val(42), "42") cont_var = ContinuousVariable("x") self.assertTrue(math.isnan(cont_var.to_val("?"))) self.assertTrue(math.isnan(Unknown)) var = Variable("x") self.assertEqual(var.to_val("x"), "x")
def create_domain(at, cl, metas): if OR3: return Orange.data.Domain(at, cl, metas=metas) else: domain = Orange.data.Domain(at, cl) if metas: if isinstance(metas, dict): metas = sorted(metas.items()) else: metas = zip([ StringVariable.new_meta_id() for _ in metas ], metas) domain.add_metas(dict((StringVariable.new_meta_id(), ma) for mi, ma in metas)) return domain
def test_proxy_has_separate_attributes(self): image = StringVariable("image") image1 = image.make_proxy() image2 = image1.make_proxy() image.attributes["origin"] = "a" image1.attributes["origin"] = "b" image2.attributes["origin"] = "c" self.assertEqual(image.attributes["origin"], "a") self.assertEqual(image1.attributes["origin"], "b") self.assertEqual(image2.attributes["origin"], "c")
def parse_record_json(record, includes_metadata): """ Parses the JSON representation of the record returned by the New York Times Article API. :param record: The JSON representation of the query's results. :param includes_metadata: The flags that determine which fields to include. :return: A list of articles parsed into documents and a list of the corresponding metadata, joined in a tuple. """ text_fields = ["headline", "lead_paragraph", "snippet", "abstract", "keywords"] documents = [] class_values = [] meta_vars = [StringVariable.make(field) for field, flag in zip(text_fields, includes_metadata) if flag] # Also add pub_date and glocation. meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")] metadata = np.empty((0, len(meta_vars)), dtype=object) for doc in record["response"]["docs"]: string_document = "" metas_row = [] for field, flag in zip(text_fields, includes_metadata): if flag and field in doc: field_value = "" if isinstance(doc[field], dict): field_value = " ".join([val for val in doc[field].values() if val]) elif isinstance(doc[field], list): field_value = " ".join([kw["value"] for kw in doc[field] if kw]) else: if doc[field]: field_value = doc[field] string_document += field_value metas_row.append(field_value) # Add the pub_date. field_value = "" if "pub_date" in doc and doc["pub_date"]: field_value = doc["pub_date"] metas_row.append(field_value) # Add the glocation. metas_row.append(",".join([kw["value"] for kw in doc["keywords"] if kw["name"] == "glocations"])) # Add the section_name. class_val = "" if "section_name" in doc and doc["section_name"]: class_val = doc["section_name"] documents.append(string_document) class_values.append(class_val) metadata = np.vstack((metadata, np.array(metas_row))) return documents, metadata, meta_vars, class_values
def _create_corpus(self): corpus = None names = ["name", "path", "content"] data = [] category_data = [] text_categories = list(set(t.category for t in self._text_data)) values = list(set(text_categories)) category_var = DiscreteVariable.make("category", values=values) for textdata in self._text_data: data.append( [textdata.name, textdata.path, textdata.content] ) category_data.append(category_var.to_val(textdata.category)) if len(text_categories) > 1: category_data = np.array(category_data) else: category_var = [] category_data = np.empty((len(data), 0)) domain = Domain( [], category_var, [StringVariable.make(name) for name in names] ) domain["name"].attributes["title"] = True data = np.array(data, dtype=object) if len(data): corpus = Corpus(domain, Y=category_data, metas=data, text_features=[domain.metas[2]]) return corpus
def _corpus_from_records(records, includes_metadata): """Receives PubMed records and transforms them into a corpus. Args: records (list): A list of PubMed entries. includes_metadata (list): A list of text fields to include. Returns: corpus: The output Corpus. """ meta_values, class_values = _records_to_corpus_entries( records, includes_metadata=includes_metadata ) meta_vars = [] for field_name, _ in includes_metadata: if field_name == 'pub_date': meta_vars.append(TimeVariable(field_name)) else: meta_vars.append(StringVariable.make(field_name)) class_vars = [ DiscreteVariable('section_name', values=list(set(class_values))) ] domain = Domain([], class_vars=class_vars, metas=meta_vars) Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None] return Corpus(domain=domain, Y=Y, metas=meta_values)
def generate_corpus(self, url_list): """ generate new corpus with values requested by user :param url_list: :return: corpus """ new_table=None text_includes_params = [self.includes_article, self.includes_author, self.includes_date, self.includes_title, self.includes_web_url] if True not in text_includes_params: self.warning(1, "You must select at least one text field.") return required_text_fields = [incl_field for yes, incl_field in zip(text_includes_params, ARTICLE_TEXT_FIELDS) if yes] meta_vars = [StringVariable.make(field) for field in required_text_fields] metadata=[] for url in url_list: info, is_cached =_get_info(url) final_fields = [incl_field for yes, incl_field in zip(text_includes_params, info) if yes] metadata.append(final_fields) metadata = np.array(metadata, dtype=object) metas=metadata domain = Domain([], class_vars=None, metas=(meta_vars)) new_table = Corpus(None, None, metadata, domain, meta_vars) self.output_corpus=new_table self.send("Corpus",self.output_corpus)
def create_domain(at, cl, metas): if OR3: return Orange.data.Domain(at, cl, metas=metas) else: domain = Orange.data.Domain(at, cl) if metas: domain.add_metas(dict((StringVariable.new_meta_id(), ma) for ma in metas)) return domain
def create_domain(at, cl, metas): if OR3: return Orange.data.Domain(at, cl, metas=metas) else: domain = Orange.data.Domain(at, cl) if metas: # add metas in the reverse order (because meta ids are always decreasing) # this allows us to pass metas in the same order to create_table metas = zip([ StringVariable.new_meta_id() for _ in metas ], reversed(metas)) domain.add_metas(dict(metas)) return domain
def _generate_corpus(records, required_text_fields): """ Generates a corpus from the input NYT records. :param records: The input NYT records. :type records: list :param required_text_fields: A list of the available NYT text fields. :type required_text_fields: list :return: :class: `orangecontrib.text.corpus.Corpus` """ metas, class_values = _parse_record_json(records, required_text_fields) documents = [] for doc in metas: documents.append(" ".join([d for d in doc if d is not None]).strip()) # Create domain. meta_vars = [StringVariable.make(field) for field in required_text_fields] meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")] class_vars = [DiscreteVariable("section_name", values=list(set(class_values)))] domain = Domain([], class_vars=class_vars, metas=meta_vars) Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None] return Corpus(documents, None, Y, metas, domain)
def test_asserting_errors(self): c = Corpus.from_file('book-excerpts') with self.assertRaises(TypeError): Corpus(1.0, c.Y, c.metas, c.domain, c.text_features) too_large_x = np.vstack((c.X, c.X)) with self.assertRaises(ValueError): Corpus(c.domain, too_large_x, c.Y, c.metas, c.W, c.text_features) with self.assertRaises(ValueError): c.set_text_features([StringVariable('foobar')]) with self.assertRaises(ValueError): c.set_text_features([c.domain.metas[0], c.domain.metas[0]])
def commit(self): table = None if self.data is not None: if self.correlations_type == 2 and self.target_variable and \ self.target_variable.is_continuous: pearson = ContinuousVariable.make("Pearson") spearman = ContinuousVariable.make("Spearman") row_name = StringVariable.make("Variable") domain = Orange.data.Domain([pearson, spearman], metas=[row_name]) table = Orange.data.Table(domain, self.target_correlations) for inst, name in zip(table, self.var_names): inst[row_name] = name self.send("Correlations", table)
def send_coefficients(self): """ Function sends logistic regression coefficients on output. """ if self.learner is not None and self.learner.theta is not None: domain = Domain( [ContinuousVariable("Coefficients", number_of_decimals=7)], metas=[StringVariable("Name")]) names = ["theta 0", "theta 1"] coefficients_table = Table( domain, list(zip(list(self.learner.theta), names))) self.Outputs.coefficients.send(coefficients_table) else: self.Outputs.coefficients.send(None)
def graph_to_table(G): """Builds a Data Table from node values.""" if G.number_of_nodes() > 0: features = list(set(itertools.chain.from_iterable(node.keys() for node in G.node.values()))) data = [[node.get(f).replace('\t', ' ') if isinstance(node.get(f, 1), str) else str(node.get(f, '?')) for f in features] for node in G.node.values()] fp = tempfile.NamedTemporaryFile('wt', suffix='.tab', delete=False) fp.write('\n'.join('\t'.join(line) for line in [features] + data)) fp.close() table = Table(fp.name) os.unlink(fp.name) else: table = Table(Domain([], [], [StringVariable("id")]), []) return table
def test_duplicate_names(self): domain = Domain([ContinuousVariable("C1")], metas=[DiscreteVariable("Feature", values=("A", "B"))]) data = Table(domain, np.array([[1.], [0.]]), metas=np.array([[1.], [0.]])) domain = Domain([ContinuousVariable("C1")], metas=[StringVariable("Feature")]) extra_data = Table(domain, np.array([[1.], [0.]]), metas=np.array([["A"], ["B"]])) self.send_signal(self.widget.Inputs.data, data) self.send_signal(self.widget.Inputs.extra_data, extra_data) self.assertTrue(self.widget.Warning.renamed_vars.is_shown()) merged_data = self.get_output(self.widget.Outputs.data) self.assertListEqual([m.name for m in merged_data.domain.metas], ["Feature (1)", "Feature (2)"])
def update_model(self): super().update_model() coeffs = None if self.model is not None: if self.model.domain.class_var.is_discrete: coeffs = create_coef_table(self.model) else: attrs = [ContinuousVariable("coef")] domain = Domain(attrs, metas=[StringVariable("name")]) cfs = list(self.model.intercept) + list(self.model.coefficients) names = ["intercept"] + \ [attr.name for attr in self.model.domain.attributes] coeffs = Table.from_list(domain, list(zip(cfs, names))) coeffs.name = "coefficients" self.Outputs.coefficients.send(coeffs)
def create_domain(*ss): Variable._clear_all_caches() vars=dict( age=ContinuousVariable(name="AGE"), gender=DiscreteVariable(name="Gender", values=["M", "F"]), incomeA=ContinuousVariable(name="incomeA"), income=ContinuousVariable(name="income"), education=DiscreteVariable(name="education", values=["GS", "HS", "C"]), ssn=StringVariable(name="SSN"), race=DiscreteVariable(name="race", values=["White", "Hypsanic", "African", "Other"])) def map_vars(s): return [vars[x] for x in s] return Domain(*[map_vars(s) for s in ss])
def __getitem__(self, key): if not self: for tpe, char, col in ((vartype(ContinuousVariable("c")), "N", (202, 0, 32)), (vartype(DiscreteVariable("d")), "C", (26, 150, 65)), (vartype(StringVariable("s")), "S", (0, 0, 0)), (vartype(TimeVariable("t")), "T", (68, 170, 255)), (-1, "?", (128, 128, 128))): self[tpe] = createAttributePixmap(char, QtGui.QColor(*col)) if key not in self: key = vartype(key) if isinstance(key, Variable) else -1 return super().__getitem__(key)
def test_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") grades = Table.from_list(Domain( [x, y], [], [s]), [[91.0, 89.0, "Bill"], [51.0, 100.0, "Cynthia"], [9.0, 61.0, "Demi"], [49.0, 92.0, "Fred"], [91.0, 49.0, "George"]]) distances = Euclidean(grades) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(grades.domain.metas[0]) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertIsNone(self.widget.tablemodel.label_colors)
def commit(self): if self.data is None or self.disc_data is None: self.Outputs.features.send(None) self.Outputs.interactions.send(None) return attrs = [ContinuousVariable("Interaction"), ContinuousVariable("Entropy Removed")] metas = [StringVariable("Feature 1"), StringVariable("Feature 2")] domain = Domain(attrs, metas=metas) model = self.vizrank.rank_model x = np.array( [[float(model.data(model.index(row, 0), role)) for role in (InteractionRank.IntRole, InteractionRank.RemovedRole)] for row in range(model.rowCount())]) m = np.array( [[a.name for a in model.data(model.index(row, 0), InteractionRank._AttrRole)] for row in range(model.rowCount())], dtype=object) int_table = Table(domain, x, metas=m) int_table.name = "Interactions" # data has been imputed; send original attributes self.Outputs.features.send(AttributeList( [self.data.domain[var.name] for var in self.selection])) self.Outputs.interactions.send(int_table)
def test_preprocess(self): domain = Domain([ ContinuousVariable("c"), DiscreteVariable("d", values=['a', 'b']) ], [DiscreteVariable("cls", values=['e', 'f'])], [StringVariable("m")]) table = Table(domain, [[1, 'a', 'e', 'm1'], [2, 'b', 'f', 'm2']]) new_table = _preprocess(table) np.testing.assert_equal(new_table.X, table.X[:, 0].reshape(2, 1)) np.testing.assert_equal(new_table.Y, table.Y) np.testing.assert_equal(new_table.metas, table.metas) self.assertEqual( [a.name for a in new_table.domain.attributes], [a.name for a in table.domain.attributes if a.is_continuous]) self.assertEqual(new_table.domain.class_vars, table.domain.class_vars) self.assertEqual(new_table.domain.metas, table.domain.metas)
def create_domain(*ss): vars = dict( age=ContinuousVariable(name="AGE"), gender=DiscreteVariable(name="Gender", values=("M", "F")), incomeA=ContinuousVariable(name="incomeA"), income=ContinuousVariable(name="income"), education=DiscreteVariable(name="education", values=("GS", "HS", "C")), ssn=StringVariable(name="SSN"), race=DiscreteVariable(name="race", values=("White", "Hypsanic", "African", "Other")), arrival=TimeVariable("arrival")) def map_vars(s): return [vars[x] for x in s] return Domain(*[map_vars(s) for s in ss])
def generate(self): name, func, args, *_ = self.GRAPH_TYPES[self.graph_type] args = tuple(self.arguments[_ctrl_name(name, arg)] for arg in args) self.Error.generation_error.clear() try: network = func(*args) except ValueError as exc: self.Error.generation_error(exc) network = None else: n = len(network.nodes) network.nodes = Table(Domain([], [], [StringVariable("id")]), np.zeros((n, 0)), np.zeros((n, 0)), np.arange(n).reshape((n, 1))) self.Outputs.network.send(network)
def test_value_string_substring_flags(self): trans = ValueFromStringSubstring(StringVariable("x"), self.patterns) with patch('Orange.widgets.data.owcreateclass.map_by_substring') as mbs: trans.case_sensitive = True trans.transform(self.arr) case_sensitive, match_beginning = mbs.call_args[0][-2:] self.assertTrue(case_sensitive) self.assertFalse(match_beginning) trans.case_sensitive = False trans.match_beginning = True trans.transform(self.arr) case_sensitive, match_beginning = mbs.call_args[0][-2:] self.assertFalse(case_sensitive) self.assertTrue(match_beginning)
def setUp(self): #: OWAggregateColumns self.widget = self.create_widget(OWAggregateColumns) c1, c2, c3 = map(ContinuousVariable, "c1 c2 c3".split()) t1, t2 = map(TimeVariable, "t1 t2".split()) d1, d2, d3 = (DiscreteVariable(n, values=("a", "b", "c")) for n in "d1 d2 d3".split()) s1 = StringVariable("s1") domain1 = Domain([c1, c2, d1, d2, t1], [d3], [s1, c3, t2]) self.data1 = Table.from_list(domain1, [[0, 1, 0, 1, 2, 0, "foo", 0, 3], [3, 1, 0, 1, 42, 0, "bar", 0, 4]]) domain2 = Domain([ContinuousVariable("c4")]) self.data2 = Table.from_list(domain2, [[4], [5]])
def commit(self): if self.data is None or self.cont_data is None: self.Outputs.data.send(self.data) self.Outputs.features.send(None) self.Outputs.correlations.send(None) return metas = [StringVariable("Feature 1"), StringVariable("Feature 2")] domain = Domain([ContinuousVariable("Correlation")], metas=metas) model = self.vizrank.rank_model x = np.array([[float(model.data(model.index(row, 0)))] for row in range(model.rowCount())]) # pylint: disable=protected-access m = np.array([[a.name for a in model.data(model.index(row, 0), CorrelationRank._AttrRole)] for row in range(model.rowCount())], dtype=object) corr_table = Table(domain, x, metas=m) corr_table.name = "Correlations" self.Outputs.data.send(self.data) # data has been imputed; send original attributes self.Outputs.features.send(AttributeList([attr.compute_value.variable for attr in self.selection])) self.Outputs.correlations.send(corr_table)
def test_num_meta_labels(self): x, y = (ContinuousVariable(c) for c in "xy") s = StringVariable("s") data = Table.from_list( Domain([x], [], [y, s]), [[0, 1, "a"], [1, np.nan, "b"]] ) distances = Euclidean(data) self.widget.set_distances(distances) ac = self.widget.annot_combo idx = ac.model().indexOf(y) ac.setCurrentIndex(idx) ac.activated.emit(idx) self.assertEqual(self.widget.tablemodel.labels, ["1", "?"])
def setUp(self): self.widget = self.create_widget(OWClusterAnalysis) self.data_table = Table("iris") self.data_table.attributes[GENE_AS_ATTRIBUTE_NAME] = True self.data_table.attributes[GENE_ID_ATTRIBUTE] = NCBI_ID for i, var in enumerate(self.data_table.domain.attributes): var.attributes[NCBI_ID] = str(i) domain = Domain(self.data_table.domain.attributes[0:2]) self.genes_as_attributes = self.data_table.transform(domain) domain = Domain([], metas=[StringVariable("Gene ID")]) self.genes_as_rows = Table.from_list(domain, [["1"], ["2"]]) self.genes_as_rows.attributes[GENE_AS_ATTRIBUTE_NAME] = False self.genes_as_rows.attributes[GENE_ID_COLUMN] = "Gene ID"
def test_remove_discrete(self): d1, d2, d3 = (DiscreteVariable(c, values=tuple("123")) for c in "abc") c1, c2 = (ContinuousVariable(c) for c in "xy") t = StringVariable("t") domain = Domain([d1, c1], d2, [c2, d3, t]) data = Table.from_domain(domain, 5) reduced = distance.remove_discrete_features(data) self.assertEqual(reduced.domain.attributes, (c1, )) self.assertEqual(reduced.domain.class_var, d2) self.assertEqual(reduced.domain.metas, (c2, d3, t)) reduced = distance.remove_discrete_features(data, to_metas=True) self.assertEqual(reduced.domain.attributes, (c1, )) self.assertEqual(reduced.domain.class_var, d2) self.assertEqual(reduced.domain.metas, (c2, d3, t, d1))
def test_variable_editor(self): w = VariableEditor() self.assertIs(w.get_data(), None) v = StringVariable(name="S") v.attributes.update({"A": 1, "B": "b"}, ) w.set_data(v) self.assertEqual(w.name_edit.text(), v.name) self.assertEqual(w.labels_model.get_dict(), v.attributes) self.assertTrue(w.is_same()) w.set_data(None) self.assertEqual(w.name_edit.text(), "") self.assertEqual(w.labels_model.get_dict(), {}) self.assertIs(w.get_data(), None)
def _to_addendum(self, df, keep): if not df.shape[1]: return None, None df.drop(['_id', 'adm0_a3'], axis=1, inplace=True) addendum = df if self.append_features else df[keep] metas = [] for col in addendum: unique_name = get_unique_names(self.data.domain, col) if col in ('latitude', 'longitude'): metas.append(ContinuousVariable(unique_name)) else: metas.append(StringVariable(unique_name)) return addendum.values, tuple(metas)
def create_coef_table(classifier): i = classifier.intercept c = classifier.coefficients if c.shape[0] > 2: values = [classifier.domain.class_var.values[int(i)] for i in classifier.used_vals[0]] else: values = [classifier.domain.class_var.values[int(classifier.used_vals[0][1])]] domain = Domain([ContinuousVariable(value, number_of_decimals=7) for value in values], metas=[StringVariable("name")]) coefs = np.vstack((i.reshape(1, len(i)), c.T)) names = [[attr.name] for attr in classifier.domain.attributes] names = [["intercept"]] + names names = np.array(names, dtype=object) coef_table = Table.from_numpy(domain, X=coefs, metas=names) coef_table.name = "coefficients" return coef_table
def commit(self): out = None if self.corpus is not None: rows = [i for i, doc in enumerate(self.corpus.ngrams) if any(word in doc for word in self.selected_words)] out = self.corpus[rows] self.Outputs.corpus.send(out) topic = None words = list(self.selected_words) if words: topic = Topic.from_numpy(Domain([], metas=[StringVariable('Words')]), X=np.empty((len(words), 0)), metas=np.c_[words].astype(object)) topic.name = 'Selected Words' self.Outputs.selected_words.send(topic)
def test_get_column(self): widget = self.widget get_column = widget.get_column cont = ContinuousVariable("cont") disc = DiscreteVariable("disc", list("abcdefghijklmno")) disc2 = DiscreteVariable("disc2", list("abc")) disc3 = DiscreteVariable("disc3", list("abc")) string = StringVariable("string") domain = Domain([cont, disc], disc2, [disc3, string]) widget.data = Table.from_numpy( domain, np.array([[1, 4], [2, 15], [6, 7]], dtype=float), np.array([2, 1, 0], dtype=float), np.array([[0, "foo"], [2, "bar"], [1, "baz"]]) ) self.assertIsNone(get_column(None)) np.testing.assert_almost_equal(get_column(cont), [1, 2, 6]) np.testing.assert_almost_equal(get_column(disc), [4, 15, 7]) np.testing.assert_almost_equal(get_column(disc2), [2, 1, 0]) np.testing.assert_almost_equal(get_column(disc3), [0, 2, 1]) self.assertEqual(list(get_column(string)), ["foo", "bar", "baz"]) widget.valid_data = np.array([True, False, True]) self.assertIsNone(get_column(None)) np.testing.assert_almost_equal(get_column(cont), [1, 6]) self.assertEqual(list(get_column(string)), ["foo", "baz"]) self.assertIsNone(get_column(None, False)) np.testing.assert_almost_equal(get_column(cont, False), [1, 2, 6]) self.assertEqual(list(get_column(string, False)), ["foo", "bar", "baz"]) self.assertIsNone(get_column(None, return_labels=True)) self.assertEqual(get_column(disc, return_labels=True), disc.values) self.assertEqual(get_column(disc2, return_labels=True), disc2.values) self.assertEqual(get_column(disc3, return_labels=True), disc3.values) with self.assertRaises(AssertionError): get_column(cont, return_labels=True) with self.assertRaises(AssertionError): get_column(cont, return_labels=True, max_categories=4) with self.assertRaises(AssertionError): get_column(string, return_labels=True) with self.assertRaises(AssertionError): get_column(string, return_labels=True, max_categories=4)
def handle_languages(self): if self.corpus is not None: domain = self.corpus.domain if self.detect_languages: if self.corpus.languages is None: self.corpus.detect_languages() curr_attributes = list(domain.attributes) curr_class_var = [domain.class_var] if domain.class_var else [] curr_metas = list(domain.metas) curr_variables = curr_attributes + curr_class_var + curr_metas curr_names = [var.name for var in curr_variables] new_name = get_unique_names(curr_names, "Language") variable_attrs = {'language-feature': True} new_variable = StringVariable(new_name) new_variable.attributes.update(variable_attrs) new_domain = Domain(attributes=domain.attributes, class_vars=domain.class_var, metas=list(domain.metas) + [new_variable]) metas = np.hstack([ self.corpus.metas, np.array(self.corpus.languages).reshape(-1, 1) ]) self.corpus = Corpus(new_domain, self.corpus.X.copy(), self.corpus.Y.copy(), metas, self.corpus.W.copy(), copy(self.corpus.text_features)) else: lang_feat_idx = None for i, f in enumerate(domain.metas): if ('language-feature' in f.attributes and f.attributes['language-feature']): lang_feat_idx = i break if lang_feat_idx is not None: new_domain = Domain(attributes=domain.attributes, class_vars=domain.class_var, metas=list( np.delete(list(domain.metas), lang_feat_idx))) self.corpus = Corpus( new_domain, self.corpus.X.copy(), self.corpus.Y.copy(), np.delete(self.corpus.metas, lang_feat_idx, axis=1), self.corpus.W.copy(), copy(self.corpus.text_features)) self.Outputs.corpus.send(self.corpus)
def test_value_from_string_substring(self): trans = ValueFromStringSubstring(StringVariable(), self.patterns) arr2 = np.hstack((self.arr.astype(object), [None])) with patch("Orange.widgets.data.owcreateclass.map_by_substring") as mbs: trans.transform(self.arr) a, patterns, case_sensitive, match_beginning = mbs.call_args[0] np.testing.assert_equal(a, self.arr) self.assertEqual(patterns, self.patterns) self.assertFalse(case_sensitive) self.assertFalse(match_beginning) trans.transform(arr2) a, patterns, *_ = mbs.call_args[0] np.testing.assert_equal(a, np.hstack((self.arr.astype(str), ""))) np.testing.assert_equal(trans.transform(arr2), [0, 1, 2, 0, 3, np.nan])
def create_data_from_states(example_states, example_traces): data_desc = example_states[0].domain attributes = data_desc.get_attributes() domain = Domain( attributes, ContinuousVariable.make("complexity"), metas=[StringVariable.make("id"), ContinuousVariable("trace")]) data = Table.from_domain(domain) for si, s in enumerate(example_states): e = Instance(domain) for f in attributes: e[f] = s.get_attribute(f) e["id"] = s.get_id() e["trace"] = example_traces[si] data.append(e) return data
def setUp(self): self.parent = DummyWidget() self.graph = DropoutGraph(self.parent) self.results = results = Mock() results.decay = 1 results.x_offset = 0.1 results.y_offset = 0.1 results.mean_expr = np.array([0.1, 0.2]) results.zero_rate = np.array([0.1, 0.2]) results.threshold = 0 self.data = Table( Domain([ContinuousVariable("A"), ContinuousVariable("B")]), np.array([[1, 0], [0, 0], [2, 0]])) self.genes = Table(Domain([], metas=[StringVariable("Entrez ID")]), np.empty((1, 0)), metas=np.array([["1"]]))
def test_from_documents(self): documents = [{ 'wheels': 4, 'engine': 'w4', 'type': 'car', 'desc': 'A new car.' }, { 'wheels': 8., 'engine': 'w8', 'type': 'truck', 'desc': 'An old truck.' }, { 'wheels': 12., 'engine': 'w12', 'type': 'truck', 'desc': 'An new truck.' }] attrs = [ (DiscreteVariable('Engine'), lambda doc: doc.get('engine')), (ContinuousVariable('Wheels'), lambda doc: doc.get('wheels')), ] class_vars = [ (DiscreteVariable('Type'), lambda doc: doc.get('type')), ] metas = [ (StringVariable('Description'), lambda doc: doc.get('desc')), ] dataset_name = 'TruckData' c = Corpus.from_documents(documents, dataset_name, attrs, class_vars, metas) self.assertEqual(len(c), len(documents)) self.assertEqual(c.name, dataset_name) self.assertEqual(len(c.domain.attributes), len(attrs)) self.assertEqual(len(c.domain.class_vars), len(class_vars)) self.assertEqual(len(c.domain.metas), len(metas)) engine_dv = c.domain.attributes[0] self.assertEqual(sorted(engine_dv.values), sorted([d['engine'] for d in documents])) self.assertEqual([engine_dv.repr_val(v) for v in c.X[:, 0]], [d['engine'] for d in documents])
def test_varying_between_combined(self): X = np.array([ [0, 0, 0, 0, 0, 1], [0, 0, 1, 1, 0, 1], [0, 0, 0, 2, np.nan, np.nan], [0, 1, 0, 0, 0, 0], [0, 1, 0, 2, 0, 0], [0, 1, 0, 0, np.nan, 0], ]) M = np.array( [ ["A", 0, 0, 0, 0, 0, 1], ["A", 0, 0, 1, 1, 0, 1], ["A", 0, 0, 0, 2, np.nan, np.nan], ["B", 0, 1, 0, 0, 0, 0], ["B", 0, 1, 0, 2, 0, 0], ["B", 0, 1, 0, 0, np.nan, 0], ], dtype=str, ) variables = [ ContinuousVariable(name="F%d" % j) for j in range(X.shape[1]) ] metas = [StringVariable(name="M%d" % j) for j in range(M.shape[1])] domain = Domain(attributes=variables, metas=metas) data = Table.from_numpy(X=X, domain=domain, metas=M) self.assertEqual( varying_between(data, idvar=data.domain.metas[0]), [ variables[2], variables[3], metas[3], metas[4], metas[5], metas[6] ], ) data = Table.from_numpy(X=sp.csr_matrix(X), domain=domain, metas=M) self.assertEqual( varying_between(data, idvar=data.domain.metas[0]), [ variables[2], variables[3], metas[3], metas[4], metas[5], metas[6] ], )
def test_domaineditor_makes_variables(self): # Variables created with domain editor should be interchangeable # with variables read from file. dat = """V0\tV1\nc\td\n\n1.0\t2""" v0 = StringVariable.make("V0") v1 = ContinuousVariable.make("V1") with named_file(dat, suffix=".tab") as filename: self.open_dataset(filename) model = self.widget.domain_editor.model() model.setData(model.createIndex(0, 1), "text", Qt.EditRole) model.setData(model.createIndex(1, 1), "numeric", Qt.EditRole) self.widget.apply_button.click() data = self.get_output(self.widget.Outputs.data) self.assertEqual(data.domain["V0"], v0) self.assertEqual(data.domain["V1"], v1)
def transpose_table(table): """ Transpose the rows and columns of the table. Args: table: Data in :obj:`Orange.data.Table` Returns: Transposed :obj:`Orange.data.Table`. (Genes as columns) """ attrs = table.domain.attributes attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table] # Set metas new_metas = [StringVariable.make(name) if name is not 'Time' else TimeVariable.make(name) for name in sorted(table.domain.variables[0].attributes.keys())] domain = Domain(attr, metas=new_metas) meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs] return Table(domain, table.X.transpose(), metas=meta_values)
def transpose_labels_to_class(data, class_label=None, gene_label="gene"): """Converts data with genes in rows to data with genes as attributes.""" # if no class_label (attribute type) given, guess it from the data if not class_label: l = [] for a in data.domain.attributes: l.extend(list(a.attributes.keys())) l = list(set(l)) class_label = l[0] if len(set(l)) > 1: import warnings warnings.warn("More than single attribute label types (%s), took %s" % (", ".join(l), class_label)) if gene_label in [v.name for v in data.domain.getmetas().values()]: atts = [ContinuousVariable(str(d[gene_label])) for d in data] else: atts = [ContinuousVariable("A%d" % i) for i in range(len(data))] classvalues = list(set([a.attributes[class_label] for a in data.domain.attributes])) if all([isinstance(x, (int, float, complex)) for x in classvalues]): classvar = ContinuousVariable(class_label) else: classvar = DiscreteVariable(class_label, values=classvalues) domain = Orange.data.Domain(atts, classvar) newdata = [] for a in data.domain.attributes: newdata.append([_float_or_na(d[a]) for d in data] + [a.attributes[class_label]]) sample = StringVariable("sample") id = StringVariable.new_meta_id() new = Orange.data.Table(domain, newdata) new.domain.addmeta(id, sample) for i, d in enumerate(new): d[sample] = data.domain.attributes[i].name return new
def transpose_class_to_labels(data, attcol="sample"): """Converts data with genes as attributes to data with genes in rows.""" if attcol in [v.name for v in data.domain.getmetas().values()]: atts = [ContinuousVariable(str(d[attcol])) for d in data] else: atts = [ContinuousVariable("S%d" % i) for i in range(len(data))] for i, d in enumerate(data): atts[i].setattr("class", str(d.getclass())) domain = Orange.data.Domain(atts, None) newdata = [] for a in data.domain.attributes: newdata.append([_float_or_na(d[a]) for d in data]) gene = StringVariable("gene") id = StringVariable.new_meta_id() new = Orange.data.Table(domain, newdata) new.domain.addmeta(id, gene) for i, d in enumerate(new): d[gene] = data.domain.attributes[i].name return new
def _guess_variable(self, field_name, field_metadata, inspect_table): type_code = field_metadata[0] FLOATISH_TYPES = (700, 701, 1700) # real, float8, numeric INT_TYPES = (20, 21, 23) # bigint, int, smallint CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar BOOLEAN_TYPES = (16,) # bool DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz # time, timestamp, timestamptz, timetz TIME_TYPES = (1083, 1114, 1184, 1266,) if type_code in FLOATISH_TYPES: return ContinuousVariable.make(field_name) if type_code in TIME_TYPES + DATE_TYPES: tv = TimeVariable.make(field_name) tv.have_date |= type_code in DATE_TYPES tv.have_time |= type_code in TIME_TYPES return tv if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: values = self.get_distinct_values(field_name, inspect_table) if values: return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in BOOLEAN_TYPES: return DiscreteVariable.make(field_name, ['false', 'true']) if type_code in CHAR_TYPES: if inspect_table: values = self.get_distinct_values(field_name, inspect_table) # remove trailing spaces values = [v.rstrip() for v in values] if values: return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name)
def etc_to_table(self, etc_json, time_var=False, callback=lambda: None): """ Converts data from Json to :obj:`Orange.data.table` Args: etc_json (dict): Data in json like format time_var (bool): Create column of time points. Default is set to False. Returns: :obj:`Orange.data.Table` """ cbc = CallBack(2, callback, callbacks=30) variables = [] time_point = 1 for time in etc_json['etc']['timePoints']: var = ContinuousVariable('TP ' + str(time_point)) var.attributes['Time'] = str(time) variables.append(var) time_point += 1 meta_attr = StringVariable.make('Gene') domain = Domain(variables, metas=[meta_attr]) cbc() table = [] for row in etc_json['etc']['genes']: gene_expression = [exp for exp in etc_json['etc']['genes'][row]] gene_expression.append(row) table.append(gene_expression) orange_table = Table(domain, table) if time_var: orange_table = transpose_table(orange_table) cbc() cbc.end() return orange_table
def _corpus_from_records(records, includes_metadata): """Receives PubMed records and transforms them into a corpus. Args: records (list): A list of PubMed entries. includes_metadata (list): A list of text fields to include. Returns: corpus: The output Corpus. """ meta_vars = [] time_var = None for field_name, _ in includes_metadata: if field_name == PUBMED_FIELD_DATE: time_var = TimeVariable(field_name) meta_vars.append(time_var) else: meta_vars.append(StringVariable.make(field_name)) if field_name == PUBMED_FIELD_TITLE: meta_vars[-1].attributes["title"] = True meta_values, class_values = _records_to_corpus_entries( records, includes_metadata=includes_metadata, time_var=time_var, ) class_vars = [ DiscreteVariable('section', values=list(map(str, set(filter(None, class_values))))) ] domain = Domain([], class_vars=class_vars, metas=meta_vars) Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None] return Corpus(domain=domain, Y=Y, metas=meta_values)
def read(self): try: import opusFC except ImportError: raise RuntimeError(self._OPUS_WARNING) if self.sheet: db = self.sheet else: db = self.sheets[0] db = tuple(db.split(" ")) dim = db[1] try: data = opusFC.getOpusData(self.filename, db) except Exception: raise IOError("Couldn't load spectrum from " + self.filename) attrs, clses, metas = [], [], [] attrs = [ContinuousVariable.make(repr(data.x[i])) for i in range(data.x.shape[0])] y_data = None meta_data = None if type(data) == opusFC.MultiRegionDataReturn: y_data = [] meta_data = [] metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region'), TimeVariable.make('start_time')]) for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) start_time = region.start_time meta_region = np.column_stack((mapX, mapY, map_region, start_time)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.MultiRegionTRCDataReturn: y_data = [] meta_data = [] metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region')]) attrs = [ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels))] for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) meta_region = np.column_stack((mapX, mapY, map_region)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.ImageDataReturn: metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')]) data_3D = data.spectra for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.ImageTRCDataReturn: metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')]) attrs = [ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels))] data_3D = data.traces for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.TimeResolvedTRCDataReturn: y_data = data.traces elif type(data) == opusFC.TimeResolvedDataReturn: metas.extend([ContinuousVariable.make('z')]) y_data = data.spectra meta_data = data.z elif type(data) == opusFC.SingleDataReturn: y_data = data.y[None, :] else: raise ValueError("Empty or unsupported opusFC DataReturn object: " + type(data)) import_params = ['SRT', 'SNM'] for param_key in import_params: try: param = data.parameters[param_key] except KeyError: pass # TODO should notify user? else: try: param_name = opusFC.paramDict[param_key] except KeyError: param_name = param_key if param_key == 'SRT': var = TimeVariable.make(param_name) elif type(param) is float: var = ContinuousVariable.make(param_name) elif type(param) is str: var = StringVariable.make(param_name) else: raise ValueError #Found a type to handle metas.extend([var]) params = np.full((y_data.shape[0],), param, np.array(param).dtype) if meta_data is not None: # NB dtype default will be np.array(fill_value).dtype in future meta_data = np.column_stack((meta_data, params.astype(object))) else: meta_data = params domain = Orange.data.Domain(attrs, clses, metas) meta_data = np.atleast_2d(meta_data) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C'), metas=meta_data) return table
def read(self): who = matlab.whosmat(self.filename) if not who: raise IOError("Couldn't load matlab file " + self.filename) else: ml = matlab.loadmat(self.filename, chars_as_strings=True) ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)} # X is the biggest numeric array numarrays = [] for name, con in ml.items(): if issubclass(con.dtype.type, numbers.Number): numarrays.append((name, reduce(lambda x, y: x*y, con.shape, 1))) X = None if numarrays: nameX = max(numarrays, key=lambda x: x[1])[0] X = ml.pop(nameX) # find an array with compatible shapes attributes = [] if X is not None: nameattributes = None for name, con in ml.items(): if con.shape in [(X.shape[1],), (1, X.shape[1])]: nameattributes = name break attributenames = ml.pop(nameattributes).ravel() if nameattributes else range(X.shape[1]) attributenames = [str(a).strip() for a in attributenames] # strip because of numpy char array attributes = [ContinuousVariable.make(a) for a in attributenames] metas = [] metaattributes = [] sizemetas = None if X is None: counts = defaultdict(list) for name, con in ml.items(): counts[len(con)].append(name) if counts: sizemetas = max(counts.keys(), key=lambda x: len(counts[x])) else: sizemetas = len(X) if sizemetas: for name, con in ml.items(): if len(con) == sizemetas: metas.append(name) metadata = [] for m in sorted(metas): f = ml[m] metaattributes.append(StringVariable.make(m)) f.resize(sizemetas, 1) metadata.append(f) metadata = np.hstack(tuple(metadata)) domain = Domain(attributes, metas=metaattributes) if X is None: X = np.zeros((sizemetas, 0)) return Orange.data.Table.from_numpy(domain, X, Y=None, metas=metadata)
def read(self): who = matlab.whosmat(self.filename) if not who: raise IOError("Couldn't load matlab file " + self.filename) else: ml = matlab.loadmat(self.filename, chars_as_strings=True) ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)} def num_elements(array): return reduce(lambda x, y: x * y, array.shape, 1) def find_biggest(arrays): sizes = [] for n, c in arrays.items(): sizes.append((num_elements(c), n)) return max(sizes)[1] def is_string_array(array): return issubclass(array.dtype.type, np.str_) def is_number_array(array): return issubclass(array.dtype.type, numbers.Number) numeric = {n: a for n, a in ml.items() if is_number_array(a)} # X is the biggest numeric array X = ml.pop(find_biggest(numeric)) if numeric else None # find an array with compatible shapes attributes = [] if X is not None: name_array = None for name in sorted(ml): con = ml[name] if con.shape in [(X.shape[1],), (1, X.shape[1])]: name_array = name break names = ml.pop(name_array).ravel() if name_array else range(X.shape[1]) names = [str(a).rstrip() for a in names] # remove matlab char padding attributes = [ContinuousVariable.make(a) for a in names] meta_names = [] metas = [] meta_size = None if X is None: counts = defaultdict(list) for name, con in ml.items(): counts[len(con)].append(name) if counts: meta_size = max(counts.keys(), key=lambda x: len(counts[x])) else: meta_size = len(X) if meta_size: for name, con in ml.items(): if len(con) == meta_size: meta_names.append(name) meta_data = [] for m in sorted(meta_names): f = ml[m] if is_string_array(f) and len(f.shape) == 1: # 1D string arrays metas.append(StringVariable.make(m)) f = np.array([a.rstrip() for a in f]) # remove matlab char padding f.resize(meta_size, 1) meta_data.append(f) elif is_number_array(f) and len(f.shape) == 2: if f.shape[1] == 1: names = [m] else: names = [m + "_" + str(i+1) for i in range(f.shape[1])] for n in names: metas.append(ContinuousVariable.make(n)) meta_data.append(f) meta_data = np.hstack(tuple(meta_data)) if meta_data else None domain = Domain(attributes, metas=metas) if X is None: X = np.zeros((meta_size, 0)) return Orange.data.Table.from_numpy(domain, X, Y=None, metas=meta_data)