def setUp(self): self.data = [ [1, 2, 3, None, 'm'], [2, 3, 1, 4, 'f'], [None, None, None, None, None], [7, None, 3, None, 'f'], ] conn, self.table_name = self.create_sql_table(self.data) self.table = SqlTable(conn, self.table_name, inspect_values=True)
def test_other(self): table = np.array([ 'bcd4d9c0-361e-bad4-7ceb-0d171cdec981', '544b7ddc-d861-0201-81c8-9f7ad0bbf531', 'b35a10f7-7901-f313-ec16-5ad9778040a6', 'b267c4be-4a26-60b5-e664-737a90a40e93' ]).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['uuid']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) filters = filter.Values( [filter.FilterString(-1, filter.FilterString.Equal, 'foo')]) self.assertEqual(len(filters(sql_table)), 0)
def setUp(self): self.data = [ [1, 2, 3, 'a', 'm'], [2, None, 1, 'a', 'f'], [None, 3, 1, 'b', None], [2, 2, 3, 'b', 'f'], ] self.conn, self.table_name = self.create_sql_table(self.data) self.table = SqlTable(self.conn, self.table_name, inspect_values=True)
def setUp(self): self.data = [ [1, 2, 3, 'a', 'm'], [2, None, 1, 'a', 'f'], [None, 3, 1, 'b', None], [2, 2, 3, 'b', 'f'], ] self.table_uri = self.create_sql_table(self.data) self.table = SqlTable(self.table_uri)
def test_distributions(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) dists = get_distributions(iris) self.assertEqual(len(dists), 5) dist = dists[0] self.assertAlmostEqual(dist.min(), 4.3) self.assertAlmostEqual(dist.max(), 7.9) self.assertAlmostEqual(dist.mean(), 5.8, 1)
def sql_table_from_data(self, data, guess_values=True): from Orange.data.sql.backend.postgres import Psycopg2Backend assert Psycopg2Backend.connection_pool is not None table_name = self._create_sql_table(data) yield SqlTable(connection_params(), table_name, inspect_values=guess_values) self.drop_sql_table(table_name)
def test_PCA(self): iris_v = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'] table = SqlTable(self.conn, self.iris, type_hints=Domain([], DiscreteVariable("iris", values=iris_v))) for batch_size in (50, 500): rpca = RemotePCA(table, batch_size, 20) self.assertEqual(rpca.components_.shape, (4, 4))
def test_download_data(self): mat = np.random.randint(0, 2, (20, 3)) conn, table_name = self.create_sql_table(mat) for member in ('X', 'Y', 'metas', 'W', 'ids'): sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) self.assertFalse(getattr(sql_table, member) is None) # has all necessary class members to create a standard Table Table(sql_table.domain, sql_table)
def test_contingencies(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) iris.domain = Domain(iris.domain[:2] + (EqualWidth()(iris, iris.domain['sepal width']),), iris.domain['iris']) conts = get_contingencies(iris) self.assertEqual(len(conts), 3) self.assertIsInstance(conts[0], Continuous) self.assertIsInstance(conts[1], Continuous) self.assertIsInstance(conts[2], Discrete)
def test_XY_small(self): mat = np.random.randint(0, 2, (20, 3)) conn, table_name = self.create_sql_table(mat) sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) assert_almost_equal(sql_table.X, mat[:, :2]) assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
def test_meta_type_hints(self): iris = SqlTable( self.conn, self.iris, type_hints=Domain([], metas=[self.IRIS_VARIABLE]), ) self.assertEqual(len(iris.domain.metas), 1) self.assertEqual(iris.domain.metas[0].name, "iris") np.testing.assert_array_equal(iris.metas.flatten(), [0] * 50 + [2] * 50 + [1] * 50)
def open_table(self): if self.tablecombo.currentIndex() == 0: return self.table = self.tablecombo.currentText() table = SqlTable(host=self.host, database=self.database, user=self.username, password=self.password, table=self.table) self.send("Data", table)
def setUpDB(self): self.data = [ [1, 2, 3, None, 'm'], [2, 3, 1, 4, 'f'], [None, None, None, None, None], [7, None, 3, None, 'f'], ] self.conn, self.table_name = self.create_sql_table(self.data) table = SqlTable(self.conn, self.table_name, inspect_values=True) variables = table.domain.variables new_table = table.copy() new_table.domain = domain.Domain(variables[:-1], variables[-1:]) self.table = new_table
def setUp(self): self.data = [ [1, 2, 3, None, 'm'], [2, 3, 1, 4, 'f'], [None, None, None, None, None], [7, None, 3, None, 'f'], ] self.table_uri = self.create_sql_table(self.data) table = SqlTable(self.table_uri) variables = table.domain.variables new_table = table.copy() new_table.domain = domain.Domain(variables[:-1], variables[-1:]) self.table = new_table
def test_query_subset_of_attributes(self): table = SqlTable(self.conn, self.iris) attributes = [ self._mock_attribute("sepal length"), self._mock_attribute("sepal width"), self._mock_attribute("double width", '2 * "sepal width"') ] results = list(table._query(attributes)) self.assertSequenceEqual(results[:5], [(5.1, 3.5, 7.0), (4.9, 3.0, 6.0), (4.7, 3.2, 6.4), (4.6, 3.1, 6.2), (5.0, 3.6, 7.2)])
def test_PCA(self): table = SqlTable(connection_params(), 'iris', type_hints=Domain([], DiscreteVariable( "iris", values=[ 'Iris-setosa', 'Iris-virginica', 'Iris-versicolor' ]))) for batch_size in (50, 500): rpca = RemotePCA(table, batch_size, 10) self.assertEqual(rpca.components_.shape, (4, 4))
def test_download_data(self): mat = np.random.randint(0, 2, (20, 3)) conn, table_name = self.create_sql_table(mat) for member in ("X", "Y", "metas", "W", "ids"): sql_table = SqlTable( conn, table_name, type_hints=Domain([], DiscreteVariable(name="col2", values=["0", "1", "2"])), ) self.assertFalse(getattr(sql_table, member) is None) # has all necessary class members to create a standard Table Table(sql_table.domain, sql_table)
def test_meta_char(self): table = np.array(list('ABCDEFGHIJKLMNOPQRSTUVW')).reshape(-1, 1) conn, table_name = self.create_sql_table(table, ['char(1)']) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) self.drop_sql_table(table_name) # test if NULL is transformed to emtpy string table = np.array(list("ABCDEFGHIJKLMNOPQRSTUVW") + [None]).reshape( -1, 1) conn, table_name = self.create_sql_table(table, ["char(1)"]) sql_table = SqlTable(conn, table_name, inspect_values=False) self.assertFirstMetaIsInstance(sql_table, StringVariable) self.assertEqual("", sql_table.metas[-1, 0]) sql_table = SqlTable(conn, table_name, inspect_values=True) self.assertFirstMetaIsInstance(sql_table, StringVariable) self.assertEqual("", sql_table.metas[-1, 0]) self.drop_sql_table(table_name)
def setUpDB(self): self.data = [ [w] for w in "Lorem ipsum dolor sit amet, consectetur adipiscing" "elit. Vestibulum vel dolor nulla. Etiam elit lectus, mollis nec" "mattis sed, pellentesque in turpis. Vivamus non nisi dolor. Etiam" "lacinia dictum purus, in ullamcorper ante vulputate sed. Nullam" "congue blandit elementum. Donec blandit laoreet posuere. Proin" "quis augue eget tortor posuere mollis. Fusce vestibulum bibendum" "neque at convallis. Donec iaculis risus volutpat malesuada" "vehicula. Ut cursus tempor massa vulputate lacinia. Pellentesque" "eu tortor sed diam placerat porttitor et volutpat risus. In" "vulputate rutrum lacus ac sagittis. Suspendisse interdum luctus" "sem auctor commodo.".split(' ')] + [[None], [None]] self.conn, self.table_name = self.create_sql_table(self.data) self.table = SqlTable(self.conn, self.table_name)
def test_XY_large(self): mat = np.random.randint(0, 2, (1020, 3)) conn, table_name = self.create_sql_table(mat) sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) with self.assertRaises(ValueError): sql_table.X with self.assertRaises(ValueError): sql_table.Y with self.assertRaises(ValueError): sql_table.download_data(1019) sql_table.download_data() assert_almost_equal(sql_table.X, mat[:, :2]) assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
def test_list_tables_with_schema(self): with self.backend.execute_sql_query( "DROP SCHEMA IF EXISTS orange_tests CASCADE") as cur: cur.execute("CREATE SCHEMA orange_tests") cur.execute("CREATE TABLE orange_tests.efgh (id int)") cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (1)") cur.execute("INSERT INTO orange_tests.efgh (id) VALUES (2)") try: tables = self.backend.list_tables("orange_tests") self.assertTrue(any([t.name == "efgh" for t in tables])) SqlTable(self.conn, tables[0], inspect_values=True) finally: with self.backend.execute_sql_query( "DROP SCHEMA IF EXISTS orange_tests CASCADE"): pass
def open_table(self): if self.tablecombo.currentIndex() <= 0: return if self.tablecombo.currentIndex() < self.tablecombo.count() - 1: self.table = self.tablecombo.currentText() else: self.table = self.sqltext.toPlainText() table = SqlTable(dict(host=self.host, port=self.port, database=self.database, user=self.username, password=self.password), self.table, inspect_values=False) sample = False if table.approx_len() > LARGE_TABLE and self.guess_values: confirm = QMessageBox(self) confirm.setIcon(QMessageBox.Warning) confirm.setText("Attribute discovery might take " "a long time on large tables.\n" "Do you want to auto discover attributes?") confirm.addButton("Yes", QMessageBox.YesRole) no_button = confirm.addButton("No", QMessageBox.NoRole) sample_button = confirm.addButton("Yes, on a sample", QMessageBox.YesRole) confirm.exec() if confirm.clickedButton() == no_button: self.guess_values = False elif confirm.clickedButton() == sample_button: sample = True self.information(1) if self.guess_values: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) if sample: s = table.sample_time(1) domain = s.get_domain(guess_values=True) self.information( 1, "Domain was generated from a sample of the table.") else: domain = table.get_domain(guess_values=True) QApplication.restoreOverrideCursor() table.domain = domain self.send("Data", table)
def test_NaiveBayes(self): table = SqlTable(dict(host='localhost', database='test'), 'iris', type_hints=Domain([], DiscreteVariable("iris", values=['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']))) table = preprocess.Discretize(table) bayes = nb.NaiveBayesLearner() clf = bayes(table) # Single instance prediction self.assertEqual(clf(table[0]), table[0].get_class()) # Table prediction pred = clf(table) actual = array([ins.get_class() for ins in table]) ca = pred == actual ca = ca.sum() / len(ca) self.assertGreater(ca, 0.95) self.assertLess(ca, 1.)
def test_recovers_connection_after_sql_error(self): conn, table_name = self.create_sql_table( np.arange(25).reshape((-1, 1))) sql_table = SqlTable(conn, table_name) try: broken_query = "SELECT 1/%s FROM %s" % ( sql_table.domain.attributes[0].to_sql(), sql_table.table_name) with sql_table.backend.execute_sql_query(broken_query) as cur: cur.fetchall() except BackendError: pass working_query = "SELECT %s FROM %s" % ( sql_table.domain.attributes[0].to_sql(), sql_table.table_name) with sql_table.backend.execute_sql_query(working_query) as cur: cur.fetchall()
def test_basic_stats(self): iris = SqlTable(self.conn, self.iris, inspect_values=True) stats = BasicStats(iris, iris.domain['sepal length']) self.assertAlmostEqual(stats.min, 4.3) self.assertAlmostEqual(stats.max, 7.9) self.assertAlmostEqual(stats.mean, 5.8, 1) self.assertEqual(stats.nans, 0) self.assertEqual(stats.non_nans, 150) domain_stats = DomainBasicStats(iris, include_metas=True) self.assertEqual(len(domain_stats.stats), len(iris.domain) + len(iris.domain.metas)) stats = domain_stats['sepal length'] self.assertAlmostEqual(stats.min, 4.3) self.assertAlmostEqual(stats.max, 7.9) self.assertAlmostEqual(stats.mean, 5.8, 1) self.assertEqual(stats.nans, 0) self.assertEqual(stats.non_nans, 150)
def test_joins(self): table = SqlTable(self.conn, """SELECT a."sepal length", b. "petal length", CASE WHEN b."petal length" < 3 THEN '<' ELSE '>' END AS "qualitative petal length" FROM iris a INNER JOIN iris b ON a."sepal width" = b."sepal width" WHERE a."petal width" < 1 ORDER BY a."sepal length", b. "petal length" ASC""", type_hints=Domain([ DiscreteVariable(name="qualitative petal length", values=['<', '>']) ], [])) self.assertEqual(len(table), 498) self.assertAlmostEqual(list(table[497]), [5.8, 1.2, 0.])
def test_query_subset_of_rows(self): table = SqlTable(self.conn, self.iris) all_results = list(table._query()) results = list(table._query(rows=range(10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=range(10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=slice(None, 10))) self.assertEqual(len(results), 10) self.assertSequenceEqual(results, all_results[:10]) results = list(table._query(rows=slice(10, None))) self.assertEqual(len(results), 140) self.assertSequenceEqual(results, all_results[10:])
def test_XY_large(self): from Orange.data.sql.table import AUTO_DL_LIMIT as DLL mat = np.random.randint(0, 2, (DLL + 100, 3)) conn, table_name = self.create_sql_table(mat) sql_table = SqlTable(conn, table_name, type_hints=Domain([], DiscreteVariable( name='col2', values=['0', '1', '2']))) self.assertRaises(ValueError, lambda: sql_table.X) self.assertRaises(ValueError, lambda: sql_table.Y) with self.assertRaises(ValueError): sql_table.download_data(DLL + 10) # Download partial data sql_table.download_data(DLL + 10, partial=True) assert_almost_equal(sql_table.X, mat[:DLL + 10, :2]) assert_almost_equal(sql_table.Y.flatten()[:DLL + 10], mat[:DLL + 10, 2]) # Download all data sql_table.download_data() assert_almost_equal(sql_table.X, mat[:, :2]) assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
def test_NaiveBayes(self): iris_v = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'] table = SqlTable(self.conn, self.iris, type_hints=Domain([], DiscreteVariable("iris", values=iris_v))) disc = preprocess.Discretize() table = disc(table) bayes = nb.NaiveBayesLearner() clf = bayes(table) # Single instance prediction self.assertEqual(clf(table[0]), table[0].get_class()) # Table prediction pred = clf(table) actual = array([ins.get_class() for ins in table]) ca = pred == actual ca = ca.sum() / len(ca) self.assertGreater(ca, 0.95) self.assertLess(ca, 1.)
def test_universal_table(self): _, table_name = self.construct_universal_table() SqlTable(self.conn, """ SELECT v1.col2 as v1, v2.col2 as v2, v3.col2 as v3, v4.col2 as v4, v5.col2 as v5 FROM %(table_name)s v1 INNER JOIN %(table_name)s v2 ON v2.col0 = v1.col0 AND v2.col1 = 2 INNER JOIN %(table_name)s v3 ON v3.col0 = v2.col0 AND v3.col1 = 3 INNER JOIN %(table_name)s v4 ON v4.col0 = v1.col0 AND v4.col1 = 4 INNER JOIN %(table_name)s v5 ON v5.col0 = v1.col0 AND v5.col1 = 5 WHERE v1.col1 = 1 ORDER BY v1.col0 """ % dict(table_name='"%s"' % table_name)) self.drop_sql_table(table_name)