def test_spark(self):
     engine = "spark"
     reader = SqlReader.from_connection(None, engine)
     assert(isinstance(reader, SqlReader))
     assert(isinstance(reader, SparkReader))
     assert(isinstance(reader.compare, NameCompare))
     assert(isinstance(reader.compare, SparkNameCompare))
     assert(isinstance(reader.serializer, Serializer))
     assert(isinstance(reader.serializer, SparkSerializer))
 def test_presto(self):
     engine = "presto"
     reader = SqlReader.from_connection(None, engine)
     assert(isinstance(reader, SqlReader))
     assert(isinstance(reader, PrestoReader))
     assert(isinstance(reader.compare, NameCompare))
     assert(isinstance(reader.compare, PrestoNameCompare))
     assert(isinstance(reader.serializer, Serializer))
     assert(isinstance(reader.serializer, PrestoSerializer))
 def test_sqlserver(self):
     engine = "sqlserver"
     reader = SqlReader.from_connection(None, engine)
     assert(isinstance(reader, SqlReader))
     assert(isinstance(reader, SqlServerReader))
     assert(isinstance(reader.compare, NameCompare))
     assert(isinstance(reader.compare, SqlServerNameCompare))
     assert(isinstance(reader.serializer, Serializer))
     assert(isinstance(reader.serializer, SqlServerSerializer))
 def test_postgres(self):
     engine = "postgres"
     reader = SqlReader.from_connection(None, engine)
     assert(isinstance(reader, SqlReader))
     assert(isinstance(reader, PostgresReader))
     assert(isinstance(reader.compare, NameCompare))
     assert(isinstance(reader.compare, PostgresNameCompare))
     assert(isinstance(reader.serializer, Serializer))
     assert(isinstance(reader.serializer, PostgresSerializer))
 def test_pandas(self):
     engine = "pandas"
     meta = Metadata.from_file(meta_path)
     df = pd.read_csv(csv_path)
     reader = SqlReader.from_connection(df, engine, metadata=meta)
     assert(isinstance(reader, SqlReader))
     assert(isinstance(reader, PandasReader))
     assert(isinstance(reader.compare, NameCompare))
     assert(isinstance(reader.compare, PandasNameCompare))
     assert(isinstance(reader.serializer, Serializer))
     assert(isinstance(reader.serializer, PandasSerializer))
Exemple #6
0
    def from_connection(cls, conn, *ignore, privacy, metadata, engine=None, **kwargs):
        """Create a private reader over an established SQL connection.  If `engine` is not
        passed in, the engine will be automatically detected.

        :param conn: An established database connection.  Can be pyodbc, psycopg2, SparkSession, Pandas DataFrame, or Presto.
        :param privacy:  A Privacy object with epsilon, delta, and other privacy properties.  Keyword-only.
        :param metadata: The metadata describing the database.  `Metadata documentation is here <https://github.com/opendp/smartnoise-sdk/blob/new_opendp/sdk/Metadata.md>`_.  Keyword-only.
        :param engine: Optional keyword-only argument that can be used to specify engine-specific rules if automatic detection fails.  This should only be necessary when using an uncommon database or middleware.
        :returns: A `PrivateReader` object initialized to process queries against the supplied connection, using the supplied `Privacy` properties.

        .. code-block:: python
        
            privacy = Privacy(epsilon=1.0, delta=1/1000)
            metadata = 'datasets/PUMS.yaml'
            pums = pd.read_csv('datasets/PUMS.csv')
            reader = PrivateReader.from_connection(pums, privacy=privacy, metadata=metadata)
        """
        _reader = SqlReader.from_connection(conn, engine=engine, metadata=metadata, **kwargs)
        return PrivateReader(_reader, metadata, privacy=privacy)
Exemple #7
0
git_root_dir = subprocess.check_output(
    "git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip()

meta_path = os.path.join(git_root_dir, os.path.join("datasets",
                                                    "PUMS_pid.yaml"))
csv_path = os.path.join(git_root_dir, os.path.join("datasets", "PUMS_pid.csv"))

from snsql.xpath.parse import XPath
p = XPath()

meta = Metadata.from_file(meta_path)
pums = pd.read_csv(csv_path)
query = 'SELECT AVG(age) + 3, STD(age), VAR(age), SUM(age) / 10, COUNT(age) + 2 FROM PUMS.PUMS'
q = QueryParser(meta).query(query)
reader = SqlReader.from_connection(pums, "pandas", metadata=meta)
priv = PrivateReader(reader, meta, privacy=Privacy(epsilon=1.0))
subquery, root = priv._rewrite(query)


class TestXPathExecutionNoRewrite:
    def test_all_root_descend(self):
        path = '//*'  # returns value
        xx = p.parse(path)
        res = xx.evaluate(q)
        assert (len(res) > 40)
        assert (str(xx) == path)

    def test_all_with_condition(self):
        path = '//*[@left]'  # returns value
        xx = p.parse(path)
 def test_reserved(self):
     if connection_case is not None and has_postgres:
         reader = SqlReader.from_connection(connection_case, "postgres")
         res = reader.execute('SELECT "select" FROM nametests')
Exemple #9
0
 def test_reserved(self):
     if connection is not None and has_sqlserver:
         reader = SqlReader.from_connection(connection, "postgres")
         res = reader.execute('SELECT "SELECT" FROM nametests')