def test_partial_pii_table(self):
        schema = Schema("public")
        table = Table(schema, "partial_pii")
        table.add_child(Column("a"))
        table.add_child(Column("b"))

        table.scan(self.data_generator)
        self.assertTrue(table.has_pii())
        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertFalse(cols[1].has_pii())
        self.assertEqual(
            {
                "columns": [
                    {
                        "name": "a",
                        "pii_types": [PiiTypes.PHONE]
                    },
                    {
                        "name": "b",
                        "pii_types": []
                    },
                ],
                "has_pii":
                True,
                "name":
                "partial_pii",
            },
            table.get_dict(),
        )
    def test_full_pii_table(self):
        schema = Schema("public")
        table = Table(schema, "full_pii")
        table.add_child(Column("name"))
        table.add_child(Column("location"))

        table.scan(self.data_generator)
        self.assertTrue(table.has_pii())

        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertTrue(cols[1].has_pii())
        self.assertEqual(
            {
                "columns": [
                    {
                        "name": "name",
                        "pii_types": [PiiTypes.PERSON]
                    },
                    {
                        "name": "location",
                        "pii_types": [PiiTypes.LOCATION]
                    },
                ],
                "has_pii":
                True,
                "name":
                "full_pii",
            },
            table.get_dict(),
        )
    def test_partial_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'partial_pii')
        table.add_child(Column('a'))
        table.add_child(Column('b'))

        table.scan(self.data_generator)
        self.assertTrue(table.has_pii())
        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertFalse(cols[1].has_pii())
        self.assertEqual(
            {
                'columns': [{
                    'name': 'a',
                    'pii_types': [PiiTypes.PHONE]
                }, {
                    'name': 'b',
                    'pii_types': []
                }],
                'has_pii':
                True,
                'name':
                'partial_pii'
            }, table.get_dict())
    def setUp(self):
        self.explorer = MockExplorer(
            Namespace(
                host="mock_connection",
                include_schema=(),
                exclude_schema=(),
                include_table=(),
                exclude_table=(),
                catalog=None,
            ))

        col1 = Column("c1")
        col2 = Column("c2")
        col2._pii = [PiiTypes.LOCATION]

        schema = Schema("s1")
        table = Table(schema, "t1")
        table.add_child(col1)
        table.add_child(col2)

        schema = Schema("testSchema")
        schema.add_child(table)

        self.explorer._database = Database("database")
        self.explorer._database.add_child(schema)
    def test_no_pii_table(self):
        schema = Schema("public")
        table = Table(schema, "no_pii")
        table.add_child(Column("a"))
        table.add_child(Column("b"))

        table.scan(self.data_generator)
        self.assertFalse(table.has_pii())
        self.assertEqual(
            {
                "columns": [
                    {
                        "name": "a",
                        "pii_types": []
                    },
                    {
                        "name": "b",
                        "pii_types": []
                    },
                ],
                "has_pii":
                False,
                "name":
                "no_pii",
            },
            table.get_dict(),
        )
    def test_full_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'full_pii')
        table.add_child(Column('name'))
        table.add_child(Column('location'))

        table.scan(self.data_generator)
        self.assertTrue(table.has_pii())

        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertTrue(cols[1].has_pii())
        self.assertEqual(
            {
                'columns': [{
                    'name': 'name',
                    'pii_types': [PiiTypes.PERSON]
                }, {
                    'name': 'location',
                    'pii_types': [PiiTypes.LOCATION]
                }],
                'has_pii':
                True,
                'name':
                'full_pii'
            }, table.get_dict())
    def test_no_pii_table(self):
        schema = Schema("public")
        table = Table(schema, "no_pii")
        table.add_child(Column("a"))
        table.add_child(Column("b"))

        table.shallow_scan()
        self.assertFalse(table.has_pii())
    def test_no_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'no_pii')
        table.add_child(Column('a'))
        table.add_child(Column('b'))

        table.shallow_scan()
        self.assertFalse(table.has_pii())
    def get_no_pii_table():
        no_pii_table = Table("test_store", "no_pii")
        no_pii_a = Column("a")
        no_pii_b = Column("b")

        no_pii_table.add_child(no_pii_a)
        no_pii_table.add_child(no_pii_b)

        return no_pii_table
    def get_partial_pii_table():
        partial_pii_table = Table("test_store", "partial_pii")
        partial_pii_a = Column("a")
        partial_pii_a.add_pii_type(PiiTypes.PHONE)
        partial_pii_b = Column("b")

        partial_pii_table.add_child(partial_pii_a)
        partial_pii_table.add_child(partial_pii_b)

        return partial_pii_table
    def test_partial_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'partial_pii')
        table.add_child(Column('fname'))
        table.add_child(Column('b'))

        table.shallow_scan()
        self.assertTrue(table.has_pii())
        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertFalse(cols[1].has_pii())
    def test_partial_pii_table(self):
        schema = Schema("public")
        table = Table(schema, "partial_pii")
        table.add_child(Column("fname"))
        table.add_child(Column("b"))

        table.shallow_scan()
        self.assertTrue(table.has_pii())
        cols = table.get_children()
        self.assertTrue(cols[0].has_pii())
        self.assertFalse(cols[1].has_pii())
    def setUp(self):
        col1 = Column('c1')
        col2 = Column('c2')
        col2._pii = [PiiTypes.LOCATION]

        self.schema = Schema('testSchema')

        table = Table(self.schema, 't1')
        table._columns = [col1, col2]

        self.schema.tables = [table]
Beispiel #14
0
    def setUp(self):
        col1 = Column('c1')
        col2 = Column('c2')
        col2._pii = [PiiTypes.LOCATION]

        self.schema = Schema('testSchema')

        table = Table(self.schema, 't1')
        table.add_child(col1)
        table.add_child(col2)

        self.schema.add_child(table)
    def get_full_pii_table():
        full_pii_table = Table("test_store", "full_pii")
        full_pii_a = Column("a")
        full_pii_a.add_pii_type(PiiTypes.PHONE)
        full_pii_b = Column("b")
        full_pii_b.add_pii_type(PiiTypes.ADDRESS)
        full_pii_b.add_pii_type(PiiTypes.LOCATION)

        full_pii_table.add_child(full_pii_a)
        full_pii_table.add_child(full_pii_b)

        return full_pii_table
    def test_full_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'full_pii')
        table.add(Column('name'))
        table.add(Column('dob'))

        table.shallow_scan()
        self.assertTrue(table.has_pii())

        cols = table.get_columns()
        self.assertTrue(cols[0].has_pii())
        self.assertTrue(cols[1].has_pii())
Beispiel #17
0
    def setUp(self):
        col1 = Column("c1")
        col2 = Column("c2")
        col2._pii = [PiiTypes.LOCATION]

        self.schema = Schema("testSchema")

        table = Table(self.schema, "t1")
        table.add_child(col1)
        table.add_child(col2)

        self.schema.add_child(table)
Beispiel #18
0
    def _load_catalog(self):
        if self._cache_ts is None or self._cache_ts < datetime.now() - timedelta(minutes=10):
            with self._get_context_manager() as cursor:
                logging.debug("Catalog Query: %s", self._get_catalog_query())
                cursor.execute(self._get_catalog_query())
                self._schemas = []

                row = cursor.fetchone()

                current_schema = None
                current_table = None

                if row is not None:
                    current_schema = Schema(row[0])
                    current_table = Table(current_schema, row[1])

                while row is not None:
                    if current_schema.get_name() != row[0]:
                        current_schema.tables.append(current_table)
                        self._schemas.append(current_schema)
                        current_schema = Schema(row[0])
                        current_table = Table(current_schema, row[1])
                    elif current_table.get_name() != row[1]:
                        current_schema.tables.append(current_table)
                        current_table = Table(current_schema, row[1])
                    current_table.add(Column(row[2]))

                    row = cursor.fetchone()

                if current_schema is not None and current_table is not None:
                    current_schema.tables.append(current_table)
                    self._schemas.append(current_schema)

            self._cache_ts = datetime.now()
    def setUp(self):
        self.explorer = MockExplorer(
            Namespace(host="mock_connection", catalog=None))

        col1 = Column('c1')
        col2 = Column('c2')
        col2._pii = [PiiTypes.LOCATION]

        schema = Schema('s1')
        table = Table(schema, 't1')
        table._columns = [col1, col2]

        schema = Schema('testSchema')
        schema.tables = [table]

        self.explorer._schemas = [schema]
    def test_no_pii_table(self):
        schema = Schema('public')
        table = Table(schema, 'no_pii')
        table.add_child(Column('a'))
        table.add_child(Column('b'))

        table.scan(self.data_generator)
        self.assertFalse(table.has_pii())
        self.assertEqual(
            {
                'columns': [{
                    'name': 'a',
                    'pii_types': []
                }, {
                    'name': 'b',
                    'pii_types': []
                }],
                'has_pii':
                False,
                'name':
                'no_pii'
            }, table.get_dict())
 def test_positive_scan_column(self):
     col = Column("col")
     col.scan("Jonathan Smith", [RegexScanner(), NERScanner()])
     self.assertTrue(col.has_pii())
     self.assertEqual({
         "pii_types": [PiiTypes.PERSON],
         "name": "col"
     }, col.get_dict())
 def test_positive_scan_column(self):
     col = Column('col')
     col.scan('Jonathan Smith', [RegexScanner(), NERScanner()])
     self.assertTrue(col.has_pii())
     self.assertEqual({
         'pii_types': [PiiTypes.PERSON],
         'name': 'col'
     }, col.get_dict())
Beispiel #23
0
    def setUp(self):
        self.explorer = MockExplorer(
            Namespace(host="mock_connection",
                      include_schema=(),
                      exclude_schema=(),
                      include_table=(),
                      exclude_table=(),
                      catalog=None))

        col1 = Column('c1')
        col2 = Column('c2')
        col2._pii = [PiiTypes.LOCATION]

        schema = Schema('s1')
        table = Table(schema, 't1')
        table.add_child(col1)
        table.add_child(col2)

        schema = Schema('testSchema')
        schema.add_child(table)

        self.explorer._database = Database('database')
        self.explorer._database.add_child(schema)
Beispiel #24
0
    def _load_catalog(self):
        if self._cache_ts is None or self._cache_ts < datetime.now() - timedelta(
            minutes=10
        ):
            with self._get_context_manager() as cursor:
                logging.debug("Catalog Query: {0}".format(self._get_catalog_query()))
                cursor.execute(self._get_catalog_query())
                self._database = Database(
                    "database",
                    include=self._include_schema,
                    exclude=self._exclude_schema,
                )

                row = cursor.fetchone()

                current_schema = None
                current_table = None

                if row is not None:
                    current_schema = Schema(
                        row[0], include=self._include_table, exclude=self._exclude_table
                    )
                    current_table = Table(current_schema, row[1])

                while row is not None:
                    if current_schema.get_name() != row[0]:
                        current_schema.add_child(current_table)
                        self._database.add_child(current_schema)
                        current_schema = Schema(
                            row[0],
                            include=self._include_table,
                            exclude=self._exclude_table,
                        )
                        current_table = Table(current_schema, row[1])
                    elif current_table.get_name() != row[1]:
                        current_schema.add_child(current_table)
                        current_table = Table(current_schema, row[1])
                    current_table.add_child(Column(row[2]))

                    row = cursor.fetchone()

                if current_schema is not None and current_table is not None:
                    current_schema.add_child(current_table)
                    self._database.add_child(current_schema)

            self._cache_ts = datetime.now()
 def test_null_scan_column(self):
     col = Column('col')
     col.scan(None, [RegexScanner(), NERScanner()])
     self.assertFalse(col.has_pii())
     self.assertEqual({'pii_types': [], 'name': 'col'}, col.get_dict())
 def test_null_scan_column(self):
     col = Column("col")
     col.scan(None, [RegexScanner(), NERScanner()])
     self.assertFalse(col.has_pii())
     self.assertEqual({"pii_types": [], "name": "col"}, col.get_dict())