def testComputeRowsWithTableEmptyResult(self): """Test _ComputeRows() with a query that returns empty (zero) rows.""" # Query is # SELECT # column AS HP11223344, # COUNT(column) AS cnt_column # FROM table # GROUP BY HP11223344 # HAVING (cnt_column > 1) prefix = util.PSEUDONYM_PREFIX stack = [ [util.PseudonymToken('column').SetAlias('HP1122')], [ util.AggregationQueryToken('COUNT(column)').SetAlias( 'cnt_column') ], ] queried_values = { 'cnt_column': [], '%scolumn' % prefix: [], 'COUNT(%scolumn)' % prefix: [], 'HP1122': [], } real_result = [] result = encrypted_bigquery_client._ComputeRows(stack, queried_values) self.assertEqual(result, real_result)
def testComputeRowsWithTableNoManifest(self): """Test _ComputeRows() with a query that returns simple row values.""" # Query is # SELECT # column AS HP11223344, # COUNT(column) AS cnt_column # FROM table # GROUP BY HP11223344 # HAVING (cnt_column > 1) prefix = util.PSEUDONYM_PREFIX stack = [ [util.PseudonymToken('column').SetAlias('HP1122')], [ util.AggregationQueryToken('COUNT(column)').SetAlias( 'cnt_column') ], ] # This is just example data. The values have no meaning to the test. queried_values = { 'cnt_column': [ 2L, 6L, 6L, 4L, 2L, 5L, 2L, 7L, 6L, 2L, 3L, 2L, 2L, 4L, 4L, 4L, 7L, 5L, 4L, 3L, 2L, 5L, 2L, 4L, 3L ], '%scolumn' % prefix: [ '"90283714"', '"17007482"', '"40222489"', '"81558712"', '"13579536"', '"50946068"', '"82310121"', '"25566093"', '"27641327"', '"74384378"', '"58169219"', '"42514380"', '"53429045"', '"23220692"', '"28206670"', '"85131464"', '"52975787"', '"39194916"', '"96266483"', '"88770661"', '"16849538"', '"90717726"', '"49895676"', '"23519752"', '"19332978"' ], 'COUNT(%scolumn)' % prefix: [], 'HP1122': [], }
def testFailOperationsOnEncryptions(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.PseudonymToken('Year'), 1, util.OperatorToken('+', 2), 2000, util.OperatorToken('>=', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.ProbabilisticToken('Model'), 2, util.BuiltInFunctionToken('left') ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.BuiltInFunctionToken('is_nan') ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID)
def RewriteField(field): """Rewrite fields for real query with server.""" if not isinstance(field, util.FieldToken): return field row = util.GetEntryFromSchema(field, schema) if not row: return field if row['encrypt'].startswith('probabilistic'): return util.ProbabilisticToken(str(field)) elif row['encrypt'] == 'pseudonym': if row.get('related', None) is not None: return util.PseudonymToken(str(field), related=row['related']) else: return util.PseudonymToken(str(field)) elif row['encrypt'] == 'homomorphic' and row['type'] == 'integer': return util.HomomorphicIntToken(str(field)) elif row['encrypt'] == 'homomorphic' and row['type'] == 'float': return util.HomomorphicFloatToken(str(field)) elif row['encrypt'] == 'searchwords': return util.SearchwordsToken(str(field)) return field
def testRewriteEncryptedFields(self): queries = [ util.FieldToken('Year'), util.FieldToken('Model'), util.FieldToken('Make'), util.FieldToken('Invoice_Price'), util.FieldToken('Price'), util.FieldToken('Website'), util.FieldToken('Description') ] rewritten_queries = [ util.FieldToken('Year'), util.ProbabilisticToken('Model'), util.PseudonymToken('Make'), util.HomomorphicIntToken('Invoice_Price'), util.ProbabilisticToken('Price'), util.SearchwordsToken('Website'), util.SearchwordsToken('Description') ] test_schema = test_util.GetCarsSchema() new_queries = query_lib._RewriteEncryptedFields([queries], test_schema) self.assertEqual(new_queries, [rewritten_queries])
def testEncryptedContains(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), util.BuiltInFunctionToken('string'), util.StringLiteralToken('"1"'), util.OperatorToken('CONTAINS', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(string(Year) contains "1")') stack = [ util.SearchwordsToken('Model'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.SEARCHWORDS_PREFIX + 'Model contains ' 'to_base64(left(bytes(sha1(concat(left(' + util.SEARCHWORDS_PREFIX + 'Model, 24), \'yB9HY2qv+DI=\'))), 8)))') stack = [ util.SearchwordsToken('Model'), util.FieldToken('Year'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), 'A', util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.SearchwordsToken('Model'), util.SearchwordsToken('Model'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ 'Hello', util.SearchwordsToken('Model'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.SearchwordsToken('Model'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2), util.OperatorToken('not', 1) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), 'not (' + util.SEARCHWORDS_PREFIX + 'Model contains ' 'to_base64(left(bytes(sha1(concat(left(' + util.SEARCHWORDS_PREFIX + 'Model, 24), \'yB9HY2qv+DI=\'))), 8)))') schema = test_util.GetPlacesSchema() stack = [ util.SearchwordsToken('citiesLived.place'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(citiesLived.' + util.SEARCHWORDS_PREFIX + 'place contains ' 'to_base64(left(bytes(sha1(concat(left(citiesLived.' + util.SEARCHWORDS_PREFIX + 'place, 24), \'cBKPKGiY2cg=\'))), 8)))')
def testEncryptedEquality(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), 1, util.OperatorToken('+', 2), 2000, util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '((Year + 1) = 2000)') stack = [ util.FieldToken('Year'), util.PseudonymToken('Make'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(Year = ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "HS57DHbh2KlkqNJREmu1wQ==")') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" == ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('!=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" != ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.PseudonymToken('Make2'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == ' + util.PSEUDONYM_PREFIX + 'Make2)') stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), util.ProbabilisticToken('Price'), util.OperatorToken('=', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) schema = test_util.GetPlacesSchema() stack = [ util.PseudonymToken('spouse.spouseName'), util.StringLiteralToken('"Hello"'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(spouse.' + util.PSEUDONYM_PREFIX + 'spouseName = "HS57DHbh2KlkqNJREmu1wQ==")')
def testEncryptedEquality(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), 1, util.OperatorToken('+', 2), 2000, util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '((Year + 1) = 2000)') stack = [ util.FieldToken('Year'), util.PseudonymToken('Make'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(Year = ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "HS57DHbh2KlkqNJREmu1wQ==")') # begin: tests about 'related' schema option schema2 = test_util.GetCarsSchema() for field in schema2: if field['name'] == 'Make': field['related'] = 'cars_name' # value is deterministic calc with related instead of _TABLE_ID stack = [ util.PseudonymToken('Make', related=_RELATED), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema2, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "sspWKAH/NKuUyX8ji1mmSw==")') # token with related attribute makes no sense if schema doesn't have it stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make', related=_RELATED), util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) # end: tests about 'related' schema option stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" == ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('!=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" != ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.PseudonymToken('Make2'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == ' + util.PSEUDONYM_PREFIX + 'Make2)') stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), util.ProbabilisticToken('Price'), util.OperatorToken('=', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) schema = test_util.GetPlacesSchema() stack = [ util.PseudonymToken('spouse.spouseName'), util.StringLiteralToken('"Hello"'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(spouse.' + util.PSEUDONYM_PREFIX + 'spouseName = "HS57DHbh2KlkqNJREmu1wQ==")')
def testRewriteAggregations(self): stack = [ util.CountStarToken(), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(*)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Price'), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(' + util.PROBABILISTIC_PREFIX + 'Price)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Price'), 4, util.AggregationFunctionToken('COUNT', 2) ] rewritten_stack = ['COUNT(' + util.PROBABILISTIC_PREFIX + 'Price, 4)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), 5, util.AggregationFunctionToken('DISTINCTCOUNT', 2), util.FieldToken('Year'), util.AggregationFunctionToken('COUNT', 1), util.OperatorToken('+', 2) ] rewritten_stack = ['COUNT(DISTINCT Year, 5)', 'COUNT(Year)', '+'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ 0, util.BuiltInFunctionToken('cos'), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(1.0)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.StringLiteralToken('"Hello"'), 2, util.BuiltInFunctionToken('left'), util.StringLiteralToken('"y"'), util.BuiltInFunctionToken('concat'), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] rewritten_stack = ['GROUP_CONCAT("Hey")'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.FieldToken('Year'), util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = ['SUM((Year * Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price), \'0\')))', '*', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicFloatToken('Holdback_Percentage'), util.AggregationFunctionToken('AVG', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage), \'0\')))', '*', '+', 'COUNT(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage)', '/' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('+', 2), 5, util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 5.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price), \'0\')))', '*', '+', 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'SUM((2 * 5))', '*', '+', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), 2, util.AggregationFunctionToken('DISTINCTCOUNT', 2) ] rewritten_stack = [ 'COUNT(DISTINCT ' + util.PSEUDONYM_PREFIX + 'Make, 2)' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('TOP', 1) ] rewritten_stack = ['TOP(Year)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), 5, 1, util.AggregationFunctionToken('TOP', 3) ] rewritten_stack = ['TOP(' + util.PSEUDONYM_PREFIX + 'Make, 5, 1)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.BuiltInFunctionToken('cos'), util.HomomorphicIntToken('Invoice_Price'), util.OperatorToken('+', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'SUM(cos(Year))', '*', '+', 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price),' ' \'0\')))', '*', '+', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Model'), util.AggregationFunctionToken('DISTINCTCOUNT', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.ProbabilisticToken('Price'), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.HomomorphicFloatToken('Holdback_Percentage'), util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicFloatToken('Holdback_Percentage'), util.BuiltInFunctionToken('cos'), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('TOP', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('SUM', 1), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = ['SUM(SUM(Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('SUM', 1), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('GROUP_CONCAT', 1), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] rewritten_stack = ['GROUP_CONCAT(GROUP_CONCAT(Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), util.AggregationFunctionToken('GROUP_CONCAT', 1), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE)