def testDecryptValues(self): """Test _DecryptValues().""" cars_schema = test_util.GetCarsSchema() jobs_schema = test_util.GetJobsSchema() master_key = test_util.GetMasterKey() field = '%sInvoice_Price' % util.HOMOMORPHIC_INT_PREFIX table = [[1], [2], [3]] cipher = ecrypto.HomomorphicIntCipher(master_key) ciphers = {util.HOMOMORPHIC_INT_PREFIX: cipher} table = self._EncryptTable(cipher, table, 0) table.append([None]) column = encrypted_bigquery_client._DecryptValues( field, table, 0, ciphers, cars_schema, util.HOMOMORPHIC_INT_PREFIX) self.assertEqual(column, [1, 2, 3, util.LiteralToken('null', None)]) field = 'citiesLived.job.%sposition' % util.PSEUDONYM_PREFIX table = [[0, unicode('Hello')], [1, unicode('My')], [-1, unicode('job')]] cipher = ecrypto.PseudonymCipher(master_key) ciphers = {util.PSEUDONYM_PREFIX: cipher} table = self._EncryptTable(cipher, table, 1) table.insert(1, [100, None]) column = encrypted_bigquery_client._DecryptValues( field, table, 1, ciphers, jobs_schema, util.PSEUDONYM_PREFIX) self.assertEqual(column, [ util.StringLiteralToken('"Hello"'), util.LiteralToken('null', None), util.StringLiteralToken('"My"'), util.StringLiteralToken('"job"') ]) field = '%snonexistent_field' % util.HOMOMORPHIC_FLOAT_PREFIX self.assertRaises(ValueError, encrypted_bigquery_client._DecryptValues, field, table, 1, ciphers, cars_schema, util.HOMOMORPHIC_FLOAT_PREFIX)
def testBooleanLiterals(self): stack = [ util.LiteralToken('True', True), util.LiteralToken('False', False), util.OperatorToken('or', 2), 1, 2, util.OperatorToken('=', 2), util.OperatorToken('or', 2) ] self.assertEqual(interpreter.ToInfix(list(stack)), '((True or False) or (1 = 2))') self.assertEqual(interpreter.Evaluate(stack), True)
def _GetTimestampValues(table, column_index): """Returns new rows with timestamp values converted from float to string.""" values = [] for i in range(len(table)): if table[i][column_index] is None: value = util.LiteralToken('null', None) else: f = float(table[i][column_index]) # this handles sci-notation too s = util.SecToTimestamp(f) value = util.LiteralToken('"%s"' % s, s) values.append(value) return values
def _DecryptGroupConcatValues(field, table, column_index, ciphers, schema, prefix): if not field.startswith(util.GROUP_CONCAT_PREFIX): raise ValueError('Not a GROUP_CONCAT aggregation.') if len(field.split(' ')) >= 3: field = ' '.join(field.split(' ')[:-2]) field = field.split(util.GROUP_CONCAT_PREFIX)[1][:-1] field = field.split('.') field[-1] = field[-1].split(prefix)[1] field = '.'.join(field) value_type = util.GetFieldType(field, schema) if value_type not in ['string', 'integer', 'float']: raise ValueError('Not an known type.') if value_type != 'string': raise bigquery_client.BigqueryInvalidQueryError( 'Cannot GROUP_CONCAT non-string type.', None, None, None) cipher = ciphers[prefix] decrypted_column = [] for i in range(len(table)): if table[i][column_index] is None: decrypted_column.append(util.LiteralToken('null', None)) continue list_words = table[i][column_index].split(',') for k in range(len(list_words)): list_words[k] = unicode( cipher.Decrypt(list_words[k].encode('utf-8'))).strip() decrypted_column.append( util.StringLiteralToken('"%s"' % ','.join(list_words))) return decrypted_column
def _DecryptValues(field, table, column_index, ciphers, schema, prefix): field = field.split('.') field[-1] = field[-1].split(prefix)[1] field = '.'.join(field) value_type = util.GetFieldType(field, schema) if value_type not in ['string', 'integer', 'float']: raise ValueError('Not an known type.') cipher = ciphers[prefix] decrypted_column = [] for i in range(len(table)): if table[i][column_index] is None: decrypted_value = util.LiteralToken('null', None) else: decrypted_value = unicode( cipher.Decrypt( table[i][column_index].encode('utf-8'))).strip() if value_type == 'string': decrypted_value = util.StringLiteralToken('"%s"' % decrypted_value) elif value_type == 'integer': decrypted_value = long(decrypted_value) else: decrypted_value = float(decrypted_value) decrypted_column.append(decrypted_value) return decrypted_column
def testGetUnencryptedValues(self): table = [[1], [2], [3], [None]] column = encrypted_bigquery_client._GetUnencryptedValuesWithType( table, 0, 'integer') self.assertEqual(column, [1, 2, 3, util.LiteralToken('null', None)]) table = [[1, 'Hello'], [2, None], [None, 'Bye']] column = encrypted_bigquery_client._GetUnencryptedValuesWithType( table, 1, 'string') self.assertEqual(column, [ util.StringLiteralToken('"Hello"'), util.LiteralToken('null', None), util.StringLiteralToken('"Bye"') ]) self.assertRaises( ValueError, encrypted_bigquery_client._GetUnencryptedValuesWithType, table, 1, None)
def _CollapseFunctions(stack): """Collapses functions by evaluating them for actual values. Replaces a function's postfix expression with a single token. If the function can be evaluated (no fields included as arguments), the single token is the value of function's evaluation. Otherwise, the function is collapsed into a single token without evaluation. Arguments: stack: The stack whose functions are to be collapsed and resolved. Raises: bigquery_client.BigqueryInvalidQueryError: If a field exists inside the arguments of a function. Returns: True iff a function is found and collapsed. In other words, another potential function can still exist. """ for i in xrange(len(stack)): if isinstance(stack[i], util.BuiltInFunctionToken): start_idx, postfix_expr = interpreter.GetSingleValue(stack[:i + 1]) if util.IsEncryptedExpression(postfix_expr): raise bigquery_client.BigqueryInvalidQueryError( 'Invalid aggregation function argument: Cannot put an encrypted ' 'field as an argument to a built-in function.', None, None, None) # If the expression has no fields, we want to get the actual value. # But, if the field has a field, we have to get the infix string instead. try: result = interpreter.Evaluate(list(postfix_expr)) if isinstance(result, basestring): result = util.StringLiteralToken('"%s"' % result) elif result is None: result = util.LiteralToken('NULL', None) elif str(result).lower() in ['true', 'false']: result = util.LiteralToken(str(result).lower(), result) stack[start_idx:i + 1] = [result] except bigquery_client.BigqueryInvalidQueryError: result = interpreter.ToInfix(list(postfix_expr)) stack[start_idx:i + 1] = [util.FieldToken(result)] return True return False
def testGetTimestampValues(self): int_str = '1396368000' float_str = '1396368000.0' sn_str = '1.396368E9' ts_str = '"2014-04-01 16:00:00 UTC"' table = [[int_str], [float_str], [sn_str], [None]] column = encrypted_bigquery_client._GetTimestampValues(table, 0) self.assertEqual( column, [ts_str, ts_str, ts_str, util.LiteralToken('null', None)])
def _GetTimestampValues(table, column_index): values = [] for i in range(len(table)): if table[i][column_index] is None: value = util.LiteralToken('null', None) else: value = util.StringLiteralToken( '"%s"' % util.SecToTimestamp(float(table[i][column_index]))) values.append(value) return value
def testDecryptGroupConcatValues(self): cars_schema = test_util.GetCarsSchema() jobs_schema = test_util.GetJobsSchema() master_key = test_util.GetMasterKey() query = 'GROUP_CONCAT(%sModel)' % util.PROBABILISTIC_PREFIX cipher = ecrypto.ProbabilisticCipher(master_key) ciphers = {util.PROBABILISTIC_PREFIX: cipher} unencrypted_values = ([['A', 'B', 'C', 'D'], ['1', '2', '3', '4'], ['Hello', 'Bye']]) table = [] for values in unencrypted_values: encrypted_values = [] for token in values: encrypted_values.append(cipher.Encrypt(unicode(token))) table.append([','.join(encrypted_values), random.random()]) table.insert(0, [None, None]) column = encrypted_bigquery_client._DecryptGroupConcatValues( query, table, 0, ciphers, cars_schema, util.PROBABILISTIC_PREFIX) self.assertEqual(column, [ util.LiteralToken('null', None), util.StringLiteralToken('"A,B,C,D"'), util.StringLiteralToken('"1,2,3,4"'), util.StringLiteralToken('"Hello,Bye"') ]) query = ( 'GROUP_CONCAT(citiesLived.job.%sposition) within citiesLived.job' % util.PSEUDONYM_PREFIX) cipher = ecrypto.PseudonymCipher(master_key) ciphers = {util.PSEUDONYM_PREFIX: cipher} table = [] for values in unencrypted_values: encrypted_values = [] for token in values: encrypted_values.append(cipher.Encrypt(unicode(token))) table.append([','.join(encrypted_values)]) column = encrypted_bigquery_client._DecryptGroupConcatValues( query, table, 0, ciphers, jobs_schema, util.PSEUDONYM_PREFIX) self.assertEqual(column, [ util.StringLiteralToken('"A,B,C,D"'), util.StringLiteralToken('"1,2,3,4"'), util.StringLiteralToken('"Hello,Bye"') ]) query = '%sModel' % util.PROBABILISTIC_PREFIX self.assertRaises(ValueError, encrypted_bigquery_client._DecryptGroupConcatValues, query, table, 0, ciphers, cars_schema, util.PROBABILISTIC_PREFIX) query = ( 'GROUP_CONCAT(citiesLived.%snumberOfYears) within citiesLived' % util.HOMOMORPHIC_FLOAT_PREFIX) self.assertRaises(bigquery_client.BigqueryInvalidQueryError, encrypted_bigquery_client._DecryptGroupConcatValues, query, table, 0, ciphers, jobs_schema, util.HOMOMORPHIC_FLOAT_PREFIX)
def PushSingleToken(tokens): """Push the topmost token onto the stack.""" if util.IsFloat(tokens[0]): try: token = int(tokens[0]) except ValueError: token = float(tokens[0]) elif tokens[0].startswith('\'') or tokens[0].startswith('"'): token = util.StringLiteralToken(tokens[0]) elif tokens[0].lower() in util.BIGQUERY_CONSTANTS: token = util.LiteralToken(tokens[0].lower(), util.BIGQUERY_CONSTANTS[tokens[0].lower()]) else: token = util.FieldToken(tokens[0]) math_stack.append(token)
def _GetUnencryptedValuesWithType(table, column_index, value_type): if (value_type is None or value_type.lower() not in ['string', 'integer', 'float']): raise ValueError('Not an known type.') value_type = value_type.lower() value_column = [] for i in range(len(table)): if table[i][column_index] is None: value = util.LiteralToken('null', None) else: value = table[i][column_index] if value_type == 'string': value = util.StringLiteralToken('"%s"' % str(value).strip()) elif value_type == 'integer': value = long(value) else: value = float(value) value_column.append(value) return value_column
def testNull(self): stack = [util.LiteralToken('null', None)] self.assertEqual(interpreter.ToInfix(list(stack)), 'null') self.assertEqual(interpreter.Evaluate(stack), None)