Esempi in Python per PseudonymCipher, esempi in Python per ebq_crypto.PseudonymCipher

Esempio n. 1

0

Mostra file

 def testConvertCsvDataFile(self):
     self._SetupTestFlags()
     schema = json.loads(test_util.GetCarsSchemaString())
     infile = self._WriteTempCarsCsvFile()
     outfile = os.path.join(self.dirname, 'cars.enc_data')
     master_key = base64.b64decode(_MASTER_KEY)
     string_hasher = ecrypto.StringHash(
         ecrypto.GenerateStringHashKey(master_key, _TABLE_ID))
     pseudonym_cipher = ecrypto.PseudonymCipher(
         ecrypto.GeneratePseudonymCipherKey(master_key, _TABLE_ID))
     load_lib.ConvertCsvDataFile(schema, master_key, _TABLE_ID, infile,
                                 outfile)
     # validate new data file against new rewritten schema.
     new_schema = json.loads(_CARS_REWRITTEN_SCHEMA)
     load_lib._ValidateCsvDataFile(new_schema, outfile)
     # Sanity check one row entries. Entries for semantic encrypted fields cannot
     # be checked because the values are randomized.
     fout = open(outfile, 'rt')
     row0 = fout.readline()
     self.assertTrue('1997' in row0)
     self.assertTrue(pseudonym_cipher.Encrypt(unicode('Ford')) in row0)
     # Get iv and hash for Model searchwords field whose value is 'E350'
     (model_iv, model_hash) = row0.split(',')[2].split(' ')
     # Calculate expected key hash value for 'E350'
     expected_model_key_hash = string_hasher.GetStringKeyHash(
         util.SEARCHWORDS_PREFIX + u'Model', u'E350'.lower())
     # Calculate outer sha1 using model_iv and expected key hash.
     expected_model_hash = base64.b64encode(
         hashlib.sha1(model_iv + expected_model_key_hash).digest()[:8])
     self.assertEquals(expected_model_hash, model_hash)
     fout.close()

Esempio n. 2

0

Mostra file

 def testDecryptValues(self):
     """Test _DecryptValues()."""
     cars_schema = test_util.GetCarsSchema()
     jobs_schema = test_util.GetJobsSchema()
     master_key = test_util.GetMasterKey()
     field = '%sInvoice_Price' % util.HOMOMORPHIC_INT_PREFIX
     table = [[1], [2], [3]]
     cipher = ecrypto.HomomorphicIntCipher(master_key)
     ciphers = {util.HOMOMORPHIC_INT_PREFIX: cipher}
     table = self._EncryptTable(cipher, table, 0)
     table.append([None])
     column = encrypted_bigquery_client._DecryptValues(
         field, table, 0, ciphers, cars_schema, util.HOMOMORPHIC_INT_PREFIX)
     self.assertEqual(column, [1, 2, 3, util.LiteralToken('null', None)])
     field = 'citiesLived.job.%sposition' % util.PSEUDONYM_PREFIX
     table = [[0, unicode('Hello')], [1, unicode('My')],
              [-1, unicode('job')]]
     cipher = ecrypto.PseudonymCipher(master_key)
     ciphers = {util.PSEUDONYM_PREFIX: cipher}
     table = self._EncryptTable(cipher, table, 1)
     table.insert(1, [100, None])
     column = encrypted_bigquery_client._DecryptValues(
         field, table, 1, ciphers, jobs_schema, util.PSEUDONYM_PREFIX)
     self.assertEqual(column, [
         util.StringLiteralToken('"Hello"'),
         util.LiteralToken('null', None),
         util.StringLiteralToken('"My"'),
         util.StringLiteralToken('"job"')
     ])
     field = '%snonexistent_field' % util.HOMOMORPHIC_FLOAT_PREFIX
     self.assertRaises(ValueError, encrypted_bigquery_client._DecryptValues,
                       field, table, 1, ciphers, cars_schema,
                       util.HOMOMORPHIC_FLOAT_PREFIX)

Esempio n. 3

0

Mostra file

def ConvertJsonDataFile(schema, master_key, table_id, infile, outfile):
  """Encrypts data in a json file based on schema provided.

  Arguments:
    schema: User defined values and types.
    master_key: Key to provide ciphers.
    table_id: Used to unique key for each table.
    infile: File to be encrypted.
    outfile: Location of encrypted file to outputted.
  """
  prob_cipher = ecrypto.ProbabilisticCipher(
      ecrypto.GenerateProbabilisticCipherKey(master_key, table_id))
  pseudonym_cipher = ecrypto.PseudonymCipher(
      ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
  # TODO(user): ciphers and hash should not use the same key.
  string_hasher = ecrypto.StringHash(
      ecrypto.GenerateStringHashKey(master_key, table_id))
  homomorphic_int_cipher = ecrypto.HomomorphicIntCipher(
      ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))
  homomorphic_float_cipher = ecrypto.HomomorphicFloatCipher(
      ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))

  _ValidateJsonDataFile(schema, infile)
  with open(infile, 'rb') as in_file:
    with open(outfile, 'wb') as out_file:
      for line in in_file:
        data = json.loads(line)
        data = _StrToUnicode(data)
        rewritten_data = _ConvertJsonField(
            data, schema, prob_cipher, pseudonym_cipher, string_hasher,
            homomorphic_int_cipher, homomorphic_float_cipher)
        rewritten_data = json.dumps(rewritten_data)
        out_file.write(rewritten_data + '\n')

Esempio n. 4

0

Mostra file

 def testDecryptGroupConcatValues(self):
     cars_schema = test_util.GetCarsSchema()
     jobs_schema = test_util.GetJobsSchema()
     master_key = test_util.GetMasterKey()
     query = 'GROUP_CONCAT(%sModel)' % util.PROBABILISTIC_PREFIX
     cipher = ecrypto.ProbabilisticCipher(master_key)
     ciphers = {util.PROBABILISTIC_PREFIX: cipher}
     unencrypted_values = ([['A', 'B', 'C', 'D'], ['1', '2', '3', '4'],
                            ['Hello', 'Bye']])
     table = []
     for values in unencrypted_values:
         encrypted_values = []
         for token in values:
             encrypted_values.append(cipher.Encrypt(unicode(token)))
         table.append([','.join(encrypted_values), random.random()])
     table.insert(0, [None, None])
     column = encrypted_bigquery_client._DecryptGroupConcatValues(
         query, table, 0, ciphers, cars_schema, util.PROBABILISTIC_PREFIX)
     self.assertEqual(column, [
         util.LiteralToken('null', None),
         util.StringLiteralToken('"A,B,C,D"'),
         util.StringLiteralToken('"1,2,3,4"'),
         util.StringLiteralToken('"Hello,Bye"')
     ])
     query = (
         'GROUP_CONCAT(citiesLived.job.%sposition) within citiesLived.job' %
         util.PSEUDONYM_PREFIX)
     cipher = ecrypto.PseudonymCipher(master_key)
     ciphers = {util.PSEUDONYM_PREFIX: cipher}
     table = []
     for values in unencrypted_values:
         encrypted_values = []
         for token in values:
             encrypted_values.append(cipher.Encrypt(unicode(token)))
         table.append([','.join(encrypted_values)])
     column = encrypted_bigquery_client._DecryptGroupConcatValues(
         query, table, 0, ciphers, jobs_schema, util.PSEUDONYM_PREFIX)
     self.assertEqual(column, [
         util.StringLiteralToken('"A,B,C,D"'),
         util.StringLiteralToken('"1,2,3,4"'),
         util.StringLiteralToken('"Hello,Bye"')
     ])
     query = '%sModel' % util.PROBABILISTIC_PREFIX
     self.assertRaises(ValueError,
                       encrypted_bigquery_client._DecryptGroupConcatValues,
                       query, table, 0, ciphers, cars_schema,
                       util.PROBABILISTIC_PREFIX)
     query = (
         'GROUP_CONCAT(citiesLived.%snumberOfYears) within citiesLived' %
         util.HOMOMORPHIC_FLOAT_PREFIX)
     self.assertRaises(bigquery_client.BigqueryInvalidQueryError,
                       encrypted_bigquery_client._DecryptGroupConcatValues,
                       query, table, 0, ciphers, jobs_schema,
                       util.HOMOMORPHIC_FLOAT_PREFIX)

Esempio n. 5

0

Mostra file

File: load_lib.py Progetto: runt18/encrypted-bigquery-client

def ConvertJsonDataFile(schema, master_key, table_id, infile, outfile):
    """Encrypts data in a json file based on schema provided.

  Arguments:
    schema: User defined values and types.
    master_key: Key to provide ciphers.
    table_id: Used to unique key for each table.
    infile: File to be encrypted.
    outfile: Location of encrypted file to outputted.
  """
    prob_cipher = ecrypto.ProbabilisticCipher(
        ecrypto.GenerateProbabilisticCipherKey(master_key, table_id))
    pseudonym_cipher = ecrypto.PseudonymCipher(
        ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
    # TODO(user): ciphers and hash should not use the same key.
    string_hasher = ecrypto.StringHash(
        ecrypto.GenerateStringHashKey(master_key, table_id))
    homomorphic_int_cipher = ecrypto.HomomorphicIntCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))
    homomorphic_float_cipher = ecrypto.HomomorphicFloatCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))

    _ValidateJsonDataFile(schema, infile)
    with open(infile, 'rb') as in_file:
        with open(outfile, 'wb') as out_file:
            for line in in_file:
                data = json.loads(line)
                data = _StrToUnicode(data)
                rewritten_data = _ConvertJsonField(data, schema, prob_cipher,
                                                   pseudonym_cipher,
                                                   string_hasher,
                                                   homomorphic_int_cipher,
                                                   homomorphic_float_cipher)
                # When python prints unicode strings, it uses single quotes and
                # prepends a u before the string (such as u'Hello'). Json does
                # understand this and will only allow strings of double quotes
                # without any prefixes, therefore we must substitute to fit
                # the criteria.
                rewritten_data = str(rewritten_data).replace('u\'', '"')
                rewritten_data = rewritten_data.replace('\'', '"')
                out_file.write(rewritten_data + '\n')

Esempio n. 6

0

Mostra file

def _GenerateRelatedCiphers(schema, master_key, default_cipher):
  """Reads schema for pseudonym encrypt types and adds generating ciphers.

  Args:
    schema: list of dict, the db schema. modified by
    master_key: str, the master key
    default_cipher: obj, cipher that encrypt() can be called on.
  Returns:
    dict, mapping field names to index in schema.
  """
  map_name_to_index = {}
  for i in xrange(len(schema)):
    logging.warning(schema[i])
    map_name_to_index[schema[i]['name']] = i
    if schema[i].get('encrypt', None) == 'pseudonym':
      related = schema[i].get('related', None)
      if related is not None:
        pseudonym_cipher_related = ecrypto.PseudonymCipher(
            ecrypto.GeneratePseudonymCipherKey(
                master_key, str(related).encode('utf-8')))
        schema[i]['cipher'] = pseudonym_cipher_related
      else:
        schema[i]['cipher'] = default_cipher
  return map_name_to_index

Esempio n. 7

0

Mostra file

 def setUp(self):
     """Run once for each test in the class."""
     self.cipher = ecrypto.PseudonymCipher(_KEY1)

Esempio n. 8

0

Mostra file

def RewriteSelectionCriteria(stack, schema, master_key, table_id):
    """Rewrites selection criteria (arguments of WHERE and HAVING clause).

  Arguments:
    stack: The postfix expression that is the where/having expression.
    schema: The user defined values and encryption.
    master_key: Used to get ciphers for encryption.
    table_id: Used to generate a proper key.

  Returns:
    An infix version of the <stack>. The expression is rewritten so that it
    can be sent to the BigQuery server.

  Raises:
    bigquery_client.BigqueryInvalidQueryError: If the expression is invalid
    (such as searching non-searchable encrypted fields, etc).
  """

    pseudonym_cipher = ecrypto.PseudonymCipher(
        ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
    string_hasher = ecrypto.StringHash(
        ecrypto.GenerateStringHashKey(master_key, table_id))

    def FailIfEncrypted(tokens):
        if util.IsEncryptedExpression(tokens):
            raise bigquery_client.BigqueryInvalidQueryError(
                'Invalid where/having expression.', None, None, None)

    def FailIfDeterministic(tokens):
        if util.IsDeterministicExpression(tokens):
            raise bigquery_client.BigqueryInvalidQueryError(
                'Cannot do equality on probabilistic encryption, '
                'only pseudonym encryption.', None, None, None)

    def RewritePseudonymEncryption(token):
        if isinstance(token, util.StringLiteralToken):
            return '"%s"' % pseudonym_cipher.Encrypt(unicode(token[1:-1]))
        else:
            return token

    def RewriteSearchwordsEncryption(field, literal):
        """Rewrites the literal such that it can be checked for containment.

    Arguments:
      field: The field which is being checked if literal is contained within.
      literal: Substring being searched for.

    Returns:
      A tuple containing both field and literal rewritten.

    Raises:
      ValueError: Try to rewrite non-searchwords encryption.
    """
        if (not isinstance(field, util.SearchwordsToken)
                and not isinstance(field, util.ProbabilisticToken)):
            raise ValueError('Invalid encryption to check containment.')
        field = field.original_name
        row = util.GetEntryFromSchema(field, schema)
        modified_field = util.SEARCHWORDS_PREFIX + row['name']
        field = field.split('.')
        field[-1] = modified_field
        modified_field = '.'.join(field)
        if 'searchwords_separator' in row:
            searchwords_separator = row['searchwords_separator']
        else:
            searchwords_separator = None
        word_list = ecrypto.CleanUnicodeString(unicode(literal.value),
                                               separator=searchwords_separator)
        if searchwords_separator is None:
            word_seq = ' '.join(word_list)
        else:
            word_seq = searchwords_separator.join(word_list)
        keyed_hash = (u'\'%s\'' % string_hasher.GetStringKeyHash(
            modified_field.split('.')[-1], word_seq))
        modified_string = (
            u'to_base64(left(bytes(sha1(concat(left(%s, 24), %s))), 8))' %
            (modified_field, keyed_hash))
        return (modified_field, modified_string)

    def CheckSearchableField(op1):
        """Checks if the operand is a searchable encrypted field.

    Arguments:
      op1: The operand that is being checked if it is searchable.

    Returns:
      True iff op1 is searchable.
    """
        if isinstance(op1, util.SearchwordsToken):
            return True
        elif not isinstance(op1, util.ProbabilisticToken):
            return False
        op1 = op1.original_name
        row = util.GetEntryFromSchema(op1, schema)
        if row['encrypt'] in ['probabilistic_searchwords', 'searchwords']:
            return True
        else:
            return False
        return False

    def RewriteContainsOrFail(op1, op2):
        """Tries to rewrite a contains expression.

    Arguments:
      op1: The first operand of the contains binary operator.
      op2: The second operand of the contians binary operator.

    Returns:
      The rewritten versions of both operands.

    Raises:
      bigquery_client.BigqueryInvalidQueryError: If the contains expressions
      is invalid.
    """
        if not isinstance(op1, util.EncryptedToken):
            return (op1, op2)
        if not CheckSearchableField(op1):
            raise bigquery_client.BigqueryInvalidQueryError(
                'Cannot do contains on an encrypted field that is not searchable.',
                None, None, None)
        elif not isinstance(op2, util.StringLiteralToken):
            raise bigquery_client.BigqueryInvalidQueryError(
                'The substring to be checked must be a literal.', None, None,
                None)
        return RewriteSearchwordsEncryption(op1, op2)

    def CheckAndRewriteStack(postfix):
        if not postfix:
            raise bigquery_client.BigqueryInvalidQueryError(
                'Not enough arguments.', None, None, None)
        top = postfix.pop()
        if isinstance(top, util.OperatorToken):
            args = []
            for unused_i in range(top.num_args):
                args.append(CheckAndRewriteStack(postfix))
            args.reverse()
            if top.num_args == 1:
                return '%s %s' % (str(top), args[0])
            elif str(top) in ['=', '==', '!=']:
                FailIfDeterministic(args)
                if (isinstance(args[0], util.PseudonymToken)
                        or isinstance(args[1], util.PseudonymToken)):
                    args[0] = RewritePseudonymEncryption(args[0])
                    args[1] = RewritePseudonymEncryption(args[1])
            elif str(top) == 'contains':
                FailIfEncrypted([args[1]])
                args[0], args[1] = RewriteContainsOrFail(args[0], args[1])
            else:
                FailIfEncrypted(args)
            return '(%s %s %s)' % (args[0], str(top), args[1])
        elif isinstance(top, util.BuiltInFunctionToken):
            func_name = str(top)
            if func_name in _ZERO_ARGUMENT_FUNCTIONS:
                return '%s()' % func_name
            elif func_name in _ONE_ARGUMENT_FUNCTIONS:
                op = CheckAndRewriteStack(postfix)
                FailIfEncrypted([op])
                return '%s(%s)' % (func_name, op)
            elif func_name in _TWO_ARGUMENT_FUNCTIONS:
                op2 = CheckAndRewriteStack(postfix)
                op1 = CheckAndRewriteStack(postfix)
                FailIfEncrypted([op1, op2])
                return '%s(%s, %s)' % (func_name, op1, op2)
            elif func_name in _THREE_ARGUMENT_FUNCTIONS:
                op3 = CheckAndRewriteStack(postfix)
                op2 = CheckAndRewriteStack(postfix)
                op1 = CheckAndRewriteStack(postfix)
                FailIfEncrypted([op1, op2, op3])
                return '%s(%s, %s, %s)' % (func_name, op1, op2, op3)
            else:
                raise bigquery_client.BigqueryInvalidQueryError(
                    '%s function does not exist.' % func_name, None, None,
                    None)
        elif not isinstance(top, basestring):
            return str(top)
        else:
            return top

    temp_stack = list(stack)
    new_expression = CheckAndRewriteStack(temp_stack)
    if temp_stack:
        raise bigquery_client.BigqueryInvalidQueryError(
            'Too many arguments.', None, None, None)
    return new_expression

Esempio n. 9

0

Mostra file

File: load_lib.py Progetto: runt18/encrypted-bigquery-client

def ConvertCsvDataFile(schema, master_key, table_id, infile, outfile):
    """Reads utf8 csv data, encrypts and stores into a new csv utf8 data file."""
    prob_cipher = ecrypto.ProbabilisticCipher(
        ecrypto.GenerateProbabilisticCipherKey(master_key, table_id))
    pseudonym_cipher = ecrypto.PseudonymCipher(
        ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
    # TODO(user): ciphers and hash should not use the same key.
    string_hasher = ecrypto.StringHash(
        ecrypto.GenerateStringHashKey(master_key, table_id))
    homomorphic_int_cipher = ecrypto.HomomorphicIntCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))
    homomorphic_float_cipher = ecrypto.HomomorphicFloatCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))

    with open(infile, 'rb') as in_file:
        with open(outfile, 'wb') as out_file:
            num_columns = len(schema)
            csv_writer = csv.writer(out_file)
            _ValidateCsvDataFile(schema, infile)
            csv_reader = _Utf8CsvReader(in_file, csv_writer)
            for row in csv_reader:
                new_row = []
                if len(row) != num_columns:
                    raise EncryptConvertError(
                        'Number of fields in schema do not match '
                        'in row: %s' % row)
                for i in xrange(num_columns):
                    encrypt_mode = schema[i]['encrypt']
                    if encrypt_mode == 'none':
                        new_row.append(row[i].encode('utf-8'))
                    elif encrypt_mode == 'probabilistic':
                        new_row.append(
                            prob_cipher.Encrypt(row[i]).encode('utf-8'))
                    elif encrypt_mode == 'pseudonym':
                        new_row.append(
                            pseudonym_cipher.Encrypt(row[i]).encode('utf-8'))
                    elif encrypt_mode == 'homomorphic' and schema[i][
                            'type'] == 'integer':
                        new_row.append(
                            homomorphic_int_cipher.Encrypt(long(
                                row[i])).encode('utf-8'))
                    elif encrypt_mode == 'homomorphic' and schema[i][
                            'type'] == 'float':
                        new_row.append(
                            homomorphic_float_cipher.Encrypt(float(
                                row[i])).encode('utf-8'))
                    elif encrypt_mode == 'searchwords':
                        if 'searchwords_separator' in schema[i]:
                            searchwords_separator = schema[i][
                                'searchwords_separator']
                        else:
                            searchwords_separator = None
                        if 'max_word_sequence' in schema[i]:
                            max_word_sequence = schema[i]['max_word_sequence']
                        else:
                            max_word_sequence = 5
                        new_row.append(
                            string_hasher.GetHashesForWordSubsequencesWithIv(
                                util.SEARCHWORDS_PREFIX + schema[i]['name'],
                                row[i],
                                separator=searchwords_separator,
                                max_sequence_len=max_word_sequence).encode(
                                    'utf-8'))
                    elif encrypt_mode == 'probabilistic_searchwords':
                        if 'searchwords_separator' in schema[i]:
                            searchwords_separator = schema[i][
                                'searchwords_separator']
                        else:
                            searchwords_separator = None
                        if 'max_word_sequence' in schema[i]:
                            max_word_sequence = schema[i]['max_word_sequence']
                        else:
                            max_word_sequence = 5
                        new_row.append(
                            string_hasher.GetHashesForWordSubsequencesWithIv(
                                util.SEARCHWORDS_PREFIX + schema[i]['name'],
                                row[i],
                                separator=searchwords_separator,
                                max_sequence_len=max_word_sequence).encode(
                                    'utf-8'))
                        new_row.append(
                            prob_cipher.Encrypt(row[i]).encode('utf-8'))
                csv_writer.writerow(new_row)

Esempio n. 10

0

Mostra file

File: encrypted_bigquery_client.py Progetto: uptycs-vipul/encrypted-bigquery-client

def _DecryptRows(fields,
                 rows,
                 master_key,
                 table_id,
                 schema,
                 query_list,
                 aggregation_query_list,
                 unencrypted_query_list,
                 manifest=None):
    """Decrypts all values in rows.

  Arguments:
    fields: Column names.
    rows: Table values.
    master_key: Key to get ciphers.
    table_id: Used to generate keys.
    schema: Represents information about fields.
    query_list: List of fields that were queried.
    aggregation_query_list: List of aggregations of fields that were queried.
    unencrypted_query_list: List of unencrypted expressions.
    manifest: optional, query_lib.QueryManifest instance.
  Returns:
    A dictionary that returns for each query, a list of decrypted values.

  Raises:
    bigquery_client.BigqueryInvalidQueryError: User trying to query for a
    SEARCHWORD encrypted field. SEARCHWORD encrypted fields cannot be decrypted.
  """
    # create ciphers for decryption
    prob_cipher = ecrypto.ProbabilisticCipher(
        ecrypto.GenerateProbabilisticCipherKey(master_key, table_id))
    pseudonym_cipher = ecrypto.PseudonymCipher(
        ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
    homomorphic_int_cipher = ecrypto.HomomorphicIntCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))
    homomorphic_float_cipher = ecrypto.HomomorphicFloatCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))

    ciphers = {
        util.PROBABILISTIC_PREFIX: prob_cipher,
        util.PSEUDONYM_PREFIX: pseudonym_cipher,
        util.HOMOMORPHIC_INT_PREFIX: homomorphic_int_cipher,
        util.HOMOMORPHIC_FLOAT_PREFIX: homomorphic_float_cipher,
    }

    queried_values = {}
    for query in query_list:
        if len(query.split(' ')) >= 3 and query.split(' ')[-2] == 'AS':
            queried_values[' '.join(query.split(' ')[:-2])] = []
        else:
            queried_values[query] = []
    for query in aggregation_query_list:
        queried_values[query] = []
    for i in xrange(len(unencrypted_query_list)):
        queried_values['%s%d_' % (util.UNENCRYPTED_ALIAS_PREFIX, i)] = []

    # If a manifest is supplied rewrite the column names according to any
    # computed aliases that were used. Otherwise, resort to the old scheme
    # of substituting the '.' in multidimensional schemas in/out.
    if manifest is not None:
        for i in xrange(len(fields)):
            # TODO(user): This is a hash lookup on every column name.
            # The lookup is efficient and the column names are sufficiently random
            # as compared to likely human language column names such that false
            # hits should not be possible. However this may need future revision.
            n = manifest.GetColumnNameForAlias(fields[i]['name'])
            if n is not None:
                fields[i]['name'] = n
    else:
        for i in xrange(len(fields)):
            fields[i]['name'] = fields[i]['name'].replace(
                util.PERIOD_REPLACEMENT, '.')

    for i in xrange(len(fields)):
        encrypted_name = fields[i]['name'].split('.')[-1]
        if fields[i]['type'] == 'TIMESTAMP':
            queried_values[fields[i]['name']] = _GetTimestampValues(rows, i)
        elif encrypted_name.startswith(util.PROBABILISTIC_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.PROBABILISTIC_PREFIX))
        elif encrypted_name.startswith(util.PSEUDONYM_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.PSEUDONYM_PREFIX))
        elif encrypted_name.startswith(util.SEARCHWORDS_PREFIX):
            raise bigquery_client.BigqueryInvalidQueryError(
                'Cannot decrypt searchwords encryption. Decryption of SEARCHWORDS '
                'is limited to PROBABILISTIC_SEARCHWORDS encryption.', None,
                None, None)
        elif encrypted_name.startswith(util.HOMOMORPHIC_INT_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.HOMOMORPHIC_INT_PREFIX))
        elif encrypted_name.startswith(util.HOMOMORPHIC_FLOAT_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.HOMOMORPHIC_FLOAT_PREFIX))
        elif (encrypted_name.startswith(util.UNENCRYPTED_ALIAS_PREFIX)
              and encrypted_name.endswith('_')):
            queried_values[fields[i]['name']] = (_GetUnencryptedValuesWithType(
                rows, i, fields[i]['type']))
        elif encrypted_name.startswith('f') and encrypted_name.endswith('_'):
            index = int(fields[i]['name'][1:-1])
            original_fieldname = aggregation_query_list[index]
            original_fieldname = original_fieldname.strip()
            if (len(original_fieldname.split(' ')) >= 3
                    and original_fieldname.split(' ')[-2].lower() == 'within'):
                actual_field = original_fieldname.split(' ')[:-2]
                actual_field = ' '.join(actual_field)
            else:
                actual_field = original_fieldname
            if original_fieldname.startswith(util.GROUP_CONCAT_PREFIX):
                concat_field = actual_field.split(
                    util.GROUP_CONCAT_PREFIX)[1][:-1].strip()
                encrypted_name = concat_field.split('.')[-1]
                if encrypted_name.startswith(util.PROBABILISTIC_PREFIX):
                    queried_values[original_fieldname] = (
                        _DecryptGroupConcatValues(original_fieldname, rows, i,
                                                  ciphers, schema,
                                                  util.PROBABILISTIC_PREFIX))
                elif encrypted_name.startswith(util.PSEUDONYM_PREFIX):
                    queried_values[original_fieldname] = (
                        _DecryptGroupConcatValues(original_fieldname, rows, i,
                                                  ciphers, schema,
                                                  util.PSEUDONYM_PREFIX))
                elif (encrypted_name.startswith(util.HOMOMORPHIC_INT_PREFIX)
                      or encrypted_name.startswith(
                          util.HOMOMORPHIC_FLOAT_PREFIX)):
                    raise bigquery_client.BigqueryInvalidQueryError(
                        'GROUP_CONCAT only accepts string type.', None, None,
                        None)
                elif encrypted_name.startswith(util.SEARCHWORDS_PREFIX):
                    raise bigquery_client.BigqueryInvalidQueryError(
                        'Invalid query, cannot recover searchwords encryption.',
                        None, None, None)
                else:
                    for j in xrange(len(rows)):
                        queried_values[original_fieldname].append(rows[j][i])
            elif (original_fieldname.startswith('COUNT(')
                  or original_fieldname.startswith('AVG(')
                  or original_fieldname.startswith('SUM(')):
                queried_values[original_fieldname] = (
                    _GetUnencryptedValuesWithType(rows, i, fields[i]['type']))
            elif original_fieldname.startswith('TOP('):
                fieldname = actual_field.split('TOP(')[1][:-1].strip()
                fieldname = fieldname.split(',')[0].strip()
                if fieldname.split('.')[-1].startswith(util.PSEUDONYM_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        fieldname, rows, i, ciphers, schema,
                        util.PSEUDONYM_PREFIX))
                else:
                    queried_values[original_fieldname] = (
                        _GetUnencryptedValues(original_fieldname, rows, i,
                                              schema))
            elif original_fieldname.startswith(util.PAILLIER_SUM_PREFIX):
                sum_argument = original_fieldname.split(
                    util.PAILLIER_SUM_PREFIX)[1]
                sum_argument = sum_argument.split(',')[0][:-1]
                sum_argument = sum_argument.split('.')[-1]
                real_fieldname = original_fieldname.split(
                    util.PAILLIER_SUM_PREFIX)[1]
                real_fieldname = real_fieldname.split(',')[0][:-1]
                if sum_argument.startswith(util.HOMOMORPHIC_INT_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        real_fieldname, rows, i, ciphers, schema,
                        util.HOMOMORPHIC_INT_PREFIX))
                elif sum_argument.startswith(util.HOMOMORPHIC_FLOAT_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        real_fieldname, rows, i, ciphers, schema,
                        util.HOMOMORPHIC_FLOAT_PREFIX))
            else:
                queried_values[fields[i]['name']] = (
                    _GetUnencryptedValuesWithType(rows, i, fields[i]['type']))
        else:
            queried_values[fields[i]['name']] = (_GetUnencryptedValuesWithType(
                rows, i, fields[i]['type']))

    return queried_values

Esempio n. 11

0

Mostra file

File: encrypted_bigquery_client.py Progetto: runt18/encrypted-bigquery-client

def _DecryptRows(fields, rows, master_key, table_id, schema, query_list,
                 aggregation_query_list, unencrypted_query_list):
    """Decrypts all values in rows.

  Arguments:
    fields: Column names.
    rows: Table values.
    master_key: Key to get ciphers.
    table_id: Used to generate keys.
    schema: Represents information about fields.
    query_list: List of fields that were queried.
    aggregation_query_list: List of aggregations of fields that were queried.
    unencrypted_query_list: List of unencrypted expressions.

  Returns:
    A dictionary that returns for each query, a list of decrypted values.

  Raises:
    bigquery_client.BigqueryInvalidQueryError: User trying to query for a
    SEARCHWORD encrypted field. SEARCHWORD encrypted fields cannot be decrypted.
  """
    # create ciphers for decryption
    prob_cipher = ecrypto.ProbabilisticCipher(
        ecrypto.GenerateProbabilisticCipherKey(master_key, table_id))
    pseudonym_cipher = ecrypto.PseudonymCipher(
        ecrypto.GeneratePseudonymCipherKey(master_key, table_id))
    homomorphic_int_cipher = ecrypto.HomomorphicIntCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))
    homomorphic_float_cipher = ecrypto.HomomorphicFloatCipher(
        ecrypto.GenerateHomomorphicCipherKey(master_key, table_id))

    ciphers = {
        util.PROBABILISTIC_PREFIX: prob_cipher,
        util.PSEUDONYM_PREFIX: pseudonym_cipher,
        util.HOMOMORPHIC_INT_PREFIX: homomorphic_int_cipher,
        util.HOMOMORPHIC_FLOAT_PREFIX: homomorphic_float_cipher,
    }

    queried_values = {}
    for query in query_list:
        if len(query.split(' ')) >= 3 and query.split(' ')[-2] == 'AS':
            queried_values[' '.join(query.split(' ')[:-2])] = []
        else:
            queried_values[query] = []
    for query in aggregation_query_list:
        queried_values[query] = []
    for i in xrange(len(unencrypted_query_list)):
        queried_values['%s%d_' % (util.UNENCRYPTED_ALIAS_PREFIX, i)] = []

    for i in xrange(len(fields)):
        fields[i]['name'] = fields[i]['name'].replace(util.PERIOD_REPLACEMENT,
                                                      '.')
        encrypted_name = fields[i]['name'].split('.')[-1]
        if fields[i]['type'] == 'TIMESTAMP':
            queried_values[fields[i]['name']] = _GetTimestampValues(rows, i)
        elif encrypted_name.startswith(util.PROBABILISTIC_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.PROBABILISTIC_PREFIX))
        elif encrypted_name.startswith(util.PSEUDONYM_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.PSEUDONYM_PREFIX))
        elif encrypted_name.startswith(util.SEARCHWORDS_PREFIX):
            raise bigquery_client.BigqueryInvalidQueryError(
                'Cannot decrypt searchwords encryption. Decryption of SEARCHWORDS '
                'is limited to PROBABILISTIC_SEARCHWORDS encryption.', None,
                None, None)
        elif encrypted_name.startswith(util.HOMOMORPHIC_INT_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.HOMOMORPHIC_INT_PREFIX))
        elif encrypted_name.startswith(util.HOMOMORPHIC_FLOAT_PREFIX):
            queried_values[fields[i]['name']] = (_DecryptValues(
                fields[i]['name'], rows, i, ciphers, schema,
                util.HOMOMORPHIC_FLOAT_PREFIX))
        elif (encrypted_name.startswith(util.UNENCRYPTED_ALIAS_PREFIX)
              and encrypted_name.endswith('_')):
            queried_values[fields[i]['name']] = (_GetUnencryptedValuesWithType(
                rows, i, fields[i]['type']))
        elif encrypted_name.startswith('f') and encrypted_name.endswith('_'):
            index = int(fields[i]['name'][1:-1])
            original_fieldname = aggregation_query_list[index]
            original_fieldname = original_fieldname.strip()
            if (len(original_fieldname.split(' ')) >= 3
                    and original_fieldname.split(' ')[-2].lower() == 'within'):
                actual_field = original_fieldname.split(' ')[:-2]
                actual_field = ' '.join(actual_field)
            else:
                actual_field = original_fieldname
            if original_fieldname.startswith(util.GROUP_CONCAT_PREFIX):
                concat_field = actual_field.split(
                    util.GROUP_CONCAT_PREFIX)[1][:-1].strip()
                encrypted_name = concat_field.split('.')[-1]
                if encrypted_name.startswith(util.PROBABILISTIC_PREFIX):
                    queried_values[original_fieldname] = (
                        _DecryptGroupConcatValues(original_fieldname, rows, i,
                                                  ciphers, schema,
                                                  util.PROBABILISTIC_PREFIX))
                elif encrypted_name.startswith(util.PSEUDONYM_PREFIX):
                    queried_values[original_fieldname] = (
                        _DecryptGroupConcatValues(original_fieldname, rows, i,
                                                  ciphers, schema,
                                                  util.PSEUDONYM_PREFIX))
                elif (encrypted_name.startswith(util.HOMOMORPHIC_INT_PREFIX)
                      or encrypted_name.startswith(
                          util.HOMOMORPHIC_FLOAT_PREFIX)):
                    raise bigquery_client.BigqueryInvalidQueryError(
                        'GROUP_CONCAT only accepts string type.', None, None,
                        None)
                elif encrypted_name.startswith(util.SEARCHWORDS_PREFIX):
                    raise bigquery_client.BigqueryInvalidQueryError(
                        'Invalid query, cannot recover searchwords encryption.',
                        None, None, None)
                else:
                    for j in xrange(len(rows)):
                        queried_values[original_fieldname].append(rows[j][i])
            elif (original_fieldname.startswith('COUNT(')
                  or original_fieldname.startswith('AVG(')
                  or original_fieldname.startswith('SUM(')):
                queried_values[original_fieldname] = (
                    _GetUnencryptedValuesWithType(rows, i, fields[i]['type']))
            elif original_fieldname.startswith('TOP('):
                fieldname = actual_field.split('TOP(')[1][:-1].strip()
                fieldname = fieldname.split(',')[0].strip()
                if fieldname.split('.')[-1].startswith(util.PSEUDONYM_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        fieldname, rows, i, ciphers, schema,
                        util.PSEUDONYM_PREFIX))
                else:
                    queried_values[original_fieldname] = (
                        _GetUnencryptedValues(original_fieldname, rows, i,
                                              schema))
            elif original_fieldname.startswith(util.PAILLIER_SUM_PREFIX):
                sum_argument = original_fieldname.split(
                    util.PAILLIER_SUM_PREFIX)[1]
                sum_argument = sum_argument.split(',')[0][:-1]
                sum_argument = sum_argument.split('.')[-1]
                real_fieldname = original_fieldname.split(
                    util.PAILLIER_SUM_PREFIX)[1]
                real_fieldname = real_fieldname.split(',')[0][:-1]
                if sum_argument.startswith(util.HOMOMORPHIC_INT_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        real_fieldname, rows, i, ciphers, schema,
                        util.HOMOMORPHIC_INT_PREFIX))
                elif sum_argument.startswith(util.HOMOMORPHIC_FLOAT_PREFIX):
                    queried_values[original_fieldname] = (_DecryptValues(
                        real_fieldname, rows, i, ciphers, schema,
                        util.HOMOMORPHIC_FLOAT_PREFIX))
            else:
                queried_values[fields[i]['name']] = (
                    _GetUnencryptedValuesWithType(rows, i, fields[i]['type']))
        else:
            queried_values[fields[i]['name']] = (_GetUnencryptedValuesWithType(
                rows, i, fields[i]['type']))

    return queried_values