def testConvertCsvDataFile(self): self._SetupTestFlags() schema = json.loads(test_util.GetCarsSchemaString()) infile = self._WriteTempCarsCsvFile() outfile = os.path.join(self.dirname, 'cars.enc_data') master_key = base64.b64decode(_MASTER_KEY) string_hasher = ecrypto.StringHash( ecrypto.GenerateStringHashKey(master_key, _TABLE_ID)) pseudonym_cipher = ecrypto.PseudonymCipher( ecrypto.GeneratePseudonymCipherKey(master_key, _TABLE_ID)) load_lib.ConvertCsvDataFile(schema, master_key, _TABLE_ID, infile, outfile) # validate new data file against new rewritten schema. new_schema = json.loads(_CARS_REWRITTEN_SCHEMA) load_lib._ValidateCsvDataFile(new_schema, outfile) # Sanity check one row entries. Entries for semantic encrypted fields cannot # be checked because the values are randomized. fout = open(outfile, 'rt') row0 = fout.readline() self.assertTrue('1997' in row0) self.assertTrue(pseudonym_cipher.Encrypt(unicode('Ford')) in row0) # Get iv and hash for Model searchwords field whose value is 'E350' (model_iv, model_hash) = row0.split(',')[2].split(' ') # Calculate expected key hash value for 'E350' expected_model_key_hash = string_hasher.GetStringKeyHash( util.SEARCHWORDS_PREFIX + u'Model', u'E350'.lower()) # Calculate outer sha1 using model_iv and expected key hash. expected_model_hash = base64.b64encode(hashlib.sha1( model_iv + expected_model_key_hash).digest()[:8]) self.assertEquals(expected_model_hash, model_hash) fout.close()
def testConvertCsvDataFile(self): self._SetupTestFlags() schema = json.loads(test_util.GetCarsSchemaString()) infile = self._WriteTempCarsCsvFile() outfile = os.path.join(self.dirname, 'cars.enc_data') master_key = base64.b64decode(_MASTER_KEY) string_hasher = ecrypto.StringHash( ecrypto.GenerateStringHashKey(master_key, _TABLE_ID)) pseudonym_cipher = ecrypto.PseudonymCipher( ecrypto.GeneratePseudonymCipherKey(master_key, _TABLE_ID)) load_lib.ConvertCsvDataFile(schema, master_key, _TABLE_ID, infile, outfile) # validate new data file against new rewritten schema. new_schema = json.loads(_CARS_REWRITTEN_SCHEMA) load_lib._ValidateCsvDataFile(new_schema, outfile) # Sanity check one row entries. Entries for semantic encrypted fields cannot # be checked because the values are randomized. fout = open(outfile, 'rt') row0 = fout.readline() self.assertTrue('1997' in row0) self.assertTrue(pseudonym_cipher.Encrypt(unicode('Ford')) in row0) # Get iv and hash for Model searchwords field whose value is 'E350' (model_iv, model_hash) = row0.split(',')[2].split(' ') # Calculate expected key hash value for 'E350' expected_model_key_hash = string_hasher.GetStringKeyHash( util.SEARCHWORDS_PREFIX + u'Model', u'E350'.lower()) # Calculate outer sha1 using model_iv and expected key hash. expected_model_hash = base64.b64encode( hashlib.sha1(model_iv + expected_model_key_hash).digest()[:8]) self.assertEquals(expected_model_hash, model_hash) fout.close()
def testValidateCsvDataFile(self): schema = json.loads(test_util.GetCarsSchemaString()) infile = self._WriteTempCarsCsvFile() load_lib._ValidateCsvDataFile(schema, infile)