def api_hide(): method = request.method # Could be string (GET) or dict (POST) records_json = self.get_param(param_name='records', method=method) col_to_hide = self.get_param(param_name='col_to_hide', method=method) is_number_only = self.get_param(param_name='is_number_only', method=method) case_sensitive = self.get_param(param_name='case_sensitive', method=method) process_phone_country = self.get_param( param_name='process_phone_country', method=method) encrypt_key_b64 = self.get_param(param_name='encrypt_key_b64', method=method) nonce_b64 = self.get_param(param_name='nonce_b64', method=method) Log.info( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Received parameters: hide colname "' + str(col_to_hide) + '", nonce base64 "' + str(nonce_b64) + '"') return self.hide_data(records_json=records_json, hide_colname=col_to_hide, is_number_only=is_number_only, case_sensitive=case_sensitive, process_phone_country=process_phone_country, encrypt_key_b64=encrypt_key_b64, nonce_b64=nonce_b64)
def hide_data( self, # In string JSON (GET), or dict (POST) records_json, # Column names to hide hide_colname, encrypt_key_b64, nonce_b64, is_number_only=False, case_sensitive=False, process_phone_country=None, hash_encode_lang='zh', ): try: return Hide().hide_data( records_json=records_json, hide_colname=hide_colname, is_number_only=(is_number_only in [1, '1', 'y', 'yes']), case_sensitive=(case_sensitive in [1, '1', 'y', 'yes']), process_phone_country=process_phone_country, encrypt_key_b64=encrypt_key_b64, nonce_b64=nonce_b64) except Exception as ex: errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \ + ' Exception occurred IP ' + str(flask.request.remote_addr) \ + ', exception ' + str(ex) + '.' Log.error(errmsg) if Log.DEBUG_PRINT_ALL_TO_SCREEN: raise Exception(errmsg) return errmsg
def __init__( self, # 16 or 32 byte key key, nonce=None, mode=AES_MODE_EAX, text_encoding='utf-8'): self.key = key Log.debug('Using key ' + str(str(self.key)) + '. Size = ' + str(len(self.key)) + '.') self.cipher_mode_str = mode if self.cipher_mode_str == AES_Encrypt.AES_MODE_EAX: self.cipher_mode = AES.MODE_EAX elif self.cipher_mode_str == AES_Encrypt.AES_MODE_CBC: self.cipher_mode = AES.MODE_CBC else: raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Unsupported AES mode "' + str(self.cipher_mode_str) + '"') if nonce is None: # Must be 16 bytes # nonce = key[0:16] nonce = AES_Encrypt.generate_random_bytes( size=AES_Encrypt.SIZE_NONCE, printable=True) self.nonce = nonce Log.debug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Using nonce "' + str(self.nonce) + '". Size = ' + str(len(self.nonce))) self.text_encoding = text_encoding return
def decode(self, ciphertext): try: if self.cipher_mode == AES.MODE_EAX: cipher = AES.new(key=self.key, mode=self.cipher_mode, nonce=self.nonce) cipherbytes = b64decode(ciphertext.encode(self.text_encoding)) data = cipher.decrypt(cipherbytes) elif self.cipher_mode == AES.MODE_CBC: cipher = AES.new(key=self.key, mode=self.cipher_mode, iv=self.nonce) cipherbytes = b64decode(ciphertext.encode(self.text_encoding)) data = cipher.decrypt(cipherbytes) Log.debugdebug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Decrypted data length = ' + str(len(data)) + ', modulo 16 = ' + str(len(data) % 128 / 8)) # Remove last x bytes encoded in the padded bytes data = data[:-data[-1]] else: raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Unsupported mode "' + str(self.cipher_mode) + '".') return str(data, encoding=STR_ENCODING) except Exception as ex: errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \ + ': Error decoding data "' + str(ciphertext) + '" using AES ". Exception: ' + str(ex) Log.error(errmsg) raise Exception(errmsg)
def encrypt( x, encryptor ): try: # print('***** x=' + str(x)) x_bytes = bytes(x.encode(encoding='utf-8')) # print('***** x_bytes=' + str(x_bytes)) res = encryptor.encode(x_bytes) ciphermode = res.cipher_mode ciphertext_b64 = res.ciphertext_b64 tag_b64 = res.tag_b64 nonce_b64 = res.nonce_b64 # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher))) # plaintext = encryptor.decode(ciphertext=ciphertext_b64) # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x)) # if plaintext != x: # raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"') return { 'ciphermode': ciphermode, 'ciphertext_b64': ciphertext_b64, 'tag_b64': tag_b64, 'iv_b64': nonce_b64 } except Exception as ex: Log.error( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error encrypting "' + str(x) + '": ' + str(ex) ) return None
def filter_phone_china(x): # # See https://en.wikipedia.org/wiki/Telephone_numbers_in_China # To call in China, the following format is used: # # For fixed phones: # xxx xxxx | xxxx xxxx Calls within the same area code # # 0yyy xxx xxxx (11 digits) | 0yyy xxxx xxxx (12 digits) Calls from other areas within China # # +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits) Calls from outside China # # For mobile phones: # 1nn xxxx xxxx (11 digits) Calls to mobile phones within China # # +86 1nn xxxx xxxx (13 digits) Calls to mobiles from outside China # try: str_number = re.sub(pattern='[^0-9]', repl='', string=str(x)) # At least 10 digits len_no = len(str_number) if len_no < 10: return None if str_number[0] == '0': # Landline formats 0yyy xxx xxxx (11 digits) or 0yyy xxxx xxxx (12 digits) if len_no in [11, 12]: return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number[1:len_no] elif str_number[0] == '1': if len_no in [11]: # Mobile number format 1nn xxxx xxxx (11 digits) return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number elif str_number[0:2] == PhoneNumber.CHINA_COUNTRY_CODE: # Landline formats +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits) # Mobile format 86 1nn xxxx xxxx (13 digits) if len_no in [12, 13]: return '+' + str_number else: # Landline formats yyy xxx xxxx (10 digits) | yyy xxxx xxxx (11 digits) if len_no in [10, 11]: return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number else: # TODO Should we just return whatever we have then? Instead of throwing exception. pass raise Exception('Invalid ' + str(len_no) + ' digit phone number ' + str(str_number) + '') except Exception as ex: Log.error( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error phone number: ' + str(ex) ) return None
def hash_compression( self, s, # By default we return the original hash desired_byte_length = 32 ): if desired_byte_length % 4 != 0: raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Desired byte length must be 0 modulo-4, given = ' + str(desired_byte_length) ) m = hashlib.sha256() m.update(bytes(s, encoding=Obfuscate.STRING_ENCODING)) # This will return a bytes list of length 32 h = m.digest() if len(h) % 4 != 0: raise Exception( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Hash bytes length must be 0 modulo-4, got = ' + str(h) ) # We compress to 8 bytes from the 32 bytes # The original SHA-256 appends 8 parts concatenated together, we break into 4 parts and xor them # 4 blocks n_blocks = int( len(h) / desired_byte_length ) # 8 bytes block length block_len = int( len(h) / n_blocks ) Log.debugdebug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Number of blocks = ' + str(n_blocks) + ', block length = ' + str(block_len) ) # First block bytes_xor = h[0:block_len] for i in range(1, n_blocks, 1): idx_start = i * block_len idx_end = (i+1) * block_len cur_block = h[idx_start:idx_end] if len(bytes_xor) != len(cur_block): raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Different block lengths "' + str(bytes_xor) + '", and "' + str(cur_block) + '"' ) bytes_xor = self.xor_bytes( b1 = bytes_xor, b2 = cur_block ) return bytes_xor
def encode( self, # bytes format data): try: if self.cipher_mode == AES.MODE_EAX: cipher = AES.new(key=self.key, mode=self.cipher_mode, nonce=self.nonce) cipherbytes, tag = cipher.encrypt_and_digest(data) return AES_Encrypt.EncryptRetClass( cipher_mode=self.cipher_mode_str, ciphertext_b64=b64encode(cipherbytes).decode( self.text_encoding), plaintext_b64=None, tag_b64=b64encode(tag).decode(self.text_encoding), nonce_b64=b64encode(self.nonce).decode(self.text_encoding)) elif self.cipher_mode == AES.MODE_CBC: # 1-16, make sure not 0, other wise last byte will not be block length length = AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC - ( len(data) % AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC) # Pad data with the original length, so when we decrypt we can just take data[-1] # as length of data block data += bytes(chr(length), encoding=STR_ENCODING) * length Log.debugdebug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Padded length = ' + str(length)) cipher = AES.new(key=self.key, mode=self.cipher_mode, iv=self.nonce) cipherbytes = cipher.encrypt(data) return AES_Encrypt.EncryptRetClass( cipher_mode=self.cipher_mode_str, ciphertext_b64=b64encode(cipherbytes).decode( self.text_encoding), plaintext_b64=None, tag_b64=None, nonce_b64=b64encode(self.nonce).decode(self.text_encoding)) else: raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Unsupported mode "' + str(self.cipher_mode) + '".') except Exception as ex: errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \ + ': Error encoding data "' + str(data) + '" using AES ". Exception: ' + str(ex) Log.error(errmsg) raise Exception(errmsg)
def get_param(self, param_name, method='GET'): if method == 'GET': if param_name in flask.request.args: return str(flask.request.args[param_name]) else: return None else: try: val = flask.request.json[param_name] return val except Exception as ex: Log.critical( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': No param name [' + param_name + '] in request.') return None
def run_unit_test(self): res_final = ut.ResultObj(count_ok=0, count_fail=0) s = '니는 먹고 싶어' tests_set_1 = [[Hash.ALGO_SHA1, '蔮膫圈嫩慁覕邜蹋妡狿'], [Hash.ALGO_SHA256, '葶杊閹翔綐僤徼戻髯鼚胦嘭藃诠灑浽'], [Hash.ALGO_SHA512, '詐鏙仟墍例嵝烐檦蝡溲薑珇鸦東燢爻纷欜陲囚劚攠菜槑茹輀濯偑袁蓣质簨'], [Hash.ALGO_SHA3_256, '厥驹踸鸨揱澯鑢擠鳰僸覑儽悃徵絨控'], [ Hash.ALGO_SHA3_512, '醜怅僒础衺菼惓隔鮚腋釔晞鏙屜咖龩檵因伖蘦惌灱騾凊纅弪鮾蕏解铦欪臓' ]] for x in tests_set_1: algo = x[0] expected = x[1] # In Linux command line, echo -n "$s" | shasum -a 1 (or 256,512) Log.debug('Using algo "' + str(algo) + '":') hstr = Hash.hash(string=s, algo=algo) Log.debug('Hash: ' + str(hstr)) observed = Hash.convert_ascii_string_to_other_alphabet( ascii_char_string=hstr, # unicode_range = Hash.BLOCK_KOREAN_SYL, group_n_char=4) res_final.update_bool( res_bool=ut.UnitTest.assert_true(observed=observed, expected=expected, test_comment='test string "' + str(hstr) + '" got "' + str(observed) + '"')) tests_set_2 = [['abc/ii{}.!&%[][\\+=', '嵢弯敩睽簡琥坝坜礽縰'], ['8829amsf)(*&^%^*./', '蘸耹嵭潦眨砦娥娪簯縰']] for x in tests_set_2: ascii_string = x[0] expected = x[1] observed = Hash.convert_ascii_string_to_other_alphabet( ascii_char_string=ascii_string) res_final.update_bool( res_bool=ut.UnitTest.assert_true(observed=observed, expected=expected, test_comment='test string "' + str(ascii_string) + '" got "' + str(observed) + '"')) return res_final
def filter_col( x, is_number_only = False, case_sensitive = False ): try: # We always trim no matter what x = StringUtils.trim(str(x)) if not case_sensitive: x = x.lower() if is_number_only: x = re.sub(pattern='[^0-9]', repl='', string=x) return x except Exception as ex_clean: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error cleaning "' + str(x) + '". ' + str(ex_clean) ) return x
def process_phone( x, country ): try: if country == 'china': return PhoneNumber.filter_phone_china(x) else: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Unsupported country "' + str(country) + '"' ) return x except Exception as ex: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex) ) return x
def hash(string, algo=ALGO_SHA1): str_encode = string.encode(encoding=Hash.STR_ENCODING) try: if algo == Hash.ALGO_SHA1: h = hashlib.sha1(str_encode) elif algo == Hash.ALGO_SHA256: h = hashlib.sha256(str_encode) elif algo == Hash.ALGO_SHA512: h = hashlib.sha512(str_encode) elif algo == Hash.ALGO_SHA3_256: h = hashlib.sha3_256(str_encode) elif algo == Hash.ALGO_SHA3_512: h = hashlib.sha3_512(str_encode) else: raise Exception('Unsupported hash algo "' + str(algo) + '".') return h.hexdigest() except Exception as ex: errmsg = str(__name__) + ' ' + str() \ + 'Error hashing string "' + str(string) + '" using algo "' + str(algo)\ + '". Exception: ' + str(ex) Log.error(errmsg) return None
def xor_bytes( self, b1, b2 ): t12 = zip(b1,b2) res_xor = [] for x in t12: byte_xor = x[0] ^ x[1] Log.debugdebug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + 'XOR "' + str(hex(x[0])) + '" and "' + str(hex(x[1])) + '" = ' + str(hex(byte_xor)) ) res_xor.append(byte_xor) Log.debug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': XOR between "' + str(self.hexdigest(b1)) + '" and "' + str(self.hexdigest(b2)) + '" = "' + str(self.hexdigest(res_xor)) + '"' ) return res_xor
def run_unit_test(self): res_final = ut.ResultObj(count_ok=0, count_fail=0) long_str = '' for i in range(10000): long_str += random.choice(AES_Encrypt.CHARS_STR) sentences = [ '니는 먹고 싶어', 'Дворянское ГНЕЗДО', '没问题 大陆 经济', '存款方式***2019-12-11 11:38:46***', '1234567890123456', long_str ] key = b'Sixteen byte key' nonce = b'0123456789xxyyzz' for mode in [AES_Encrypt.AES_MODE_CBC, AES_Encrypt.AES_MODE_EAX]: # aes_obj = AES_Encrypt(key=AES_Encrypt.generate_random_bytes(size=32, printable=True)) aes_obj = AES_Encrypt(key=key + key, mode=mode, nonce=nonce) for s in sentences: Log.debug('Encrypting "' + str(s) + '"') data_bytes = bytes(s.encode(encoding=STR_ENCODING)) Log.debug('Data length in bytes = ' + str(len(data_bytes))) res = aes_obj.encode(data=data_bytes) ciphertext = res.ciphertext_b64 Log.debug('Encrypted as "' + str(ciphertext) + '"') plaintext = aes_obj.decode(ciphertext=ciphertext) Log.debug('Decrypted as "' + plaintext + '"') res_final.update_bool( res_bool=ut.UnitTest.assert_true(observed=plaintext, expected=s, test_comment='mode "' + str(mode) + '" s=' + str(s) + '" encrypted to "' + str(ciphertext) + '", decrypted back to "' + str(plaintext))) return res_final
def convert_ascii_string_to_other_alphabet( ascii_char_string, # Default to CJK Unicode Block unicode_range=BLOCK_CHINESE, # If the characters come from a hexdigest from a hash, we can compress 4 times, # otherwise for a random ascii string, we can only compress 2 characters to 1 chinese. group_n_char=2): uni_len = unicode_range[1] - unicode_range[0] + 1 r = len(ascii_char_string) % 4 if r != 0: # Append 0's ascii_char_string = ascii_char_string + '0' * (4 - r) # raise Exception('Hash length ' + str(len(hash_hex_string)) # + ' for "' + str(hash_hex_string) + '" not 0 modulo-4') hash_zh = '' len_block = int(len(ascii_char_string) / group_n_char) for i in range(0, len_block, 1): idx_start = group_n_char * i idx_end = idx_start + group_n_char s = ascii_char_string[idx_start:idx_end] # Convert to Chinese, Korean, etc if group_n_char == 2: ord_arr = np.array([ord(x) for x in s]) val = ord_arr * np.array( [2**(8 * (x - 1)) for x in range(len(ord_arr), 0, -1)]) val = np.sum(val) Log.debug('Index start=' + str(idx_start) + ', end=' + str(idx_end) + ', s=' + str(s) + ', ordinal=' + str(ord_arr) + ', val=' + str(hex(val))) cjk_unicode = (val % uni_len) + unicode_range[0] hash_zh += chr(cjk_unicode) elif group_n_char == 4: Log.debug('Index start=' + str(idx_start) + ', end=' + str(idx_end) + ', s=' + str(s)) n = int('0x' + str(s), 16) cjk_unicode = (n % uni_len) + unicode_range[0] hash_zh += chr(cjk_unicode) Log.debugdebug('From ' + str(idx_start) + ': ' + str(s) + ', n=' + str(n) + ', char=' + str(chr(cjk_unicode))) return hash_zh
def xor_string( self, s1, s2 ): Log.debug( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': XOR between "' + str(s1) + '" and "' + str(s2) + '".' ) len_s1 = len(s1) len_s2 = len(s2) len_max = max(len(s1), len(s2)) # Append to the shorter one, in a repeat manner for i in range(len(s1), len_max, 1): s1 += s1[(i-len_s1)] for i in range(len(s2), len_max, 1): s2 += s2[(i-len_s2)] Log.debug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': After appending, XOR between "' + str(s1) + '" and "' + str(s2) + '".' ) Log.debugdebug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': s1 "' + str(s1) + '", s2 "' + str(s2) + '"' ) b1 = bytes(s1, encoding=Obfuscate.STRING_ENCODING) b2 = bytes(s2, encoding=Obfuscate.STRING_ENCODING) bytes_xor = self.xor_bytes( b1 = b1, b2 = b2 ) return bytes_xor
def page_not_found(e): Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Resource [' + str(flask.request.url) + '] is not valid!') return "<h1>404</h1><p>The resource could not be found.</p>", 404
def hide_data( self, # In string JSON records_json, # Column names to hide hide_colname, encrypt_key_b64, nonce_b64 = None, is_number_only = False, case_sensitive = False, # We support processing only China for now process_phone_country = None, hash_encode_lang = 'zh', ): step = 0 if type(records_json) is str: try: records_json = json.loads( records_json ) except Exception as ex_json: errmsg = \ str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \ + ': Exception loading json: ' + str(records_json)\ + '. Got exception: ' + str(ex_json) Log.error(errmsg) return errmsg colname_clean = str(hide_colname) + '_clean' colname_last4char = str(hide_colname) + '_last4char' colname_hash = str(hide_colname) + '_sha256' colname_hash_readable = str(hide_colname) + '_sha256_readable' colname_encrypt = str(hide_colname) + '_encrypt' colname_encrypt_readable = str(hide_colname) + '_encrypt_readable' df = pd.DataFrame(records_json) Log.debug( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Converted json object (first 20 records): ' + str(records_json[0:min(20,len(records_json))]) + ' to data frame: ' + str(df) ) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Start processing records, hide column "' + str(hide_colname) + '". Records of sample rows' + str(records_json[0:min(10,len(records_json))]) ) # # Step 1 # - Clean phone numbers, bank accounts # - Extract last 4 digits of phone/bank-account numbers to separate columns # - Obfuscate the phone numbers, bank accounts for storage in cube # step += 1 start_filter_time = Profiling.start() def filter_col( x, is_number_only = False, case_sensitive = False ): try: # We always trim no matter what x = StringUtils.trim(str(x)) if not case_sensitive: x = x.lower() if is_number_only: x = re.sub(pattern='[^0-9]', repl='', string=x) return x except Exception as ex_clean: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error cleaning "' + str(x) + '". ' + str(ex_clean) ) return x df[colname_clean] = df[hide_colname].apply(filter_col, args=(is_number_only, case_sensitive)) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': BASIC CLEANING Took ' + str(Profiling.get_time_dif_secs(start=start_filter_time, stop=Profiling.stop(), decimals=2)) + ' secs. Successfully cleaned column "' + str(hide_colname)+ '", case sensitive "' + str(case_sensitive) + '", is number "' + str(is_number_only) + '", sample rows: ' + str(df[0:2]) ) # # Process Phone Number by Country # step += 2 start_phone_time = Profiling.start() def process_phone( x, country ): try: if country == 'china': return PhoneNumber.filter_phone_china(x) else: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Unsupported country "' + str(country) + '"' ) return x except Exception as ex: Log.error( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex) ) return x if process_phone_country == 'china': df[colname_clean] = df[colname_clean].apply(process_phone, args=[process_phone_country]) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': PHONE CLEANING Took ' + str(Profiling.get_time_dif_secs(start=start_phone_time, stop=Profiling.stop(), decimals=2)) + ' secs. Successfully processed phone for column "' + str(hide_colname) + '", sample rows: ' + str(df[0:2]) ) # # Extract last 4 characters # step += 1 start_last4_time = Profiling.start() def last4char( x ): len_x = len(str(x)) if len_x >= 8: start = max(0, len_x - 4) else: start = len_x-1 return '***' + str(x)[start:len_x] df[colname_last4char] = df[colname_clean].apply(last4char) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': EXTRACT LAST 4 CHAR Took ' + str(Profiling.get_time_dif_secs(start=start_last4_time, stop=Profiling.stop(), decimals=2)) + ' secs. Successfully extracted last 4 chars from column "' + str(hide_colname) + '"' ) # # Hash the column # step += 1 start_hash_time = Profiling.start() def hash( x, desired_byte_len = 32 ): s = Hash.hash( string = x, algo = Hash.ALGO_SHA256 ) # obf = Obfuscate() # bytes_list = obf.hash_compression( # s = str(x), # desired_byte_length = desired_byte_len # ) # s = obf.hexdigest( # bytes_list = bytes_list, # unicode_range = None # ) return s df[colname_hash] = df[colname_clean].apply(hash, args=[32]) stop_hash_time = Profiling.start() Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': HASH Took ' + str(Profiling.get_time_dif_secs(start=start_hash_time, stop=stop_hash_time, decimals=2)) + ' secs. Successfully obfuscated column "' + str(hide_colname) + '", sample rows: ' + str(df[0:2]) ) # # Obfuscate Hash hexdigest to Chinese/etc characters # step += 1 start_obflang_time = Profiling.start() def obfuscate_hash_to_lang( x, lang ): unicode_range = Hash.BLOCK_CHINESE if lang == 'ko': unicode_range = Hash.BLOCK_KOREAN_SYL s = Hash.convert_ascii_string_to_other_alphabet( ascii_char_string = x, unicode_range = unicode_range, group_n_char = 4 ) return s df[colname_hash_readable] = df[colname_hash].apply(obfuscate_hash_to_lang, args=[hash_encode_lang]) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': HASH TO CHAR Took ' + str(Profiling.get_time_dif_secs(start=start_obflang_time, stop=Profiling.stop(), decimals=2)) + ' secs. Successfully converted obfuscation to language for column "' + str(hide_colname) + '"' ) # # Encryption # step += 1 start_enc_time = Profiling.start() try: key_bytes = b64decode(encrypt_key_b64.encode('utf-8')) except Exception as ex_key_conversion: raise Exception( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error converting base64 key "' + str(encrypt_key_b64) + '" to bytes. Exception: ' + str(ex_key_conversion) ) try: nonce_bytes = b64decode(nonce_b64.encode(encoding='utf-8')) except Exception as ex_nonce: Log.warning( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error converting base64 nonce "' + str(nonce_b64) + '" to bytes. Exception: ' + str(ex_nonce) ) nonce_bytes = None Log.important( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': HASH Took ' + ': Key bytes "' + str(key_bytes) + '", len = ' + str(len(key_bytes)) ) encryptor = AES_Encrypt( key = key_bytes, mode = AES_Encrypt.AES_MODE_CBC, nonce = nonce_bytes ) def encrypt( x, encryptor ): try: # print('***** x=' + str(x)) x_bytes = bytes(x.encode(encoding='utf-8')) # print('***** x_bytes=' + str(x_bytes)) res = encryptor.encode(x_bytes) ciphermode = res.cipher_mode ciphertext_b64 = res.ciphertext_b64 tag_b64 = res.tag_b64 nonce_b64 = res.nonce_b64 # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher))) # plaintext = encryptor.decode(ciphertext=ciphertext_b64) # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x)) # if plaintext != x: # raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"') return { 'ciphermode': ciphermode, 'ciphertext_b64': ciphertext_b64, 'tag_b64': tag_b64, 'iv_b64': nonce_b64 } except Exception as ex: Log.error( str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Error encrypting "' + str(x) + '": ' + str(ex) ) return None df[colname_encrypt] = df[colname_clean].apply(encrypt, args=[encryptor]) Log.important( str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) + ': Step ' + str(step) + ': ENCRYPTION Took ' + str(Profiling.get_time_dif_secs(start=start_enc_time, stop=Profiling.stop(), decimals=2)) + ' secs. Successfully encrypted column "' + str(hide_colname) + '", for records (first 20 rows): ' + str(df.values[0:min(20,df.shape[0])]) ) # def obfuscate_cipher_to_lang( # x, # lang # ): # unicode_range = Hash.BLOCK_CHINESE # if lang == 'ko': # unicode_range = Hash.BLOCK_KOREAN_SYL # s = Hash.convert_ascii_string_to_other_alphabet( # ascii_char_string = x['ciphertext_b64'], # unicode_range = unicode_range, # group_n_char = 2 # ) # return s # # df[colname_encrypt_readable] = df[colname_encrypt].apply(obfuscate_cipher_to_lang, args=[hash_encode_lang]) df_json_str = df.to_json( # Make sure not ASCII force_ascii = False, orient = 'records', # Don't need indexing # index = False ) return df_json_str
from hide.utils.CmdLine import CmdLine # # Decide whether to run multi-threaded in gunicorn or not # pv = cl.CmdLine.get_cmdline_params(pv_default={'gunicorn': '0'}) cmdline_params = CmdLine.get_cmdline_params(pv_default=pv) print('Command line params: ' + str(cmdline_params)) cwd = os.getcwd() if 'debug' in pv.keys(): if pv['debug'] in ['1', 'y', 'yes']: Log.DEBUG_PRINT_ALL_TO_SCREEN = True print('Logs will be directed to stdout') else: print('Current working directory "' + str(cwd) + '"') cwd = re.sub(pattern='([/\\\\]hide[/\\\\]).*', repl='/hide/', string=cwd) Log.LOGFILE = cwd + 'logs/hide.log' print('Logs will be directed to log file (with date) "' + str(Log.LOGFILE) + '"') rest_api = HideApi() if pv['gunicorn'] == '1': Log.important('Starting Hide API with gunicorn from folder "' + str(cwd)) # Port and Host specified on command line already for gunicorn else: Log.important('Starting Hide API without gunicorn from folder "' + str(cwd)) rest_api.run_hide_api()