Exemple #1
0
 def api_hide():
     method = request.method
     # Could be string (GET) or dict (POST)
     records_json = self.get_param(param_name='records', method=method)
     col_to_hide = self.get_param(param_name='col_to_hide',
                                  method=method)
     is_number_only = self.get_param(param_name='is_number_only',
                                     method=method)
     case_sensitive = self.get_param(param_name='case_sensitive',
                                     method=method)
     process_phone_country = self.get_param(
         param_name='process_phone_country', method=method)
     encrypt_key_b64 = self.get_param(param_name='encrypt_key_b64',
                                      method=method)
     nonce_b64 = self.get_param(param_name='nonce_b64', method=method)
     Log.info(
         str(self.__class__) + ' ' +
         str(getframeinfo(currentframe()).lineno) +
         ': Received parameters: hide colname "' + str(col_to_hide) +
         '", nonce base64 "' + str(nonce_b64) + '"')
     return self.hide_data(records_json=records_json,
                           hide_colname=col_to_hide,
                           is_number_only=is_number_only,
                           case_sensitive=case_sensitive,
                           process_phone_country=process_phone_country,
                           encrypt_key_b64=encrypt_key_b64,
                           nonce_b64=nonce_b64)
Exemple #2
0
 def hide_data(
     self,
     # In string JSON (GET), or dict (POST)
     records_json,
     # Column names to hide
     hide_colname,
     encrypt_key_b64,
     nonce_b64,
     is_number_only=False,
     case_sensitive=False,
     process_phone_country=None,
     hash_encode_lang='zh',
 ):
     try:
         return Hide().hide_data(
             records_json=records_json,
             hide_colname=hide_colname,
             is_number_only=(is_number_only in [1, '1', 'y', 'yes']),
             case_sensitive=(case_sensitive in [1, '1', 'y', 'yes']),
             process_phone_country=process_phone_country,
             encrypt_key_b64=encrypt_key_b64,
             nonce_b64=nonce_b64)
     except Exception as ex:
         errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                  + ' Exception occurred IP ' + str(flask.request.remote_addr) \
                  + ', exception ' + str(ex) + '.'
         Log.error(errmsg)
         if Log.DEBUG_PRINT_ALL_TO_SCREEN:
             raise Exception(errmsg)
         return errmsg
Exemple #3
0
    def __init__(
            self,
            # 16 or 32 byte key
            key,
            nonce=None,
            mode=AES_MODE_EAX,
            text_encoding='utf-8'):
        self.key = key
        Log.debug('Using key ' + str(str(self.key)) + '. Size = ' +
                  str(len(self.key)) + '.')
        self.cipher_mode_str = mode
        if self.cipher_mode_str == AES_Encrypt.AES_MODE_EAX:
            self.cipher_mode = AES.MODE_EAX
        elif self.cipher_mode_str == AES_Encrypt.AES_MODE_CBC:
            self.cipher_mode = AES.MODE_CBC
        else:
            raise Exception(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) +
                ': Unsupported AES mode "' + str(self.cipher_mode_str) + '"')
        if nonce is None:
            # Must be 16 bytes
            # nonce = key[0:16]
            nonce = AES_Encrypt.generate_random_bytes(
                size=AES_Encrypt.SIZE_NONCE, printable=True)

        self.nonce = nonce
        Log.debug(
            str(self.__class__) + ' ' +
            str(getframeinfo(currentframe()).lineno) + ': Using nonce "' +
            str(self.nonce) + '". Size = ' + str(len(self.nonce)))

        self.text_encoding = text_encoding
        return
Exemple #4
0
    def decode(self, ciphertext):
        try:
            if self.cipher_mode == AES.MODE_EAX:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 nonce=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
            elif self.cipher_mode == AES.MODE_CBC:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 iv=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
                Log.debugdebug(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Decrypted data length = ' + str(len(data)) +
                    ', modulo 16 = ' + str(len(data) % 128 / 8))
                # Remove last x bytes encoded in the padded bytes
                data = data[:-data[-1]]
            else:
                raise Exception(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Unsupported mode "' + str(self.cipher_mode) + '".')

            return str(data, encoding=STR_ENCODING)
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Error decoding data "' + str(ciphertext) + '" using AES ". Exception: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)
Exemple #5
0
 def encrypt(
         x,
         encryptor
 ):
     try:
         # print('***** x=' + str(x))
         x_bytes = bytes(x.encode(encoding='utf-8'))
         # print('***** x_bytes=' + str(x_bytes))
         res = encryptor.encode(x_bytes)
         ciphermode = res.cipher_mode
         ciphertext_b64 = res.ciphertext_b64
         tag_b64 = res.tag_b64
         nonce_b64 = res.nonce_b64
         # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher)))
         # plaintext = encryptor.decode(ciphertext=ciphertext_b64)
         # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x))
         # if plaintext != x:
         #     raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"')
         return {
             'ciphermode': ciphermode,
             'ciphertext_b64': ciphertext_b64,
             'tag_b64': tag_b64,
             'iv_b64': nonce_b64
         }
     except Exception as ex:
         Log.error(
             str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error encrypting "' + str(x) + '": ' + str(ex)
         )
         return None
Exemple #6
0
    def filter_phone_china(x):
        #
        # See https://en.wikipedia.org/wiki/Telephone_numbers_in_China
        # To call in China, the following format is used:
        #
        # For fixed phones:
        # xxx xxxx | xxxx xxxx Calls within the same area code
        #
        # 0yyy xxx xxxx (11 digits) | 0yyy xxxx xxxx (12 digits) Calls from other areas within China
        #
        # +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits) Calls from outside China
        #
        # For mobile phones:
        # 1nn xxxx xxxx (11 digits) Calls to mobile phones within China
        #
        # +86 1nn xxxx xxxx (13 digits) Calls to mobiles from outside China
        #
        try:
            str_number = re.sub(pattern='[^0-9]', repl='', string=str(x))
            # At least 10 digits
            len_no = len(str_number)
            if len_no < 10:
                return None

            if str_number[0] == '0':
                # Landline formats 0yyy xxx xxxx (11 digits) or 0yyy xxxx xxxx (12 digits)
                if len_no in [11, 12]:
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number[1:len_no]
            elif str_number[0] == '1':
                if len_no in [11]:
                    # Mobile number format 1nn xxxx xxxx (11 digits)
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number
            elif str_number[0:2] == PhoneNumber.CHINA_COUNTRY_CODE:
                # Landline formats +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits)
                # Mobile format 86 1nn xxxx xxxx (13 digits)
                if len_no in [12, 13]:
                    return '+' + str_number
            else:
                # Landline formats yyy xxx xxxx (10 digits) | yyy xxxx xxxx (11 digits)
                if len_no in [10, 11]:
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number
                else:
                    # TODO Should we just return whatever we have then? Instead of throwing exception.
                    pass

            raise Exception('Invalid ' + str(len_no) + ' digit phone number ' + str(str_number) + '')
        except Exception as ex:
            Log.error(
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error phone number: ' + str(ex)
            )
            return None
Exemple #7
0
    def hash_compression(
            self,
            s,
            # By default we return the original hash
            desired_byte_length = 32
    ):
        if desired_byte_length % 4 != 0:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Desired byte length must be 0 modulo-4, given = ' + str(desired_byte_length)
            )

        m = hashlib.sha256()
        m.update(bytes(s, encoding=Obfuscate.STRING_ENCODING))
        # This will return a bytes list of length 32
        h = m.digest()
        if len(h) % 4 != 0:
            raise Exception(
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Hash bytes length must be 0 modulo-4, got = ' + str(h)
            )

        # We compress to 8 bytes from the 32 bytes
        # The original SHA-256 appends 8 parts concatenated together, we break into 4 parts and xor them

        # 4 blocks
        n_blocks = int( len(h) / desired_byte_length )
        # 8 bytes block length
        block_len = int( len(h) / n_blocks )
        Log.debugdebug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Number of blocks = ' + str(n_blocks) + ', block length = ' + str(block_len)
        )

        # First block
        bytes_xor = h[0:block_len]
        for i in range(1, n_blocks, 1):
            idx_start = i * block_len
            idx_end = (i+1) * block_len
            cur_block = h[idx_start:idx_end]
            if len(bytes_xor) != len(cur_block):
                raise Exception(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Different block lengths "' + str(bytes_xor)
                    + '", and "' + str(cur_block) + '"'
                )
            bytes_xor = self.xor_bytes(
                b1 = bytes_xor,
                b2 = cur_block
            )

        return bytes_xor
Exemple #8
0
 def encode(
         self,
         # bytes format
         data):
     try:
         if self.cipher_mode == AES.MODE_EAX:
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              nonce=self.nonce)
             cipherbytes, tag = cipher.encrypt_and_digest(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=b64encode(tag).decode(self.text_encoding),
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         elif self.cipher_mode == AES.MODE_CBC:
             # 1-16, make sure not 0, other wise last byte will not be block length
             length = AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC - (
                 len(data) % AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC)
             # Pad data with the original length, so when we decrypt we can just take data[-1]
             # as length of data block
             data += bytes(chr(length), encoding=STR_ENCODING) * length
             Log.debugdebug(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Padded length = ' + str(length))
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              iv=self.nonce)
             cipherbytes = cipher.encrypt(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=None,
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         else:
             raise Exception(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Unsupported mode "' + str(self.cipher_mode) + '".')
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                 + ': Error encoding data "' + str(data) + '" using AES ". Exception: ' + str(ex)
         Log.error(errmsg)
         raise Exception(errmsg)
Exemple #9
0
 def get_param(self, param_name, method='GET'):
     if method == 'GET':
         if param_name in flask.request.args:
             return str(flask.request.args[param_name])
         else:
             return None
     else:
         try:
             val = flask.request.json[param_name]
             return val
         except Exception as ex:
             Log.critical(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': No param name [' + param_name + '] in request.')
             return None
Exemple #10
0
    def run_unit_test(self):
        res_final = ut.ResultObj(count_ok=0, count_fail=0)

        s = '니는 먹고 싶어'
        tests_set_1 = [[Hash.ALGO_SHA1, '蔮膫圈嫩慁覕邜蹋妡狿'],
                       [Hash.ALGO_SHA256, '葶杊閹翔綐僤徼戻髯鼚胦嘭藃诠灑浽'],
                       [Hash.ALGO_SHA512, '詐鏙仟墍例嵝烐檦蝡溲薑珇鸦東燢爻纷欜陲囚劚攠菜槑茹輀濯偑袁蓣质簨'],
                       [Hash.ALGO_SHA3_256, '厥驹踸鸨揱澯鑢擠鳰僸覑儽悃徵絨控'],
                       [
                           Hash.ALGO_SHA3_512,
                           '醜怅僒础衺菼惓隔鮚腋釔晞鏙屜咖龩檵因伖蘦惌灱騾凊纅弪鮾蕏解铦欪臓'
                       ]]
        for x in tests_set_1:
            algo = x[0]
            expected = x[1]
            # In Linux command line, echo -n "$s" | shasum -a 1 (or 256,512)
            Log.debug('Using algo "' + str(algo) + '":')
            hstr = Hash.hash(string=s, algo=algo)
            Log.debug('Hash: ' + str(hstr))
            observed = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string=hstr,
                # unicode_range   = Hash.BLOCK_KOREAN_SYL,
                group_n_char=4)
            res_final.update_bool(
                res_bool=ut.UnitTest.assert_true(observed=observed,
                                                 expected=expected,
                                                 test_comment='test string "' +
                                                 str(hstr) + '" got "' +
                                                 str(observed) + '"'))

        tests_set_2 = [['abc/ii{}.!&%[][\\+=', '嵢弯敩睽簡琥坝坜礽縰'],
                       ['8829amsf)(*&^%^*./', '蘸耹嵭潦眨砦娥娪簯縰']]
        for x in tests_set_2:
            ascii_string = x[0]
            expected = x[1]
            observed = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string=ascii_string)
            res_final.update_bool(
                res_bool=ut.UnitTest.assert_true(observed=observed,
                                                 expected=expected,
                                                 test_comment='test string "' +
                                                 str(ascii_string) +
                                                 '" got "' + str(observed) +
                                                 '"'))

        return res_final
Exemple #11
0
 def filter_col(
         x,
         is_number_only = False,
         case_sensitive = False
 ):
     try:
         # We always trim no matter what
         x = StringUtils.trim(str(x))
         if not case_sensitive:
             x = x.lower()
         if is_number_only:
             x = re.sub(pattern='[^0-9]', repl='', string=x)
         return x
     except Exception as ex_clean:
         Log.error(
             str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error cleaning "' + str(x) + '". ' + str(ex_clean)
         )
         return x
Exemple #12
0
 def process_phone(
         x,
         country
 ):
     try:
         if country == 'china':
             return PhoneNumber.filter_phone_china(x)
         else:
             Log.error(
                 str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                 + ': Unsupported country "' + str(country) + '"'
             )
             return x
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex)
         )
         return x
Exemple #13
0
 def hash(string, algo=ALGO_SHA1):
     str_encode = string.encode(encoding=Hash.STR_ENCODING)
     try:
         if algo == Hash.ALGO_SHA1:
             h = hashlib.sha1(str_encode)
         elif algo == Hash.ALGO_SHA256:
             h = hashlib.sha256(str_encode)
         elif algo == Hash.ALGO_SHA512:
             h = hashlib.sha512(str_encode)
         elif algo == Hash.ALGO_SHA3_256:
             h = hashlib.sha3_256(str_encode)
         elif algo == Hash.ALGO_SHA3_512:
             h = hashlib.sha3_512(str_encode)
         else:
             raise Exception('Unsupported hash algo "' + str(algo) + '".')
         return h.hexdigest()
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str() \
                  + 'Error hashing string "' + str(string) + '" using algo "' + str(algo)\
                  + '". Exception: ' + str(ex)
         Log.error(errmsg)
         return None
Exemple #14
0
    def xor_bytes(
            self,
            b1,
            b2
    ):
        t12 = zip(b1,b2)

        res_xor = []
        for x in t12:
            byte_xor = x[0] ^ x[1]
            Log.debugdebug(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + 'XOR "' + str(hex(x[0])) + '" and "' + str(hex(x[1])) + '" = ' + str(hex(byte_xor))
            )
            res_xor.append(byte_xor)

        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': XOR between "' + str(self.hexdigest(b1))
            + '" and "' + str(self.hexdigest(b2))
            + '" = "' + str(self.hexdigest(res_xor)) + '"'
        )

        return res_xor
Exemple #15
0
    def run_unit_test(self):
        res_final = ut.ResultObj(count_ok=0, count_fail=0)

        long_str = ''
        for i in range(10000):
            long_str += random.choice(AES_Encrypt.CHARS_STR)
        sentences = [
            '니는 먹고 싶어', 'Дворянское ГНЕЗДО', '没问题 大陆 经济',
            '存款方式***2019-12-11 11:38:46***', '1234567890123456', long_str
        ]

        key = b'Sixteen byte key'
        nonce = b'0123456789xxyyzz'

        for mode in [AES_Encrypt.AES_MODE_CBC, AES_Encrypt.AES_MODE_EAX]:
            # aes_obj = AES_Encrypt(key=AES_Encrypt.generate_random_bytes(size=32, printable=True))
            aes_obj = AES_Encrypt(key=key + key, mode=mode, nonce=nonce)
            for s in sentences:
                Log.debug('Encrypting "' + str(s) + '"')
                data_bytes = bytes(s.encode(encoding=STR_ENCODING))
                Log.debug('Data length in bytes = ' + str(len(data_bytes)))
                res = aes_obj.encode(data=data_bytes)
                ciphertext = res.ciphertext_b64
                Log.debug('Encrypted as "' + str(ciphertext) + '"')

                plaintext = aes_obj.decode(ciphertext=ciphertext)
                Log.debug('Decrypted as "' + plaintext + '"')

                res_final.update_bool(
                    res_bool=ut.UnitTest.assert_true(observed=plaintext,
                                                     expected=s,
                                                     test_comment='mode "' +
                                                     str(mode) + '" s=' +
                                                     str(s) +
                                                     '" encrypted to "' +
                                                     str(ciphertext) +
                                                     '", decrypted back to "' +
                                                     str(plaintext)))

        return res_final
Exemple #16
0
    def convert_ascii_string_to_other_alphabet(
        ascii_char_string,
        # Default to CJK Unicode Block
        unicode_range=BLOCK_CHINESE,
        # If the characters come from a hexdigest from a hash, we can compress 4 times,
        # otherwise for a random ascii string, we can only compress 2 characters to 1 chinese.
        group_n_char=2):
        uni_len = unicode_range[1] - unicode_range[0] + 1

        r = len(ascii_char_string) % 4
        if r != 0:
            # Append 0's
            ascii_char_string = ascii_char_string + '0' * (4 - r)
        # raise Exception('Hash length ' + str(len(hash_hex_string))
        #                 + ' for "' + str(hash_hex_string) + '" not 0 modulo-4')

        hash_zh = ''

        len_block = int(len(ascii_char_string) / group_n_char)
        for i in range(0, len_block, 1):
            idx_start = group_n_char * i
            idx_end = idx_start + group_n_char
            s = ascii_char_string[idx_start:idx_end]

            # Convert to Chinese, Korean, etc
            if group_n_char == 2:
                ord_arr = np.array([ord(x) for x in s])
                val = ord_arr * np.array(
                    [2**(8 * (x - 1)) for x in range(len(ord_arr), 0, -1)])
                val = np.sum(val)
                Log.debug('Index start=' + str(idx_start) + ', end=' +
                          str(idx_end) + ', s=' + str(s) + ', ordinal=' +
                          str(ord_arr) + ', val=' + str(hex(val)))
                cjk_unicode = (val % uni_len) + unicode_range[0]
                hash_zh += chr(cjk_unicode)
            elif group_n_char == 4:
                Log.debug('Index start=' + str(idx_start) + ', end=' +
                          str(idx_end) + ', s=' + str(s))
                n = int('0x' + str(s), 16)
                cjk_unicode = (n % uni_len) + unicode_range[0]
                hash_zh += chr(cjk_unicode)
                Log.debugdebug('From ' + str(idx_start) + ': ' + str(s) +
                               ', n=' + str(n) + ', char=' +
                               str(chr(cjk_unicode)))

        return hash_zh
Exemple #17
0
    def xor_string(
            self,
            s1,
            s2
    ):
        Log.debug(
            str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': XOR between "' + str(s1) + '" and "' + str(s2) + '".'
        )

        len_s1 = len(s1)
        len_s2 = len(s2)
        len_max = max(len(s1), len(s2))

        # Append to the shorter one, in a repeat manner
        for i in range(len(s1), len_max, 1):
            s1 += s1[(i-len_s1)]
        for i in range(len(s2), len_max, 1):
            s2 += s2[(i-len_s2)]

        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': After appending, XOR between "' + str(s1) + '" and "' + str(s2) + '".'
        )

        Log.debugdebug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': s1 "' + str(s1) + '", s2 "' + str(s2) + '"'
        )

        b1 = bytes(s1, encoding=Obfuscate.STRING_ENCODING)
        b2 = bytes(s2, encoding=Obfuscate.STRING_ENCODING)

        bytes_xor = self.xor_bytes(
            b1 = b1,
            b2 = b2
        )

        return bytes_xor
Exemple #18
0
 def page_not_found(e):
     Log.error(
         str(self.__class__) + ' ' +
         str(getframeinfo(currentframe()).lineno) + ': Resource [' +
         str(flask.request.url) + '] is not valid!')
     return "<h1>404</h1><p>The resource could not be found.</p>", 404
Exemple #19
0
    def hide_data(
            self,
            # In string JSON
            records_json,
            # Column names to hide
            hide_colname,
            encrypt_key_b64,
            nonce_b64        = None,
            is_number_only   = False,
            case_sensitive   = False,
            # We support processing only China for now
            process_phone_country = None,
            hash_encode_lang = 'zh',
    ):
        step = 0

        if type(records_json) is str:
            try:
                records_json = json.loads(
                    records_json
                )
            except Exception as ex_json:
                errmsg = \
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Exception loading json: ' + str(records_json)\
                    + '. Got exception: ' + str(ex_json)
                Log.error(errmsg)
                return errmsg

        colname_clean            = str(hide_colname) + '_clean'
        colname_last4char        = str(hide_colname) + '_last4char'
        colname_hash             = str(hide_colname) + '_sha256'
        colname_hash_readable    = str(hide_colname) + '_sha256_readable'
        colname_encrypt          = str(hide_colname) + '_encrypt'
        colname_encrypt_readable = str(hide_colname) + '_encrypt_readable'

        df = pd.DataFrame(records_json)
        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Converted json object (first 20 records): '
            + str(records_json[0:min(20,len(records_json))])
            + ' to data frame: ' + str(df)
        )

        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Start processing records, hide column "' + str(hide_colname)
            + '". Records of sample rows' +  str(records_json[0:min(10,len(records_json))])
        )

        #
        # Step 1
        #  - Clean phone numbers, bank accounts
        #  - Extract last 4 digits of phone/bank-account numbers to separate columns
        #  - Obfuscate the phone numbers, bank accounts for storage in cube
        #
        step += 1
        start_filter_time = Profiling.start()
        def filter_col(
                x,
                is_number_only = False,
                case_sensitive = False
        ):
            try:
                # We always trim no matter what
                x = StringUtils.trim(str(x))
                if not case_sensitive:
                    x = x.lower()
                if is_number_only:
                    x = re.sub(pattern='[^0-9]', repl='', string=x)
                return x
            except Exception as ex_clean:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error cleaning "' + str(x) + '". ' + str(ex_clean)
                )
                return x
        df[colname_clean] = df[hide_colname].apply(filter_col, args=(is_number_only, case_sensitive))
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': BASIC CLEANING Took '
            + str(Profiling.get_time_dif_secs(start=start_filter_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully cleaned column "' + str(hide_colname)+
            '", case sensitive "' + str(case_sensitive)
            + '", is number "' + str(is_number_only)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Process Phone Number by Country
        #
        step += 2
        start_phone_time = Profiling.start()
        def process_phone(
                x,
                country
        ):
            try:
                if country == 'china':
                    return PhoneNumber.filter_phone_china(x)
                else:
                    Log.error(
                        str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                        + ': Unsupported country "' + str(country) + '"'
                    )
                    return x
            except Exception as ex:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex)
                )
                return x

        if process_phone_country == 'china':
            df[colname_clean] = df[colname_clean].apply(process_phone, args=[process_phone_country])
            Log.important(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Step ' + str(step) + ': PHONE CLEANING Took '
                + str(Profiling.get_time_dif_secs(start=start_phone_time, stop=Profiling.stop(), decimals=2))
                + ' secs. Successfully processed phone for column "' + str(hide_colname)
                + '", sample rows: ' + str(df[0:2])
            )

        #
        # Extract last 4 characters
        #
        step += 1
        start_last4_time = Profiling.start()
        def last4char(
                x
        ):
            len_x = len(str(x))
            if len_x >= 8:
                start = max(0, len_x - 4)
            else:
                start = len_x-1
            return '***' + str(x)[start:len_x]
        df[colname_last4char] = df[colname_clean].apply(last4char)
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': EXTRACT LAST 4 CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_last4_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully extracted last 4 chars from column "' + str(hide_colname)
            + '"'
        )

        #
        # Hash the column
        #
        step += 1
        start_hash_time = Profiling.start()
        def hash(
                x,
                desired_byte_len = 32
        ):
            s = Hash.hash(
                string = x,
                algo   = Hash.ALGO_SHA256
            )
            # obf = Obfuscate()
            # bytes_list = obf.hash_compression(
            #     s                   = str(x),
            #     desired_byte_length = desired_byte_len
            # )
            # s = obf.hexdigest(
            #     bytes_list    = bytes_list,
            #     unicode_range = None
            # )
            return s

        df[colname_hash] = df[colname_clean].apply(hash, args=[32])
        stop_hash_time = Profiling.start()
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + str(Profiling.get_time_dif_secs(start=start_hash_time, stop=stop_hash_time, decimals=2))
            + ' secs. Successfully obfuscated column "' + str(hide_colname)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Obfuscate Hash hexdigest to Chinese/etc characters
        #
        step += 1
        start_obflang_time = Profiling.start()
        def obfuscate_hash_to_lang(
                x,
                lang
        ):
            unicode_range = Hash.BLOCK_CHINESE
            if lang == 'ko':
                unicode_range = Hash.BLOCK_KOREAN_SYL
            s = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string = x,
                unicode_range     = unicode_range,
                group_n_char      = 4
            )
            return s

        df[colname_hash_readable] = df[colname_hash].apply(obfuscate_hash_to_lang, args=[hash_encode_lang])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH TO CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_obflang_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully converted obfuscation to language for column "' + str(hide_colname)
            + '"'
        )

        #
        # Encryption
        #
        step += 1
        start_enc_time = Profiling.start()
        try:
            key_bytes = b64decode(encrypt_key_b64.encode('utf-8'))
        except Exception as ex_key_conversion:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 key "' + str(encrypt_key_b64)
                + '" to bytes. Exception: ' + str(ex_key_conversion)
            )
        try:
            nonce_bytes = b64decode(nonce_b64.encode(encoding='utf-8'))
        except Exception as ex_nonce:
            Log.warning(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 nonce "' + str(nonce_b64)
                + '" to bytes. Exception: ' + str(ex_nonce)
            )
            nonce_bytes = None
        Log.important(
            str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + ': Key bytes "' + str(key_bytes) + '", len = ' + str(len(key_bytes))
        )
        encryptor = AES_Encrypt(
            key   = key_bytes,
            mode  = AES_Encrypt.AES_MODE_CBC,
            nonce = nonce_bytes
        )
        def encrypt(
                x,
                encryptor
        ):
            try:
                # print('***** x=' + str(x))
                x_bytes = bytes(x.encode(encoding='utf-8'))
                # print('***** x_bytes=' + str(x_bytes))
                res = encryptor.encode(x_bytes)
                ciphermode = res.cipher_mode
                ciphertext_b64 = res.ciphertext_b64
                tag_b64 = res.tag_b64
                nonce_b64 = res.nonce_b64
                # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher)))
                # plaintext = encryptor.decode(ciphertext=ciphertext_b64)
                # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x))
                # if plaintext != x:
                #     raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"')
                return {
                    'ciphermode': ciphermode,
                    'ciphertext_b64': ciphertext_b64,
                    'tag_b64': tag_b64,
                    'iv_b64': nonce_b64
                }
            except Exception as ex:
                Log.error(
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error encrypting "' + str(x) + '": ' + str(ex)
                )
                return None

        df[colname_encrypt] = df[colname_clean].apply(encrypt, args=[encryptor])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': ENCRYPTION Took '
            + str(Profiling.get_time_dif_secs(start=start_enc_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully encrypted column "' + str(hide_colname)
            + '", for records (first 20 rows): ' + str(df.values[0:min(20,df.shape[0])])
        )

        # def obfuscate_cipher_to_lang(
        #         x,
        #         lang
        # ):
        #     unicode_range = Hash.BLOCK_CHINESE
        #     if lang == 'ko':
        #         unicode_range = Hash.BLOCK_KOREAN_SYL
        #     s = Hash.convert_ascii_string_to_other_alphabet(
        #         ascii_char_string = x['ciphertext_b64'],
        #         unicode_range     = unicode_range,
        #         group_n_char      = 2
        #     )
        #     return s
        #
        # df[colname_encrypt_readable] = df[colname_encrypt].apply(obfuscate_cipher_to_lang, args=[hash_encode_lang])

        df_json_str = df.to_json(
            # Make sure not ASCII
            force_ascii = False,
            orient      = 'records',
            # Don't need indexing
            # index       = False
        )

        return df_json_str
Exemple #20
0

from hide.utils.CmdLine import CmdLine
#
# Decide whether to run multi-threaded in gunicorn or not
#
pv = cl.CmdLine.get_cmdline_params(pv_default={'gunicorn': '0'})
cmdline_params = CmdLine.get_cmdline_params(pv_default=pv)
print('Command line params: ' + str(cmdline_params))
cwd = os.getcwd()

if 'debug' in pv.keys():
    if pv['debug'] in ['1', 'y', 'yes']:
        Log.DEBUG_PRINT_ALL_TO_SCREEN = True
        print('Logs will be directed to stdout')
else:
    print('Current working directory "' + str(cwd) + '"')
    cwd = re.sub(pattern='([/\\\\]hide[/\\\\]).*', repl='/hide/', string=cwd)
    Log.LOGFILE = cwd + 'logs/hide.log'
    print('Logs will be directed to log file (with date) "' +
          str(Log.LOGFILE) + '"')

rest_api = HideApi()
if pv['gunicorn'] == '1':
    Log.important('Starting Hide API with gunicorn from folder "' + str(cwd))
    # Port and Host specified on command line already for gunicorn
else:
    Log.important('Starting Hide API without gunicorn from folder "' +
                  str(cwd))
    rest_api.run_hide_api()