Ejemplo n.º 1
0
 def hide_data(
     self,
     # In string JSON (GET), or dict (POST)
     records_json,
     # Column names to hide
     hide_colname,
     encrypt_key_b64,
     nonce_b64,
     is_number_only=False,
     case_sensitive=False,
     process_phone_country=None,
     hash_encode_lang='zh',
 ):
     try:
         return Hide().hide_data(
             records_json=records_json,
             hide_colname=hide_colname,
             is_number_only=(is_number_only in [1, '1', 'y', 'yes']),
             case_sensitive=(case_sensitive in [1, '1', 'y', 'yes']),
             process_phone_country=process_phone_country,
             encrypt_key_b64=encrypt_key_b64,
             nonce_b64=nonce_b64)
     except Exception as ex:
         errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                  + ' Exception occurred IP ' + str(flask.request.remote_addr) \
                  + ', exception ' + str(ex) + '.'
         Log.error(errmsg)
         if Log.DEBUG_PRINT_ALL_TO_SCREEN:
             raise Exception(errmsg)
         return errmsg
Ejemplo n.º 2
0
    def decode(self, ciphertext):
        try:
            if self.cipher_mode == AES.MODE_EAX:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 nonce=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
            elif self.cipher_mode == AES.MODE_CBC:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 iv=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
                Log.debugdebug(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Decrypted data length = ' + str(len(data)) +
                    ', modulo 16 = ' + str(len(data) % 128 / 8))
                # Remove last x bytes encoded in the padded bytes
                data = data[:-data[-1]]
            else:
                raise Exception(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Unsupported mode "' + str(self.cipher_mode) + '".')

            return str(data, encoding=STR_ENCODING)
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Error decoding data "' + str(ciphertext) + '" using AES ". Exception: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)
Ejemplo n.º 3
0
Archivo: Hide.py Proyecto: mapktah/hide
 def encrypt(
         x,
         encryptor
 ):
     try:
         # print('***** x=' + str(x))
         x_bytes = bytes(x.encode(encoding='utf-8'))
         # print('***** x_bytes=' + str(x_bytes))
         res = encryptor.encode(x_bytes)
         ciphermode = res.cipher_mode
         ciphertext_b64 = res.ciphertext_b64
         tag_b64 = res.tag_b64
         nonce_b64 = res.nonce_b64
         # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher)))
         # plaintext = encryptor.decode(ciphertext=ciphertext_b64)
         # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x))
         # if plaintext != x:
         #     raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"')
         return {
             'ciphermode': ciphermode,
             'ciphertext_b64': ciphertext_b64,
             'tag_b64': tag_b64,
             'iv_b64': nonce_b64
         }
     except Exception as ex:
         Log.error(
             str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error encrypting "' + str(x) + '": ' + str(ex)
         )
         return None
Ejemplo n.º 4
0
    def filter_phone_china(x):
        #
        # See https://en.wikipedia.org/wiki/Telephone_numbers_in_China
        # To call in China, the following format is used:
        #
        # For fixed phones:
        # xxx xxxx | xxxx xxxx Calls within the same area code
        #
        # 0yyy xxx xxxx (11 digits) | 0yyy xxxx xxxx (12 digits) Calls from other areas within China
        #
        # +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits) Calls from outside China
        #
        # For mobile phones:
        # 1nn xxxx xxxx (11 digits) Calls to mobile phones within China
        #
        # +86 1nn xxxx xxxx (13 digits) Calls to mobiles from outside China
        #
        try:
            str_number = re.sub(pattern='[^0-9]', repl='', string=str(x))
            # At least 10 digits
            len_no = len(str_number)
            if len_no < 10:
                return None

            if str_number[0] == '0':
                # Landline formats 0yyy xxx xxxx (11 digits) or 0yyy xxxx xxxx (12 digits)
                if len_no in [11, 12]:
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number[1:len_no]
            elif str_number[0] == '1':
                if len_no in [11]:
                    # Mobile number format 1nn xxxx xxxx (11 digits)
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number
            elif str_number[0:2] == PhoneNumber.CHINA_COUNTRY_CODE:
                # Landline formats +86 yyy xxx xxxx (12 digits) | +86 yyy xxxx xxxx (13 digits)
                # Mobile format 86 1nn xxxx xxxx (13 digits)
                if len_no in [12, 13]:
                    return '+' + str_number
            else:
                # Landline formats yyy xxx xxxx (10 digits) | yyy xxxx xxxx (11 digits)
                if len_no in [10, 11]:
                    return '+' + PhoneNumber.CHINA_COUNTRY_CODE + str_number
                else:
                    # TODO Should we just return whatever we have then? Instead of throwing exception.
                    pass

            raise Exception('Invalid ' + str(len_no) + ' digit phone number ' + str(str_number) + '')
        except Exception as ex:
            Log.error(
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error phone number: ' + str(ex)
            )
            return None
Ejemplo n.º 5
0
 def encode(
         self,
         # bytes format
         data):
     try:
         if self.cipher_mode == AES.MODE_EAX:
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              nonce=self.nonce)
             cipherbytes, tag = cipher.encrypt_and_digest(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=b64encode(tag).decode(self.text_encoding),
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         elif self.cipher_mode == AES.MODE_CBC:
             # 1-16, make sure not 0, other wise last byte will not be block length
             length = AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC - (
                 len(data) % AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC)
             # Pad data with the original length, so when we decrypt we can just take data[-1]
             # as length of data block
             data += bytes(chr(length), encoding=STR_ENCODING) * length
             Log.debugdebug(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Padded length = ' + str(length))
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              iv=self.nonce)
             cipherbytes = cipher.encrypt(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=None,
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         else:
             raise Exception(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Unsupported mode "' + str(self.cipher_mode) + '".')
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                 + ': Error encoding data "' + str(data) + '" using AES ". Exception: ' + str(ex)
         Log.error(errmsg)
         raise Exception(errmsg)
Ejemplo n.º 6
0
Archivo: Hide.py Proyecto: mapktah/hide
 def filter_col(
         x,
         is_number_only = False,
         case_sensitive = False
 ):
     try:
         # We always trim no matter what
         x = StringUtils.trim(str(x))
         if not case_sensitive:
             x = x.lower()
         if is_number_only:
             x = re.sub(pattern='[^0-9]', repl='', string=x)
         return x
     except Exception as ex_clean:
         Log.error(
             str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error cleaning "' + str(x) + '". ' + str(ex_clean)
         )
         return x
Ejemplo n.º 7
0
Archivo: Hide.py Proyecto: mapktah/hide
 def process_phone(
         x,
         country
 ):
     try:
         if country == 'china':
             return PhoneNumber.filter_phone_china(x)
         else:
             Log.error(
                 str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                 + ': Unsupported country "' + str(country) + '"'
             )
             return x
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex)
         )
         return x
Ejemplo n.º 8
0
 def hash(string, algo=ALGO_SHA1):
     str_encode = string.encode(encoding=Hash.STR_ENCODING)
     try:
         if algo == Hash.ALGO_SHA1:
             h = hashlib.sha1(str_encode)
         elif algo == Hash.ALGO_SHA256:
             h = hashlib.sha256(str_encode)
         elif algo == Hash.ALGO_SHA512:
             h = hashlib.sha512(str_encode)
         elif algo == Hash.ALGO_SHA3_256:
             h = hashlib.sha3_256(str_encode)
         elif algo == Hash.ALGO_SHA3_512:
             h = hashlib.sha3_512(str_encode)
         else:
             raise Exception('Unsupported hash algo "' + str(algo) + '".')
         return h.hexdigest()
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str() \
                  + 'Error hashing string "' + str(string) + '" using algo "' + str(algo)\
                  + '". Exception: ' + str(ex)
         Log.error(errmsg)
         return None
Ejemplo n.º 9
0
 def page_not_found(e):
     Log.error(
         str(self.__class__) + ' ' +
         str(getframeinfo(currentframe()).lineno) + ': Resource [' +
         str(flask.request.url) + '] is not valid!')
     return "<h1>404</h1><p>The resource could not be found.</p>", 404
Ejemplo n.º 10
0
Archivo: Hide.py Proyecto: mapktah/hide
    def hide_data(
            self,
            # In string JSON
            records_json,
            # Column names to hide
            hide_colname,
            encrypt_key_b64,
            nonce_b64        = None,
            is_number_only   = False,
            case_sensitive   = False,
            # We support processing only China for now
            process_phone_country = None,
            hash_encode_lang = 'zh',
    ):
        step = 0

        if type(records_json) is str:
            try:
                records_json = json.loads(
                    records_json
                )
            except Exception as ex_json:
                errmsg = \
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Exception loading json: ' + str(records_json)\
                    + '. Got exception: ' + str(ex_json)
                Log.error(errmsg)
                return errmsg

        colname_clean            = str(hide_colname) + '_clean'
        colname_last4char        = str(hide_colname) + '_last4char'
        colname_hash             = str(hide_colname) + '_sha256'
        colname_hash_readable    = str(hide_colname) + '_sha256_readable'
        colname_encrypt          = str(hide_colname) + '_encrypt'
        colname_encrypt_readable = str(hide_colname) + '_encrypt_readable'

        df = pd.DataFrame(records_json)
        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Converted json object (first 20 records): '
            + str(records_json[0:min(20,len(records_json))])
            + ' to data frame: ' + str(df)
        )

        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Start processing records, hide column "' + str(hide_colname)
            + '". Records of sample rows' +  str(records_json[0:min(10,len(records_json))])
        )

        #
        # Step 1
        #  - Clean phone numbers, bank accounts
        #  - Extract last 4 digits of phone/bank-account numbers to separate columns
        #  - Obfuscate the phone numbers, bank accounts for storage in cube
        #
        step += 1
        start_filter_time = Profiling.start()
        def filter_col(
                x,
                is_number_only = False,
                case_sensitive = False
        ):
            try:
                # We always trim no matter what
                x = StringUtils.trim(str(x))
                if not case_sensitive:
                    x = x.lower()
                if is_number_only:
                    x = re.sub(pattern='[^0-9]', repl='', string=x)
                return x
            except Exception as ex_clean:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error cleaning "' + str(x) + '". ' + str(ex_clean)
                )
                return x
        df[colname_clean] = df[hide_colname].apply(filter_col, args=(is_number_only, case_sensitive))
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': BASIC CLEANING Took '
            + str(Profiling.get_time_dif_secs(start=start_filter_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully cleaned column "' + str(hide_colname)+
            '", case sensitive "' + str(case_sensitive)
            + '", is number "' + str(is_number_only)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Process Phone Number by Country
        #
        step += 2
        start_phone_time = Profiling.start()
        def process_phone(
                x,
                country
        ):
            try:
                if country == 'china':
                    return PhoneNumber.filter_phone_china(x)
                else:
                    Log.error(
                        str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                        + ': Unsupported country "' + str(country) + '"'
                    )
                    return x
            except Exception as ex:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex)
                )
                return x

        if process_phone_country == 'china':
            df[colname_clean] = df[colname_clean].apply(process_phone, args=[process_phone_country])
            Log.important(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Step ' + str(step) + ': PHONE CLEANING Took '
                + str(Profiling.get_time_dif_secs(start=start_phone_time, stop=Profiling.stop(), decimals=2))
                + ' secs. Successfully processed phone for column "' + str(hide_colname)
                + '", sample rows: ' + str(df[0:2])
            )

        #
        # Extract last 4 characters
        #
        step += 1
        start_last4_time = Profiling.start()
        def last4char(
                x
        ):
            len_x = len(str(x))
            if len_x >= 8:
                start = max(0, len_x - 4)
            else:
                start = len_x-1
            return '***' + str(x)[start:len_x]
        df[colname_last4char] = df[colname_clean].apply(last4char)
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': EXTRACT LAST 4 CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_last4_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully extracted last 4 chars from column "' + str(hide_colname)
            + '"'
        )

        #
        # Hash the column
        #
        step += 1
        start_hash_time = Profiling.start()
        def hash(
                x,
                desired_byte_len = 32
        ):
            s = Hash.hash(
                string = x,
                algo   = Hash.ALGO_SHA256
            )
            # obf = Obfuscate()
            # bytes_list = obf.hash_compression(
            #     s                   = str(x),
            #     desired_byte_length = desired_byte_len
            # )
            # s = obf.hexdigest(
            #     bytes_list    = bytes_list,
            #     unicode_range = None
            # )
            return s

        df[colname_hash] = df[colname_clean].apply(hash, args=[32])
        stop_hash_time = Profiling.start()
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + str(Profiling.get_time_dif_secs(start=start_hash_time, stop=stop_hash_time, decimals=2))
            + ' secs. Successfully obfuscated column "' + str(hide_colname)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Obfuscate Hash hexdigest to Chinese/etc characters
        #
        step += 1
        start_obflang_time = Profiling.start()
        def obfuscate_hash_to_lang(
                x,
                lang
        ):
            unicode_range = Hash.BLOCK_CHINESE
            if lang == 'ko':
                unicode_range = Hash.BLOCK_KOREAN_SYL
            s = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string = x,
                unicode_range     = unicode_range,
                group_n_char      = 4
            )
            return s

        df[colname_hash_readable] = df[colname_hash].apply(obfuscate_hash_to_lang, args=[hash_encode_lang])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH TO CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_obflang_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully converted obfuscation to language for column "' + str(hide_colname)
            + '"'
        )

        #
        # Encryption
        #
        step += 1
        start_enc_time = Profiling.start()
        try:
            key_bytes = b64decode(encrypt_key_b64.encode('utf-8'))
        except Exception as ex_key_conversion:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 key "' + str(encrypt_key_b64)
                + '" to bytes. Exception: ' + str(ex_key_conversion)
            )
        try:
            nonce_bytes = b64decode(nonce_b64.encode(encoding='utf-8'))
        except Exception as ex_nonce:
            Log.warning(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 nonce "' + str(nonce_b64)
                + '" to bytes. Exception: ' + str(ex_nonce)
            )
            nonce_bytes = None
        Log.important(
            str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + ': Key bytes "' + str(key_bytes) + '", len = ' + str(len(key_bytes))
        )
        encryptor = AES_Encrypt(
            key   = key_bytes,
            mode  = AES_Encrypt.AES_MODE_CBC,
            nonce = nonce_bytes
        )
        def encrypt(
                x,
                encryptor
        ):
            try:
                # print('***** x=' + str(x))
                x_bytes = bytes(x.encode(encoding='utf-8'))
                # print('***** x_bytes=' + str(x_bytes))
                res = encryptor.encode(x_bytes)
                ciphermode = res.cipher_mode
                ciphertext_b64 = res.ciphertext_b64
                tag_b64 = res.tag_b64
                nonce_b64 = res.nonce_b64
                # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher)))
                # plaintext = encryptor.decode(ciphertext=ciphertext_b64)
                # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x))
                # if plaintext != x:
                #     raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"')
                return {
                    'ciphermode': ciphermode,
                    'ciphertext_b64': ciphertext_b64,
                    'tag_b64': tag_b64,
                    'iv_b64': nonce_b64
                }
            except Exception as ex:
                Log.error(
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error encrypting "' + str(x) + '": ' + str(ex)
                )
                return None

        df[colname_encrypt] = df[colname_clean].apply(encrypt, args=[encryptor])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': ENCRYPTION Took '
            + str(Profiling.get_time_dif_secs(start=start_enc_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully encrypted column "' + str(hide_colname)
            + '", for records (first 20 rows): ' + str(df.values[0:min(20,df.shape[0])])
        )

        # def obfuscate_cipher_to_lang(
        #         x,
        #         lang
        # ):
        #     unicode_range = Hash.BLOCK_CHINESE
        #     if lang == 'ko':
        #         unicode_range = Hash.BLOCK_KOREAN_SYL
        #     s = Hash.convert_ascii_string_to_other_alphabet(
        #         ascii_char_string = x['ciphertext_b64'],
        #         unicode_range     = unicode_range,
        #         group_n_char      = 2
        #     )
        #     return s
        #
        # df[colname_encrypt_readable] = df[colname_encrypt].apply(obfuscate_cipher_to_lang, args=[hash_encode_lang])

        df_json_str = df.to_json(
            # Make sure not ASCII
            force_ascii = False,
            orient      = 'records',
            # Don't need indexing
            # index       = False
        )

        return df_json_str