def decrypt(cipher): error_min = 10 key = '' message = '' for j in range(0, 256): string = '' for i in range(0, len(cipher), 2): res = hex(int(cipher[i:i + 2], 16) ^ j)[2:] if len(res) == 2: string += str(res) else: string += '0' + str(res) try: list1 = list(string.decode('hex').lower()) error = 0 for c in freqs.keys(): error += abs( round(list1.count(c) * 2.0 / len(cipher), 7) - freqs[c]) if error < error_min: error_min = error key = chr(j) message = string.decode('hex') except: pass return key, message
def encode(self, string): clean_sentence_unwantedchars= '["\t\n ]+' string = string.encode('utf8') string = string.decode('utf-8') string = re.sub(clean_sentence_unwantedchars, ' ', string) string = string.encode('ascii', 'replace').encode('utf-8') string = string.decode('utf-8') return str(string)
def ensure_unicode(string): if isinstance(string, str): try: string = string.decode("utf-8") except UnicodeDecodeError: string = string.decode("unicode-escape") return string
def scrubstring(string): from scrubber import Scrubber scrubber = Scrubber(autolink=True) try: string = string.decode('ascii') except UnicodeDecodeError: string = string.decode('utf-8') string = scrubber.scrub(string) return string.encode('utf-8')
def normalize_unicode(string, encoding="utf-8"): if isinstance(string, type(b'')): string = string.decode(encoding) # replace "oe" and "ae" letters, or else they are dropped! string = string.replace(u"æ", u"ae").replace(u"Æ", u"AE") string = string.replace(u"œ", u"oe").replace(u"Œ", u"OE") string = unicodedata.normalize('NFKD', string) string = string.encode('ascii', 'ignore') string = string.decode() return string
def ensure_unicode(stuff, encoding = 'utf8', encoding2 = 'latin-1'): if type(stuff) is not str and type(stuff) is not np.string_: return stuff else: string = stuff try: string = string.decode(encoding) except: string = string.decode(encoding2, errors = 'ignore') return string
def unicode_cleaner(string): if isinstance(string, unicode): return string try: return string.decode('utf-8') except UnicodeError: try: return string.decode('latin-1') except UnicodeError: return string.decode('utf-8', 'ignore')
def decode_guess(self, string, encoding): # text is not valid utf-8, try to make sense of it if encoding: try: return string.decode(encoding).encode('utf-8') except UnicodeDecodeError: pass try: return string.decode('latin-1').encode('utf-8') except UnicodeDecodeError: return string.decode('ascii', 'replace').encode('utf-8')
def tryDecode(string): try: string_d = string.decode('utf-8','strict') enc = 'utf-8' except: try: string_d = string.decode(config.encoding,'strict') enc = config.encoding except: enc = 'iso-8859-1' try: string_d = string.decode('iso-8859-1','strict') except: string_d = string.decode('iso-8859-1','replace') return [string_d, enc]
def utf8(string): if isinstance(string, six.text_type): return string elif isinstance(string, six.binary_type): return string.decode('utf8') else: return six.text_type(string)
def _decode_string(self, string): for encoding in ['ascii', 'UTF8', 'latin-1']: try: return string.decode(encoding) except: pass return 'INVALID ENCODING'
def fireDecode(string): """A function for handling Unicode. This is the decode method. :Sequence Character:: ? (This function's character when calling in a custom sequence) :Parameters:: string -- The string to edit :Date:: 11/11/2017 :Author:: Allison Smith """ try: string = base64.b64decode(string) string = str(string.decode('utf-16')) except: print( '''Error: could not decode source as base64. Please check your Password, Salt, and Sequence, and try again. If this problem persists, please file an issue: https://github.com/TheCyaniteProject/firecoder/issues''' ) #sys.exit(2) # Exit with major error setErrorFlag(True) return string
def decrypt(self, string, decode=False): # split encrypted aes key. string = Formats.denitialize(string) if isinstance(string, bytes): string = string.decode() try: key,encrypted = unpack(string) #except: except KeyboardInterrupt: return _response_.error("Unable to unpack the encrypted data.") # decypt key with rsa. response = self.rsa.decrypt_string(key, decode=False) if not response.success: return response passphrase = response["decrypted"].decode() # decrypt with aes. aes = AES(passphrase=passphrase) response = aes.decrypt(encrypted) if not response.success: return response decrypted = response["decrypted"] # success. if decode: decrypted = decrypted.decode() return _response_.success("Successfully decrypted the specified data.", { "decrypted":decrypted })
def htmlenties2txt(string, encoding="latin-1"): """ Resolves all the HTML entities in the input string. Returns a Unicode string with the entities resolved. """ try: string = string.decode(encoding) except: pass i = 0 while i < len(string): amp = string.find("&", i) # find & as start of entity if amp == -1: # not found break i = amp + 1 semicolon = string.find(";", amp) # find ; as end of entity if string[amp + 1] == "#": # numerical entity like "'" entity = string[amp:semicolon+1] replacement = unichr(int(entity[2:-1])) else: entity = string[amp:semicolon + 1] if semicolon - amp > 7: continue try: # the array has mappings like "Uuml" -> "�" replacement = unichr(htmlentitydefs.name2codepoint[entity[1:-1]]) except KeyError: continue string = string.replace(entity, replacement) return string
def convert_bstr(string, type=0): if type == "": return string if type == "encoder": return string.encode("utf-8") if type == "decoder": return string.decode("utf-8")
def do_POST(self): if self.path not in getattr(self, 'rpc_paths', [self.path]): return self.report_404() # return None try: max_chunk_size = 10 * 1024 * 1024 size_remaining = int(self.headers['content-length']) data = '' while size_remaining: string = self.rfile.read(min(size_remaining, max_chunk_size)) size_remaining -= len(string) data += string.decode('utf8') response = self.server._marshaled_dispatch(data) self.send_response(200) except Exception as e: response = ext.dumps(Payload.error(-32603, ext.format_exc(e))) ext.logger_rpc.error(response) self.send_response(500) if not isinstance(response, bytes): # py 2 & 3 response = response.encode('utf8') self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Access-Control-Allow-Headers', 'Content-Type') self.send_header('Content-type', 'application/json-rpc') self.send_header('Content-length', str(len(response))) self.end_headers() self.wfile.write(response)
def clean_str(string): """ Tokenization/string cleaning for all datasets except for SST. Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py """ # seperate puctuations from words string = string.decode() string = re.sub("(www\S*)\s", " ", string) string = re.sub(r"[^A-Za-z(),!?\'\`]", " ", string) string = re.sub(r"\'s", " \'s", string) string = re.sub(r"\'ve", " \'ve", string) string = re.sub(r"n\'t", " n\'t", string) string = re.sub(r"\'re", " \'re", string) string = re.sub(r"\'d", " \'d", string) string = re.sub(r"\'ll", " \'ll", string) string = re.sub(r",", " , ", string) string = re.sub(r"!", " ! ", string) string = re.sub(r"\(", " \( ", string) string = re.sub(r"\)", " \) ", string) string = re.sub(r"\?", " \? ", string) string = re.sub(r"\s{2,}", " ", string) string = re.sub(r'"', ' ', string) string = string.replace("\'ve", "have").replace("\'d", "had").replace("\'s", "is") string = string.replace("n\'t", "not").replace("\'re", "are").replace("\'ll", "will") string = string.replace("\n", " ").replace("\(", " ").replace("\)", " ").replace("\?", " ") return string.strip().lower()
def force_decode(string, codecs=['utf-8', 'iso-8859-1']): for i in codecs: try: return string.decode(i) except: pass return string
def encrypt(self, string, decode=False): string = Formats.denitialize(string) if isinstance(string, bytes): string = string.decode() # encrypt data with aes. passphrase = String().generate(length=64, digits=True, capitalize=True) aes = AES(passphrase=passphrase) response = aes.encrypt(string) if not response.success: return response aes_encrypted = response["encrypted"] if b" " in aes_encrypted: return _response_.error("AES encrypt data contains invalid ' ' character(s).") # encrypt aes key with rsa. response = self.rsa.encrypt_string(passphrase, decode=False) if not response.success: return response rsa_encrypted = response["encrypted"] # pack encrypted. encrypted = rsa_encrypted+b" "+aes_encrypted # success. if decode: encrypted = encrypted.decode() return _response_.success("Successfully encrypted the specified data.", { "encrypted":encrypted })
def strip_escape(string='', encoding="utf-8"): # pylint: disable=redefined-outer-name """ Strip escape characters from string. :param string: string to work on :param encoding: string name of the encoding used. :return: stripped string """ matches = [] try: if hasattr(string, "decode"): string = string.decode(encoding) except Exception: # pylint: disable=broad-except # Tried to decode something that is not decodeable in the specified encoding. Let's just # move on. pass try: for match in ansi_eng.finditer(string): matches.append(match) except TypeError as error: raise TypeError( "Unable to strip escape characters from data {}: {}".format( string, error)) matches.reverse() for match in matches: start = match.start() end = match.end() string = string[0:start] + string[end:] return string
def string_decode(string): ''' For cross compatibility between Python 2 and Python 3 strings. ''' if sys.version_info.major > 2: return bytes(string, 'utf-8').decode('unicode_escape') else: return string.decode('string_escape')
def tryDecode(string): codec = ('ascii', 'latin-1', 'utf-8') for c in codec: try: return string.decode(c) except UnicodeDecodeError: continue raise UnicodeDecodeError(string)
def force_unicode(string, encoding='utf-8'): try: if type(string) is str: string = string.decode(encoding) if type(string) is not unicode: string = unicode(string) finally: return string
def string_decode(string): """ For cross compatibility between Python 2 and Python 3 strings. """ if PY_MAJOR_VERSION > 2: return bytes(string, "utf-8").decode("unicode_escape") else: return string.decode("string_escape")
def force_encoding(string): try: return string.encode("Utf-8") except UnicodeDecodeError: pass try: return string.decode("Utf-8") except UnicodeDecodeError: pass try: return string.encode("latin") except UnicodeDecodeError: pass return string.decode("latin")
def string_decode(string): ''' For cross compatibility between Python 2 and Python 3 strings. ''' if PY_MAJOR_VERSION > 2: return bytes(string, 'utf-8').decode('unicode_escape') else: return string.decode('string_escape')
def encoded(string, encoding='utf8'): """Cast string to binary_type. :param string: six.binary_type or six.text_type :param encoding: encoding which the object is forced to :return: six.binary_type """ assert isinstance(string, string_types) or isinstance(string, binary_type) if isinstance(string, text_type): return string.encode(encoding) try: # make sure the string can be decoded in the specified encoding ... string.decode(encoding) return string except UnicodeDecodeError: # ... if not use latin1 as best guess to decode the string before encoding as # specified. return string.decode('latin1').encode(encoding)
def b64_to_hex(string): pad = len(string) % 4 if string[-1] == '=': pad = (pad + 1) % 4 if pad == 3: string += 'A==' elif pad == 1 or pad == 2: string += b'=' * pad return string.decode('base64').encode('hex')
def mb_code(string, coding="utf-8"): if isinstance(string, unicode): return string.encode(coding) for c in ('utf-8', 'gb2312', 'gbk', 'gb18030', 'big5'): try: return string.decode(c).encode(coding) except: pass return string
def encoded(string, encoding='utf-8'): """Cast string to binary_type. :param string: six.binary_type or six.text_type :param encoding: encoding which the object is forced to :return: six.binary_type """ assert isinstance(string, string_types) or isinstance(string, binary_type) if isinstance(string, text_type): return string.encode(encoding) try: # make sure the string can be decoded in the specified encoding ... string.decode(encoding) return string except UnicodeDecodeError: # ... if not use latin1 as best guess to decode the string before encoding as # specified. return string.decode('latin1').encode(encoding)
def tokenize(string): if not string: return None # print 'string: ', string.encode('utf-8') stop = stopwords.words('english') return [ t for t in word_tokenize(string.decode('utf-8'), 'english') if t not in stop ]
def _sync_string_to(bin_or_str, string): """ Ensure two unicode or binary strings are the same type """ if isinstance(string, type(bin_or_str)): return string elif isinstance(string, bytes): return string.decode(DEFAULT_ENCODING) else: return string.encode(DEFAULT_ENCODING)
def correct_string_decoding(self, string): try: return string.encode("iso-8859-1").decode("utf-8") except UnicodeEncodeError as uni_encode_error: return string.encode("utf-8") except UnicodeDecodeError as uni_decode_error: return string.encode("utf-8") except AttributeError as attr_error: return string.decode("utf-8")
def decodeString(string): try: decodedString = string.decode('Cp1252') preparedHtml = decodedString.encode("utf-8") return preparedHtml except Exception, errorInfo: return string print errorInfo print "Unable to convert a character in this item to utf-8, this may cause the character to display incorrectly"
def zip(cls, string: Union[bytes, str], append_semicolon: bool = True) -> str: if isinstance(string, bytes): string = string.decode(errors="ignore") if append_semicolon and not string.endswith(";"): string += ";" return cls.encode_save(string, needs_xor=False)
def decode(string, key): string = base64.urlsafe_b64decode(string + b'===') string = string.decode('latin') if six.PY3 else string encoded_chars = [] for i in range(len(string)): key_c = key[i % len(key)] encoded_c = chr((ord(string[i]) - ord(key_c) + 256) % 256) encoded_chars.append(encoded_c) encoded_string = ''.join(encoded_chars) return encoded_string
def romanizeHangul(self, string): transliter = Transliter(academic) return transliter.translit(string.decode("utf-8")).replace("-", "") # t = Translator() # print t.translateIDKR("aku orang indonesia") # print t.translateKRID("나는 인도네시아") # print t.romanizeHangul("aa 나는 인도네시아 어이다.") # print t.preprocessKR("나는 인도네시아")
def mapUTF8toXML(string): uString = string.decode('utf8') string = "" for uChar in uString: i = ord(uChar) if (i < 0x80) and (i > 0x1F): string = string + chr(i) else: string = string + "&#x" + hex(i)[2:] + ";" return string
def assertContained(self, values, string, additional_info=''): if type(values) not in [list, tuple]: values = [values] for value in values: if type(value) is unicode: string = string.decode('UTF-8') # If we have any non-ASCII chars in the expected string, treat the test string from ASCII as UTF8 as well. if type(string) is not str and type(string) is not unicode: string = string() if value in string: return # success raise Exception("Expected to find '%s' in '%s', diff:\n\n%s\n%s" % ( limit_size(values[0]), limit_size(string), limit_size(''.join([a.rstrip()+'\n' for a in difflib.unified_diff(values[0].split('\n'), string.split('\n'), fromfile='expected', tofile='actual')])), additional_info ))
def fsdecode(string): u"""Decode byte strings to unicode with file system encoding. This function is modelled after its namesake in the Python 3 os.path module. """ if isinstance(string, str): return string.decode( sys.getfilesystemencoding() ) elif isinstance(string, unicode): return string else: raise TypeError("argument is not of string type: {!r}".format(string))
def to_unicode(string): """ Converts a 'string' to unicode """ if not isinstance(string, unicode): if not isinstance(string,str): raise TypeError('You are required to pass either unicode or string here, not: %r (%s)' % (type(string), string)) try: string = string.decode("UTF-8") except UnicodeDecodeError, exc: raise TypeError(str(exc))
def dewindows(string): h = HTMLParser.HTMLParser() try: string = string.decode("windows-1252") except: try: string = string.decode("windows-1251") except: try: string = string.decode("ISO-8859-1") except: try: string = string.decode("utf-8") except: pass try: string = h.unescape(string) except: pass return string
def parse_string(string, parser): rv = [] s = string.decode('unicode_escape').encode('ascii', 'replace') s = s.replace('?', ' ') l = s.split('\n') for para in l: if para.strip(): result = parser.raw_parse(para) rv.append(result) return rv
def extractString(io, decode=True): _startpos = io.tell() b, c = [], io.read(1) while c and c != "\0": b.append(c) c = io.read(1) string = "".join(b) if decode: return string.decode("utf8") else: return string
def check_utf8(string): """ Validate if a string is valid UTF-8 str or unicode and that it does not contain any null character. :param string: string to be validated :returns: True if the string is valid utf-8 str or unicode and contains no null characters, False otherwise """ if not string: return False try: if isinstance(string, unicode): string.encode('utf-8') else: string.decode('UTF-8') return '\x00' not in string # If string is unicode, decode() will raise UnicodeEncodeError # So, we should catch both UnicodeDecodeError & UnicodeEncodeError except UnicodeError: return False
def readString(self, min_string_length = 4, max_string_length = 1000): length = self.readUint32() if not (min_string_length <= length <= max_string_length): return None b = self.f.read(length) #if b[-1] != b'\x00': # return None string = b[:-1] # omit trailing null terminator try: return string.decode('utf-8') except: return string
def normalize(string): r""" Returns a new string withou non ASCII characters, trying to replace them with their ASCII closest counter parts when possible. :Example: >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld") 'Hell World' This version use unicodedata and provide limited yet useful results. """ string = unicodedata.normalize('NFKD', string).encode('ascii', 'ignore') return string.decode('ascii')
def send_raw(self, string): """Send raw string to the server. The string will be padded with appropriate CR LF. """ if self.socket is None: raise ServerNotConnectedError("Not connected.") try: if self.ssl: try: string = string + "\r\n" string = string.encode('utf-8') self.ssl.write(string) except (UnicodeEncodeError,UnicodeDecodeError) as e: try: string = string.decode('ISO-8859-1') string = string + "\r\n" stringuf8 = string.encode('utf-8') self.socket.send(stringuf8 + "\r\n") self.ssl.write(stringuf8) except (UnicodeEncodeError,UnicodeDecodeError) as e: print e else: try: string = string + "\r\n" string = string.encode('utf-8') self.socket.send(string) except (UnicodeEncodeError,UnicodeDecodeError) as e: try: string = string.decode('ISO-8859-1') string = string + "\r\n" stringuf8 = string.encode('utf-8') self.socket.send(stringuf8 + "\r\n") except (UnicodeEncodeError,UnicodeDecodeError) as e: print e if DEBUG: print "TO SERVER:", string except socket.error: # Ouch! self.disconnect("Connection reset by peer.")
def reply(self, string, notice=False): if isinstance(string, str) and not py3: string = string.decode('utf8') if notice: self.notice( '%s: %s' % (self._trigger.nick, string), self._trigger.sender ) else: self._bot.msg( self._trigger.sender, '%s: %s' % (self._trigger.nick, string) )
def unicodise(string, encoding = None, errors = "replace"): """ Convert 'string' to Unicode or raise an exception. """ if not encoding: encoding = Config.Config().encoding if type(string) == unicode: return string debug("Unicodising %r using %s" % (string, encoding)) try: return string.decode(encoding, errors) except UnicodeDecodeError: raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
def TagFont(string, fonttype, fonts): font = fonts[fonttype] color = fonts['%sfc'%fonttype] tag = u''.join(['<font face="',font.FaceName, '" size="',str(Point2HTMLSize(fonts[fonttype].PointSize)), '" color="',color,'">', '<b>' if font.Weight == wx.BOLD else '', '<i>' if font.Style == wx.ITALIC else '', '<u>' if font.Underlined else '', string if isinstance(string, unicode) else string.decode('utf-8'), '</u>' if font.Underlined else '', '</i>' if font.Style == wx.ITALIC else '', '</b>' if font.Weight == wx.BOLD else '', '</font>']) return tag
def to_native_string(string, encoding='ascii'): """ Given a string object, regardless of type, returns a representation of that string in the native string type, encoding and decoding where necessary. This assumes ASCII unless told otherwise. """ out = None if isinstance(string, builtin_str): out = string else: if is_py2: out = string.encode(encoding) else: out = string.decode(encoding) return out