def is_valid_encoding(encoding): """Return ``True`` if `encoding` is a supported encoding.""" try: codecs.lookup(encoding) return True except LookupError: return False
def _detect_encoding(lines): """Get the encoding of a Python source file from a list of lines as bytes This function does less than tokenize.detect_encoding added in Python 3 as it does not attempt to raise a SyntaxError when the interpreter would, it just wants the encoding of a source file Python has already compiled and determined is valid. """ if not lines: return _default_source_encoding if lines[0].startswith("\xef\xbb\xbf"): # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError return "utf-8" # Only the first two lines of the source file are examined magic = _cookie_search("".join(lines[:2])) if magic is None: return _default_source_encoding encoding = magic.group(1) try: codecs.lookup(encoding) except LookupError: # Some codecs raise something other than LookupError if they don't # support the given error handler, but not the text ones that could # actually be used for Python source code return _default_source_encoding return encoding
def Translate(expression): try: codecs.lookup(expression) command = '.decode("%s")' % expression except LookupError: command = expression return lambda x: eval('x' + command)
def __init__(self, environ): script_name = base.get_script_name(environ) path_info = force_text(environ.get('PATH_INFO', '/')) if not path_info or path_info == script_name: # Sometimes PATH_INFO exists, but is empty (e.g. accessing # the SCRIPT_NAME URL without a trailing slash). We really need to # operate as if they'd requested '/'. Not amazingly nice to force # the path like this, but should be harmless. # # (The comparison of path_info to script_name is to work around an # apparent bug in flup 1.0.1. See Django ticket #8490). path_info = '/' self.environ = environ self.path_info = path_info self.path = '%s%s' % (script_name, path_info) self.META = environ self.META['PATH_INFO'] = path_info self.META['SCRIPT_NAME'] = script_name self.method = environ['REQUEST_METHOD'].upper() _, content_params = self._parse_content_type(self.META.get('CONTENT_TYPE', '')) if 'charset' in content_params: try: codecs.lookup(content_params['charset']) except LookupError: pass else: self.encoding = content_params['charset'] self._post_parse_error = False try: content_length = int(self.environ.get('CONTENT_LENGTH')) except (ValueError, TypeError): content_length = 0 self._stream = LimitedStream(self.environ['wsgi.input'], content_length) self._read_started = False
def write_vmx_file(path, pairs): """ Write a VMware VMX file. :param path: path to the VMX file :param pairs: settings to write """ encoding = "utf-8" if ".encoding" in pairs: file_encoding = pairs[".encoding"] try: codecs.lookup(file_encoding) encoding = file_encoding except LookupError: log.warning("Invalid file encoding detected in '{}': {}".format(path, file_encoding)) with open(path, "w", encoding=encoding, errors="ignore") as f: if sys.platform.startswith("linux"): # write the shebang on the first line on Linux vmware_path = VMware._get_linux_vmware_binary() if vmware_path: f.write("#!{}\n".format(vmware_path)) for key, value in pairs.items(): entry = '{} = "{}"\n'.format(key, value) f.write(entry)
def __init__(self, environ): script_name = get_script_name(environ) path_info = get_path_info(environ) if not path_info: # Sometimes PATH_INFO exists, but is empty (e.g. accessing # the SCRIPT_NAME URL without a trailing slash). We really need to # operate as if they'd requested '/'. Not amazingly nice to force # the path like this, but should be harmless. path_info = '/' self.environ = environ self.path_info = path_info self.path = '%s/%s' % (script_name.rstrip('/'), path_info.lstrip('/')) self.META = environ self.META['PATH_INFO'] = path_info self.META['SCRIPT_NAME'] = script_name self.method = environ['REQUEST_METHOD'].upper() _, content_params = self._parse_content_type(environ.get('CONTENT_TYPE', '')) if 'charset' in content_params: try: codecs.lookup(content_params['charset']) except LookupError: pass else: self.encoding = content_params['charset'] self._post_parse_error = False try: content_length = int(environ.get('CONTENT_LENGTH')) except (ValueError, TypeError): content_length = 0 self._stream = LimitedStream(self.environ['wsgi.input'], content_length) self._read_started = False self.resolver_match = None
def determine_console_encoding(): try: # try for the preferred encoding encoding = locale.getpreferredencoding() # see if the locale.getdefaultlocale returns null # some versions of python\platforms return US-ASCII # when it cannot determine an encoding if not encoding or encoding == "US-ASCII": encoding = locale.getdefaultlocale()[1] if encoding: codecs.lookup(encoding) # make sure a lookup error is not made except (locale.Error, LookupError): encoding = None is_osx = sys.platform == "darwin" if not encoding: return ["US-ASCII", "utf-8"][is_osx] elif encoding.startswith("mac-") and is_osx: # certain versions of python would return mac-roman as default # OSX as a left over of earlier mac versions. return "utf-8" else: return encoding
def __init__(self, stream, in_enc = 'utf-8', out_enc = 'utf-8', errors = "replace"): """ Initialization :Parameters: - `stream`: The stream to wrap - `in_enc`: The input encoding, that should be assumed, if a pure string is written - `out_enc`: The output encoding - `errors`: The error handling indicator, when an unicode error occurs. (The default is quite lenient and writes replace characters on errors) :Types: - `stream`: ``file`` - `in_enc`: ``str`` - `out_enc`: ``str`` - `errors`: ``str`` """ import codecs writer = codecs.lookup(out_enc)[3] super(UnicodeStream, self).__init__(writer(stream, errors)) self.decode = codecs.lookup(in_enc)[1] self.err = errors
def _worker_start(self, source): self.log.info("starting worker: {0}".format(source)) if not self._worker_cfg_ok(source): self.log.error("skipping worker {0} because of broken configuration".format(source)) return file = source.rstrip(':source') conf = self._config_dict[source] encoding = conf.get('encoding', 'utf-8') try: codecs.lookup(encoding) except LookupError: self.log.fatal("encoding {0} not found for source {1}: not starting worker".format(encoding, source)) return _worker = Worker( file=file, msgqueue=self._deque, tags=conf['tags'], template=conf['template'], syslog_facility=conf['syslog_facility'], syslog_severity=conf['syslog_severity'], syslog_tag=conf['syslog_tag'], regex=conf['regex'], encoding=encoding ) _worker.start() self._worker[source] = _worker self.log.info("worker: {0} running".format(source))
def _check_text_encoding(arg): """Check if text-encoding argument is acceptable.""" try: codecs.lookup(arg) except LookupError: return False return True
def has_encoding(name): try: import codecs codecs.lookup(name) return True except LookupError: return False
def set_encoding(self, encoding): # verify encoding is valid import codecs try: codecs.lookup(encoding) except codecs.LookupError, err: raise ValueError, err.args[0]
def test_set_locale(self): if len(self.locales) == 1: pytest.skip("Only a single locale found, no point in " "trying to test setting another locale") if all(x is None for x in CURRENT_LOCALE): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). pytest.skip("CURRENT_LOCALE is not set.") if LOCALE_OVERRIDE is None: lang, enc = 'it_CH', 'UTF-8' elif LOCALE_OVERRIDE == 'C': lang, enc = 'en_US', 'ascii' else: lang, enc = LOCALE_OVERRIDE.split('.') enc = codecs.lookup(enc).name new_locale = lang, enc if not tm._can_set_locale(new_locale): with pytest.raises(locale.Error): with tm.set_locale(new_locale): pass else: with tm.set_locale(new_locale) as normalized_locale: new_lang, new_enc = normalized_locale.split('.') new_enc = codecs.lookup(enc).name normalized_locale = new_lang, new_enc assert normalized_locale == new_locale current_locale = locale.getlocale() assert current_locale == CURRENT_LOCALE
def is_valid_code(code): """Return ``True`` if encoding `code` is valid.""" try: codecs.lookup(code) except LookupError: return False return True
def process(self, chain, processing, request:Request, requestCnt:RequestContent, response:ErrorResponse, responseCnt:ResponseContent, Target:TargetContent, **keyargs): ''' Parse the request content. ''' assert isinstance(chain, Chain), 'Invalid processors chain %s' % chain assert isinstance(processing, Processing), 'Invalid processing %s' % processing assert isinstance(request, Request), 'Invalid request %s' % request assert isinstance(requestCnt, RequestContent), 'Invalid request content %s' % requestCnt assert isinstance(responseCnt, ResponseContent), 'Invalid response content %s' % responseCnt assert isinstance(response, ErrorResponse), 'Invalid response %s' % response if response.isSuccess is False: return # Skip in case the response is in error if not request.invoker: return assert isinstance(request.invoker, Invoker), 'Invalid invoker %s' % request.invoker if not request.invoker.decodingContent: return target = Target(arg=chain.arg, converter=request.converterContent) assert isinstance(target, TargetContent), 'Invalid target %s' % target # Resolving the character set if requestCnt.charSet: try: codecs.lookup(requestCnt.charSet) except LookupError: requestCnt.charSet = self.charSetDefault else: requestCnt.charSet = self.charSetDefault if not requestCnt.type: requestCnt.type = responseCnt.type if not processing.wingIn(chain, True, decoding=request.invoker.decodingContent, target=target).execute(CONSUMED): if RequestContent.doFetchNextContent in requestCnt and requestCnt.doFetchNextContent: chain.arg.requestCnt = requestCnt.doFetchNextContent() else: chain.arg.requestCnt = None # We process the chain with the next content or no content. elif response.isSuccess is not False: ENCODING_UNKNOWN.set(response) addError(response, 'Content type \'%(type)s\' not supported for parsing', type=requestCnt.type)
def _open(self): # Open the bagit.txt file, and load any tags from it, including # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") if not isfile(bagit_file_path): raise BagError("No bagit.txt found: %s" % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) try: self.version = tags["BagIt-Version"] self.encoding = tags["Tag-File-Character-Encoding"] except KeyError as e: raise BagError("Missing required tag in bagit.txt: %s" % e) if self.version in ["0.93", "0.94", "0.95"]: self.tag_file_name = "package-info.txt" elif self.version in ["0.96", "0.97"]: self.tag_file_name = "bag-info.txt" else: raise BagError("Unsupported bag version: %s" % self.version) try: codecs.lookup(self.encoding) except codecs.LookupError: raise BagValidationError("Unsupported encoding: %s" % self.encoding) info_file_path = os.path.join(self.path, self.tag_file_name) if os.path.exists(info_file_path): self.info = _load_tag_file(info_file_path, encoding=self.encoding) self._load_manifests()
def clean (self, content, encoding=None): """ Takes the HTML content given, parses it, and converts stray markup. The content can be either: - A unicode string, in which case the encoding parameter is not required - An ordinary string, in which case the encoding will be used - A file-like object, in which case the encoding will be used if present The method returns a unicode string which is suitable for addition to a simpleTALES.Context object. """ if (isinstance (content, types.StringType)): # Not unicode, convert converter = codecs.lookup (encoding)[1] file = StringIO.StringIO (converter (content)[0]) elif (isinstance (content, types.UnicodeType)): file = StringIO.StringIO (content) else: # Treat it as a file type object - and convert it if we have an encoding if (encoding is not None): converterStream = codecs.lookup (encoding)[2] file = converterStream (content) else: file = content self.outputFile = StringIO.StringIO (u"") self.feed (file.read()) self.close() return self.outputFile.getvalue()
def encoding_exists(encoding, _aliases=_ENCODING_ALIASES): """Returns ``True`` if encoding is valid, otherwise returns ``False``""" try: codecs.lookup(resolve_encoding(encoding, _aliases)) except LookupError: return False return True
def add_cp65001_codec(): try: codecs.lookup('cp65001') except LookupError: codecs.register( lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) return
def is_known_charset(charset): """Checks if the given charset is known to Python.""" try: codecs.lookup(charset) except LookupError: return False return True
def detect_xml_encoding(raw, verbose=False, assume_utf8=False): if not raw or isinstance(raw, unicode): return raw, None for x in ('utf8', 'utf-16-le', 'utf-16-be'): bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace( '-', '_')) if raw.startswith(bom): return raw[len(bom):], x encoding = None for pat in ENCODING_PATS: match = pat.search(raw) if match: encoding = match.group(1) break if encoding is None: encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8) if encoding.lower().strip() == 'macintosh': encoding = 'mac-roman' if encoding.lower().replace('_', '-').strip() in ( 'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn', 'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'): # Microsoft Word exports to HTML with encoding incorrectly set to # gb2312 instead of gbk. gbk is a superset of gb2312, anyway. encoding = 'gbk' try: codecs.lookup(encoding) except LookupError: encoding = 'utf-8' return raw, encoding
def test_set_locale(): if all(x is None for x in _current_locale): # Not sure why, but on some Travis runs with pytest, # getlocale() returned (None, None). pytest.skip("Current locale is not set.") locale_override = os.environ.get("LOCALE_OVERRIDE", None) if locale_override is None: lang, enc = "it_CH", "UTF-8" elif locale_override == "C": lang, enc = "en_US", "ascii" else: lang, enc = locale_override.split(".") enc = codecs.lookup(enc).name new_locale = lang, enc if not can_set_locale(new_locale): msg = "unsupported locale setting" with pytest.raises(locale.Error, match=msg): with set_locale(new_locale): pass else: with set_locale(new_locale) as normalized_locale: new_lang, new_enc = normalized_locale.split(".") new_enc = codecs.lookup(enc).name normalized_locale = new_lang, new_enc assert normalized_locale == new_locale # Once we exit the "with" statement, locale should be back to what it was. current_locale = locale.getlocale() assert current_locale == _current_locale
def _setEncoding(self, encoding): """ :param encoding: a valid encoding to be used. Currently only valid Python encodings are allowed. :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this encoding rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified encoding value has a syntax error and is unparsable. """ self._checkReadonly() tokenizer = self._tokenize2(encoding) encodingtoken = self._nexttoken(tokenizer) unexpected = self._nexttoken(tokenizer) if not encodingtoken or unexpected or\ self._prods.IDENT != self._type(encodingtoken): self._log.error('CSSCharsetRule: Syntax Error in encoding value ' '%r.' % encoding) else: try: codecs.lookup(encoding) except LookupError: self._log.error('CSSCharsetRule: Unknown (Python) encoding %r.' % encoding) else: self._encoding = encoding.lower()
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def VerifyUcs2Data(self, FileIn, FileName, Encoding): Ucs2Info = codecs.lookup('ucs-2') # # Convert to unicode # try: FileDecoded = codecs.decode(FileIn, Encoding) Ucs2Info.encode(FileDecoded) except: UniFile = StringIO.StringIO(FileIn) Info = codecs.lookup(Encoding) (Reader, Writer) = (Info.streamreader, Info.streamwriter) File = codecs.StreamReaderWriter(UniFile, Reader, Writer) LineNumber = 0 ErrMsg = lambda Encoding, LineNumber: \ '%s contains invalid %s characters on line %d.' % \ (FileName, Encoding, LineNumber) while True: LineNumber = LineNumber + 1 try: Line = File.readline() if Line == '': EdkLogger.error('Unicode File Parser', PARSER_ERROR, ErrMsg(Encoding, LineNumber)) Ucs2Info.encode(Line) except: EdkLogger.error('Unicode File Parser', PARSER_ERROR, ErrMsg('UCS-2', LineNumber))
def coding_spec(data): """Return the encoding declaration according to PEP 263. When checking encoded data, only the first two lines should be passed in to avoid a UnicodeDecodeError if the rest of the data is not unicode. The first two lines would contain the encoding specification. Raise a LookupError if the encoding is declared but unknown. """ if isinstance(data, bytes): try: lines = data.decode('utf-8') except UnicodeDecodeError: return None else: lines = data # consider only the first two lines if '\n' in lines: lst = lines.split('\n')[:2] elif '\r' in lines: lst = lines.split('\r')[:2] else: lst = list(lines) str = '\n'.join(lst) match = coding_re.search(str) if not match: return None name = match.group(1) try: codecs.lookup(name) except LookupError: # The standard encoding error does not indicate the encoding raise LookupError("Unknown encoding: "+name) return name
def decoding(text): import sys import codecs import locale if isinstance(text, unicode): return text elif isinstance(text, (basestring, str)): pass else: return text # do not need decode, return original object if type is not instance of string type # raise RuntimeError("expected type is str, but got {type} type".format(type=type(text))) mswindows = (sys.platform == "win32") try: encoding = locale.getdefaultlocale()[1] or ('ascii' if not mswindows else 'gbk') codecs.lookup(encoding) # codecs.lookup('cp936').name == 'gbk' except Exception as _: del _ encoding = 'ascii' if not mswindows else 'gbk' # 'gbk' is Windows default encoding in Chinese language 'zh-CN' msg = text if mswindows: try: msg = text.decode(encoding) return msg except (UnicodeDecodeError, UnicodeEncodeError): pass return msg
def coding_spec(data): """Return the encoding declaration according to PEP 263. When checking encoded data, only the first two lines should be passed in to avoid a UnicodeDecodeError if the rest of the data is not unicode. The first two lines would contain the encoding specification. Raise a LookupError if the encoding is declared but unknown. """ if isinstance(data, bytes): # This encoding might be wrong. However, the coding # spec must be ASCII-only, so any non-ASCII characters # around here will be ignored. Decoding to Latin-1 should # never fail (except for memory outage) lines = data.decode('iso-8859-1') else: lines = data # consider only the first two lines if '\n' in lines: lst = lines.split('\n')[:2] elif '\r' in lines: lst = lines.split('\r')[:2] else: lst = list(lines) str = '\n'.join(lst) match = coding_re.search(str) if not match: return None name = match.group(1) try: codecs.lookup(name) except LookupError: # The standard encoding error does not indicate the encoding raise LookupError("Unknown encoding: "+name) return name
def get_charset(self): charset = getattr(self, 'charset', 'windows-1252') try: codecs.lookup(charset) except LookupError: raise ImproperlyConfigured("Invalid output characterset (%s) provided" % charset) return charset
def search_function(name): name = encodings.normalize_encoding(name) # Rather undocumented... if name in _extended_encodings: if name not in _cache: base_encoding, mapping = _extended_encodings[name] assert(name[-4:] == "_ttx") # Python 2 didn't have any of the encodings that we are implementing # in this file. Python 3 added aliases for the East Asian ones, mapping # them "temporarily" to the same base encoding as us, with a comment # suggesting that full implementation will appear some time later. # As such, try the Python version of the x_mac_... first, if that is found, # use *that* as our base encoding. This would make our encoding upgrade # to the full encoding when and if Python finally implements that. # http://bugs.python.org/issue24041 base_encodings = [name[:-4], base_encoding] for base_encoding in base_encodings: try: codecs.lookup(base_encoding) except LookupError: continue _cache[name] = ExtendCodec(name, base_encoding, mapping) break return _cache[name].info return None
def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() if enc: # If encoding non-empty, make sure it is valid codecs.lookup(enc)
def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None, rev_cache=None): """Initialize PyGit.Storage instance `git_dir`: path to .git folder; this setting is not affected by the `git_fs_encoding` setting `log`: logger instance `git_bin`: path to executable this setting is not affected by the `git_fs_encoding` setting `git_fs_encoding`: encoding used for paths stored in git repository; if `None`, no implicit decoding/encoding to/from unicode objects is performed, and bytestrings are returned instead """ self.logger = log self.commit_encoding = None # caches self.__rev_cache = rev_cache or self.RevCache.empty() self.__rev_cache_refresh = True self.__rev_cache_lock = Lock() # cache the last 200 commit messages self.__commit_msg_cache = SizedDict(200) self.__commit_msg_lock = Lock() self.__cat_file_pipe = None self.__cat_file_pipe_lock = Lock() if git_fs_encoding is not None: # validate encoding name codecs.lookup(git_fs_encoding) # setup conversion functions self._fs_to_unicode = lambda s: s.decode(git_fs_encoding, 'replace' ) self._fs_from_unicode = lambda s: s.encode(git_fs_encoding) else: # pass bytestrings as-is w/o any conversion self._fs_to_unicode = self._fs_from_unicode = lambda s: s # simple sanity checking __git_file_path = partial(os.path.join, git_dir) control_files = ['HEAD', 'objects', 'refs'] control_files_exist = \ lambda p: all(map(os.path.exists, map(p, control_files))) if not control_files_exist(__git_file_path): __git_file_path = partial(os.path.join, git_dir, '.git') if os.path.exists(__git_file_path()) and \ control_files_exist(__git_file_path): git_dir = __git_file_path() else: self.logger.error("GIT control files missing in '%s'" % git_dir) raise GitError("GIT control files not found, maybe wrong " "directory?") # at least, check that the HEAD file is readable head_file = os.path.join(git_dir, 'HEAD') try: with open(head_file, 'rb'): pass except IOError as e: raise GitError( "Make sure the Git repository '%s' is readable: %s" % (git_dir, to_unicode(e))) self.repo = GitCore(git_dir, git_bin, log, git_fs_encoding) self.repo_path = git_dir self.logger.debug("PyGIT.Storage instance for '%s' is constructed", git_dir)
def __init__(self, file, encoding='UTF-8'): self.path = [] self.encoding = encoding encoder, decoder, streamReader, streamWriter = codecs.lookup(encoding) self.file = streamWriter(file) self.margin = 78
def is_ascii_encoding(encoding): """Checks if a given encoding is ascii.""" try: return codecs.lookup(encoding).name == "ascii" except LookupError: return False
def encoding(self, encoding): if encoding is None: encoding = locale.getpreferredencoding() codecs.lookup(encoding) # raise early self._encoding = encoding
# See the License for the specific language governing permissions and # limitations under the License. """Functionality for working with symbolic grapheme representations. This is intended to make it easier to work with scripts that developers may not be able to read fluently, by providing a one-to-one transliteration between Unicode characters and short symbolic names. """ from __future__ import unicode_literals import codecs import re import sys STDOUT = codecs.lookup('utf-8').streamwriter(sys.stdout) STDERR = codecs.lookup('utf-8').streamwriter(sys.stderr) def WriteSymbolTable(writer, int2sym, epsilon='<epsilon>'): """Write a symbol table in OpenFst text format.""" if epsilon: writer.write('%s\t0\n' % epsilon) keys = int2sym.keys() keys.sort() for k in keys: writer.write('%s\t%d\n' % (int2sym[k], k)) return def MakeSymbolToCharDict(codepoint_to_symbol):
def getcodec(encoding): if encoding is None: encoding = locale.getpreferredencoding() codec = codecs.lookup(encoding) return codec
def TextIOWrapper(stream, encoding): return codecs.lookup(encoding or 'ascii')[2](stream)
def __init__(self, model, data, mapping, layer=0, source_srs=None, encoding='utf-8', transaction_mode='commit_on_success', transform=True, unique=None, using=None): """ A LayerMapping object is initialized using the given Model (not an instance), a DataSource (or string path to an OGR-supported data file), and a mapping dictionary. See the module level docstring for more details and keyword argument usage. """ # Getting the DataSource and the associated Layer. if isinstance(data, str): self.ds = DataSource(data, encoding=encoding) else: self.ds = data self.layer = self.ds[layer] self.using = using if using is not None else router.db_for_write(model) self.spatial_backend = connections[self.using].ops # Setting the mapping & model attributes. self.mapping = mapping self.model = model # Checking the layer -- initialization of the object will fail if # things don't check out before hand. self.check_layer() # Getting the geometry column associated with the model (an # exception will be raised if there is no geometry column). if connections[self.using].features.supports_transform: self.geo_field = self.geometry_field() else: transform = False # Checking the source spatial reference system, and getting # the coordinate transformation object (unless the `transform` # keyword is set to False) if transform: self.source_srs = self.check_srs(source_srs) self.transform = self.coord_transform() else: self.transform = transform # Setting the encoding for OFTString fields, if specified. if encoding: # Making sure the encoding exists, if not a LookupError # exception will be thrown. from codecs import lookup lookup(encoding) self.encoding = encoding else: self.encoding = None if unique: self.check_unique(unique) transaction_mode = 'autocommit' # Has to be set to autocommit. self.unique = unique else: self.unique = None # Setting the transaction decorator with the function in the # transaction modes dictionary. self.transaction_mode = transaction_mode if transaction_mode == 'autocommit': self.transaction_decorator = None elif transaction_mode == 'commit_on_success': self.transaction_decorator = transaction.atomic else: raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode)
isxp = isoldvista = False if iswindows: wver = sys.getwindowsversion() isxp = wver.major < 6 isoldvista = wver.build < 6002 is64bit = sys.maxsize > (1 << 32) isworker = 'CALIBRE_WORKER' in os.environ or 'CALIBRE_SIMPLE_WORKER' in os.environ if isworker: os.environ.pop('CALIBRE_FORCE_ANSI', None) FAKE_PROTOCOL, FAKE_HOST = 'https', 'calibre-internal.invalid' VIEWER_APP_UID = 'com.calibre-ebook.viewer' EDITOR_APP_UID = 'com.calibre-ebook.edit-book' MAIN_APP_UID = 'com.calibre-ebook.main-gui' try: preferred_encoding = locale.getpreferredencoding() codecs.lookup(preferred_encoding) except: preferred_encoding = 'utf-8' win32event = importlib.import_module('win32event') if iswindows else None winerror = importlib.import_module('winerror') if iswindows else None win32api = importlib.import_module('win32api') if iswindows else None fcntl = None if iswindows else importlib.import_module('fcntl') _osx_ver = None def get_osx_version(): global _osx_ver if _osx_ver is None: import platform
UNKNOWN_DBMS = "Unknown" # String used for representation of unknown DBMS version UNKNOWN_DBMS_VERSION = "Unknown" # Dynamicity boundary length used in dynamicity removal engine DYNAMICITY_BOUNDARY_LENGTH = 20 # Dummy user prefix used in dictionary attack DUMMY_USER_PREFIX = "__dummy__" # Reference: http://en.wikipedia.org/wiki/ISO/IEC_8859-1 DEFAULT_PAGE_ENCODING = "iso-8859-1" try: codecs.lookup(DEFAULT_PAGE_ENCODING) except LookupError: DEFAULT_PAGE_ENCODING = "utf8" # Marker for program piped input STDIN_PIPE_DASH = '-' # URL used in dummy runs DUMMY_URL = "http://foo/bar?id=1" # Timeout used during initial websocket (pull) testing WEBSOCKET_INITIAL_TIMEOUT = 3 # The name of the operating system dependent module imported. The following names have currently been registered: 'posix', 'nt', 'mac', 'os2', 'ce', 'java', 'riscos' PLATFORM = os.name PYVERSION = sys.version.split()[0]
def __init__(self, message): self.message = message self.reply_channel = self.message.reply_channel self._content_length = 0 self._post_parse_error = False self._read_started = False self.resolver_match = None # Path info self.path = self.message['path'] self.script_name = self.message.get('root_path', '') if self.script_name: # TODO: Better is-prefix checking, slash handling? self.path_info = self.path[len(self.script_name):] else: self.path_info = self.path # HTTP basics self.method = self.message['method'].upper() self.META = { "REQUEST_METHOD": self.method, "QUERY_STRING": self.message.get('query_string', ''), "SCRIPT_NAME": self.script_name, "PATH_INFO": self.path_info, # Old code will need these for a while "wsgi.multithread": True, "wsgi.multiprocess": True, } if self.message.get('client', None): self.META['REMOTE_ADDR'] = self.message['client'][0] self.META['REMOTE_HOST'] = self.META['REMOTE_ADDR'] self.META['REMOTE_PORT'] = self.message['client'][1] if self.message.get('server', None): self.META['SERVER_NAME'] = self.message['server'][0] self.META['SERVER_PORT'] = six.text_type(self.message['server'][1]) else: self.META['SERVER_NAME'] = "unknown" self.META['SERVER_PORT'] = "0" # Handle old style-headers for a transition period if "headers" in self.message and isinstance(self.message['headers'], dict): self.message['headers'] = [ (x.encode("latin1"), y) for x, y in self.message['headers'].items() ] # Headers go into META for name, value in self.message.get('headers', []): name = name.decode("latin1") if name == "content-length": corrected_name = "CONTENT_LENGTH" elif name == "content-type": corrected_name = "CONTENT_TYPE" else: corrected_name = 'HTTP_%s' % name.upper().replace("-", "_") # HTTPbis say only ASCII chars are allowed in headers, but we latin1 just in case value = value.decode("latin1") if corrected_name in self.META: value = self.META[corrected_name] + "," + value self.META[corrected_name] = value # Pull out request encoding if we find it if "CONTENT_TYPE" in self.META: self.content_type, self.content_params = cgi.parse_header( self.META["CONTENT_TYPE"]) if 'charset' in self.content_params: try: codecs.lookup(self.content_params['charset']) except LookupError: pass else: self.encoding = self.content_params['charset'] else: self.content_type, self.content_params = "", {} # Pull out content length info if self.META.get('CONTENT_LENGTH', None): try: self._content_length = int(self.META['CONTENT_LENGTH']) except (ValueError, TypeError): pass # Body handling self._body = message.get("body", b"") if message.get("body_channel", None): body_handle_start = time.time() while True: # Get the next chunk from the request body channel chunk = None while chunk is None: # If they take too long, raise request timeout and the handler # will turn it into a response if time.time( ) - body_handle_start > self.body_receive_timeout: raise RequestTimeout() _, chunk = message.channel_layer.receive_many( [message['body_channel']], block=True, ) # If chunk contains close, abort. if chunk.get("closed", False): raise RequestAborted() # Add content to body self._body += chunk.get("content", "") # Exit loop if this was the last if not chunk.get("more_content", False): break assert isinstance(self._body, six.binary_type), "Body is not bytes" # Add a stream-a-like for the body self._stream = BytesIO(self._body) # Other bits self.resolver_match = None
# -*- coding: utf-8 -*- """Packaging logic for beem.""" import codecs import io import os import sys from setuptools import setup # Work around mbcs bug in distutils. # http://bugs.python.org/issue10945 try: codecs.lookup('mbcs') except LookupError: ascii = codecs.lookup('ascii') codecs.register(lambda name, enc=ascii: {True: enc}.get(name == 'mbcs')) VERSION = '0.24.27' tests_require = ['mock >= 2.0.0', 'pytest', 'pytest-mock', 'parameterized'] requires = [ "ecdsa", "requests", "websocket-client", "appdirs", "scrypt", "pycryptodomex", "pytz", "Click",
# along with this program; if not, write to the Free Software # # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # # MA 02110-1301, USA. # ##################################################################### """ Main game executable. """ # Register the latin-1 encoding import codecs import encodings.iso8859_1 import encodings.utf_8 codecs.register(lambda encoding: encodings.iso8859_1.getregentry()) codecs.register(lambda encoding: encodings.utf_8.getregentry()) assert codecs.lookup("iso-8859-1") assert codecs.lookup("utf-8") import Config from GameEngine import GameEngine from MainMenu import MainMenu import Log import Version import getopt import sys import os import codecs import Resource import pygame
# SickRage is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with SickRage. If not, see <http://www.gnu.org/licenses/>. # Check needed software dependencies to nudge users to fix their setup # pylint: disable=broad-except # Catching too general exception import codecs codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) import time import signal import sys import subprocess import traceback import os sys.path.insert( 1, os.path.abspath(os.path.join(os.path.dirname(__file__), 'lib'))) import shutil import shutil_custom
def _verify_python3_env(): """Ensures that the environment is good for unicode on Python 3.""" if PY2: return try: import locale fs_enc = codecs.lookup(locale.getpreferredencoding()).name except Exception: fs_enc = 'ascii' if fs_enc != 'ascii': return extra = '' if os.name == 'posix': import subprocess rv = subprocess.Popen(['locale', '-a'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] good_locales = set() has_c_utf8 = False # Make sure we're operating on text here. if isinstance(rv, bytes): rv = rv.decode('ascii', 'replace') for line in rv.splitlines(): locale = line.strip() if locale.lower().endswith(('.utf-8', '.utf8')): good_locales.add(locale) if locale.lower() in ('c.utf8', 'c.utf-8'): has_c_utf8 = True extra += '\n\n' if not good_locales: extra += ( 'Additional information: on this system no suitable UTF-8\n' 'locales were discovered. This most likely requires resolving\n' 'by reconfiguring the locale system.') elif has_c_utf8: extra += ( 'This system supports the C.UTF-8 locale which is recommended.\n' 'You might be able to resolve your issue by exporting the\n' 'following environment variables:\n\n' ' export LC_ALL=C.UTF-8\n' ' export LANG=C.UTF-8') else: extra += ( 'This system lists a couple of UTF-8 supporting locales that\n' 'you can pick from. The following suitable locales where\n' 'discovered: %s') % ', '.join(sorted(good_locales)) bad_locale = None for locale in os.environ.get('LC_ALL'), os.environ.get('LANG'): if locale and locale.lower().endswith(('.utf-8', '.utf8')): bad_locale = locale if locale is not None: break if bad_locale is not None: extra += ( '\n\nClick discovered that you exported a UTF-8 locale\n' 'but the locale system could not pick up from it because\n' 'it does not exist. The exported locale is "%s" but it\n' 'is not supported') % bad_locale raise RuntimeError('Click will abort further execution because Python 3 ' 'was configured to use ASCII as encoding for the ' 'environment. Either run this under Python 2 or ' 'consult http://click.pocoo.org/python3/ for ' 'mitigation steps.' + extra)
from __future__ import division from __future__ import print_function import codecs import functools from absl import app as absl_app from absl import flags # This codifies help string conventions and makes it easy to update them if # necessary. Currently the only major effect is that help bodies start on the # line after flags are listed. All flag definitions should wrap the text bodies # with help wrap when calling DEFINE_*. _help_wrap = functools.partial(flags.text_wrap, length=80, indent="", firstline_indent="\n") # Pretty formatting causes issues when utf-8 is not installed on a system. try: codecs.lookup("utf-8") help_wrap = _help_wrap except LookupError: def help_wrap(text, *args, **kwargs): return _help_wrap(text, *args, **kwargs).replace("\ufeff", "") # Replace None with h to also allow -h absl_app.HelpshortFlag.SHORT_NAME = "h"
import sys import tempfile from pytest import mark, raises from wand.image import ClosedImageError, Image from wand.color import Color from wand.compat import PY3, text, text_type try: filesystem_encoding = sys.getfilesystemencoding() except RuntimeError: unicode_filesystem_encoding = False else: try: codec_info = codecs.lookup(filesystem_encoding) except LookupError: unicode_filesystem_encoding = False else: unicode_filesystem_encoding = codec_info.name in ( 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-32', 'utf-32-be', 'utf-32-le', 'mbcs' # for Windows ) try:
def str_to_display(data, desc=None): # type: (Union[bytes, Text], Optional[str]) -> Text """ For display or logging purposes, convert a bytes object (or text) to text (e.g. unicode in Python 2) safe for output. :param desc: An optional phrase describing the input data, for use in the log message if a warning is logged. Defaults to "Bytes object". This function should never error out and so can take a best effort approach. It is okay to be lossy if needed since the return value is just for display. We assume the data is in the locale preferred encoding. If it won't decode properly, we warn the user but decode as best we can. We also ensure that the output can be safely written to standard output without encoding errors. """ if isinstance(data, text_type): return data # Otherwise, data is a bytes object (str in Python 2). # First, get the encoding we assume. This is the preferred # encoding for the locale, unless that is not found, or # it is ASCII, in which case assume UTF-8 encoding = locale.getpreferredencoding() if (not encoding) or codecs.lookup(encoding).name == "ascii": encoding = "utf-8" # Now try to decode the data - if we fail, warn the user and # decode with replacement. try: decoded_data = data.decode(encoding) except UnicodeDecodeError: logger.warning( '%s does not appear to be encoded as %s', desc or 'Bytes object', encoding, ) decoded_data = data.decode(encoding, errors=backslashreplace_decode) # Make sure we can print the output, by encoding it to the output # encoding with replacement of unencodable characters, and then # decoding again. # We use stderr's encoding because it's less likely to be # redirected and if we don't find an encoding we skip this # step (on the assumption that output is wrapped by something # that won't fail). # The double getattr is to deal with the possibility that we're # being called in a situation where sys.__stderr__ doesn't exist, # or doesn't have an encoding attribute. Neither of these cases # should occur in normal pip use, but there's no harm in checking # in case people use pip in (unsupported) unusual situations. output_encoding = getattr(getattr(sys, "__stderr__", None), "encoding", None) if output_encoding: output_encoded = decoded_data.encode(output_encoding, errors="backslashreplace") decoded_data = output_encoded.decode(output_encoding) return decoded_data
# coding=utf-8 import locale, codecs, os if codecs.lookup(locale.getpreferredencoding()).name == 'ascii': os.environ['LANG'] = 'en_US.utf-8' from dots.interpreter import AsciiDotsInterpreter from dots.callbacks import IOCallbacksStorage from dots import terminalsize import curses import click import sys import os import time import signal from dots.states import DeadState interpreter = None debug_ = True autostep_debug_ = False class Default_IO_Callbacks(IOCallbacksStorage): def __init__(self, ticks, silent, debug, compat_debug, debug_lines,
def handle_menu_key(self, c): """Implement a simple menu / settings""" if c == self.menu_character or c == self.exit_character: # Menu/exit character again -> send itself self.serial.write(self.tx_encoder.encode(c)) if self.echo: self.console.write(c) elif c == '\x15': # CTRL+U -> upload file sys.stderr.write('\n--- File to upload: ') sys.stderr.flush() with self.console: filename = sys.stdin.readline().rstrip('\r\n') if filename: try: with open(filename, 'rb') as f: sys.stderr.write('--- Sending file {} ---\n'.format(filename)) while True: block = f.read(1024) if not block: break self.serial.write(block) # Wait for output buffer to drain. self.serial.flush() sys.stderr.write('.') # Progress indicator. sys.stderr.write('\n--- File {} sent ---\n'.format(filename)) except IOError as e: sys.stderr.write('--- ERROR opening file {}: {} ---\n'.format(filename, e)) elif c in '\x08hH?': # CTRL+H, h, H, ? -> Show help sys.stderr.write(self.get_help_text()) elif c == '\x12': # CTRL+R -> Toggle RTS self.serial.rts = not self.serial.rts sys.stderr.write('--- RTS {} ---\n'.format('active' if self.serial.rts else 'inactive')) elif c == '\x04': # CTRL+D -> Toggle DTR self.serial.dtr = not self.serial.dtr sys.stderr.write('--- DTR {} ---\n'.format('active' if self.serial.dtr else 'inactive')) elif c == '\x02': # CTRL+B -> toggle BREAK condition self.serial.break_condition = not self.serial.break_condition sys.stderr.write('--- BREAK {} ---\n'.format('active' if self.serial.break_condition else 'inactive')) elif c == '\x05': # CTRL+E -> toggle local echo self.echo = not self.echo sys.stderr.write('--- local echo {} ---\n'.format('active' if self.echo else 'inactive')) elif c == '\x06': # CTRL+F -> edit filters sys.stderr.write('\n--- Available Filters:\n') sys.stderr.write('\n'.join( '--- {:<10} = {.__doc__}'.format(k, v) for k, v in sorted(TRANSFORMATIONS.items()))) sys.stderr.write('\n--- Enter new filter name(s) [{}]: '.format(' '.join(self.filters))) with self.console: new_filters = sys.stdin.readline().lower().split() if new_filters: for f in new_filters: if f not in TRANSFORMATIONS: sys.stderr.write('--- unknown filter: {}\n'.format(repr(f))) break else: self.filters = new_filters self.update_transformations() sys.stderr.write('--- filters: {}\n'.format(' '.join(self.filters))) elif c == '\x0c': # CTRL+L -> EOL mode modes = list(EOL_TRANSFORMATIONS) # keys eol = modes.index(self.eol) + 1 if eol >= len(modes): eol = 0 self.eol = modes[eol] sys.stderr.write('--- EOL: {} ---\n'.format(self.eol.upper())) self.update_transformations() elif c == '\x01': # CTRL+A -> set encoding sys.stderr.write('\n--- Enter new encoding name [{}]: '.format(self.input_encoding)) with self.console: new_encoding = sys.stdin.readline().strip() if new_encoding: try: codecs.lookup(new_encoding) except LookupError: sys.stderr.write('--- invalid encoding name: {}\n'.format(new_encoding)) else: self.set_rx_encoding(new_encoding) self.set_tx_encoding(new_encoding) sys.stderr.write('--- serial input encoding: {}\n'.format(self.input_encoding)) sys.stderr.write('--- serial output encoding: {}\n'.format(self.output_encoding)) elif c == '\x09': # CTRL+I -> info self.dump_port_settings() #~ elif c == '\x01': # CTRL+A -> cycle escape mode #~ elif c == '\x0c': # CTRL+L -> cycle linefeed mode elif c in 'pP': # P -> change port with self.console: try: port = ask_for_port() except KeyboardInterrupt: port = None if port and port != self.serial.port: # reader thread needs to be shut down self._stop_reader() # save settings settings = self.serial.getSettingsDict() try: new_serial = serial.serial_for_url(port, do_not_open=True) # restore settings and open new_serial.applySettingsDict(settings) new_serial.rts = self.serial.rts new_serial.dtr = self.serial.dtr new_serial.open() new_serial.break_condition = self.serial.break_condition except Exception as e: sys.stderr.write('--- ERROR opening new port: {} ---\n'.format(e)) new_serial.close() else: self.serial.close() self.serial = new_serial sys.stderr.write('--- Port changed to: {} ---\n'.format(self.serial.port)) # and restart the reader thread self._start_reader() elif c in 'bB': # B -> change baudrate sys.stderr.write('\n--- Baudrate: ') sys.stderr.flush() with self.console: backup = self.serial.baudrate try: self.serial.baudrate = int(sys.stdin.readline().strip()) except ValueError as e: sys.stderr.write('--- ERROR setting baudrate: {} ---\n'.format(e)) self.serial.baudrate = backup else: self.dump_port_settings() elif c == '8': # 8 -> change to 8 bits self.serial.bytesize = serial.EIGHTBITS self.dump_port_settings() elif c == '7': # 7 -> change to 8 bits self.serial.bytesize = serial.SEVENBITS self.dump_port_settings() elif c in 'eE': # E -> change to even parity self.serial.parity = serial.PARITY_EVEN self.dump_port_settings() elif c in 'oO': # O -> change to odd parity self.serial.parity = serial.PARITY_ODD self.dump_port_settings() elif c in 'mM': # M -> change to mark parity self.serial.parity = serial.PARITY_MARK self.dump_port_settings() elif c in 'sS': # S -> change to space parity self.serial.parity = serial.PARITY_SPACE self.dump_port_settings() elif c in 'nN': # N -> change to no parity self.serial.parity = serial.PARITY_NONE self.dump_port_settings() elif c == '1': # 1 -> change to 1 stop bits self.serial.stopbits = serial.STOPBITS_ONE self.dump_port_settings() elif c == '2': # 2 -> change to 2 stop bits self.serial.stopbits = serial.STOPBITS_TWO self.dump_port_settings() elif c == '3': # 3 -> change to 1.5 stop bits self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE self.dump_port_settings() elif c in 'xX': # X -> change software flow control self.serial.xonxoff = (c == 'X') self.dump_port_settings() elif c in 'rR': # R -> change hardware flow control self.serial.rtscts = (c == 'R') self.dump_port_settings() else: sys.stderr.write('--- unknown menu character {} --\n'.format(key_description(c)))
def norm_encoding(enc): return codecs.lookup(enc).name
For the full list of settings and their values, see https://docs.djangoproject.com/en/2.1/ref/settings/ """ import codecs import os import sentry_sdk from sentry_sdk.integrations.django import DjangoIntegration from sentry_sdk.integrations.celery import CeleryIntegration from sentry_sdk.integrations.redis import RedisIntegration from celery.schedules import crontab from django.utils import timezone # Hack to handle emojis under MySQL codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else None) ## Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ## SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = 'secret!' ## Clash Royale API configuration CLASHROYALE_API_KEY = os.environ.get('CLASHROYALE_API_KEY') or "YOUR_API_KEY" REFRESH_RATE = timezone.timedelta(minutes=5) MAIN_CLAN = "2GJU9Y2G" # omit the '#' ## SECURITY WARNING: don't run with debug turned on in production! DEBUG = False
W = DX * 12 + 2 * inch H = DY * 12 + 5 * inch # can.circle(XS[-1] + 1 * DX + 1 * inch, YS[0], DX * 1.3) # can.circle(XS[0] - 1 * inch, YS[0], DX * 1.3) # can.circle(W / 2, H, 1 * inch) # can.circle(W / 2, H -.5 * inch, 1 * inch) # can.circle(W / 2, -.5 * inch, 4 * inch) bbox = (XS[0] - 1 * inch, 0 * inch, W, H) edge = MyPath() edge.rect(bbox) edge.drawOn(can, 1) ################################################################################ encName = 'winansi' decoder = codecs.lookup(encName)[1] def decodeFunc(txt): if txt is None: return ' ' else: return case(decoder(txt, errors='replace')[0]) data = [[decodeFunc(case(char)) for char in line] for line in data] ################################################################################ for y, l in zip(YS + DY * .27, data[::-1]): for x, c in zip(XS + DX / 2., l): can.drawCentredString(x, y, c) can.drawCentredString(XS[-2] + DX / 2, YS[1] + DY / 4,
# # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Registering your own codec search function. """ #end_pymotw_header import codecs import encodings def search1(encoding): print('search1: Searching for:', encoding) return None def search2(encoding): print('search2: Searching for:', encoding) return None codecs.register(search1) codecs.register(search2) utf8 = codecs.lookup('utf-8') print('UTF-8:', utf8) try: unknown = codecs.lookup('no-such-encoding') except LookupError as err: print('ERROR:', err)
def set_property(self, name, value, location=None): r""" Set data format property ``name`` to ``value`` possibly translating ``value`` from a human readable representation to an internal one. :param str name: any of the ``KEY_*`` constants :param value: the value to set the property to as it would show up in a CID. \ In some cases, the value will be translated to an internal representation. \ For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \ :py:attr:`cutplace.data.line_delimiter` being ``'\n'``. :type value: str or None :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property """ assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name assert name is not None assert name == name.lower( ), 'property name must be lower case: %r' % name assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER)) name = name.replace(' ', '_') property_attribute_name = '_' + name if property_attribute_name not in self.__dict__: valid_property_names = _tools.human_readable_list( list(self.__dict__.keys())) raise errors.InterfaceError( 'data format property %s for format %s is %s but must be one of %s' % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location) if name == KEY_ENCODING: try: codecs.lookup(value) except LookupError: raise errors.InterfaceError( 'value for data format property %s is %s but must be a valid encoding' % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location) self.encoding = value elif name == KEY_HEADER: self.header = DataFormat._validated_int_at_least_0( name, value, location) elif name == KEY_ALLOWED_CHARACTERS: try: self._allowed_characters = ranges.Range(value) except errors.InterfaceError as error: raise errors.InterfaceError( 'data format property %s must be a valid range: %s' % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location) elif name == KEY_DECIMAL_SEPARATOR: self.decimal_separator = DataFormat._validated_choice( KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location) elif name == KEY_ESCAPE_CHARACTER: self.escape_character = DataFormat._validated_choice( KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location) elif name == KEY_ITEM_DELIMITER: item_delimiter = DataFormat._validated_character( KEY_ITEM_DELIMITER, value, location) if item_delimiter == '\x00': raise errors.InterfaceError( "data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)" % _compat.text_repr(KEY_ITEM_DELIMITER), location) self.item_delimiter = item_delimiter elif name == KEY_LINE_DELIMITER: try: self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[ value.lower()] except KeyError: raise errors.InterfaceError( 'line delimiter %s must be changed to one of: %s' % (_compat.text_repr(value), _tools.human_readable_list( self._VALID_LINE_DELIMITER_TEXTS)), location) elif name == KEY_QUOTE_CHARACTER: self.quote_character = DataFormat._validated_choice( KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location) elif name == KEY_SHEET: self.sheet = DataFormat._validated_int_at_least_0( KEY_SHEET, value, location) elif name == KEY_SKIP_INITIAL_SPACE: self.skip_initial_space = DataFormat._validated_bool( KEY_SKIP_INITIAL_SPACE, value, location) elif name == KEY_THOUSANDS_SEPARATOR: self.thousands_separator = DataFormat._validated_choice( KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location) else: assert False, 'name=%r' % name
from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from selenium.webdriver.support import expected_conditions from selenium.common.exceptions import WebDriverException from bs4 import BeautifulSoup from bs4.element import Tag from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.remote.errorhandler import ErrorHandler from unidecode import unidecode import unicodecsv import codecs reload(sys) streamWriter = codecs.lookup('utf-8')[-1] sys.stdout = streamWriter(sys.stdout) sys.setdefaultencoding("utf-8") class MyTestCase(): def setUp(self): self.driver = webdriver.Chrome() #self.driver.error_handler = MyHandler() def main(self): REGION = [] INSEE = [] #la liste des départements CITIES = [] with open('3000Commun_France.csv') as csvfile: csv_reader = csv.reader(csvfile)
def handle_common_args(args): """ handle the global config specified by `install_common_args`, such as configuring logging/coloring/etc. the following fields will be overwritten when present: - rules: file system path to rule files. - signatures: file system path to signature files. args: args (argparse.Namespace): parsed arguments that included at least `install_common_args` args. """ if args.quiet: logging.basicConfig(level=logging.WARNING) logging.getLogger().setLevel(logging.WARNING) elif args.debug: logging.basicConfig(level=logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) else: logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) # disable vivisect-related logging, it's verbose and not relevant for capa users set_vivisect_log_level(logging.CRITICAL) # Since Python 3.8 cp65001 is an alias to utf_8, but not for Pyhton < 3.8 # TODO: remove this code when only supporting Python 3.8+ # https://stackoverflow.com/a/3259271/87207 import codecs codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) if args.color == "always": colorama.init(strip=False) elif args.color == "auto": # colorama will detect: # - when on Windows console, and fixup coloring, and # - when not an interactive session, and disable coloring # renderers should use coloring and assume it will be stripped out if necessary. colorama.init() elif args.color == "never": colorama.init(strip=True) else: raise RuntimeError("unexpected --color value: " + args.color) if hasattr(args, "rules"): if args.rules == RULES_PATH_DEFAULT_STRING: logger.debug("-" * 80) logger.debug(" Using default embedded rules.") logger.debug( " To provide your own rules, use the form `capa.exe -r ./path/to/rules/ /path/to/mal.exe`." ) logger.debug(" You can see the current default rule set here:") logger.debug(" https://github.com/mandiant/capa-rules") logger.debug("-" * 80) rules_path = os.path.join(get_default_root(), "rules") if not os.path.exists(rules_path): # when a users installs capa via pip, # this pulls down just the source code - not the default rules. # i'm not sure the default rules should even be written to the library directory, # so in this case, we require the user to use -r to specify the rule directory. logger.error( "default embedded rules not found! (maybe you installed capa as a library?)" ) logger.error("provide your own rule set via the `-r` option.") return E_MISSING_RULES else: rules_path = args.rules logger.debug("using rules path: %s", rules_path) args.rules = rules_path if hasattr(args, "signatures"): if args.signatures == SIGNATURES_PATH_DEFAULT_STRING: logger.debug("-" * 80) logger.debug(" Using default embedded signatures.") logger.debug( " To provide your own signatures, use the form `capa.exe --signature ./path/to/signatures/ /path/to/mal.exe`." ) logger.debug("-" * 80) sigs_path = os.path.join(get_default_root(), "sigs") else: sigs_path = args.signatures logger.debug("using signatures path: %s", sigs_path) args.signatures = sigs_path
#coding:utf-8 import codecs, sys #python 2 中一般才会用codecs #创建gb2312 编码器 look = codecs.lookup("gb2312") #创建utf-8 编码器 look2 = codecs.lookup("utf-8") a = "我爱北京" print(len(a), a, type(a)) #把a 编码为内部的 unicode ,但为什么方法名为decode呢,我的理解是把gb2312的字符串解码为unicode b = look.decode(a) # 返回的b[0]是数据,b[1]是长度,这个时候的类型是unicode了 print(b[1], b[0], type(b[0]))
def get_default_encoding(): import locale import codecs return codecs.lookup(locale.getpreferredencoding()).name