def _guessEncoding(self, path): """Opens a file from the given `path` and checks the file encoding. The file must exists on the file system and end with the extension `.csv`. The file is read line by line until the encoding could be guessed. On a successfull identification, the widgets of this dialog will be updated. Args: path (string): Path to a csv file on the file system. """ if os.path.exists(path) and path.lower().endswith('csv'): encoding = self._detector.detect(path) if encoding is not None: if encoding.startswith('utf'): encoding = encoding.replace('-', '') encoding = encoding.replace('-','_') viewValue = _encodings.get(encoding) self._encodingKey = encoding index = self._encodingComboBox.findText(viewValue.upper()) self._encodingComboBox.setCurrentIndex(index)
def _guessEncoding(self, path): """Opens a file from the given `path` and checks the file encoding. The file must exists on the file system and end with the extension `.csv`. The file is read line by line until the encoding could be guessed. On a successfull identification, the widgets of this dialog will be updated. Args: path (string): Path to a csv file on the file system. """ if os.path.exists(path) and path.lower().endswith('csv'): # encoding = self._detector.detect(path) encoding = None if encoding is not None: if encoding.startswith('utf'): encoding = encoding.replace('-', '') encoding = encoding.replace('-', '_') viewValue = _encodings.get(encoding) self._encodingKey = encoding index = self._encodingComboBox.findText(viewValue.upper()) self._encodingComboBox.setCurrentIndex(index)
def get_encoding(stream): """ Return stream encoding. """ enc = None if stream in (sys.stdin, sys.stdout, sys.stderr): if sys.platform == "darwin": # There is no way to determine it reliably under OS X 10.4? return "UTF-8" elif sys.platform == "win32": if sys.version_info >= (2, 6): # Windows/Python 2.6+: If a locale is set, the actual encoding # of stdio changes, but the encoding attribute isn't updated enc = locale.getlocale()[1] if not enc: try: if stream is (sys.stdin): enc = aliases.get(str(GetConsoleCP())) else: enc = aliases.get(str(GetConsoleOutputCP())) except: pass enc = enc or getattr(stream, "encoding", None) or \ locale.getpreferredencoding() or sys.getdefaultencoding() return enc
def __init__(self): self.content_type = None self.status = None self.response = None self.charset = None self.charsets = { key.replace('_', ''): aliases.get(key).replace('_', '-') for key in aliases.keys() } self.types = { 'json': 'application/json', 'xml': 'application/xml', 'soap': 'application/soap+xml' } self.transactions = None
def analyze_charset_data(iana_data): """Extract data from the IANA character sets definitions. @param iana_data: definitions of characters. @type iana_data: line-iterable (file, list of string...) """ global charsets # Note: All character sets definitions begin by Name: and finish by a # white line. NAMEKEYWORD = "Name: " MIBKEYWORD = "MIBenum: " ALIASKEYWORD = "Alias: " in_setdef = False name = None mib = None aliases = [] mime = None pyenc = None for line in iana_data: line = line.strip() if not in_setdef: if line.startswith(NAMEKEYWORD): # Start of an entry. # The name line can have extra characters: # Name: xxxxx [ref1,ref2] # Name: xxxxx (preffered MIME name) [ref1,ref2] name = line.split()[1] if "preferred MIME name" in line: mime = name in_setdef = True if DEBUG: print "Found", name continue if not line.strip(): if not name: continue # End of an entry. # Search a Python correspondance from one of name/alias. for n in [name] + aliases: pyenc = encoding_aliases.get(normenc(n), None) if pyenc: break if DEBUG: print "End", name # Make object (it auto-register in dicts/lists). CharacterSet(name, mib, aliases, mime, pyenc) # Reset... name = None mib = None aliases = [] mime = None pyenc = None in_setdef = False continue # Keep data in set definition. if line.startswith(MIBKEYWORD): mib = int(line[len(MIBKEYWORD):]) elif line.startswith(ALIASKEYWORD): alias = line[len(ALIASKEYWORD):] if alias == "None": continue # ... yes, there are lines with "Alias: None" ... if "preferred MIME name" in alias: alias = alias.split()[0] mime = alias aliases.append(alias.strip())
def analyze_charset_data(iana_data) : """Extract data from the IANA character sets definitions. @param iana_data: definitions of characters. @type iana_data: line-iterable (file, list of string...) """ global charsets # Note: All character sets definitions begin by Name: and finish by a # white line. NAMEKEYWORD = "Name: " MIBKEYWORD = "MIBenum: " ALIASKEYWORD = "Alias: " in_setdef = False name = None mib = None aliases = [] mime = None pyenc = None for line in iana_data : line = line.strip() if not in_setdef : if line.startswith(NAMEKEYWORD) : # Start of an entry. # The name line can have extra characters: # Name: xxxxx [ref1,ref2] # Name: xxxxx (preffered MIME name) [ref1,ref2] name = line.split()[1] if "preferred MIME name" in line : mime = name in_setdef = True if DEBUG : print "Found",name continue if not line.strip() : if not name : continue # End of an entry. # Search a Python correspondance from one of name/alias. for n in [name]+aliases : pyenc = encoding_aliases.get(normenc(n),None) if pyenc : break if DEBUG : print "End",name # Make object (it auto-register in dicts/lists). CharacterSet(name,mib,aliases,mime,pyenc) # Reset... name = None mib = None aliases = [] mime = None pyenc = None in_setdef = False continue # Keep data in set definition. if line.startswith(MIBKEYWORD) : mib = int(line[len(MIBKEYWORD):]) elif line.startswith(ALIASKEYWORD) : alias = line[len(ALIASKEYWORD):] if alias == "None" : continue # ... yes, there are lines with "Alias: None" ... if "preferred MIME name" in alias : alias = alias.split()[0] mime = alias aliases.append(alias.strip())