def get_env(base_vars=None, lib_dir=None): """ Return a dictionary of environment variables for command execution with appropriate LD paths. Use the optional `base_vars` environment variables dictionary as a base if provided. Note: if `base_vars` contains LD variables these will be overwritten. Add `lib_dir` as a proper "LD_LIBRARY_PATH"-like path if provided. """ env_vars = {} if base_vars: env_vars.update(base_vars) # Create and add LD environment variables if lib_dir and on_posix: new_path = '%(lib_dir)s' % locals() # on Linux/posix ld_lib_path = os.environ.get(LD_LIBRARY_PATH) env_vars.update( {LD_LIBRARY_PATH: update_path_var(ld_lib_path, new_path)}) # on Mac, though LD_LIBRARY_PATH should work too dyld_lib_path = os.environ.get(DYLD_LIBRARY_PATH) env_vars.update( {DYLD_LIBRARY_PATH: update_path_var(dyld_lib_path, new_path)}) if py2: # ensure that we use bytes on py2 and unicode on py3 def to_bytes(s): return s if isinstance(s, bytes) else s.encode('utf-8') env_vars = {to_bytes(k): to_bytes(v) for k, v in env_vars.items()} else: env_vars = {text.as_unicode(k): text.as_unicode(v) for k, v in env_vars.items()} return env_vars
def get_env(base_vars=None, lib_dir=None): """ Return a dictionary of environment variables for command execution with appropriate DY/LD_LIBRARY_PATH path variables. Use the optional `base_vars` environment variables dictionary as a base if provided. Note: if `base_vars` contains DY/LD_LIBRARY_PATH variables these will be overwritten. On POSIX, add `lib_dir` as DY/LD_LIBRARY_PATH-like path if provided. """ env_vars = {} if base_vars: env_vars.update(base_vars) # Create and add LD environment variables if lib_dir and on_posix: new_path = f'{lib_dir}' # on Linux/posix ld_lib_path = os.environ.get(LD_LIBRARY_PATH) env_vars.update( {LD_LIBRARY_PATH: update_path_var(ld_lib_path, new_path)}) # on Mac, though LD_LIBRARY_PATH should work too dyld_lib_path = os.environ.get(DYLD_LIBRARY_PATH) env_vars.update( {DYLD_LIBRARY_PATH: update_path_var(dyld_lib_path, new_path)}) env_vars = { text.as_unicode(k): text.as_unicode(v) for k, v in env_vars.items() } return env_vars
def test_as_unicode(): assert text.as_unicode('') == '' assert isinstance(text.as_unicode(b'some bytes'), str) assert text.as_unicode(None) == None try: text.as_unicode(['foo']) raise Exception('Exception should have been raised') except AssertionError: pass
def test_as_unicode(): assert '' == text.as_unicode('') assert isinstance(text.as_unicode(b'some bytes'), compat.unicode) assert None == text.as_unicode(None) try: text.as_unicode(['foo']) raise Exception('Exception should have been raised') except AssertionError: pass
def safe_path(path, posix=False): """ Convert `path` to a safe and portable POSIX path usable on multiple OSes. The returned path is an ASCII-only byte string, resolved for relative segments and itself relative. The `path` is treated as a POSIX path if `posix` is True or as a Windows path with blackslash separators otherwise. """ # if the path is UTF, try to use unicode instead if not isinstance(path, unicode): path = as_unicode(path) path = path.strip() if not is_posixpath(path): path = as_winpath(path) posix = False path = resolve(path, posix) _pathmod, path_sep = path_handlers(path, posix) segments = [s.strip() for s in path.split(path_sep) if s.strip()] segments = [portable_filename(s) for s in segments] # print('safe_path: orig:', orig_path, 'segments:', segments) if not segments: return '_' # always return posix sep = u'/' if isinstance(path, unicode) else b'/' path = sep.join(segments) return as_posixpath(path)
def safe_path(path, posix=False): """ Convert `path` to a safe and portable POSIX path usable on multiple OSes. The returned path is an ASCII-only byte string, resolved for relative segments and itself relative. The `path` is treated as a POSIX path if `posix` is True or as a Windows path with blackslash separators otherwise. """ # if the path is UTF, try to use unicode instead if not isinstance(path, unicode): path = as_unicode(path) path = path.strip() if not is_posixpath(path): path = as_winpath(path) posix = False path = resolve(path, posix) _pathmod, path_sep = path_handlers(path, posix) segments = [s.strip() for s in path.split(path_sep) if s.strip()] segments = [portable_filename(s) for s in segments] # print('safe_path: orig:', orig_path, 'segments:', segments) if not segments: return '_' # always return posix sep = u'/' if isinstance(path, unicode) else b'/' path = sep.join(segments) return as_posixpath(path)
def hunk_data(hnk): return dict( startsrc=hnk.startsrc, linessrc=hnk.linessrc, starttgt=hnk.starttgt, linestgt=hnk.linestgt, desc=text.as_unicode(hnk.desc), )
def path_as_unicode(path): """ Return path as unicode. """ if isinstance(path, unicode): return path try: return path.decode(get_fs_encoding()) except UnicodeDecodeError: return as_unicode(path)
def get_text_file_start(location, length=4096): """ Return a unicode string with up the first "length" characters from the text file at location. """ content = None # read the first 4K of the file try: with io.open(location, 'r') as f: content = f.read(length) except: # try again as bytes and force unicode with open(location, 'rb') as f: content = text.as_unicode(f.read(length)) finally: return content
def is_markup(location): """ Return True is the file at `location` is some kind of markup, such as HTML, XML, PHP, etc. """ T = get_type(location) # do not care for small files if T.size < 64: return False if not T.is_text: return False if location.endswith(extensions): return True with open(location, 'rb') as f: start = as_unicode(f.read(1024)) if start.startswith('<'): return True # count whitespaces no_spaces = ''.join(start.split()) # count opening and closing tags_count counts = Counter(c for c in no_spaces if c in '<>') if not all(c in counts for c in '<>'): return False if not all(counts.values()): return False # ~ 5 percent of tag <> markers has_tags = sum(counts.values()) / len(no_spaces) > 0.05 # check if we have some significant proportion of tag-like characters open_close = counts['>'] / counts['<'] # ratio of open to close tags should approach 1: accept a 20% drift balanced = abs(1 - open_close) < .2 return has_tags and balanced
def __init__(self, rc=None, archive_struct=None, archive_func=None, root_ex=None): self.root_ex = root_ex if root_ex and isinstance(root_ex, ArchiveException): self.rc = root_ex.rc self.errno = root_ex.errno msg = root_ex.args or [] msg = map(text.as_unicode, msg) msg = u'\n'.join(msg) self.msg = msg or None self.func = root_ex.func else: self.rc = rc self.errno = archive_struct and errno(archive_struct) or None msg = archive_struct and err_msg(archive_struct) or '' self.msg = msg and text.as_unicode(msg) or 'Unknown error' self.func = archive_func and archive_func.__name__ or None
def get_path(self, func, func_w): """ Return a path calling first the path function `func` then the wide char equivalent `func_w` if `func` did not provide a path. The path returned is either byte (on Python 2) or unicode string (Python 3) On Python 2, if a path is unicode its bytes are converted to UTF-8-encoded bytes. """ path = func(self.entry_struct) if not path: path = func_w(self.entry_struct) if py2 and isinstance(path, compat.unicode): # FIXME: encoding MAY fail if the encoding is NOT UTF-8! # .... should we transliterate there? path = path.encode('utf-8') if py3 and not isinstance(path, compat.unicode): path = text.as_unicode(path) return path
def get_pygments_lexer(location): """ Given an input file location, return a Pygments lexer appropriate for lexing this file content. """ try: T = _registry[location] if T.is_binary: return except KeyError: if is_binary(location): return try: # FIXME: Latest Pygments versions should work fine # win32_bug_on_s_files = dejacode.on_windows and location.endswith('.s') # NOTE: we use only the location for its file name here, we could use # lowercase location may be lexer = get_lexer_for_filename(location, stripnl=False, stripall=False) return lexer except LexerClassNotFound: try: # if Pygments does not guess we should not carry forward # read the first 4K of the file try: with io.open(location, 'r') as f: content = f.read(4096) except: # try again as bytes and force unicode with open(location, 'rb') as f: content = text.as_unicode(f.read(4096)) guessed = guess_lexer(content) return guessed except LexerClassNotFound: return
def pe_info(location): """ Return a mapping of common data available for a Windows dll or exe PE (portable executable). Return None for non-Windows PE files. Return an empty mapping for PE from which we could not collect data. Also collect extra data found if any, returned as a dictionary under the 'extra_data' key in the returned mapping. """ if not location: return {} T = contenttype.get_type(location) if not T.is_winexe: return {} result = dict([( k, None, ) for k in PE_INFO_KEYS]) extra_data = result['extra_data'] = {} with closing(pefile.PE(location)) as pe: if not hasattr(pe, 'FileInfo'): # No fileinfo section: we return just empties return result # >>> pe.FileInfo: this is a list of list of Structure objects: # [[<Structure: [VarFileInfo] >, <Structure: [StringFileInfo]>]] file_info = pe.FileInfo if not file_info or not isinstance(file_info, list): if TRACE: logger.debug('pe_info: not file_info') return result # here we have a non-empty list file_info = file_info[0] if TRACE: logger.debug('pe_info: file_info:', file_info) string_file_info = [ x for x in file_info if type(x) == pefile.Structure and hasattr(x, 'name') and x.name == 'StringFileInfo' ] if not string_file_info: # No stringfileinfo section: we return just empties if TRACE: logger.debug('pe_info: not string_file_info') return result string_file_info = string_file_info[0] if not hasattr(string_file_info, 'StringTable'): # No fileinfo.StringTable section: we return just empties if TRACE: logger.debug('pe_info: not StringTable') return result string_table = string_file_info.StringTable if not string_table or not isinstance(string_table, list): return result string_table = string_table[0] if TRACE: logger.debug('pe_info: Entries keys: ' + str(set(k for k in string_table.entries))) logger.debug('pe_info: Entry values:') for k, v in string_table.entries.items(): logger.debug(' ' + str(k) + ': ' + repr(type(v)) + repr(v)) for k, v in string_table.entries.items(): # convert unicode to a safe ASCII representation key = text.as_unicode(k).strip() value = text.as_unicode(v).strip() value = fix_text(value) if key in PE_INFO_KEYSET: result[key] = value else: extra_data[key] = value return result
def pe_info(location, include_extra_data=False): """ Return a mapping of common data available for a Windows dll or exe PE (portable executable). Return None for non-Windows PE files. Return an empty mapping for PE from which we could not collect data. If `include_extra_data` is True, also collect extra data found if any, returned as a dictionary under the 'extra_data' key in the returned mapping. """ if not location: return {} T = contenttype.get_type(location) if not T.is_winexe: return {} # FIXME: WTF: we initialize with empty values, as we must always # return something for all values result = OrderedDict([( k, None, ) for k in PE_INFO_KEYS]) result['extra_data'] = OrderedDict() try: with closing(pefile.PE(location)) as pe: if not hasattr(pe, 'FileInfo'): # No fileinfo section: we return just empties return result # >>> pe.FileInfo: this is a list of list of Structure objects: # [[<Structure: [VarFileInfo] >, <Structure: [StringFileInfo]>]] pefi = pe.FileInfo if not pefi or not isinstance(pefi, list): if TRACE: logger.debug('pe_info: not pefi') return result # here we have anon-empty list pefi = pefi[0] if TRACE: logger.debug('pe_info: pefi:', pefi) sfi = [ x for x in pefi if type(x) == pefile.Structure and hasattr(x, 'name') and x.name == 'StringFileInfo' ] if not sfi: # No stringfileinfo section: we return just empties if TRACE: logger.debug('pe_info: not sfi') return result sfi = sfi[0] if not hasattr(sfi, 'StringTable'): # No fileinfo.StringTable section: we return just empties if TRACE: logger.debug('pe_info: not StringTable') return result strtab = sfi.StringTable if not strtab or not isinstance(strtab, list): return result strtab = strtab[0] if TRACE: logger.debug('pe_info: Entries keys: ' + str(set(k for k in strtab.entries))) logger.debug('pe_info: Entry values:') for k, v in strtab.entries.items(): logger.debug(' ' + str(k) + ': ' + repr(type(v)) + repr(v)) for k, v in strtab.entries.items(): # convert unicode to a safe ASCII representation key = text.as_unicode(k).strip() value = text.as_unicode(v).strip() value = fix_text(value) if key in PE_INFO_KEYSET: result[key] = value else: # collect extra_data if any: result['extra_data'][key] = value except Exception as e: raise if TRACE: logger.debug('pe_info: Failed to collect infos: ' + repr(e)) # FIXME: return empty for now: this is wrong # the ordering of extra_data is not guaranteed on Python 2 because the dict is not ordered result['extra_data'] = OrderedDict(sorted(result['extra_data'].items())) return result