def try_to_extract(location, target_dir, extractor): """ Extract archive at `location` to `target_dir` trying the `extractor` function. If extract fails, just return without returning warnings nor raising exceptions. Note: there are a few cases where we want to attempt extracting something but do not care if this fails. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = compat.unicode( os.path.abspath(os.path.expanduser(target_dir))) temp_target = compat.unicode( fileutils.get_temp_dir(prefix='extractcode-extract1-')) warnings = [] try: warnings = extractor(abs_location, temp_target) if TRACE: logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals()) fileutils.copytree(temp_target, abs_target_dir) except: return warnings finally: fileutils.delete(temp_target) return warnings
def extract_with_fallback(location, target_dir, extractor1, extractor2): """ Extract archive at `location` to `target_dir` trying first the primary `extractor1` function. If extract fails with this function, attempt extraction again with the fallback `extractor2` function. Return a list of warning messages. Raise exceptions on errors. Note: there are a few cases where the primary extractor for a type may fail and a fallback extractor will succeed. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = compat.unicode(os.path.abspath(os.path.expanduser(target_dir))) # attempt extract first to a temp dir temp_target1 = compat.unicode(fileutils.get_temp_dir(prefix='scancode-extract1-')) try: warnings = extractor1(abs_location, temp_target1) if TRACE: logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals()) fileutils.copytree(temp_target1, abs_target_dir) except: try: temp_target2 = compat.unicode(fileutils.get_temp_dir(prefix='scancode-extract2-')) warnings = extractor2(abs_location, temp_target2) if TRACE: logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals()) fileutils.copytree(temp_target2, abs_target_dir) finally: fileutils.delete(temp_target2) finally: fileutils.delete(temp_target1) return warnings
def test_execute2_non_ascii_output_py2(self): # Popen returns a *binary* string with non-ascii chars: skips these python = sys.executable rc, stdout, stderr = command.execute2( python, ['-c', "print b'non ascii: \\xe4 just passed it !'"]) assert b'' == stderr assert b'non ascii: a just passed it !' == stdout assert rc == 0 # do not throw exception compat.unicode(stdout)
def as_unicode(line): """ Return a unicode text line from a text line. Try to decode line as Unicode. Try first some default encodings, then attempt Unicode trans-literation and finally fall-back to ASCII strings extraction. TODO: Add file/magic detection, unicodedmanit/BS3/4 """ if isinstance(line, compat.unicode): return remove_null_bytes(line) try: s = line.decode('UTF-8') except UnicodeDecodeError: try: # FIXME: latin-1 may never fail s = line.decode('LATIN-1') except UnicodeDecodeError: try: # Convert some byte string to ASCII characters as Unicode including # replacing accented characters with their non- accented NFKD # equivalent. Non ISO-Latin and non ASCII characters are stripped # from the output. Does not preserve the original length offsets. # For Unicode NFKD equivalence, see: # http://en.wikipedia.org/wiki/Unicode_equivalence s = unicodedata.normalize('NFKD', line).encode('ASCII') except UnicodeDecodeError: try: enc = chardet.detect(line)['encoding'] s = compat.unicode(line, enc) except UnicodeDecodeError: # fall-back to strings extraction if all else fails s = strings.string_from_string(s) return remove_null_bytes(s)
def extract_twice(location, target_dir, extractor1, extractor2): """ Extract a nested compressed archive at `location` to `target_dir` using the `extractor1` function to a temporary directory then the `extractor2` function on the extracted payload of `extractor1`. Return a list of warning messages. Raise exceptions on errors. Typical nested archives include compressed tarballs and RPMs (containing a compressed cpio). Note: it would be easy to support deeper extractor chains, but this gets hard to trace and debug very quickly. A depth of two is simple and sane and covers most common cases. """ if on_linux and py2: location = fileutils.fsencode(location) target_dir = fileutils.fsencode(target_dir) abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = compat.unicode( os.path.abspath(os.path.expanduser(target_dir))) # extract first the intermediate payload to a temp dir temp_target = compat.unicode( fileutils.get_temp_dir(prefix='extractcode-extract-')) warnings = extractor1(abs_location, temp_target) if TRACE: logger.debug('extract_twice: temp_target: %(temp_target)r' % locals()) # extract this intermediate payload to the final target_dir try: inner_archives = list( fileutils.resource_iter(temp_target, with_dirs=False)) if not inner_archives: warnings.append(location + ': No files found in archive.') else: for extracted1_loc in inner_archives: if TRACE: logger.debug( 'extract_twice: extractor2: %(extracted1_loc)r' % locals()) warnings.extend(extractor2(extracted1_loc, abs_target_dir)) finally: # cleanup the temporary output from extractor1 fileutils.delete(temp_target) return warnings
def get_ip(s): """ Return True is string s is an IP address """ if not is_ip(s): return False try: ip = ipaddress.ip_address(compat.unicode(s)) return ip except ValueError: return False
def path_handlers(path, posix=True): """ Return a path module and path separator to use for handling (e.g. split and join) `path` using either POSIX or Windows conventions depending on the `path` content. Force usage of POSIX conventions if `posix` is True. """ # determine if we use posix or windows path handling is_posix = is_posixpath(path) use_posix = posix or is_posix pathmod = use_posix and posixpath or ntpath path_sep = POSIX_PATH_SEP if use_posix else WIN_PATH_SEP path_sep = isinstance( path, compat.unicode) and compat.unicode(path_sep) or path_sep return pathmod, path_sep
def check_error(result, func, args): # NOQA """ ctypes error handler/checker: Check for errors and raise an exception or return the result otherwise. """ is_int = isinstance(result, int) is_bytes = isinstance(result, bytes) is_text = isinstance(result, compat.unicode) if (result is None or (is_int and result < 0) or (is_bytes and compat.unicode(result, encoding='utf-8').startswith('cannot open')) or (is_text and result.startswith('cannot open'))): err = _magic_error(args[0]) raise MagicException(err) else: return result
def path_progress_message(item, verbose=False, prefix='Scanned: '): """ Return a styled message suitable for progress display when processing a path for an `item` tuple of (location, rid, scan_errors, *other items) """ if not item: return '' location = item[0] errors = item[2] location = compat.unicode(toascii(location)) progress_line = location if not verbose: max_file_name_len = file_name_max_len() # do not display a file name in progress bar if there is no space available if max_file_name_len <= 10: return '' progress_line = fixed_width_file_name(location, max_file_name_len) color = 'red' if errors else 'green' return style(prefix) + style(progress_line, fg=color)
def find_urls(location, unique=True): """ Yield urls found in file at `location`. Only return unique items if unique is True. `location` can be a list of strings for testing. """ patterns = [( 'urls', urls_regex(), )] matches = find(location, patterns) if TRACE: matches = list(matches) for m in matches: logger_debug('url match:', m) # the order of filters IS important filters = ( verbatim_crlf_url_cleaner, end_of_url_cleaner, empty_urls_filter, scheme_adder, user_pass_cleaning_filter, build_regex_filter(INVALID_URLS_PATTERN), canonical_url_cleaner, junk_url_hosts_filter, junk_urls_filter, ) if unique: filters += (unique_filter, ) matches = apply_filters(matches, *filters) for _key, url, _line, lineno in matches: if TRACE_URL: logger_debug('find_urls: lineno:', lineno, '_line:', repr(_line), 'type(url):', type(url), 'url:', repr(url)) yield compat.unicode(url), lineno
if on_linux and py2: PATH_TYPE = bytes POSIX_PATH_SEP = b'/' WIN_PATH_SEP = b'\\' EMPTY_STRING = b'' DOT = b'.' PATH_SEP = bytes(os.sep) PATH_ENV_VAR = b'PATH' PATH_ENV_SEP = bytes(os.pathsep) else: PATH_TYPE = compat.unicode POSIX_PATH_SEP = '/' WIN_PATH_SEP = '\\' EMPTY_STRING = '' DOT = '.' PATH_SEP = compat.unicode(os.sep) PATH_ENV_VAR = 'PATH' PATH_ENV_SEP = compat.unicode(os.pathsep) ALL_SEPS = POSIX_PATH_SEP + WIN_PATH_SEP """ File, paths and directory utility functions. """ # # DIRECTORIES # def create_dir(location): """