def feed(self, pkgset, reporter): base = os.path.dirname(pkgset[0].ebuild.path) category = os.path.basename( os.path.dirname(os.path.dirname(pkgset[0].ebuild.path))) ebuild_ext = '.ebuild' mismatched = [] invalid = [] # note we don't use os.walk, we need size info also for filename in listdir(base): # while this may seem odd, written this way such that the # filtering happens all in the genexp. if the result was being # handed to any, it's a frame switch each # char, which adds up. if any(True for x in filename if x not in allowed_filename_chars_set): reporter.add_report(Glep31Violation(pkgset[0], filename)) if filename.endswith(ebuild_ext) or filename in \ ("Manifest", "metadata.xml"): if os.stat(pjoin(base, filename)).st_mode & 0111: reporter.add_report(ExecutableFile(pkgset[0], filename)) if filename.endswith(ebuild_ext): utf8_check(pkgset[0], base, filename, reporter) pkg_name = os.path.basename(filename[:-len(ebuild_ext)]) try: pkg_atom = atom('=%s/%s' % (category, pkg_name)) if pkg_atom.package != os.path.basename(base): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) if mismatched: reporter.add_report(MismatchedPN(pkgset[0], mismatched)) if invalid: reporter.add_report(InvalidPN(pkgset[0], invalid)) if not os.path.exists(pjoin(base, 'files')): return unprocessed_dirs = deque(["files"]) while unprocessed_dirs: cwd = unprocessed_dirs.pop() for fn in listdir(pjoin(base, cwd)): afn = pjoin(base, cwd, fn) st = os.lstat(afn) if stat.S_ISDIR(st.st_mode): if fn not in self.ignore_dirs: unprocessed_dirs.append(pjoin(cwd, fn)) elif stat.S_ISREG(st.st_mode): if st.st_mode & 0111: reporter.add_report(ExecutableFile(pkgset[0], pjoin(cwd, fn))) if not fn.startswith("digest-"): if st.st_size > 20480: reporter.add_report(SizeViolation(pkgset[0], fn, st.st_size)) if any(True for x in fn if x not in allowed_filename_chars_set): reporter.add_report(Glep31Violation(pkgset[0], pjoin(cwd, fn)))
def _internal_offset_iter_scan(path, chksum_handlers, offset, stat_func=os.lstat, hidden=True, backup=True): offset = normpath(offset) path = normpath(path) dirs = collections.deque([path[len(offset):]]) if dirs[0]: yield gen_obj(dirs[0], chksum_handlers=chksum_handlers, stat_func=stat_func) sep = os.path.sep while dirs: base = dirs.popleft() real_base = pjoin(offset, base.lstrip(sep)) base = base.rstrip(sep) + sep for x in listdir(real_base): if not hidden and x.startswith('.'): continue if not backup and x.endswith('~'): continue path = pjoin(base, x) obj = gen_obj(path, chksum_handlers=chksum_handlers, real_location=pjoin(real_base, x), stat_func=os.lstat) yield obj if obj.is_dir: dirs.append(path)
def feed(self, pkgset, reporter): base = os.path.dirname(pkgset[0].ebuild.path) # note we don't use os.walk, we need size info also for filename in listdir(base): # while this may seem odd, written this way such that the # filtering happens all in the genexp. if the result was being # handed to any, it's a frame switch each # char, which adds up. if any(True for x in filename if x not in allowed_filename_chars_set): reporter.add_report(Glep31Violation(pkgset[0], filename)) if filename.endswith(".ebuild") or filename in \ ("Manifest", "ChangeLog", "metadata.xml"): if os.stat(pjoin(base, filename)).st_mode & 0111: reporter.add_report(ExecutableFile(pkgset[0], filename)) if filename.endswith(".ebuild"): utf8_check(pkgset[0], base, filename, reporter) try: utf8_check(pkgset[0], base, "ChangeLog", reporter) except IOError, e: if e.errno != errno.ENOENT: raise del e reporter.add_report(MissingFile(pkgset[0], "ChangeLog"))
def _internal_iter_scan(path, chksum_handlers, stat_func=os.lstat, hidden=True, backup=True): dirs = collections.deque([normpath(path)]) obj = gen_obj(dirs[0], chksum_handlers=chksum_handlers, stat_func=stat_func) yield obj if not obj.is_dir: return while dirs: base = dirs.popleft() for x in listdir(base): if not hidden and x.startswith('.'): continue if not backup and x.endswith('~'): continue path = pjoin(base, x) obj = gen_obj(path, chksum_handlers=chksum_handlers, real_location=path, stat_func=stat_func) yield obj if obj.is_dir: dirs.append(path)
def is_empty(self): result = True try: # any files existing means it's not empty result = not listdir(self.location) except EnvironmentError, e: if e.errno != errno.ENOENT: raise
def _get_mtimes(loc): d = {} sdir = stat.S_ISDIR # yes, listdir here makes sense due to the stating, and the # potential for listdir_dirs to do it's own statting if the # underlying FS doesn't support dt_type... for x in listdir(loc): st = os.stat(pjoin(loc, x)) if sdir(st.st_mode): d[x] = st.st_mtime return d
def is_empty(self): result = True try: # any files existing means it's not empty result = not listdir(self.location) except EnvironmentError as e: if e.errno != errno.ENOENT: raise if result: logger.debug("repository at %r is empty" % (self.location,)) return result
def is_empty(self): result = True try: # any files existing means it's not empty result = not listdir(self.location) except EnvironmentError as e: if e.errno != errno.ENOENT: raise if result: logger.debug("repository at %r is empty" % (self.location, )) return result
def is_empty(self): """Return boolean related to if the repo has files in it.""" result = True try: # any files existing means it's not empty result = not listdir(self.location) if result: logger.debug(f"repo is empty: {self.location!r}") except FileNotFoundError: pass return result
def is_empty(self): """Return boolean related to if the repo has files in it.""" result = True try: # any files existing means it's not empty result = not listdir(self.location) except EnvironmentError as e: if e.errno != errno.ENOENT: raise if result: logger.debug("repo is empty: %r", self.location) return result
def _get_mtimes(loc): d = {} sdir = stat.S_ISDIR # yes, listdir here makes sense due to the stating, and the # potential for listdir_dirs to do it's own statting if the # underlying FS doesn't support dt_type... try: for x in listdir(loc): st = os.stat(pjoin(loc, x)) if sdir(st.st_mode): d[x] = st.st_mtime except OSError as e: if e.errno != errno.ENOENT: raise return d
def _internal_iter_scan(path, chksum_handlers, stat_func=os.lstat): dirs = collections.deque([normpath(path)]) obj = gen_obj(dirs[0], chksum_handlers=chksum_handlers, stat_func=stat_func) yield obj if not obj.is_dir: return while dirs: base = dirs.popleft() for x in listdir(base): path = pjoin(base, x) obj = gen_obj(path, chksum_handlers=chksum_handlers, real_location=path, stat_func=stat_func) yield obj if obj.is_dir: dirs.append(path)
def feed(self, pkgset): pkg = pkgset[0] pkg_path = pjoin(self.options.target_repo.location, pkg.category, pkg.package) ebuild_ext = '.ebuild' mismatched = [] invalid = [] unknown = [] # note we don't use os.walk, we need size info also for filename in listdir(pkg_path): path = pjoin(pkg_path, filename) if self.gitignored(path): continue if os.path.isfile(path) and os.stat(path).st_mode & 0o111: yield ExecutableFile(filename, pkg=pkg) # While this may seem odd, written this way such that the filtering # happens all in the genexp. If the result was being handed to any, # it's a frame switch each char, which adds up. banned_chars = set(filename) - allowed_filename_chars_set if banned_chars: yield BannedCharacter(filename, sorted(banned_chars), pkg=pkg) if filename.endswith(ebuild_ext): try: with open(path, mode='rb') as f: f.read(8192).decode() except UnicodeDecodeError as e: yield InvalidUTF8(filename, str(e), pkg=pkg) pkg_name = os.path.basename(filename[:-len(ebuild_ext)]) try: pkg_atom = atom_cls(f'={pkg.category}/{pkg_name}') if pkg_atom.package != os.path.basename(pkg_path): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) elif (self.options.gentoo_repo and filename not in ('Manifest', 'metadata.xml', 'files')): unknown.append(filename) if mismatched: yield MismatchedPN(sorted(mismatched), pkg=pkg) if invalid: yield InvalidPN(sorted(invalid), pkg=pkg) if unknown: yield UnknownPkgDirEntry(sorted(unknown), pkg=pkg) files_by_size = defaultdict(list) pkg_path_len = len(pkg_path) + 1 for root, dirs, files in os.walk(pjoin(pkg_path, 'files')): # don't visit any ignored directories for d in self.ignore_dirs.intersection(dirs): dirs.remove(d) base_dir = root[pkg_path_len:] for filename in files: path = pjoin(root, filename) if self.gitignored(path): continue file_stat = os.lstat(path) if stat.S_ISREG(file_stat.st_mode): if file_stat.st_mode & 0o111: yield ExecutableFile(pjoin(base_dir, filename), pkg=pkg) if file_stat.st_size == 0: yield EmptyFile(pjoin(base_dir, filename), pkg=pkg) else: files_by_size[file_stat.st_size].append(pjoin(base_dir, filename)) if file_stat.st_size > 20480: yield SizeViolation( pjoin(base_dir, filename), file_stat.st_size, pkg=pkg) banned_chars = set(filename) - allowed_filename_chars_set if banned_chars: yield BannedCharacter( pjoin(base_dir, filename), sorted(banned_chars), pkg=pkg) files_by_digest = defaultdict(list) for size, files in files_by_size.items(): if len(files) > 1: for f in files: digest = get_chksums(pjoin(pkg_path, f), self.digest_algo)[0] files_by_digest[digest].append(f) for digest, files in files_by_digest.items(): if len(files) > 1: yield DuplicateFiles(sorted(files), pkg=pkg)
def feed(self, pkgset, reporter): pkg = pkgset[0] base = os.path.dirname(pkg.path) category = os.path.basename(os.path.dirname(os.path.dirname(pkg.path))) ebuild_ext = '.ebuild' mismatched = [] invalid = [] # note we don't use os.walk, we need size info also for filename in listdir(base): # while this may seem odd, written this way such that the # filtering happens all in the genexp. if the result was being # handed to any, it's a frame switch each # char, which adds up. if any(True for x in filename if x not in allowed_filename_chars_set): reporter.add_report(Glep31Violation(pkg, filename)) if (filename.endswith(ebuild_ext) or filename in ("Manifest", "metadata.xml")): if os.stat(pjoin(base, filename)).st_mode & 0o111: reporter.add_report(ExecutableFile(pkg, filename)) if filename.endswith(ebuild_ext): utf8_check(pkg, base, filename, reporter) pkg_name = os.path.basename(filename[:-len(ebuild_ext)]) try: pkg_atom = atom(f'={category}/{pkg_name}') if pkg_atom.package != os.path.basename(base): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) if mismatched: reporter.add_report(MismatchedPN(pkg, mismatched)) if invalid: reporter.add_report(InvalidPN(pkg, invalid)) if not os.path.exists(pjoin(base, 'files')): return unprocessed_dirs = deque(["files"]) files_by_size = defaultdict(list) while unprocessed_dirs: cwd = unprocessed_dirs.pop() for fn in listdir(pjoin(base, cwd)): afn = pjoin(base, cwd, fn) st = os.lstat(afn) if stat.S_ISDIR(st.st_mode): if fn not in self.ignore_dirs: unprocessed_dirs.append(pjoin(cwd, fn)) elif stat.S_ISREG(st.st_mode): if st.st_mode & 0o111: reporter.add_report(ExecutableFile( pkg, pjoin(cwd, fn))) if not fn.startswith("digest-"): if st.st_size == 0: reporter.add_report(EmptyFile(pkg, pjoin(cwd, fn))) else: files_by_size[st.st_size].append(pjoin(cwd, fn)) if st.st_size > 20480: reporter.add_report( SizeViolation(pkg, pjoin(cwd, fn), st.st_size)) if any(True for x in fn if x not in allowed_filename_chars_set): reporter.add_report( Glep31Violation(pkg, pjoin(cwd, fn))) files_by_digest = defaultdict(list) for size, files in files_by_size.items(): if len(files) > 1: for f in files: digest = get_chksums(pjoin(base, f), self.digest_algo)[0] files_by_digest[digest].append(f) for digest, files in files_by_digest.items(): if len(files) > 1: reporter.add_report(DuplicateFiles(pkg, files))
def feed(self, pkgset, reporter): base = os.path.dirname(pkgset[0].ebuild.path) category = os.path.basename( os.path.dirname(os.path.dirname(pkgset[0].ebuild.path))) ebuild_ext = '.ebuild' mismatched = [] invalid = [] # note we don't use os.walk, we need size info also for filename in listdir(base): # while this may seem odd, written this way such that the # filtering happens all in the genexp. if the result was being # handed to any, it's a frame switch each # char, which adds up. if any(True for x in filename if x not in allowed_filename_chars_set): reporter.add_report(Glep31Violation(pkgset[0], filename)) if filename.endswith(ebuild_ext) or filename in \ ("Manifest", "metadata.xml"): if os.stat(pjoin(base, filename)).st_mode & 0111: reporter.add_report(ExecutableFile(pkgset[0], filename)) if filename.endswith(ebuild_ext): utf8_check(pkgset[0], base, filename, reporter) pkg_name = os.path.basename(filename[:-len(ebuild_ext)]) try: pkg_atom = atom('=%s/%s' % (category, pkg_name)) if pkg_atom.package != os.path.basename(base): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) if mismatched: reporter.add_report(MismatchedPN(pkgset[0], mismatched)) if invalid: reporter.add_report(InvalidPN(pkgset[0], invalid)) if not os.path.exists(pjoin(base, 'files')): return unprocessed_dirs = deque(["files"]) while unprocessed_dirs: cwd = unprocessed_dirs.pop() for fn in listdir(pjoin(base, cwd)): afn = pjoin(base, cwd, fn) st = os.lstat(afn) if stat.S_ISDIR(st.st_mode): if fn not in self.ignore_dirs: unprocessed_dirs.append(pjoin(cwd, fn)) elif stat.S_ISREG(st.st_mode): if st.st_mode & 0111: reporter.add_report( ExecutableFile(pkgset[0], pjoin(cwd, fn))) if not fn.startswith("digest-"): if st.st_size > 20480: reporter.add_report( SizeViolation(pkgset[0], fn, st.st_size)) if any(True for x in fn if x not in allowed_filename_chars_set): reporter.add_report( Glep31Violation(pkgset[0], pjoin(cwd, fn)))