def run(self, file_callback = None, directory_callback = None): logger.debug("Start walking directory '%s'" % self.__root) liche.utils.directory_exists_barrier(self.__root) self.__walk(self.__root, file_callback, directory_callback, list())
def match(self, fileobject): logger.debug("Looking for file '%s' confidence against license '%s'" % (fileobject.path(), self.name())) tl = self.__text tf = fileobject.tokens() logger.debug("Tokens: file %d, license %d" % (len(tf), len(tl))) confidence = 0 if len(tf) < len(tl): logger.debug("File is shorter than license (%d < %d))" % (len(tf), len(tl))) return confidence i = 0 hits = 0 misses = 0 for i in range(0, len(tl)): logger.debug("tf[%d] = '%s', tl[%d] = '%s'" % (i, tf[i], i, tl[i])) if tf[i] == tl[i]: hits = hits + 1 else: misses = misses + 1 confidence = hits / (misses + hits) logger.debug("Hits %d / Missees %d -> Confidence %f" % (hits, misses, confidence)) return confidence
def __init__(self, path): self.__globs = list() logger.debug("Got globs file '%s' to handle" % path) liche.utils.file_exists_barrier(path) fh = open(path, 'r') ln = 0 base = os.path.abspath(os.path.dirname(path)) while True: line = fh.readline() if len(line) == 0: break ln = ln + 1 line = line.rstrip('\n') if re.match(r'^[ \t]*#.*$', line) or re.match(r'^[ \t]*$', line): #logger.debug("Skipping comment/empty line") continue self.__globs.append(Glob(base, line)) fh.close() logger.debug("Got %d globs: %s" % (len(self.__globs), map(str, self.__globs)))
def __init__(self, filename, tags): self.__text = list() self.__lines = 0 self.__parse(filename, tags) logger.debug("License text is %d lines long" % self.__lines)
def __show_file(self, filename): logger.debug("Showing file '%s'" % filename) f = self.__filefactory.get(filename) assert(self.__stream is not None) self.__stream.write("%s\n" % f.path())
def __init__(self, filename): self.__tags = set() self.__compatibles = set() self.__parse(filename) logger.debug("License uses tags: %s" % map(str, self.__tags)) logger.debug("License is compatible with: %s" % map(str, self.__compatibles))
def __init__(self, name, basepath): self.__name = name logger.debug("License '%s' base path is '%s'" % (self.__name, basepath)) meta_filename = basepath + os.path.extsep + "meta" text_filename = basepath + os.path.extsep + "txt" LicenseMeta.__init__(self, meta_filename) LicenseText.__init__(self, text_filename, LicenseMeta.tags(self))
def __init__(self, path, licenses, ignore_filename = None): logger.debug("Initializing job for path '%s'" % path) liche.utils.directory_exists_barrier(path) self.__filefactory = liche.file.FileFactory() self.__licenses = licenses self.__walker = liche.directory.Walker(path, ignore_filename) self.__stream = None logger.debug("Job ready")
def __parse(self, filename, tags): logger.debug("Parsing license text from '%s'" % filename) liche.utils.file_exists_barrier(filename) punct_mappings = { "`": " ` ", "'": " ' ", ";": " ; ", ".": " . ", ",": " , ", ":": " : ", "[": " [ ", "]": " ] ", "(": " ( ", ")": " ) ", "{": " } ", "}": " } ", "<": " < ", ">": " > ", "*": " * ", "/": " / ", "-": " - ", } ln = 0 fh = open(filename, "r") while True: line = fh.readline() ln = ln + 1 if len(line) == 0: break words = line.split() # logger.debug("Words in line: %s" % str(words)) tmp = list() for word in words: if word in tags: # logger.debug("Word '%s' is a tag" % str(word)) tmp.append(LicenseTag(word)) else: # logger.debug("Word '%s' is not a tag" % str(word)) tokens = liche.utils.string_replace_all(word, punct_mappings) # logger.debug("Re-phrased word is '%s'" % str(word)) for token in tokens.split(): tmp.append(LicenseToken(token)) self.__text = self.__text + tmp fh.close() self.__lines = ln
def __parse_pattern(self, base, pattern): assert(pattern == pattern.strip()) glob = None if pattern[0] == '/': # Absolute glob tmp = pattern[1:].strip() glob = os.path.join(base, tmp) else: # Relative glob glob = pattern assert(glob is not None) logger.debug("Demangled glob '%s' is '%s'" % (pattern, glob)) return glob
def __parse(self, filename): logger.debug("Parsing license meta from '%s'" % filename) liche.utils.file_exists_barrier(filename) ln = 0 fh = open(filename, "r") while True: line = fh.readline() ln = ln + 1 if len(line) == 0: break line = line.strip() # logger.debug("Handling line '%s'" % line) if re.match(r"^#.*$", line) or re.match(r"^$", line): # logger.debug("Skipping comment/empty line") continue r = re.match(r"^isCompatible[ \t]*:[ \t]*(.*)$", line) if r is not None and len(r.groups()) == 1: # logger.debug("Got compatibility list '%s'" % r.group(1)) licenses = r.group(1).split() licenses = map(lambda obj: obj.strip(), licenses) self.__compatibles = self.__compatibles.union(set(licenses)) continue r = re.match(r"^tags[ \t]*:[ \t]*(.*)$", line) if r is not None and len(r.groups()) == 1: # logger.debug("Got tag list '%s'" % r.group(1)) tags = r.group(1).split() tags = map(lambda obj: obj.strip(), tags) self.__tags = self.__tags.union(set(tags)) logger.debug("Checking tags") for tag in tags: if not re.match(r"<[a-zA-Z0-9_\-]+>", tag): raise Exception("Tag '%s' is invalid (%s:%d)" % (tag, filename, ln)) continue raise Exception("Malformed line (%s:%d)" % (filename, ln)) fh.close()
def get_by_extension(self, path): extension = os.path.splitext(path)[1][1:].lower() if extension is None or len(extension) <= 0: return None logger.debug("File '%s' extension is '%s'" % (path, extension)) mappings = { "txt" : TextFile, "log" : TextFile, "lua" : LuaFile, "pl" : PerlFile, "py" : PythonFile, "rb" : RubyFile, "sh" : ShellFile, "m4" : M4File, } if extension not in mappings: return None return mappings[extension](path)
def slurp(self, max_lines, line_cleaner = None): assert(max_lines is not None) assert(max_lines > 0) logger.debug("Slurping max %d lines from '%s'" % (max_lines, self.__path)) assert(max_lines >= 0) fh = open(self.__path, 'r') ln = 0 while ln < max_lines: line = fh.readline() ln = ln + 1 if len(line) == 0: break if line_cleaner is not None: line = line_cleaner(line) self.__lines.append(line) fh.close() logger.debug("Slurped %d lines from '%s'" % (ln, self.__path))
def __run(self, stream, dry): callback = None if dry: logger.info("Dry-running job for path '%s'" % self.__walker.root()) callback = self.__show_file else: logger.info("Running job for path '%s'" % self.__walker.root()) max_lines = 0 for l in self.__licenses: max_lines = max(max_lines, l.lines()) self.__slurp_lines = 2 * max_lines logger.debug("Slurping %d maximum lines for each file" % self.__slurp_lines) callback = self.__check_file assert(callback is not None) self.__stream = stream self.__walker.run(file_callback = callback) self.__stream = None
def __check_file(self, filename): logger.debug("Checking file '%s'" % filename) f = self.__filefactory.get(filename) f.slurp(self.__slurp_lines) confidencies = dict() for l in self.__licenses: confidencies[l.name()] = l.match(f) assert(self.__stream is not None) format = list() for c in confidencies.keys(): d = confidencies[c] assert(d >= 0) if d == 0: continue format.append("%s (%02.02f)" % (c, d)) if len(format) == 0: format.append("?") self.__stream.write("%s: %s\n" % (f.path(), string.join(format, ", ")))
def __init__(self, filename): logger.debug("Reading licenses from '%s'" % filename) liche.utils.file_exists_barrier(filename) names = set() fh = open(filename, "r") c = 0 while True: line = fh.readline() if len(line) == 0: break c = c + 1 line = line.strip() if re.match(r"^#.*$", line) or re.match(r"^$", line): logger.debug("Skipping comment/empty line") continue tmp = re.match(r"^[ \t]*([0-9A-Za-z_\-\+\.]+)[ \t]*$", line) if tmp is not None and len(tmp.groups()) == 1: names.add(tmp.group(1)) continue raise Exception("Malformed line %d in licenses file '%s'" % (c, filename)) fh.close() logger.debug("Read %d lines, got %d licenses: '%s'" % (c, len(names), names)) self.__licenses = set() directory = os.path.dirname(filename) for name in names: basepath = os.path.join(directory, name) self.__licenses.add(License(name, basepath)) self.__cross_check_barrier() logger.debug("Created %d licenses" % len(self.__licenses))
def __cross_check_barrier(self): logger.debug("Cross-checking all licenses") assert self.__licenses is not None names = set(map(lambda obj: obj.name(), self.__licenses)) logger.debug("Known licenses: %s" % names) for l in self.__licenses: # logger.debug("compatibles for '%s': %s" % (l, l.compatibles())) missing = l.compatibles().difference(names) if len(missing) > 0: logger.debug("Got problems with '%s' license" % l) raise Exception( "There are %d missing license(s) referenced " "by '%s' (%s)" % (len(missing), l, string.join(list(missing), ", ")) )
def __walk(self, root, file_callback, directory_callback, globs): assert(globs is not None) current_dir = os.path.abspath(root) logger.debug("Walking directory '%s'" % current_dir) entries = os.listdir(current_dir) logger.debug("Directory entries are: '%s'" % entries) assert("." not in entries) assert(".." not in entries) if (self.__ignore is not None) and (self.__ignore in entries): p = os.path.join(current_dir, self.__ignore) logger.debug("Found ignore file '%s'" % p) gf = GlobsFile(p) logger.debug("Globs file parsed successfully") gl = gf.globs() logger.debug("Ignore file '%s' produced %d globs" % (p, len(gl))) globs = globs + gl logger.debug("Globs are now %d" % len(globs)) entries.remove(self.__ignore) assert(self.__ignore not in entries) assert(globs is not None) logger.debug("We have %d globs for directory '%s': %s" % (len(globs), current_dir, map(str, globs))) for entry in entries: rel_path = entry abs_path = os.path.abspath(os.path.join(current_dir, rel_path)) assert(not os.path.isabs(rel_path)) assert( os.path.isabs(abs_path)) skip = False for g in globs: x = None if g.match(rel_path): logger.debug("Relative path '%s' got a match with '%s'" % (rel_path, g.pattern())) if g.is_inclusive(): skip = False else: skip = True continue if g.match(abs_path): logger.debug("Absolute path '%s' got a match with '%s'" % (abs_path, g.pattern())) if g.is_inclusive(): skip = False else: skip = True continue if skip: logger.info("Skipping '%s'" % abs_path) continue logger.debug("Handling path '%s'" % abs_path) liche.utils.path_exists_barrier(abs_path) if os.path.isdir(abs_path): liche.utils.directory_exists_barrier(abs_path) if directory_callback is not None: directory_callback(abs_path) self.__walk(abs_path, file_callback, directory_callback, globs) elif os.path.isfile(abs_path): liche.utils.file_exists_barrier(abs_path) if file_callback is not None: file_callback(abs_path) elif os.path.ismount(abs_path): logger.warning("Skipping '%s' (mount point)" % abs_path) else: logger.warning("Skipping '%s' (not a file or directory)" % abs_path) logger.debug("Completed handling directory '%s' (%d globs)" % (current_dir, len(globs)))
def main(argv): parser = argparse.ArgumentParser(description = "A source-file LIcense CHEcker.", epilog = "Report bugs to " + "<" + program_bugreport + ">", add_help = True, prog = program_name) #parser.add_argument('-V', '--version', # action = 'version', # version = '%(prog)s ' + # liche.settings.package_version, # help = "print version number, then exit") parser.add_argument('-V', '--version', action = 'store_true', dest = 'show_version', help = "print version number, then exit") parser.add_argument('-v', '--verbose', action = 'store_true', dest = 'want_verbose', help = 'produce verbose output') parser.add_argument('-d', '--debug', action = 'store_true', dest = 'want_debug', help = 'produce debugging output') parser.add_argument('--quiet', action = 'store_true', dest = 'quiet', help = 'perform actions quietly') parser.add_argument('--dry-run', action = 'store_true', dest = 'dry_run', help = 'do not perform checks') parser.add_argument('--licenses', action = 'store_true', dest = 'show_licenses', help = 'show licenses') #parser.add_argument('--exclude', # nargs = 1, # action = 'append', # dest = 'exclude', # metavar = 'PATH', # help = 'exclude paths, given as PATTERN') parser.add_argument('DIRECTORY', nargs = '*', action = 'store', default = ".", help = 'input directory') args = parser.parse_args() #print args.__dict__ if args.show_version: print("%s (%s) %s" % (program_name, liche.settings.package_name, liche.settings.package_version)) return 0 if not args.want_debug: liche.log.debug_off() if not args.want_verbose: liche.log.info_off() logger.debug("%s %s" % (program_name, liche.settings.package_version)) licenses_filename = os.path.join(liche.settings.pkgdatadir, "licenses.txt") licenses_factory = liche.license.LicensesFactory(licenses_filename) logger.debug("License factory contains %d licenses: %s" % (len(licenses_factory.licenses()), map(str, licenses_factory.licenses()))) if args.show_licenses: for l in licenses_factory.licenses(): #print("%s: %s" % (str(l), map(str, l.compatibles()).join(' '))) print("%s" % str(l)) return 0 tags = set() licenses = licenses_factory.licenses() for l in licenses: tags = set.union(tags, l.tags()) logger.debug("There are %d known tags: %s" % (len(tags), map(str, tags))) paths = set() for path in args.DIRECTORY: logger.debug("Adding path '%s'" % path) paths.add(os.path.abspath(path)) logger.debug("Paths are: '%s'" % str(paths)) jobs = set() for path in paths: jobs.add(liche.job.Job(path, licenses, ignore_filename)) logger.debug("Jobs are: '%s'" % str(jobs)) if args.quiet: s = None else: s = sys.stdout rets = map(lambda obj: obj.run(stream = s, dry = args.dry_run), jobs) logger.debug("%d jobs completed" % len(rets)) retval = 0 if False in rets: logger.warning("Got problems") retval = 1 logger.debug("Everything seems ok") return retval