Example #1
0
 def run(self, file_callback = None, directory_callback = None):
     logger.debug("Start walking directory '%s'" % self.__root)
     liche.utils.directory_exists_barrier(self.__root)
     self.__walk(self.__root,
                 file_callback,
                 directory_callback,
                 list())
Example #2
0
    def match(self, fileobject):
        logger.debug("Looking for file '%s' confidence against license '%s'" % (fileobject.path(), self.name()))

        tl = self.__text
        tf = fileobject.tokens()

        logger.debug("Tokens: file %d, license %d" % (len(tf), len(tl)))

        confidence = 0

        if len(tf) < len(tl):
            logger.debug("File is shorter than license (%d < %d))" % (len(tf), len(tl)))
            return confidence

        i = 0
        hits = 0
        misses = 0

        for i in range(0, len(tl)):
            logger.debug("tf[%d] = '%s', tl[%d] = '%s'" % (i, tf[i], i, tl[i]))
            if tf[i] == tl[i]:
                hits = hits + 1
            else:
                misses = misses + 1

        confidence = hits / (misses + hits)

        logger.debug("Hits %d / Missees %d -> Confidence %f" % (hits, misses, confidence))

        return confidence
Example #3
0
    def __init__(self, path):
        self.__globs = list()

        logger.debug("Got globs file '%s' to handle" % path)

        liche.utils.file_exists_barrier(path)

        fh   = open(path, 'r')
        ln   = 0
        base = os.path.abspath(os.path.dirname(path))

        while True:
            line = fh.readline()
            if len(line) == 0:
                break
            ln = ln + 1

            line = line.rstrip('\n')

            if re.match(r'^[ \t]*#.*$', line) or re.match(r'^[ \t]*$', line):
                #logger.debug("Skipping comment/empty line")
                continue

            self.__globs.append(Glob(base, line))

        fh.close()

        logger.debug("Got %d globs: %s" %
                     (len(self.__globs), map(str, self.__globs)))
Example #4
0
    def __init__(self, filename, tags):
        self.__text = list()
        self.__lines = 0

        self.__parse(filename, tags)

        logger.debug("License text is %d lines long" % self.__lines)
Example #5
0
    def __show_file(self, filename):
        logger.debug("Showing file '%s'" % filename)

        f = self.__filefactory.get(filename)

        assert(self.__stream is not None)

        self.__stream.write("%s\n" % f.path())
Example #6
0
    def __init__(self, filename):
        self.__tags = set()
        self.__compatibles = set()

        self.__parse(filename)

        logger.debug("License uses tags:          %s" % map(str, self.__tags))
        logger.debug("License is compatible with: %s" % map(str, self.__compatibles))
Example #7
0
    def __init__(self, name, basepath):
        self.__name = name

        logger.debug("License '%s' base path is '%s'" % (self.__name, basepath))

        meta_filename = basepath + os.path.extsep + "meta"
        text_filename = basepath + os.path.extsep + "txt"

        LicenseMeta.__init__(self, meta_filename)
        LicenseText.__init__(self, text_filename, LicenseMeta.tags(self))
Example #8
0
    def __init__(self, path, licenses, ignore_filename = None):
        logger.debug("Initializing job for path '%s'" % path)

        liche.utils.directory_exists_barrier(path)

        self.__filefactory = liche.file.FileFactory()
        self.__licenses    = licenses
        self.__walker      = liche.directory.Walker(path, ignore_filename)
        self.__stream      = None

        logger.debug("Job ready")
Example #9
0
    def __parse(self, filename, tags):
        logger.debug("Parsing license text from '%s'" % filename)

        liche.utils.file_exists_barrier(filename)

        punct_mappings = {
            "`": " ` ",
            "'": " ' ",
            ";": " ; ",
            ".": " . ",
            ",": " , ",
            ":": " : ",
            "[": " [ ",
            "]": " ] ",
            "(": " ( ",
            ")": " ) ",
            "{": " } ",
            "}": " } ",
            "<": " < ",
            ">": " > ",
            "*": " * ",
            "/": " / ",
            "-": " - ",
        }

        ln = 0
        fh = open(filename, "r")
        while True:
            line = fh.readline()
            ln = ln + 1
            if len(line) == 0:
                break

            words = line.split()
            # logger.debug("Words in line: %s" % str(words))

            tmp = list()
            for word in words:
                if word in tags:
                    # logger.debug("Word '%s' is a tag" % str(word))
                    tmp.append(LicenseTag(word))
                else:
                    # logger.debug("Word '%s' is not a tag" % str(word))
                    tokens = liche.utils.string_replace_all(word, punct_mappings)
                    # logger.debug("Re-phrased word is '%s'" % str(word))
                    for token in tokens.split():
                        tmp.append(LicenseToken(token))
            self.__text = self.__text + tmp
        fh.close()

        self.__lines = ln
Example #10
0
    def __parse_pattern(self, base, pattern):
        assert(pattern == pattern.strip())

        glob = None
        if pattern[0] == '/':
            # Absolute glob
            tmp = pattern[1:].strip()
            glob = os.path.join(base, tmp)
        else:
            # Relative glob
            glob = pattern
        assert(glob is not None)

        logger.debug("Demangled glob '%s' is '%s'" % (pattern, glob))

        return glob
Example #11
0
    def __parse(self, filename):
        logger.debug("Parsing license meta from '%s'" % filename)

        liche.utils.file_exists_barrier(filename)

        ln = 0
        fh = open(filename, "r")
        while True:
            line = fh.readline()
            ln = ln + 1
            if len(line) == 0:
                break
            line = line.strip()

            # logger.debug("Handling line '%s'" % line)

            if re.match(r"^#.*$", line) or re.match(r"^$", line):
                # logger.debug("Skipping comment/empty line")
                continue

            r = re.match(r"^isCompatible[ \t]*:[ \t]*(.*)$", line)
            if r is not None and len(r.groups()) == 1:
                # logger.debug("Got compatibility list '%s'" % r.group(1))
                licenses = r.group(1).split()
                licenses = map(lambda obj: obj.strip(), licenses)
                self.__compatibles = self.__compatibles.union(set(licenses))
                continue

            r = re.match(r"^tags[ \t]*:[ \t]*(.*)$", line)
            if r is not None and len(r.groups()) == 1:
                # logger.debug("Got tag list '%s'" % r.group(1))
                tags = r.group(1).split()
                tags = map(lambda obj: obj.strip(), tags)
                self.__tags = self.__tags.union(set(tags))

                logger.debug("Checking tags")
                for tag in tags:
                    if not re.match(r"<[a-zA-Z0-9_\-]+>", tag):
                        raise Exception("Tag '%s' is invalid (%s:%d)" % (tag, filename, ln))
                continue

            raise Exception("Malformed line (%s:%d)" % (filename, ln))

        fh.close()
Example #12
0
    def get_by_extension(self, path):
        extension = os.path.splitext(path)[1][1:].lower()

        if extension is None or len(extension) <= 0:
            return None

        logger.debug("File '%s' extension is '%s'" % (path, extension))

        mappings = {
            "txt" : TextFile,
            "log" : TextFile,
            "lua" : LuaFile,
            "pl"  : PerlFile,
            "py"  : PythonFile,
            "rb"  : RubyFile,
            "sh"  : ShellFile,
            "m4"  : M4File,
            }

        if extension not in mappings:
            return None
        return mappings[extension](path)
Example #13
0
    def slurp(self, max_lines, line_cleaner = None):
        assert(max_lines is not None)
        assert(max_lines > 0)

        logger.debug("Slurping max %d lines from '%s'" %
                     (max_lines, self.__path))

        assert(max_lines >= 0)

        fh = open(self.__path, 'r')
        ln = 0
        while ln < max_lines:
            line = fh.readline()
            ln   = ln + 1
            if len(line) == 0:
                break
            if line_cleaner is not None:
                line = line_cleaner(line)
            self.__lines.append(line)
        fh.close()

        logger.debug("Slurped %d lines from '%s'" % (ln, self.__path))
Example #14
0
    def __run(self, stream, dry):
        callback = None
        if dry:
            logger.info("Dry-running job for path '%s'" % self.__walker.root())

            callback = self.__show_file
        else:
            logger.info("Running job for path '%s'" % self.__walker.root())

            max_lines = 0
            for l in self.__licenses:
                max_lines          = max(max_lines, l.lines())
                self.__slurp_lines = 2 * max_lines

            logger.debug("Slurping %d maximum lines for each file" %
                         self.__slurp_lines)

            callback = self.__check_file
        assert(callback is not None)

        self.__stream = stream
        self.__walker.run(file_callback = callback)
        self.__stream = None
Example #15
0
    def __check_file(self, filename):
        logger.debug("Checking file '%s'" % filename)

        f = self.__filefactory.get(filename)
        f.slurp(self.__slurp_lines)

        confidencies = dict()
        for l in self.__licenses:
            confidencies[l.name()] = l.match(f)

        assert(self.__stream is not None)

        format = list()
        for c in confidencies.keys():
            d = confidencies[c]
            assert(d >= 0)
            if d == 0:
                continue
            format.append("%s (%02.02f)" % (c, d))
        if len(format) == 0:
            format.append("?")

        self.__stream.write("%s: %s\n" %
                            (f.path(), string.join(format, ", ")))
Example #16
0
    def __init__(self, filename):
        logger.debug("Reading licenses from '%s'" % filename)

        liche.utils.file_exists_barrier(filename)

        names = set()
        fh = open(filename, "r")
        c = 0
        while True:
            line = fh.readline()
            if len(line) == 0:
                break
            c = c + 1

            line = line.strip()

            if re.match(r"^#.*$", line) or re.match(r"^$", line):
                logger.debug("Skipping comment/empty line")
                continue

            tmp = re.match(r"^[ \t]*([0-9A-Za-z_\-\+\.]+)[ \t]*$", line)
            if tmp is not None and len(tmp.groups()) == 1:
                names.add(tmp.group(1))
                continue

            raise Exception("Malformed line %d in licenses file '%s'" % (c, filename))
        fh.close()

        logger.debug("Read %d lines, got %d licenses: '%s'" % (c, len(names), names))

        self.__licenses = set()
        directory = os.path.dirname(filename)
        for name in names:
            basepath = os.path.join(directory, name)
            self.__licenses.add(License(name, basepath))

        self.__cross_check_barrier()

        logger.debug("Created %d licenses" % len(self.__licenses))
Example #17
0
    def __cross_check_barrier(self):
        logger.debug("Cross-checking all licenses")

        assert self.__licenses is not None

        names = set(map(lambda obj: obj.name(), self.__licenses))
        logger.debug("Known licenses: %s" % names)

        for l in self.__licenses:
            # logger.debug("compatibles for '%s': %s" % (l, l.compatibles()))

            missing = l.compatibles().difference(names)
            if len(missing) > 0:
                logger.debug("Got problems with '%s' license" % l)
                raise Exception(
                    "There are %d missing license(s) referenced "
                    "by '%s' (%s)" % (len(missing), l, string.join(list(missing), ", "))
                )
Example #18
0
    def __walk(self,
               root,
               file_callback,
               directory_callback,
               globs):

        assert(globs is not None)

        current_dir = os.path.abspath(root)

        logger.debug("Walking directory '%s'" % current_dir)

        entries = os.listdir(current_dir)
        logger.debug("Directory entries are: '%s'" % entries)

        assert("."  not in entries)
        assert(".." not in entries)

        if (self.__ignore is not None) and (self.__ignore in entries):
            p = os.path.join(current_dir, self.__ignore)
            logger.debug("Found ignore file '%s'" % p)

            gf = GlobsFile(p)
            logger.debug("Globs file parsed successfully")

            gl = gf.globs()
            logger.debug("Ignore file '%s' produced %d globs" %
                         (p, len(gl)))

            globs = globs + gl
            logger.debug("Globs are now %d" % len(globs))
            entries.remove(self.__ignore)

        assert(self.__ignore not in entries)
        assert(globs is not None)

        logger.debug("We have %d globs for directory '%s': %s" %
                     (len(globs), current_dir, map(str, globs)))

        for entry in entries:
            rel_path = entry
            abs_path = os.path.abspath(os.path.join(current_dir, rel_path))

            assert(not os.path.isabs(rel_path))
            assert(    os.path.isabs(abs_path))

            skip = False
            for g in globs:
                x = None
                if g.match(rel_path):
                    logger.debug("Relative path '%s' got a match with '%s'" %
                                 (rel_path, g.pattern()))
                    if g.is_inclusive():
                        skip = False
                    else:
                        skip = True
                    continue

                if g.match(abs_path):
                    logger.debug("Absolute path '%s' got a match with '%s'" %
                                 (abs_path, g.pattern()))
                    if g.is_inclusive():
                        skip = False
                    else:
                        skip = True
                    continue
            if skip:
                logger.info("Skipping '%s'" % abs_path)
                continue

            logger.debug("Handling path '%s'" % abs_path)
            liche.utils.path_exists_barrier(abs_path)

            if os.path.isdir(abs_path):
                liche.utils.directory_exists_barrier(abs_path)

                if directory_callback is not None:
                    directory_callback(abs_path)

                self.__walk(abs_path,
                            file_callback,
                            directory_callback,
                            globs)
            elif os.path.isfile(abs_path):
                liche.utils.file_exists_barrier(abs_path)

                if file_callback is not None:
                    file_callback(abs_path)
            elif os.path.ismount(abs_path):
                logger.warning("Skipping '%s' (mount point)" %
                               abs_path)
            else:
                logger.warning("Skipping '%s' (not a file or directory)" %
                               abs_path)

        logger.debug("Completed handling directory '%s' (%d globs)" %
                     (current_dir, len(globs)))
Example #19
0
def main(argv):
    parser = argparse.ArgumentParser(description =
                                     "A source-file LIcense CHEcker.",
                                     epilog      = "Report bugs to " +
                                     "<" + program_bugreport + ">",
                                     add_help    = True,
                                     prog        = program_name)

    #parser.add_argument('-V', '--version',
    #                    action = 'version',
    #                    version = '%(prog)s ' +
    #                    liche.settings.package_version,
    #                    help    = "print version number, then exit")
    parser.add_argument('-V', '--version',
                        action = 'store_true',
                        dest   = 'show_version',
                        help    = "print version number, then exit")
    parser.add_argument('-v', '--verbose',
                        action = 'store_true',
                        dest   = 'want_verbose',
                        help   = 'produce verbose output')
    parser.add_argument('-d', '--debug',
                        action = 'store_true',
                        dest   = 'want_debug',
                        help   = 'produce debugging output')

    parser.add_argument('--quiet',
                        action = 'store_true',
                        dest   = 'quiet',
                        help   = 'perform actions quietly')
    parser.add_argument('--dry-run',
                        action = 'store_true',
                        dest   = 'dry_run',
                        help   = 'do not perform checks')
    parser.add_argument('--licenses',
                        action = 'store_true',
                        dest   = 'show_licenses',
                        help   = 'show licenses')
    #parser.add_argument('--exclude',
    #                    nargs   = 1,
    #                    action  = 'append',
    #                    dest    = 'exclude',
    #                    metavar = 'PATH',
    #                    help    = 'exclude paths, given as PATTERN')

    parser.add_argument('DIRECTORY',
                        nargs   = '*',
                        action  = 'store',
                        default = ".",
                        help    = 'input directory')

    args = parser.parse_args()
    #print args.__dict__

    if args.show_version:
        print("%s (%s) %s" %
              (program_name,
               liche.settings.package_name,
               liche.settings.package_version))
        return 0

    if not args.want_debug:
        liche.log.debug_off()

    if not args.want_verbose:
        liche.log.info_off()

    logger.debug("%s %s" % (program_name, liche.settings.package_version))

    licenses_filename = os.path.join(liche.settings.pkgdatadir, "licenses.txt")
    licenses_factory  = liche.license.LicensesFactory(licenses_filename)

    logger.debug("License factory contains %d licenses: %s" %
                 (len(licenses_factory.licenses()),
                  map(str, licenses_factory.licenses())))

    if args.show_licenses:
        for l in licenses_factory.licenses():
            #print("%s: %s" % (str(l),  map(str, l.compatibles()).join(' ')))
            print("%s" % str(l))
        return 0

    tags = set()
    licenses = licenses_factory.licenses()
    for l in licenses:
        tags = set.union(tags, l.tags())

    logger.debug("There are %d known tags: %s" % (len(tags), map(str, tags)))

    paths = set()
    for path in args.DIRECTORY:
        logger.debug("Adding path '%s'" % path)
        paths.add(os.path.abspath(path))
    logger.debug("Paths are: '%s'" % str(paths))

    jobs = set()
    for path in paths:
        jobs.add(liche.job.Job(path, licenses, ignore_filename))
    logger.debug("Jobs are: '%s'" % str(jobs))

    if args.quiet:
        s = None
    else:
        s = sys.stdout

    rets = map(lambda obj: obj.run(stream = s,
                                   dry    = args.dry_run), jobs)
    logger.debug("%d jobs completed" % len(rets))

    retval = 0
    if False in rets:
        logger.warning("Got problems")
        retval = 1

    logger.debug("Everything seems ok")

    return retval