예제 #1
0
 def save(self, latest_version, current_time):
     # Attempt to write out our version check file
     with lockfile.FileLock(self.lockfile_path).locked(timeout=10):
         state = {
             'last_check': current_time.strftime(SELFCHECK_DATE_FMT),
             'latest_version': latest_version,
         }
         with open(self.statefile_path, 'w') as statefile:
             json.dump(state, statefile, sort_keys=True,
                       separators=(',', ':'))
예제 #2
0
    def load_or_build(
        licensedcode_cache_dir=licensedcode_cache_dir,
        scancode_cache_dir=scancode_cache_dir,
        check_consistency=SCANCODE_DEV_MODE,
        # used for testing only
        timeout=LICENSE_INDEX_LOCK_TIMEOUT,
        tree_base_dir=scancode_src_dir,
        licenses_data_dir=None,
        rules_data_dir=None,
    ):
        """
        Load or build and save and return a LicenseCache object.

        We either load a cached LicenseIndex or build and cache the index.
        On the side, we load cached or build license db, SPDX symbols and other
        license-related data structures.

        - If the cache does not exist, a new index is built and cached.
        - If `check_consistency` is True, the cache is checked for consistency and
          rebuilt if inconsistent or stale.
        - If `check_consistency` is False, the cache is NOT checked for consistency and
          if the cache files exist but ARE stale, the cache WILL NOT be rebuilt
        """
        idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR)
        create_dir(idx_cache_dir)
        cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME)

        has_cache = os.path.exists(cache_file) and os.path.getsize(cache_file)

        # bypass check if no consistency check is needed
        if has_cache and not check_consistency:
            try:
                return load_cache_file(cache_file)
            except Exception as e:
                # work around some rare Windows quirks
                import traceback
                print(
                    'Inconsistent License cache: checking and rebuilding index.'
                )
                print(str(e))
                print(traceback.format_exc())

        from licensedcode.models import licenses_data_dir as ldd
        from licensedcode.models import rules_data_dir as rdd
        from licensedcode.models import load_licenses
        from scancode import lockfile

        licenses_data_dir = licenses_data_dir or ldd
        rules_data_dir = rules_data_dir or rdd

        lock_file = os.path.join(scancode_cache_dir, LICENSE_LOCKFILE_NAME)
        checksum_file = os.path.join(scancode_cache_dir, LICENSE_CHECKSUM_FILE)

        has_tree_checksum = os.path.exists(checksum_file)

        # here, we have no cache or we want a validity check: lock, check
        # and build or rebuild as needed
        try:
            # acquire lock and wait until timeout to get a lock or die
            with lockfile.FileLock(lock_file).locked(timeout=timeout):
                current_checksum = None
                # is the current cache consistent or stale?
                if has_cache and has_tree_checksum:
                    # if we have a saved cached index
                    # load saved tree_checksum and compare with current tree_checksum
                    with open(checksum_file) as etcs:
                        existing_checksum = etcs.read()

                    current_checksum = tree_checksum(
                        tree_base_dir=tree_base_dir)
                    if current_checksum == existing_checksum:
                        # The cache is consistent with the latest code and data
                        # load and return
                        return load_cache_file(cache_file)

                # Here, the cache is not consistent with the latest code and
                # data: It is either stale or non-existing: we need to
                # rebuild all cached data (e.g. mostly the index) and cache it

                licenses_db = load_licenses(
                    licenses_data_dir=licenses_data_dir)
                index = build_index(
                    licenses_db=licenses_db,
                    licenses_data_dir=licenses_data_dir,
                    rules_data_dir=rules_data_dir,
                )
                spdx_symbols = build_spdx_symbols(licenses_db=licenses_db)
                unknown_spdx_symbol = build_unknown_spdx_symbol(
                    licenses_db=licenses_db)
                licensing = build_licensing(licenses_db=licenses_db)

                license_cache = LicenseCache(
                    db=licenses_db,
                    index=index,
                    licensing=licensing,
                    spdx_symbols=spdx_symbols,
                    unknown_spdx_symbol=unknown_spdx_symbol,
                )

                # save the cache as pickle new tree checksum
                with open(cache_file, 'wb') as fn:
                    pickle.dump(license_cache, fn, protocol=PICKLE_PROTOCOL)

                current_checksum = tree_checksum(tree_base_dir=tree_base_dir)
                with open(checksum_file, 'w') as ctcs:
                    ctcs.write(current_checksum)

                return license_cache

        except lockfile.LockTimeout:
            # TODO: handle unable to lock in a nicer way
            raise
예제 #3
0
def get_cached_index(
    cache_dir=licensedcode_cache_dir,
    check_consistency=SCANCODE_DEV_MODE,
    # used for testing only
    timeout=LICENSE_INDEX_LOCK_TIMEOUT,
    tree_base_dir=scancode_src_dir,
    licenses_data_dir=None,
    rules_data_dir=None,
    use_dumps=True,
):
    """
    Return a LicenseIndex: either load a cached index or build and cache the
    index.
    - If the cache does not exist, a new index is built and cached.
    - If `check_consistency` is True, the cache is checked for consistency and
      rebuilt if inconsistent or stale.
    - If `check_consistency` is False, the cache is NOT checked for consistency
      If the cache files exist but ARE stale, the cache WILL NOT be rebuilt
    """
    from licensedcode.index import LicenseIndex
    from licensedcode.models import get_rules
    from licensedcode.models import get_all_spdx_key_tokens
    from licensedcode.models import licenses_data_dir as ldd
    from licensedcode.models import rules_data_dir as rdd

    from scancode import lockfile

    licenses_data_dir = licenses_data_dir or ldd
    rules_data_dir = rules_data_dir or rdd

    lock_file, checksum_file, cache_file = get_license_cache_paths(cache_dir)

    has_cache = os.path.exists(cache_file)
    # bypass check if no consistency check is needed
    if has_cache and not check_consistency:
        return load_index(cache_file)

    has_tree_checksum = os.path.exists(checksum_file)

    # here, we have no cache or we want a validity check: lock, check
    # and build or rebuild as needed
    try:
        # acquire lock and wait until timeout to get a lock or die
        with lockfile.FileLock(lock_file).locked(timeout=timeout):
            current_checksum = None
            # is the current cache consistent or stale?
            if has_cache and has_tree_checksum:
                # if we have a saved cached index
                # load saved tree_checksum and compare with current tree_checksum
                with open(checksum_file, 'r') as etcs:
                    existing_checksum = etcs.read()
                current_checksum = tree_checksum(tree_base_dir=tree_base_dir)
                if current_checksum == existing_checksum:
                    # The cache is consistent with the latest code and data
                    # load and return
                    return load_index(cache_file)

            # Here, the cache is not consistent with the latest code and
            # data: It is either stale or non-existing: we need to
            # rebuild the index and cache it

            # FIXME: caching a pickle of this would be 10x times faster
            license_db = get_licenses_db(licenses_data_dir=licenses_data_dir)

            rules = get_rules(licenses_data_dir=licenses_data_dir,
                              rules_data_dir=rules_data_dir)

            spdx_tokens = set(get_all_spdx_key_tokens(license_db))

            idx = LicenseIndex(rules, _spdx_tokens=spdx_tokens)

            with open(cache_file, 'wb') as ifc:
                if use_dumps:
                    ifc.write(idx.dumps())
                else:
                    idx.dump(ifc)

            # save the new tree checksum
            current_checksum = tree_checksum(tree_base_dir=tree_base_dir)
            with open(checksum_file, 'w') as ctcs:
                ctcs.write(current_checksum)

            return idx

    except lockfile.LockTimeout:
        # TODO: handle unable to lock in a nicer way
        raise