def save(self, latest_version, current_time): # Attempt to write out our version check file with lockfile.FileLock(self.lockfile_path).locked(timeout=10): state = { 'last_check': current_time.strftime(SELFCHECK_DATE_FMT), 'latest_version': latest_version, } with open(self.statefile_path, 'w') as statefile: json.dump(state, statefile, sort_keys=True, separators=(',', ':'))
def load_or_build( licensedcode_cache_dir=licensedcode_cache_dir, scancode_cache_dir=scancode_cache_dir, check_consistency=SCANCODE_DEV_MODE, # used for testing only timeout=LICENSE_INDEX_LOCK_TIMEOUT, tree_base_dir=scancode_src_dir, licenses_data_dir=None, rules_data_dir=None, ): """ Load or build and save and return a LicenseCache object. We either load a cached LicenseIndex or build and cache the index. On the side, we load cached or build license db, SPDX symbols and other license-related data structures. - If the cache does not exist, a new index is built and cached. - If `check_consistency` is True, the cache is checked for consistency and rebuilt if inconsistent or stale. - If `check_consistency` is False, the cache is NOT checked for consistency and if the cache files exist but ARE stale, the cache WILL NOT be rebuilt """ idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR) create_dir(idx_cache_dir) cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME) has_cache = os.path.exists(cache_file) and os.path.getsize(cache_file) # bypass check if no consistency check is needed if has_cache and not check_consistency: try: return load_cache_file(cache_file) except Exception as e: # work around some rare Windows quirks import traceback print( 'Inconsistent License cache: checking and rebuilding index.' ) print(str(e)) print(traceback.format_exc()) from licensedcode.models import licenses_data_dir as ldd from licensedcode.models import rules_data_dir as rdd from licensedcode.models import load_licenses from scancode import lockfile licenses_data_dir = licenses_data_dir or ldd rules_data_dir = rules_data_dir or rdd lock_file = os.path.join(scancode_cache_dir, LICENSE_LOCKFILE_NAME) checksum_file = os.path.join(scancode_cache_dir, LICENSE_CHECKSUM_FILE) has_tree_checksum = os.path.exists(checksum_file) # here, we have no cache or we want a validity check: lock, check # and build or rebuild as needed try: # acquire lock and wait until timeout to get a lock or die with lockfile.FileLock(lock_file).locked(timeout=timeout): current_checksum = None # is the current cache consistent or stale? if has_cache and has_tree_checksum: # if we have a saved cached index # load saved tree_checksum and compare with current tree_checksum with open(checksum_file) as etcs: existing_checksum = etcs.read() current_checksum = tree_checksum( tree_base_dir=tree_base_dir) if current_checksum == existing_checksum: # The cache is consistent with the latest code and data # load and return return load_cache_file(cache_file) # Here, the cache is not consistent with the latest code and # data: It is either stale or non-existing: we need to # rebuild all cached data (e.g. mostly the index) and cache it licenses_db = load_licenses( licenses_data_dir=licenses_data_dir) index = build_index( licenses_db=licenses_db, licenses_data_dir=licenses_data_dir, rules_data_dir=rules_data_dir, ) spdx_symbols = build_spdx_symbols(licenses_db=licenses_db) unknown_spdx_symbol = build_unknown_spdx_symbol( licenses_db=licenses_db) licensing = build_licensing(licenses_db=licenses_db) license_cache = LicenseCache( db=licenses_db, index=index, licensing=licensing, spdx_symbols=spdx_symbols, unknown_spdx_symbol=unknown_spdx_symbol, ) # save the cache as pickle new tree checksum with open(cache_file, 'wb') as fn: pickle.dump(license_cache, fn, protocol=PICKLE_PROTOCOL) current_checksum = tree_checksum(tree_base_dir=tree_base_dir) with open(checksum_file, 'w') as ctcs: ctcs.write(current_checksum) return license_cache except lockfile.LockTimeout: # TODO: handle unable to lock in a nicer way raise
def get_cached_index( cache_dir=licensedcode_cache_dir, check_consistency=SCANCODE_DEV_MODE, # used for testing only timeout=LICENSE_INDEX_LOCK_TIMEOUT, tree_base_dir=scancode_src_dir, licenses_data_dir=None, rules_data_dir=None, use_dumps=True, ): """ Return a LicenseIndex: either load a cached index or build and cache the index. - If the cache does not exist, a new index is built and cached. - If `check_consistency` is True, the cache is checked for consistency and rebuilt if inconsistent or stale. - If `check_consistency` is False, the cache is NOT checked for consistency If the cache files exist but ARE stale, the cache WILL NOT be rebuilt """ from licensedcode.index import LicenseIndex from licensedcode.models import get_rules from licensedcode.models import get_all_spdx_key_tokens from licensedcode.models import licenses_data_dir as ldd from licensedcode.models import rules_data_dir as rdd from scancode import lockfile licenses_data_dir = licenses_data_dir or ldd rules_data_dir = rules_data_dir or rdd lock_file, checksum_file, cache_file = get_license_cache_paths(cache_dir) has_cache = os.path.exists(cache_file) # bypass check if no consistency check is needed if has_cache and not check_consistency: return load_index(cache_file) has_tree_checksum = os.path.exists(checksum_file) # here, we have no cache or we want a validity check: lock, check # and build or rebuild as needed try: # acquire lock and wait until timeout to get a lock or die with lockfile.FileLock(lock_file).locked(timeout=timeout): current_checksum = None # is the current cache consistent or stale? if has_cache and has_tree_checksum: # if we have a saved cached index # load saved tree_checksum and compare with current tree_checksum with open(checksum_file, 'r') as etcs: existing_checksum = etcs.read() current_checksum = tree_checksum(tree_base_dir=tree_base_dir) if current_checksum == existing_checksum: # The cache is consistent with the latest code and data # load and return return load_index(cache_file) # Here, the cache is not consistent with the latest code and # data: It is either stale or non-existing: we need to # rebuild the index and cache it # FIXME: caching a pickle of this would be 10x times faster license_db = get_licenses_db(licenses_data_dir=licenses_data_dir) rules = get_rules(licenses_data_dir=licenses_data_dir, rules_data_dir=rules_data_dir) spdx_tokens = set(get_all_spdx_key_tokens(license_db)) idx = LicenseIndex(rules, _spdx_tokens=spdx_tokens) with open(cache_file, 'wb') as ifc: if use_dumps: ifc.write(idx.dumps()) else: idx.dump(ifc) # save the new tree checksum current_checksum = tree_checksum(tree_base_dir=tree_base_dir) with open(checksum_file, 'w') as ctcs: ctcs.write(current_checksum) return idx except lockfile.LockTimeout: # TODO: handle unable to lock in a nicer way raise