def wait_for_package_toplevel_file(context, package, ecosystem, bucket): """Wait for the package analysis to finish. This function tries to wait for the finish of component (package) analysis by repeatedly reading the 'LastModified' attribute from the {ecosystem}/{package}.json bucket from the bayesian-core-package-data. If this attribute is newer than remembered timestamp, the analysis is perceived as done. """ timeout = 300 * 60 sleep_amount = 10 key = S3Interface.package_key(ecosystem, package) start_time = datetime.datetime.now(datetime.timezone.utc) for _ in range(timeout // sleep_amount): current_date = datetime.datetime.now(datetime.timezone.utc) try: last_modified = context.s3interface.read_object_metadata( bucket, key, "LastModified") delta = current_date - last_modified # print(current_date, " ", last_modified, " ", delta) if delta.days == 0 and delta.seconds < sleep_amount * 2: # print("done!") read_core_package_data_from_bucket(context, "package toplevel", package, ecosystem, bucket) return except ClientError: print("No analyses yet (waiting for {t})".format(t=current_date - start_time)) time.sleep(sleep_amount) raise Exception('Timeout waiting for the job metadata in S3!')
def read_core_package_data_from_bucket(context, selector, package, ecosystem, bucket): """Read the selected metadata for the package.""" # At this moment, the following selectors can be used: # package toplevel # GitHub details # keywords tagging # libraries io if selector == "package toplevel": key = S3Interface.package_key(ecosystem, package) else: metadata = S3Interface.selector_to_key(selector) key = S3Interface.package_analysis_key(ecosystem, package, metadata) try: s3_data = context.s3interface.read_object(bucket, key) assert s3_data is not None context.s3_data = s3_data except Exception as e: m = "Can not read {key} for the E/P {ecosystem} {package} from bucket {bucket}"\ .format(key=key, ecosystem=ecosystem, package=package, bucket=bucket) context.s3_data = None raise Exception(m) from e