def wait_for_component_toplevel_file(context, package, version, ecosystem, bucket): """Wait for the component analysis to finish. This function tries to wait for the finish of component (package) analysis by repeatedly reading the 'LastModified' attribute from the {ecosystem}/{package}/{version}.json bucket from the bayesian-core-data. If this attribute is newer than remembered timestamp, the analysis is perceived as done. """ timeout = 300 * 60 sleep_amount = 10 key = S3Interface.component_key(ecosystem, package, version) start_time = datetime.datetime.now(datetime.timezone.utc) for _ in range(timeout // sleep_amount): current_date = datetime.datetime.now(datetime.timezone.utc) try: last_modified = context.s3interface.read_object_metadata( bucket, key, "LastModified") delta = current_date - last_modified # print(current_date, " ", last_modified, " ", delta) if delta.days == 0 and delta.seconds < sleep_amount * 2: # print("done!") read_core_data_from_bucket(context, "component toplevel", package, version, ecosystem, bucket) return except ClientError: print("No analyses yet (waiting for {t})".format(t=current_date - start_time)) time.sleep(sleep_amount) raise Exception('Timeout waiting for the job metadata in S3!')
def read_core_data_from_bucket(context, selector, package, version, ecosystem, bucket): """Read the component toplevel metadata.""" if selector == "component toplevel": key = S3Interface.component_key(ecosystem, package, version) else: metadata = S3Interface.selector_to_key(selector) key = S3Interface.component_analysis_key(ecosystem, package, version, metadata) try: s3_data = context.s3interface.read_object(bucket, key) assert s3_data is not None context.s3_data = s3_data except Exception as e: m = "Can not read {key} for the E/P/V {ecosystem} {package} {version} from bucket {bucket}"\ .format(key=key, ecosystem=ecosystem, package=package, version=version, bucket=bucket) context.s3_data = None raise Exception(m) from e
def check_weight_for_word_in_keywords_tagging(context, word, where): """Check that the given word and its weight can be found in the tagging report.""" selector = S3Interface.selector_to_key(where) assert selector in [ "package_name", "repository_description", "description" ] details = get_details_node(context) word_dict = check_and_get_attribute(details, selector) check_attribute_presence(word_dict, word) assert float(word_dict[word]) > 0.0
def read_core_package_data_from_bucket(context, selector, package, ecosystem, bucket): """Read the selected metadata for the package.""" # At this moment, the following selectors can be used: # package toplevel # GitHub details # keywords tagging # libraries io if selector == "package toplevel": key = S3Interface.package_key(ecosystem, package) else: metadata = S3Interface.selector_to_key(selector) key = S3Interface.package_analysis_key(ecosystem, package, metadata) try: s3_data = context.s3interface.read_object(bucket, key) assert s3_data is not None context.s3_data = s3_data except Exception as e: m = "Can not read {key} for the E/P {ecosystem} {package} from bucket {bucket}"\ .format(key=key, ecosystem=ecosystem, package=package, bucket=bucket) context.s3_data = None raise Exception(m) from e
def before_all(context): """Perform the setup before the first event.""" context.config.setup_logging() context.start_system = _start_system context.teardown_system = _teardown_system context.restart_system = _restart_system context.run_command_in_service = _run_command_in_service context.exec_command_in_container = _exec_command_in_container context.is_running = _is_running context.is_jobs_debug_api_running = _is_jobs_debug_api_running context.is_component_search_service_running = _is_component_search_service_running context.is_master_tag_list_service_running = _is_master_tag_list_service_running context.wait_for_master_tag_list_service = _wait_for_master_tag_list_service context.is_get_untagged_component_service_running = _is_get_untagged_component_service_running context.wait_for_get_untagged_component_service = _wait_for_get_untagged_component_service context.send_json_file = _send_json_file context.wait_for_jobs_debug_api_service = _wait_for_jobs_debug_api_service context.wait_for_component_search_service = _wait_for_component_search_service context.is_3scale_staging_running = _is_3scale_staging_running context.is_backbone_api_running = _is_backbone_api_running context.is_gemini_api_running = _is_gemini_api_running # Configure container logging context.dump_logs = _read_boolean_setting(context, 'dump_logs') tail_logs = int(context.config.userdata.get('tail_logs', 0)) dump_errors = _read_boolean_setting(context, 'dump_errors') if tail_logs: dump_errors = True else: tail_logs = 50 context.dump_errors = dump_errors context.tail_logs = tail_logs # Configure system under test context.kubernetes_dir_path = context.config.userdata.get('kubernetes_dir', None) if context.kubernetes_dir_path is not None: context.docker_compose_path = None else: # If we're not running Kubernetes, use the local Docker Compose setup _set_default_compose_path(context) # for now, we just assume we know what compose file looks like (what services need what images) context.images = {} context.images['bayesian/bayesian-api'] = context.config.userdata.get( 'coreapi_server_image', 'registry.devshift.net/bayesian/bayesian-api') context.images['bayesian/cucos-worker'] = context.config.userdata.get( 'coreapi_worker_image', 'registry.devshift.net/bayesian/cucos-worker') coreapi_url = _read_url_from_env_var('F8A_API_URL') jobs_api_url = _read_url_from_env_var('F8A_JOB_API_URL') gremlin_url = _read_url_from_env_var('F8A_GREMLIN_URL') threescale_url = _read_url_from_env_var('F8A_3SCALE_URL') backbone_api_url = _read_url_from_env_var('F8A_BACKBONE_API_URL') service_id = _read_url_from_env_var('F8A_SERVICE_ID') gemini_api_url = _read_url_from_env_var('F8A_GEMINI_API_URL') license_service_url = _read_url_from_env_var('F8A_LICENSE_SERVICE_URL') context.running_locally = _running_locally(coreapi_url, jobs_api_url) check_test_environment(context, coreapi_url) context.coreapi_url = _get_url(context, coreapi_url, 'coreapi_url', _FABRIC8_ANALYTICS_SERVER) context.jobs_api_url = _get_url(context, jobs_api_url, 'jobs_api_url', _FABRIC8_ANALYTICS_JOBS) context.gremlin_url = _get_url(context, gremlin_url, "gremlin_url", _FABRIC8_GREMLIN_SERVICE) context.license_service_url = _get_url(context, license_service_url, 'license_service_url', _FABRIC8_LICENSE_SERVICE) context.threescale_url = threescale_url context.backbone_api_url = backbone_api_url context.service_id = service_id context.gemini_api_url = gemini_api_url # we can retrieve access token by using refresh/offline token context.access_token = retrieve_access_token(os.environ.get("RECOMMENDER_REFRESH_TOKEN"), os.environ.get("OSIO_AUTH_SERVICE")) # informations needed to access S3 database from tests _check_env_var_presence_s3_db('AWS_ACCESS_KEY_ID') _check_env_var_presence_s3_db('AWS_SECRET_ACCESS_KEY') _check_env_var_presence_s3_db('S3_REGION_NAME') aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID') aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY') s3_region_name = os.environ.get('S3_REGION_NAME') deployment_prefix = os.environ.get('DEPLOYMENT_PREFIX', 'STAGE') context.s3interface = S3Interface(aws_access_key_id, aws_secret_access_key, s3_region_name, deployment_prefix) context.client = None # timeout values can be overwritten by environment variables stack_analysis_timeout = _parse_int_env_var('F8A_STACK_ANALYSIS_TIMEOUT') component_analysis_timeout = _parse_int_env_var('F8A_COMPONENT_ANALYSIS_TIMEOUT') context.stack_analysis_timeout = stack_analysis_timeout or _DEFAULT_STACK_ANALYSIS_TIMEOUT context.component_analysis_timeout = component_analysis_timeout \ or _DEFAULT_COMPONENT_ANALYSIS_TIMEOUT if context.running_locally: context.client = docker.AutoVersionClient() for desired, actual in context.images.items(): desired = 'registry.devshift.net/' + desired if desired != actual: context.client.tag(actual, desired, force=True) # Specify the analyses checked for when looking for "complete" results def _get_expected_component_analyses(ecosystem): common = context.EXPECTED_COMPONENT_ANALYSES specific = context.ECOSYSTEM_DEPENDENT_ANALYSES.get(ecosystem, set()) return common | specific context.get_expected_component_analyses = _get_expected_component_analyses def _compare_analysis_sets(actual, expected): unreliable = context.UNRELIABLE_ANALYSES missing = expected - actual - unreliable unexpected = actual - expected - unreliable return missing, unexpected context.compare_analysis_sets = _compare_analysis_sets context.EXPECTED_COMPONENT_ANALYSES = { 'metadata', 'source_licenses', 'digests', 'dependency_snapshot', 'code_metrics' # The follower workers are currently disabled by default: # 'static_analysis', 'binary_data', 'languages', 'crypto_algorithms' } # Analyses that are only executed for particular language ecosystems context.ECOSYSTEM_DEPENDENT_ANALYSES = dict() # Results that use a nonstandard format, so we don't check for the # standard "status", "summary", and "details" keys context.NONSTANDARD_ANALYSIS_FORMATS = set() # Analyses that are just plain unreliable and so need to be excluded from # consideration when determining whether or not an analysis is complete context.UNRELIABLE_ANALYSES = { 'github_details', # if no github api token provided 'security_issues' # needs Snyk vulndb in S3 }