def fetch_desired_state( gabi_instances: Iterable[Mapping], ri: ResourceInventory ) -> None: for g in gabi_instances: exp_date = datetime.strptime(g["expirationDate"], "%Y-%m-%d").date() users = [u["github_username"] for u in g["users"]] if exp_date < date.today(): users = [] elif (exp_date - date.today()).days > EXPIRATION_MAX: raise RunnerException( f'The maximum expiration date of {g["name"]} ' f"shall not exceed {EXPIRATION_MAX} days form today" ) resource = construct_gabi_oc_resource(g["name"], users) for i in g["instances"]: namespace = i["namespace"] account = i["account"] identifier = i["identifier"] tf_resources = namespace["terraformResources"] found = False for t in tf_resources: if t["provider"] != "rds": continue if (t["account"], t["identifier"]) == (account, identifier): found = True break if not found: raise RunnerException( f"Could not find rds identifier {identifier} " f'for account {account} in namespace {namespace["name"]}' ) cluster = namespace["cluster"]["name"] ri.add_desired(cluster, namespace["name"], "ConfigMap", g["name"], resource)
def fetch_desired_state(gabi_instances: Iterable[Mapping], ri: ResourceInventory) -> None: for g in gabi_instances: exp_date = datetime.strptime(g['expirationDate'], '%Y-%m-%d').date() users = [u['github_username'] for u in g['users']] if exp_date < date.today(): users = [] elif (exp_date - date.today()).days > EXPIRATION_MAX: raise RunnerException( f'The maximum expiration date of {g["name"]} ' f'shall not exceed {EXPIRATION_MAX} days form today') resource = construct_gabi_oc_resource(g['name'], users) for i in g['instances']: namespace = i['namespace'] account = i['account'] identifier = i['identifier'] tf_resources = namespace['terraformResources'] found = False for t in tf_resources: if t['provider'] != 'rds': continue if (t['account'], t['identifier']) == \ (account, identifier): found = True break if not found: raise RunnerException( f'Could not find rds identifier {identifier} ' f'for account {account} in namespace {namespace["name"]}') cluster = namespace['cluster']['name'] ri.add_desired(cluster, namespace['name'], 'ConfigMap', g['name'], resource)
def fill_desired_state(provider: EndpointMonitoringProvider, endpoints: list[Endpoint], ri: ResourceInventory) -> None: probe = build_probe(provider, endpoints) if probe and provider.blackboxExporter: ns = provider.blackboxExporter.namespace ri.add_desired(cluster=ns["cluster"]["name"], namespace=ns["name"], resource_type=probe.kind, name=probe.name, value=probe)
def setUp(self) -> None: self.all_saas_files = \ [self.fxt.get_anymarkup('saas.gql.yml')] self.state_patcher = \ patch("reconcile.utils.saasherder.State", autospec=True) self.state_mock = self.state_patcher.start().return_value self.ig_patcher = \ patch.object(SaasHerder, "_initiate_github", autospec=True) self.ig_patcher.start() self.image_auth_patcher = \ patch.object(SaasHerder, "_initiate_image_auth") self.image_auth_patcher.start() self.gfc_patcher = \ patch.object(SaasHerder, "_get_file_contents", autospec=True) gfc_mock = self.gfc_patcher.start() self.saas_file = \ self.fxt.get_anymarkup('saas.gql.yml') # ApiVersion is set in the saas gql query method in queries module self.saas_file["apiVersion"] = "v2" gfc_mock.return_value = (self.template, "url", "ahash") self.deploy_current_state_fxt = \ self.fxt.get_anymarkup('saas_deploy.state.json') self.post_deploy_current_state_fxt = \ self.fxt.get_anymarkup('saas_post_deploy.state.json') self.saasherder = SaasHerder( [self.saas_file], thread_pool_size=1, gitlab=None, integration='', integration_version='', accounts={"name": "test-account"}, # Initiates State in SaasHerder settings={"hashLength": 24}) # IMPORTANT: Populating desired state modify self.saas_files within # saasherder object. self.ri = ResourceInventory() for ns in ["test-ns-publisher", "test-ns-subscriber"]: for kind in ["Service", "Deployment"]: self.ri.initialize_resource_type(self.cluster, ns, kind) self.saasherder.populate_desired_state(self.ri) if self.ri.has_error_registered(): raise Exception("Errors registered in Resourceinventory")
def fetch_current_state(namespaces=None, clusters=None, thread_pool_size=None, integration=None, integration_version=None, override_managed_types=None, internal=None, use_jump_host=True, init_api_resources=False): ri = ResourceInventory() settings = queries.get_app_interface_settings() oc_map = OC_Map(namespaces=namespaces, clusters=clusters, integration=integration, settings=settings, internal=internal, use_jump_host=use_jump_host, thread_pool_size=thread_pool_size, init_api_resources=init_api_resources) state_specs = \ init_specs_to_fetch( ri, oc_map, namespaces=namespaces, clusters=clusters, override_managed_types=override_managed_types ) threaded.run(populate_current_state, state_specs, thread_pool_size, ri=ri, integration=integration, integration_version=integration_version) return ri, oc_map
def fetch_current_state(dry_run, namespaces, thread_pool_size, internal, use_jump_host, account_name): ri = ResourceInventory() if dry_run: return ri, None settings = queries.get_app_interface_settings() oc_map = OC_Map(namespaces=namespaces, integration=QONTRACT_INTEGRATION, settings=settings, internal=internal, use_jump_host=use_jump_host, thread_pool_size=thread_pool_size) state_specs = \ ob.init_specs_to_fetch( ri, oc_map, namespaces=namespaces, override_managed_types=['Secret'] ) threaded.run(populate_oc_resources, state_specs, thread_pool_size, ri=ri, account_name=account_name) return ri, oc_map
def fetch_data( namespaces, thread_pool_size, internal, use_jump_host, init_api_resources=False, overrides=None, ): ri = ResourceInventory() settings = queries.get_app_interface_settings() logging.debug(f"Overriding keys {overrides}") oc_map = OC_Map( namespaces=namespaces, integration=QONTRACT_INTEGRATION, settings=settings, internal=internal, use_jump_host=use_jump_host, thread_pool_size=thread_pool_size, init_api_resources=init_api_resources, ) state_specs = ob.init_specs_to_fetch(ri, oc_map, namespaces=namespaces, override_managed_types=overrides) threaded.run(fetch_states, state_specs, thread_pool_size, ri=ri) return oc_map, ri
def get_desired_state(internal, use_jump_host, thread_pool_size): gqlapi = gql.get_api() all_namespaces = gqlapi.query(QUERY)['namespaces'] namespaces = [] for namespace in all_namespaces: shard_key = f'{namespace["cluster"]["name"]}/{namespace["name"]}' if is_in_shard(shard_key): namespaces.append(namespace) ri = ResourceInventory() settings = queries.get_app_interface_settings() oc_map = OC_Map(namespaces=namespaces, integration=QONTRACT_INTEGRATION, settings=settings, internal=internal, use_jump_host=use_jump_host, thread_pool_size=thread_pool_size, init_projects=True) ob.init_specs_to_fetch(ri, oc_map, namespaces=namespaces, override_managed_types=['Namespace']) desired_state = [] for cluster, namespace, _, _ in ri: if cluster not in oc_map.clusters(): continue desired_state.append({"cluster": cluster, "namespace": namespace}) return oc_map, desired_state
def test_filling_desired_state(mocker): ep_query = mocker.patch.object(queries, 'get_service_monitoring_endpoints') ep_query.return_value = get_endpoint_fixtures("test_endpoint.yaml") add_desired_mock = mocker.patch.object(ResourceInventory, 'add_desired') endpoints = get_endpoints() provider = list(endpoints.keys())[0] fill_desired_state(provider, endpoints[provider], ResourceInventory()) assert add_desired_mock.call_count == 1 add_desired_mock.assert_called_with( cluster="app-sre-stage-01", namespace="openshift-customer-monitoring", resource_type="Probe", name="blackbox-exporter-http-2xx", value=ANY)
def fetch_data(namespaces, thread_pool_size, internal, use_jump_host, init_api_resources=False): ri = ResourceInventory() settings = queries.get_app_interface_settings() oc_map = OC_Map(namespaces=namespaces, integration=QONTRACT_INTEGRATION, settings=settings, internal=internal, use_jump_host=use_jump_host, thread_pool_size=thread_pool_size, init_api_resources=init_api_resources) state_specs = ob.init_specs_to_fetch(ri, oc_map, namespaces=namespaces) threaded.run(fetch_states, state_specs, thread_pool_size, ri=ri) return oc_map, ri
def test_populate_desired_state_cases(self): ri = ResourceInventory() for resource_type in ( "Deployment", "Service", "ConfigMap", ): ri.initialize_resource_type("stage-1", "yolo-stage", resource_type) ri.initialize_resource_type("prod-1", "yolo", resource_type) self.saasherder.populate_desired_state(ri) cnt = 0 for (cluster, namespace, resource_type, data) in ri: for _, d_item in data['desired'].items(): expected = yaml.safe_load( self.fxts.get( f"expected_{cluster}_{namespace}_{resource_type}.json", )) self.assertEqual(expected, d_item.body) cnt += 1 self.assertEqual(5, cnt, "expected 5 resources, found less")
class TestConfigHashPromotionsValidation(TestCase): """ TestCase to test SaasHerder promotions validation. SaasHerder is initialized with ResourceInventory population. Like is done in openshift-saas-deploy""" cluster: str namespace: str fxt: Any template: Any @classmethod def setUpClass(cls): cls.fxt = Fixtures('saasherder') cls.cluster = "test-cluster" cls.template = cls.fxt.get_anymarkup('template_1.yml') def setUp(self) -> None: self.all_saas_files = \ [self.fxt.get_anymarkup('saas.gql.yml')] self.state_patcher = \ patch("reconcile.utils.saasherder.State", autospec=True) self.state_mock = self.state_patcher.start().return_value self.ig_patcher = \ patch.object(SaasHerder, "_initiate_github", autospec=True) self.ig_patcher.start() self.image_auth_patcher = \ patch.object(SaasHerder, "_initiate_image_auth") self.image_auth_patcher.start() self.gfc_patcher = \ patch.object(SaasHerder, "_get_file_contents", autospec=True) gfc_mock = self.gfc_patcher.start() self.saas_file = \ self.fxt.get_anymarkup('saas.gql.yml') # ApiVersion is set in the saas gql query method in queries module self.saas_file["apiVersion"] = "v2" gfc_mock.return_value = (self.template, "url", "ahash") self.deploy_current_state_fxt = \ self.fxt.get_anymarkup('saas_deploy.state.json') self.post_deploy_current_state_fxt = \ self.fxt.get_anymarkup('saas_post_deploy.state.json') self.saasherder = SaasHerder( [self.saas_file], thread_pool_size=1, gitlab=None, integration='', integration_version='', accounts={"name": "test-account"}, # Initiates State in SaasHerder settings={"hashLength": 24}) # IMPORTANT: Populating desired state modify self.saas_files within # saasherder object. self.ri = ResourceInventory() for ns in ["test-ns-publisher", "test-ns-subscriber"]: for kind in ["Service", "Deployment"]: self.ri.initialize_resource_type(self.cluster, ns, kind) self.saasherder.populate_desired_state(self.ri) if self.ri.has_error_registered(): raise Exception("Errors registered in Resourceinventory") def tearDown(self): self.state_patcher.stop() self.ig_patcher.stop() self.gfc_patcher.stop() def test_config_hash_is_filled(self): """ Ensures the get_config_diff_saas_file fills the promotion data on the publisher target. This data is used in publish_promotions method to add the hash to subscribed targets. IMPORTANT: This is not the promotion_data within promotion. This fields are set by _process_template method in saasherder """ job_spec = \ self.saasherder.get_configs_diff_saas_file(self.saas_file)[0] promotion = job_spec["target_config"]["promotion"] self.assertIsNotNone(promotion[TARGET_CONFIG_HASH]) def test_promotion_state_config_hash_match_validates(self): """ A promotion is valid if the pusblisher state got from the state is equal to the one set in the subscriber target promotion data. This is the happy path, publisher job state target config hash is the same set in the subscriber job """ configs = \ self.saasherder.get_saas_targets_config(self.saas_file) tcs = list(configs.values()) publisher_config_hash = tcs[0]['promotion'][TARGET_CONFIG_HASH] publisher_state = { "success": True, "saas_file": self.saas_file["name"], TARGET_CONFIG_HASH: publisher_config_hash } self.state_mock.get.return_value = publisher_state result = self.saasherder.validate_promotions(self.all_saas_files) self.assertTrue(result) def test_promotion_state_config_hash_not_match_no_validates(self): """ Promotion is not valid if the parent target config hash set in promotion data is not the same set in the publisher job state. This could happen if a new publisher job has before the subscriber job """ publisher_state = { "success": True, "saas_file": self.saas_file["name"], TARGET_CONFIG_HASH: "will_not_match" } self.state_mock.get.return_value = publisher_state result = self.saasherder.validate_promotions(self.all_saas_files) self.assertFalse(result) def test_promotion_without_state_config_hash_validates(self): """ Existent states won't have promotion data. If there is an ongoing promotion, this ensures it will happen. """ promotion_result = { "success": True, } self.state_mock.get.return_value = promotion_result result = self.saasherder.validate_promotions(self.all_saas_files) self.assertTrue(result)
def run( dry_run, thread_pool_size=10, io_dir="throughput/", saas_file_name=None, env_name=None, gitlab_project_id=None, defer=None, ): all_saas_files = queries.get_saas_files(v1=True, v2=True) saas_files = queries.get_saas_files(saas_file_name, env_name, v1=True, v2=True) app_interface_settings = queries.get_app_interface_settings() if not saas_files: logging.error("no saas files found") sys.exit(ExitCodes.ERROR) # notify different outputs (publish results, slack notifications) # we only do this if: # - this is not a dry run # - there is a single saas file deployed notify = not dry_run and len(saas_files) == 1 if notify: saas_file = saas_files[0] slack_info = saas_file.get("slack") if slack_info: slack = slackapi_from_slack_workspace( slack_info, app_interface_settings, QONTRACT_INTEGRATION, init_usergroups=False, ) # support built-in start and end slack notifications # only in v2 saas files if saas_file["apiVersion"] == "v2": ri = ResourceInventory() console_url = compose_console_url(saas_file, saas_file_name, env_name) # deployment result notification defer( lambda: slack_notify( saas_file_name, env_name, slack, ri, console_url, in_progress=False, ) ) # deployment start notification slack_notifications = slack_info.get("notifications") if slack_notifications and slack_notifications.get("start"): slack_notify( saas_file_name, env_name, slack, ri, console_url, in_progress=True, ) else: slack = None instance = queries.get_gitlab_instance() # instance exists in v1 saas files only desired_jenkins_instances = [ s["instance"]["name"] for s in saas_files if s.get("instance") ] jenkins_map = jenkins_base.get_jenkins_map( desired_instances=desired_jenkins_instances ) settings = queries.get_app_interface_settings() accounts = queries.get_aws_accounts() try: gl = GitLabApi(instance, settings=settings) except Exception: # allow execution without access to gitlab # as long as there are no access attempts. gl = None saasherder = SaasHerder( saas_files, thread_pool_size=thread_pool_size, gitlab=gl, integration=QONTRACT_INTEGRATION, integration_version=QONTRACT_INTEGRATION_VERSION, settings=settings, jenkins_map=jenkins_map, accounts=accounts, ) if len(saasherder.namespaces) == 0: logging.warning("no targets found") sys.exit(ExitCodes.SUCCESS) ri, oc_map = ob.fetch_current_state( namespaces=saasherder.namespaces, thread_pool_size=thread_pool_size, integration=QONTRACT_INTEGRATION, integration_version=QONTRACT_INTEGRATION_VERSION, init_api_resources=True, cluster_admin=saasherder.cluster_admin, ) defer(oc_map.cleanup) saasherder.populate_desired_state(ri) # validate that this deployment is valid # based on promotion information in targets if not saasherder.validate_promotions(): logging.error("invalid promotions") ri.register_error() sys.exit(ExitCodes.ERROR) # if saas_file_name is defined, the integration # is being called from multiple running instances actions = ob.realize_data( dry_run, oc_map, ri, thread_pool_size, caller=saas_file_name, wait_for_namespace=True, no_dry_run_skip_compare=(not saasherder.compare), take_over=saasherder.take_over, ) if not dry_run: if saasherder.publish_job_logs: try: ob.follow_logs(oc_map, actions, io_dir) except Exception as e: logging.error(str(e)) ri.register_error() try: ob.validate_data(oc_map, actions) except Exception as e: logging.error(str(e)) ri.register_error() # publish results of this deployment # based on promotion information in targets success = not ri.has_error_registered() # only publish promotions for deployment jobs (a single saas file) if notify: # Auto-promote next stages only if there are changes in the # promoting stage. This prevents trigger promotions on job re-runs auto_promote = len(actions) > 0 mr_cli = mr_client_gateway.init(gitlab_project_id=gitlab_project_id) saasherder.publish_promotions(success, all_saas_files, mr_cli, auto_promote) if not success: sys.exit(ExitCodes.ERROR) # send human readable notifications to slack # we only do this if: # - this is not a dry run # - there is a single saas file deployed # - output is 'events' # - no errors were registered if notify and slack and actions and slack_info.get("output") == "events": for action in actions: message = ( f"[{action['cluster']}] " + f"{action['kind']} {action['name']} {action['action']}" ) slack.chat_post_message(message)
def init_specs_to_fetch( ri: ResourceInventory, oc_map: OC_Map, namespaces: Optional[Iterable[Mapping]] = None, clusters: Optional[Iterable[Mapping]] = None, override_managed_types: Optional[Iterable[str]] = None, managed_types_key: str = 'managedResourceTypes') -> list[StateSpec]: state_specs = [] if clusters and namespaces: raise KeyError('expected only one of clusters or namespaces.') elif namespaces: for namespace_info in namespaces: if override_managed_types is None: managed_types = set( namespace_info.get(managed_types_key) or []) else: managed_types = set(override_managed_types) if not managed_types: continue cluster = namespace_info['cluster']['name'] privileged = namespace_info.get("clusterAdmin", False) is True oc = oc_map.get(cluster, privileged) if not oc: if oc.log_level >= logging.ERROR: ri.register_error() logging.log(level=oc.log_level, msg=oc.message) continue namespace = namespace_info['name'] # These may exit but have a value of None managed_resource_names = \ namespace_info.get('managedResourceNames') or [] managed_resource_type_overrides = \ namespace_info.get('managedResourceTypeOverrides') or [] # Initialize current state specs for resource_type in managed_types: ri.initialize_resource_type(cluster, namespace, resource_type) resource_names = {} resource_type_overrides = {} for mrn in managed_resource_names: # Current implementation guarantees only one # managed_resource_name of each managed type if mrn['resource'] in managed_types: resource_names[mrn['resource']] = mrn['resourceNames'] elif override_managed_types: logging.debug( f"Skipping resource {mrn['resource']} in {cluster}/" f"{namespace} because the integration explicitly " "dismisses it") else: raise KeyError( f"Non-managed resource name {mrn} listed on " f"{cluster}/{namespace} (valid kinds: {managed_types})" ) for o in managed_resource_type_overrides: # Current implementation guarantees only one # override of each managed type if o['resource'] in managed_types: resource_type_overrides[o['resource']] = o['override'] elif override_managed_types: logging.debug( f"Skipping resource type override {o} listed on" f"{cluster}/{namespace} because the integration " "dismisses it explicitly") else: raise KeyError( f"Non-managed override {o} listed on " f"{cluster}/{namespace} (valid kinds: {managed_types})" ) for kind, names in resource_names.items(): c_spec = StateSpec( "current", oc, cluster, namespace, kind, resource_type_override=resource_type_overrides.get(kind), resource_names=names) state_specs.append(c_spec) managed_types.remove(kind) # Produce "empty" StateSpec's for any resource type that # doesn't have an explicit managedResourceName listed in # the namespace state_specs.extend( StateSpec("current", oc, cluster, namespace, t, resource_type_override=resource_type_overrides.get( t), resource_names=None) for t in managed_types) # Initialize desired state specs openshift_resources = namespace_info.get('openshiftResources') for openshift_resource in openshift_resources or []: d_spec = StateSpec("desired", oc, cluster, namespace, openshift_resource, namespace_info, privileged=privileged) state_specs.append(d_spec) elif clusters: # set namespace to something indicative namespace = 'cluster' for cluster_info in clusters: cluster = cluster_info['name'] oc = oc_map.get(cluster) if not oc: if oc.log_level >= logging.ERROR: ri.register_error() logging.log(level=oc.log_level, msg=oc.message) continue # we currently only use override_managed_types, # and not allow a `managedResourcesTypes` field in a cluster file for resource_type in override_managed_types or []: ri.initialize_resource_type(cluster, namespace, resource_type) # Initialize current state specs c_spec = StateSpec("current", oc, cluster, namespace, resource_type) state_specs.append(c_spec) # Initialize desired state specs d_spec = StateSpec("desired", oc, cluster, namespace, resource_type) state_specs.append(d_spec) else: raise KeyError('expected one of clusters or namespaces.') return state_specs
def _realize_resource_data(unpacked_ri_item, dry_run, oc_map: OC_Map, ri: ResourceInventory, take_over, caller, wait_for_namespace, no_dry_run_skip_compare, override_enable_deletion, recycle_pods): cluster, namespace, resource_type, data = unpacked_ri_item actions: list[dict] = [] if ri.has_error_registered(cluster=cluster): msg = ("[{}] skipping realize_data for " "cluster with errors").format(cluster) logging.error(msg) return actions enable_deletion = False if ri.has_error_registered() else True # only allow to override enable_deletion if no errors were found if enable_deletion is True and override_enable_deletion is False: enable_deletion = False # desired items for name, d_item in data['desired'].items(): c_item = data['current'].get(name) if c_item is not None: if not dry_run and no_dry_run_skip_compare: msg = ("[{}/{}] skipping compare of resource '{}/{}'.").format( cluster, namespace, resource_type, name) logging.debug(msg) else: # If resource doesn't have annotations, annotate and apply if not c_item.has_qontract_annotations(): msg = ("[{}/{}] resource '{}/{}' present " "w/o annotations, annotating and applying").format( cluster, namespace, resource_type, name) logging.info(msg) # don't apply if resources match # if there is a caller (saas file) and this is a take over # we skip the equal compare as it's not covering # cases of a removed label (for example) # d_item == c_item is uncommutative elif not (caller and take_over) and d_item == c_item: msg = ("[{}/{}] resource '{}/{}' present " "and matches desired, skipping.").format( cluster, namespace, resource_type, name) logging.debug(msg) continue # don't apply if sha256sum hashes match elif c_item.sha256sum() == d_item.sha256sum(): if c_item.has_valid_sha256sum(): msg = ("[{}/{}] resource '{}/{}' present " "and hashes match, skipping.").format( cluster, namespace, resource_type, name) logging.debug(msg) continue else: msg = ("[{}/{}] resource '{}/{}' present and " "has stale sha256sum due to manual changes." ).format(cluster, namespace, resource_type, name) logging.info(msg) logging.debug("CURRENT: " + OR.serialize(OR.canonicalize(c_item.body))) else: logging.debug("CURRENT: None") logging.debug("DESIRED: " + OR.serialize(OR.canonicalize(d_item.body))) try: privileged = data['use_admin_token'].get(name, False) apply(dry_run, oc_map, cluster, namespace, resource_type, d_item, wait_for_namespace, recycle_pods, privileged) action = { 'action': ACTION_APPLIED, 'cluster': cluster, 'namespace': namespace, 'kind': resource_type, 'name': d_item.name, 'privileged': privileged } actions.append(action) except StatusCodeError as e: ri.register_error() err = str(e) if resource_type != 'Secret' \ else f'error applying Secret {d_item.name}: REDACTED' msg = f"[{cluster}/{namespace}] {err} " + \ f"(error details: {d_item.error_details})" logging.error(msg) # current items for name, c_item in data['current'].items(): d_item = data['desired'].get(name) if d_item is not None: continue if c_item.has_qontract_annotations(): if caller and c_item.caller != caller: continue elif not take_over: # this is reached when the current resources: # - does not have qontract annotations (not managed) # - not taking over all resources of the current kind msg = f"[{cluster}/{namespace}] skipping " +\ f"{resource_type}/{c_item.name}" logging.debug(msg) continue if c_item.has_owner_reference(): continue try: privileged = data['use_admin_token'].get(name, False) delete(dry_run, oc_map, cluster, namespace, resource_type, name, enable_deletion, privileged) action = { 'action': ACTION_DELETED, 'cluster': cluster, 'namespace': namespace, 'kind': resource_type, 'name': name, 'privileged': privileged } actions.append(action) except StatusCodeError as e: ri.register_error() msg = "[{}/{}] {}".format(cluster, namespace, str(e)) logging.error(msg) return actions