def purge_release(self, chart, release_id, status, manifest_name, chart_name, result): protected = chart.get('protected', {}) if protected: p_continue = protected.get('continue_processing', False) if p_continue: LOG.warn( 'Release %s is `protected`, ' 'continue_processing=True. Operator must ' 'handle %s release manually.', release_id, status) result['protected'] = release_id return result else: LOG.error( 'Release %s is `protected`, ' 'continue_processing=False.', release_id) raise armada_exceptions.ProtectedReleaseException( release_id, status) else: # Purge the release with metrics.CHART_DELETE.get_context(manifest_name, chart_name): LOG.info('Purging release %s with status %s', release_id, status) chart_delete = ChartDelete(chart, release_id, self.helm) chart_delete.delete() result['purge'] = release_id
def execute(self, chart, cg_test_all_charts, prefix, known_releases): namespace = chart.get('namespace') release = chart.get('release') release_name = r.release_prefixer(prefix, release) LOG.info('Processing Chart, release=%s', release_name) values = chart.get('values', {}) pre_actions = {} post_actions = {} result = {} protected = chart.get('protected', {}) p_continue = protected.get('continue_processing', False) old_release = self.find_chart_release(known_releases, release_name) status = None if old_release: status = r.get_release_status(old_release) if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]: raise armada_exceptions.UnexpectedReleaseStatusException( release_name, status) chart_wait = ChartWait( self.tiller.k8s, release_name, chart, namespace, k8s_wait_attempts=self.k8s_wait_attempts, k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep, timeout=self.timeout) native_wait_enabled = chart_wait.is_native_enabled() # Begin Chart timeout deadline deadline = time.time() + chart_wait.get_timeout() chartbuilder = ChartBuilder(chart) new_chart = chartbuilder.get_helm_chart() # Check for existing FAILED release, and purge if status == const.STATUS_FAILED: LOG.info('Purging FAILED release %s before deployment.', release_name) if protected: if p_continue: LOG.warn( 'Release %s is `protected`, ' 'continue_processing=True. Operator must ' 'handle FAILED release manually.', release_name) result['protected'] = release_name return result else: LOG.error( 'Release %s is `protected`, ' 'continue_processing=False.', release_name) raise armada_exceptions.ProtectedReleaseException( release_name) else: # Purge the release self.tiller.uninstall_release(release_name) result['purge'] = release_name # TODO(mark-burnett): It may be more robust to directly call # tiller status to decide whether to install/upgrade rather # than checking for list membership. if status == const.STATUS_DEPLOYED: # indicate to the end user what path we are taking LOG.info("Existing release %s found in namespace %s", release_name, namespace) # extract the installed chart and installed values from the # latest release so we can compare to the intended state old_chart = old_release.chart old_values_string = old_release.config.raw upgrade = chart.get('upgrade', {}) disable_hooks = upgrade.get('no_hooks', False) options = upgrade.get('options', {}) force = options.get('force', False) recreate_pods = options.get('recreate_pods', False) if upgrade: upgrade_pre = upgrade.get('pre', {}) upgrade_post = upgrade.get('post', {}) if not self.disable_update_pre and upgrade_pre: pre_actions = upgrade_pre if not self.disable_update_post and upgrade_post: LOG.warning('Post upgrade actions are ignored by Armada' 'and will not affect deployment.') post_actions = upgrade_post try: old_values = yaml.safe_load(old_values_string) except yaml.YAMLError: chart_desc = '{} (previously deployed)'.format( old_chart.metadata.name) raise armada_exceptions.\ InvalidOverrideValuesYamlException(chart_desc) LOG.info('Checking for updates to chart release inputs.') diff = self.get_diff(old_chart, old_values, new_chart, values) if not diff: LOG.info("Found no updates to chart release inputs") else: LOG.info("Found updates to chart release inputs") LOG.debug("%s", diff) result['diff'] = {chart['release']: str(diff)} # TODO(MarshM): Add tiller dry-run before upgrade and # consider deadline impacts # do actual update timer = int(round(deadline - time.time())) LOG.info( "Upgrading release %s in namespace %s, wait=%s, " "timeout=%ss", release_name, namespace, native_wait_enabled, timer) tiller_result = self.tiller.update_release( new_chart, release_name, namespace, pre_actions=pre_actions, post_actions=post_actions, disable_hooks=disable_hooks, values=yaml.safe_dump(values), wait=native_wait_enabled, timeout=timer, force=force, recreate_pods=recreate_pods) LOG.info('Upgrade completed with results from Tiller: %s', tiller_result.__dict__) result['upgrade'] = release_name else: timer = int(round(deadline - time.time())) LOG.info( "Installing release %s in namespace %s, wait=%s, " "timeout=%ss", release_name, namespace, native_wait_enabled, timer) tiller_result = self.tiller.install_release( new_chart, release_name, namespace, values=yaml.safe_dump(values), wait=native_wait_enabled, timeout=timer) LOG.info('Install completed with results from Tiller: %s', tiller_result.__dict__) result['install'] = release_name # Wait timer = int(round(deadline - time.time())) chart_wait.wait(timer) # Test just_deployed = ('install' in result) or ('upgrade' in result) last_test_passed = old_release and r.get_last_test_result(old_release) test_values = chart.get('test') test_handler = Test(release_name, self.tiller, cg_test_charts=cg_test_all_charts, test_values=test_values) run_test = test_handler.test_enabled and (just_deployed or not last_test_passed) if run_test: timer = int(round(deadline - time.time())) self._test_chart(release_name, timer, test_handler) return result
def _execute(self, ch, cg_test_all_charts, prefix, known_releases): manifest_name = self.manifest['metadata']['name'] chart = ch[const.KEYWORD_DATA] chart_name = ch['metadata']['name'] namespace = chart.get('namespace') release = chart.get('release') release_name = r.release_prefixer(prefix, release) LOG.info('Processing Chart, release=%s', release_name) result = {} chart_wait = ChartWait( self.tiller.k8s, release_name, ch, namespace, k8s_wait_attempts=self.k8s_wait_attempts, k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep, timeout=self.timeout) wait_timeout = chart_wait.get_timeout() # Begin Chart timeout deadline deadline = time.time() + wait_timeout old_release = self.find_chart_release(known_releases, release_name) action = metrics.ChartDeployAction.NOOP def noop(): pass deploy = noop # Resolve action values = chart.get('values', {}) pre_actions = {} post_actions = {} status = None if old_release: status = r.get_release_status(old_release) native_wait_enabled = chart_wait.is_native_enabled() chartbuilder = ChartBuilder.from_chart_doc(ch) new_chart = chartbuilder.get_helm_chart() if status == const.STATUS_DEPLOYED: # indicate to the end user what path we are taking LOG.info("Existing release %s found in namespace %s", release_name, namespace) # extract the installed chart and installed values from the # latest release so we can compare to the intended state old_chart = old_release.chart old_values_string = old_release.config.raw upgrade = chart.get('upgrade', {}) options = upgrade.get('options', {}) # TODO: Remove when v1 doc support is removed. schema_info = get_schema_info(ch['schema']) if schema_info.version < 2: no_hooks_location = upgrade else: no_hooks_location = options disable_hooks = no_hooks_location.get('no_hooks', False) force = options.get('force', False) recreate_pods = options.get('recreate_pods', False) if upgrade: upgrade_pre = upgrade.get('pre', {}) upgrade_post = upgrade.get('post', {}) if not self.disable_update_pre and upgrade_pre: pre_actions = upgrade_pre if not self.disable_update_post and upgrade_post: LOG.warning('Post upgrade actions are ignored by Armada' 'and will not affect deployment.') post_actions = upgrade_post try: old_values = yaml.safe_load(old_values_string) except yaml.YAMLError: chart_desc = '{} (previously deployed)'.format( old_chart.metadata.name) raise armada_exceptions.\ InvalidOverrideValuesYamlException(chart_desc) LOG.info('Checking for updates to chart release inputs.') diff = self.get_diff(old_chart, old_values, new_chart, values) if not diff: LOG.info("Found no updates to chart release inputs") else: action = metrics.ChartDeployAction.UPGRADE LOG.info("Found updates to chart release inputs") LOG.debug("%s", diff) result['diff'] = {chart['release']: str(diff)} def upgrade(): # do actual update timer = int(round(deadline - time.time())) LOG.info( "Upgrading release %s in namespace %s, wait=%s, " "timeout=%ss", release_name, namespace, native_wait_enabled, timer) tiller_result = self.tiller.update_release( new_chart, release_name, namespace, pre_actions=pre_actions, post_actions=post_actions, disable_hooks=disable_hooks, values=yaml.safe_dump(values), wait=native_wait_enabled, timeout=timer, force=force, recreate_pods=recreate_pods) LOG.info('Upgrade completed with results from Tiller: %s', tiller_result.__dict__) result['upgrade'] = release_name deploy = upgrade else: # Check for release with status other than DEPLOYED if status: if status != const.STATUS_FAILED: LOG.warn( 'Unexpected release status encountered ' 'release=%s, status=%s', release_name, status) # Make best effort to determine whether a deployment is # likely pending, by checking if the last deployment # was started within the timeout window of the chart. last_deployment_age = r.get_last_deployment_age( old_release) likely_pending = last_deployment_age <= wait_timeout if likely_pending: # Give up if a deployment is likely pending, we do not # want to have multiple operations going on for the # same release at the same time. raise armada_exceptions.\ DeploymentLikelyPendingException( release_name, status, last_deployment_age, wait_timeout) else: # Release is likely stuck in an unintended (by tiller) # state. Log and continue on with remediation steps # below. LOG.info( 'Old release %s likely stuck in status %s, ' '(last deployment age=%ss) >= ' '(chart wait timeout=%ss)', release, status, last_deployment_age, wait_timeout) protected = chart.get('protected', {}) if protected: p_continue = protected.get('continue_processing', False) if p_continue: LOG.warn( 'Release %s is `protected`, ' 'continue_processing=True. Operator must ' 'handle %s release manually.', release_name, status) result['protected'] = release_name return result else: LOG.error( 'Release %s is `protected`, ' 'continue_processing=False.', release_name) raise armada_exceptions.ProtectedReleaseException( release_name, status) else: # Purge the release with metrics.CHART_DELETE.get_context( manifest_name, chart_name): LOG.info('Purging release %s with status %s', release_name, status) chart_delete = ChartDelete(chart, release_name, self.tiller) chart_delete.delete() result['purge'] = release_name action = metrics.ChartDeployAction.INSTALL def install(): timer = int(round(deadline - time.time())) LOG.info( "Installing release %s in namespace %s, wait=%s, " "timeout=%ss", release_name, namespace, native_wait_enabled, timer) tiller_result = self.tiller.install_release( new_chart, release_name, namespace, values=yaml.safe_dump(values), wait=native_wait_enabled, timeout=timer) LOG.info('Install completed with results from Tiller: %s', tiller_result.__dict__) result['install'] = release_name deploy = install # Deploy with metrics.CHART_DEPLOY.get_context(wait_timeout, manifest_name, chart_name, action.get_label_value()): deploy() # Wait timer = int(round(deadline - time.time())) chart_wait.wait(timer) # Test just_deployed = ('install' in result) or ('upgrade' in result) last_test_passed = old_release and r.get_last_test_result(old_release) test_handler = Test(chart, release_name, self.tiller, cg_test_charts=cg_test_all_charts) run_test = test_handler.test_enabled and (just_deployed or not last_test_passed) if run_test: with metrics.CHART_TEST.get_context(test_handler.timeout, manifest_name, chart_name): self._test_chart(release_name, test_handler) return result
def sync(self): ''' Synchronize Helm with the Armada Config(s) ''' if self.dry_run: LOG.info('Armada is in DRY RUN mode, no changes being made.') msg = { 'install': [], 'upgrade': [], 'diff': [], 'purge': [], 'protected': [] } # TODO: (gardlt) we need to break up this func into # a more cleaner format self.pre_flight_ops() # extract known charts on tiller right now deployed_releases, failed_releases = self._get_releases_by_status() manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {}) prefix = manifest_data.get(const.KEYWORD_PREFIX) for chartgroup in manifest_data.get(const.KEYWORD_GROUPS, []): cg_name = chartgroup.get('name', '<missing name>') cg_desc = chartgroup.get('description', '<missing description>') cg_sequenced = chartgroup.get('sequenced', False) LOG.info('Processing ChartGroup: %s (%s), sequenced=%s', cg_name, cg_desc, cg_sequenced) # TODO(MarshM): Deprecate the `test_charts` key cg_test_all_charts = chartgroup.get('test_charts') if isinstance(cg_test_all_charts, bool): LOG.warn('The ChartGroup `test_charts` key is deprecated, ' 'and support for this will be removed. See the ' 'Chart `test` key for more information.') else: # This key defaults to True. Individual charts must # explicitly disable helm tests if they choose cg_test_all_charts = True ns_label_set = set() tests_to_run = [] cg_charts = chartgroup.get(const.KEYWORD_CHARTS, []) # Track largest Chart timeout to stop the ChartGroup at the end cg_max_timeout = 0 for chart_entry in cg_charts: chart = chart_entry.get('chart', {}) namespace = chart.get('namespace') release = chart.get('release') release_name = release_prefixer(prefix, release) LOG.info('Processing Chart, release=%s', release_name) values = chart.get('values', {}) pre_actions = {} post_actions = {} protected = chart.get('protected', {}) p_continue = protected.get('continue_processing', False) # Check for existing FAILED release, and purge if release_name in [rel[0] for rel in failed_releases]: LOG.info('Purging FAILED release %s before deployment.', release_name) if protected: if p_continue: LOG.warn( 'Release %s is `protected`, ' 'continue_processing=True. Operator must ' 'handle FAILED release manually.', release_name) msg['protected'].append(release_name) continue else: LOG.error( 'Release %s is `protected`, ' 'continue_processing=False.', release_name) raise armada_exceptions.ProtectedReleaseException( release_name) else: # Purge the release self.tiller.uninstall_release(release_name) msg['purge'].append(release_name) # NOTE(MarshM): Calculating `wait_timeout` is unfortunately # overly complex. The order of precedence is currently: # 1) User provided override via API/CLI (default 0 if not # provided by client/user). # 2) Chart's `data.wait.timeout`, or... # 3) Chart's `data.timeout` (deprecated). # 4) const.DEFAULT_CHART_TIMEOUT, if nothing is ever # specified, for use in waiting for final ChartGroup # health and helm tests, but ignored for the actual # install/upgrade of the Chart. # NOTE(MarshM): Not defining a timeout has a side effect of # allowing Armada to install charts with a circular # dependency defined between components. # TODO(MarshM): Deprecated, remove the following block deprecated_timeout = chart.get('timeout', None) if isinstance(deprecated_timeout, int): LOG.warn('The `timeout` key is deprecated and support ' 'for this will be removed soon. Use ' '`wait.timeout` instead.') wait_values = chart.get('wait', {}) wait_labels = wait_values.get('labels', {}) wait_timeout = self.timeout if wait_timeout <= 0: wait_timeout = wait_values.get('timeout', wait_timeout) # TODO(MarshM): Deprecated, remove the following check if wait_timeout <= 0: wait_timeout = deprecated_timeout or wait_timeout # Determine wait logic # NOTE(Dan Kim): Conditions to wait are below : # 1) set sequenced=True in chart group # 2) set force_wait param # 3) add Chart's `data.wait.timeout` # --timeout param will do not set wait=True, it just change # max timeout of chart's deployment. (default: 900) this_chart_should_wait = (cg_sequenced or self.force_wait or (bool(wait_values) and (wait_timeout > 0))) # If there is still no timeout, we need to use a default # (item 4 in note above) if wait_timeout <= 0: LOG.warn('No Chart timeout specified, using default: %ss', const.DEFAULT_CHART_TIMEOUT) wait_timeout = const.DEFAULT_CHART_TIMEOUT # Naively take largest timeout to apply at end # TODO(MarshM) better handling of timeout/timer cg_max_timeout = max(wait_timeout, cg_max_timeout) test_chart_override = chart.get('test') # Use old default value when not using newer `test` key test_cleanup = True if test_chart_override is None: test_this_chart = cg_test_all_charts elif isinstance(test_chart_override, bool): LOG.warn('Boolean value for chart `test` key is' ' deprecated and support for this will' ' be removed. Use `test.enabled` ' 'instead.') test_this_chart = test_chart_override else: # NOTE: helm tests are enabled by default test_this_chart = test_chart_override.get('enabled', True) test_cleanup = test_chart_override.get('options', {}).get( 'cleanup', False) chartbuilder = ChartBuilder(chart) new_chart = chartbuilder.get_helm_chart() # Begin Chart timeout deadline deadline = time.time() + wait_timeout # TODO(mark-burnett): It may be more robust to directly call # tiller status to decide whether to install/upgrade rather # than checking for list membership. if release_name in [rel[0] for rel in deployed_releases]: # indicate to the end user what path we are taking LOG.info("Upgrading release %s in namespace %s", release_name, namespace) # extract the installed chart and installed values from the # latest release so we can compare to the intended state old_chart, old_values_string = self.find_release_chart( deployed_releases, release_name) upgrade = chart.get('upgrade', {}) disable_hooks = upgrade.get('no_hooks', False) force = upgrade.get('force', False) recreate_pods = upgrade.get('recreate_pods', False) LOG.info("Checking Pre/Post Actions") if upgrade: upgrade_pre = upgrade.get('pre', {}) upgrade_post = upgrade.get('post', {}) if not self.disable_update_pre and upgrade_pre: pre_actions = upgrade_pre if not self.disable_update_post and upgrade_post: post_actions = upgrade_post try: old_values = yaml.safe_load(old_values_string) except yaml.YAMLError: chart_desc = '{} (previously deployed)'.format( old_chart.metadata.name) raise armada_exceptions.\ InvalidOverrideValuesYamlException(chart_desc) LOG.info('Checking for updates to chart release inputs.') diff = self.get_diff(old_chart, old_values, new_chart, values) if not diff: LOG.info("Found no updates to chart release inputs") continue LOG.info("Found updates to chart release inputs") LOG.debug("%s", diff) msg['diff'].append({chart['release']: str(diff)}) # TODO(MarshM): Add tiller dry-run before upgrade and # consider deadline impacts # do actual update timer = int(round(deadline - time.time())) LOG.info('Beginning Upgrade, wait=%s, timeout=%ss', this_chart_should_wait, timer) tiller_result = self.tiller.update_release( new_chart, release_name, namespace, pre_actions=pre_actions, post_actions=post_actions, disable_hooks=disable_hooks, values=yaml.safe_dump(values), wait=this_chart_should_wait, timeout=timer, force=force, recreate_pods=recreate_pods) if this_chart_should_wait: self._wait_until_ready(release_name, wait_labels, namespace, timer) # Track namespace+labels touched by upgrade ns_label_set.add((namespace, tuple(wait_labels.items()))) LOG.info('Upgrade completed with results from Tiller: %s', tiller_result.__dict__) msg['upgrade'].append(release_name) # process install else: LOG.info("Installing release %s in namespace %s", release_name, namespace) timer = int(round(deadline - time.time())) LOG.info('Beginning Install, wait=%s, timeout=%ss', this_chart_should_wait, timer) tiller_result = self.tiller.install_release( new_chart, release_name, namespace, values=yaml.safe_dump(values), wait=this_chart_should_wait, timeout=timer) if this_chart_should_wait: self._wait_until_ready(release_name, wait_labels, namespace, timer) # Track namespace+labels touched by install ns_label_set.add((namespace, tuple(wait_labels.items()))) LOG.info('Install completed with results from Tiller: %s', tiller_result.__dict__) msg['install'].append(release_name) # Keeping track of time remaining timer = int(round(deadline - time.time())) test_chart_args = (release_name, timer, test_cleanup) if test_this_chart: # Sequenced ChartGroup should run tests after each Chart if cg_sequenced: LOG.info( 'Running sequenced test, timeout remaining: ' '%ss.', timer) self._test_chart(*test_chart_args) # Un-sequenced ChartGroup should run tests at the end else: tests_to_run.append( functools.partial(self._test_chart, *test_chart_args)) # End of Charts in ChartGroup LOG.info('All Charts applied in ChartGroup %s.', cg_name) # After all Charts are applied, we should wait for the entire # ChartGroup to become healthy by looking at the namespaces seen # TODO(MarshM): Need to determine a better timeout # (not cg_max_timeout) if cg_max_timeout <= 0: cg_max_timeout = const.DEFAULT_CHART_TIMEOUT deadline = time.time() + cg_max_timeout for (ns, labels) in ns_label_set: labels_dict = dict(labels) timer = int(round(deadline - time.time())) LOG.info( 'Final ChartGroup wait for healthy namespace=%s, ' 'labels=(%s), timeout remaining: %ss.', ns, labels_dict, timer) if timer <= 0: reason = ('Timeout expired waiting on namespace: %s, ' 'labels: (%s)' % (ns, labels_dict)) LOG.error(reason) raise armada_exceptions.ArmadaTimeoutException(reason) self._wait_until_ready(release_name=None, wait_labels=labels_dict, namespace=ns, timeout=timer) # After entire ChartGroup is healthy, run any pending tests for callback in tests_to_run: callback() self.post_flight_ops() if self.enable_chart_cleanup: self._chart_cleanup( prefix, self.manifest[const.KEYWORD_ARMADA][const.KEYWORD_GROUPS], msg) LOG.info('Done applying manifest.') return msg