def from_config_file(config_path, job_template_path, deployments_dir, local_dir=None): """Construct a reconciler from the config path. Args: config_path: Path to configuration job_template_path: Path to the YAML file containing a K8s job to launch to do the deployments. deployments_dir: Path where YAML should be dumped describing deployments local_dir: (Optional): Path were repositories should be checked out. """ with open(config_path) as f: config = yaml.load(f) kfdef_url = _parse_kfdef_url(config[VERSIONS_KEY][0][KFDEF_KEY]) # Ensure there is a single repository; currently the code only handles # the case where all deployments are from a single URL for d in config[VERSIONS_KEY][1:]: new_url = _parse_kfdef_url(d[KFDEF_KEY]) if (new_url.host != kfdef_url.host or new_url.owner != kfdef_url.owner or new_url.repo != kfdef_url.repo): raise ValueError( f"All deployments must use the same repo for the KFDef") url = _kfdef_url_to_clone_url(kfdef_url) manifests_repo = git_repo_manager.GitRepoManager(url=url, local_dir=local_dir) reconciler = Reconciler(config=config, job_template_path=job_template_path, manifests_repo=manifests_repo) reconciler._deployments_dir = deployments_dir # pylint: disable=protected-access logging.info( f"Using deployments directory={reconciler._deployments_dir}") # pylint: disable=protected-access service_account_path = "/var/run/secrets/kubernetes.io" if os.path.exists("/var/run/secrets/kubernetes.io"): logging.info( f"{service_account_path} exists; loading in cluster config") k8s_config.load_incluster_config() else: logging.info(f"{service_account_path} doesn't exists; " "loading kube config file") k8s_config.load_kube_config(persist_config=False) reconciler._k8s_client = k8s_client.ApiClient() # pylint: disable=protected-access return reconciler
def _reconcile(self): # Get the deployments. self._get_deployments() # Compute the current number of deployments active_deployments = 0 for _, i in self._deployments.items(): active_deployments += len(i) for run in self._pipeline_runs: self._log_context = { GROUP_LABEL: run.group, } logging.info(f"Reconciling pipeline group: {run.group}", extra=self._log_context) branch = run.get_resource_param(BLUEPRINTS_REPO, "revision") git_url = run.get_resource_param(BLUEPRINTS_REPO, "url") if not git_url in self._manifests_repo: pattern = re.compile("https://[^/]*/([^/]*)/([^#]*).git") match = pattern.match(git_url) if not match: raise ValueError( f"Repo url {git_url} did not patch regex: " f" {pattern.pattern}") local_dir = os.path.join(self._local_dir, match.group(1), match.group(2)) self._manifests_repo[ git_url] = git_repo_manager.GitRepoManager( url=git_url, local_dir=local_dir) repo = self._manifests_repo[git_url] full_branch = f"{repo.remote_name}/{branch}" # Sync the repositories because we use this to find the latest changes. repo.fetch() last_commit = repo.last_commit(full_branch, "") logging.info( f"Last commit to group={run.group} " f"commit={last_commit}", extra=self._log_context) # Get the commit of the last deployment for this version if self._deployments[run.group]: last_deployed = self._deployments[run.group][-1] now = datetime.datetime.now( tz=last_deployed.create_time.tzinfo) time_since_last_deploy = now - last_deployed.create_time logging.info( f"group={run.group} " f"last_commit={last_commit} most recent " f"deployment is {last_deployed.name} " f"at commit={last_deployed.commit} " f"age={time_since_last_deploy}", extra=self._log_context) if (last_deployed.commit == last_commit and time_since_last_deploy < PERIODIC_REDEPLOY): logging.info(f"group={run.group} no sync needed", extra=self._log_context) continue logging.info(f"group={run.group} sync needed", extra=self._log_context) if time_since_last_deploy < MIN_TIME_BETWEEN_DEPLOYMENTS: minutes = time_since_last_deploy.total_seconds() / 60.0 logging.info( f"group={run.group} can't start a new deployment " f"because deployment for {last_deployed.deployment_name }" f"is only {minutes} minutes old", extra=self._log_context) continue else: logging.info(f"group={run.group} has no active deployments", extra=self._log_context) if active_deployments >= MAX_ACTIVE_DEPLOYMENTS: logging.info( f"group={run.group} can't start a new deployment " f"there are currently {active_deployments} active " f"deployments already.", extra=self._log_context) continue self._launch_pipeline(run, last_commit) # TODO(jlewi): We should GC the older deployments. We should have # some min TTL so we don't delete clusters from underneath people. # We should then GC any clusters as long as there as a newer cluster # already available. We should require that the new cluster is at least # 30 minutes old so that we know its ready. self._gc_deployments()