def delete_state_storage(cls, layer: "Layer") -> None: """ Idempotently remove remote storage for tf state """ # After the layer is completely deleted, remove the opta config from the state bucket. if layer.cloud == "aws": cloud_client: CloudClient = AWS(layer) elif layer.cloud == "google": cloud_client = GCP(layer) elif layer.cloud == "azurerm": cloud_client = Azure(layer) elif layer.cloud == "local": cloud_client = Local(layer) elif layer.cloud == "helm": # There is no opta managed storage to delete return else: raise Exception( f"Can not handle opta config deletion for cloud {layer.cloud}") cloud_client.delete_opta_config() cloud_client.delete_remote_state() # If this is the env layer, delete the state bucket & dynamo table as well. if layer.name == layer.root().name: logger.info(f"Deleting the state storage for {layer.name}...") if layer.cloud == "aws": cls._aws_delete_state_storage(layer) elif layer.cloud == "google": cls._gcp_delete_state_storage(layer) elif layer.cloud == "local": cls._local_delete_state_storage(layer)
def _azure_verify_storage(cls, layer: "Layer") -> bool: providers = layer.gen_providers(0) storage_account_name = providers["terraform"]["backend"]["azurerm"][ "storage_account_name"] container_name = providers["terraform"]["backend"]["azurerm"][ "container_name"] return Azure(layer).bucket_exists(container_name, storage_account_name)
def get_cloud_client(self) -> CloudClient: if self.cloud == "aws": return AWS(self) elif self.cloud == "google": return GCP(self) elif self.cloud == "azurerm": return Azure(self) elif self.cloud == "local": return Local(self) elif self.cloud == "helm": return HelmCloudClient(self) else: raise Exception( f"Unknown cloud {self.cloud}. Can not handle getting the cloud client" )
def _azure_get_configs(layer: "Layer") -> List[str]: providers = layer.gen_providers(0) credentials = Azure.get_credentials() storage_account_name = providers["terraform"]["backend"]["azurerm"][ "storage_account_name" ] container_name = providers["terraform"]["backend"]["azurerm"]["container_name"] storage_client = ContainerClient( account_url=f"https://{storage_account_name}.blob.core.windows.net", container_name=container_name, credential=credentials, ) prefix = "opta_config/" blobs = storage_client.list_blobs(name_starts_with=prefix) configs = [blob.name[len(prefix) :] for blob in blobs] if layer.name in configs: configs.remove(layer.name) return configs
def _apply( config: str, env: Optional[str], refresh: bool, local: bool, image_tag: Optional[str], test: bool, auto_approve: bool, input_variables: Dict[str, str], image_digest: Optional[str] = None, stdout_logs: bool = True, detailed_plan: bool = False, ) -> None: pre_check() _clean_tf_folder() if local and not test: config = local_setup(config, input_variables, image_tag, refresh_local_env=True) layer = Layer.load_from_yaml(config, env, input_variables=input_variables) layer.verify_cloud_credentials() layer.validate_required_path_dependencies() if Terraform.download_state(layer): tf_lock_exists, _ = Terraform.tf_lock_details(layer) if tf_lock_exists: raise UserErrors(USER_ERROR_TF_LOCK) _verify_parent_layer(layer, auto_approve) event_properties: Dict = layer.get_event_properties() amplitude_client.send_event( amplitude_client.START_GEN_EVENT, event_properties=event_properties, ) # We need a region with at least 3 AZs for leader election during failover. # Also EKS historically had problems with regions that have fewer than 3 AZs. if layer.cloud == "aws": providers = layer.gen_providers(0)["provider"] aws_region = providers["aws"]["region"] azs = _fetch_availability_zones(aws_region) if len(azs) < 3: raise UserErrors( fmt_msg(f""" Opta requires a region with at least *3* availability zones like us-east-1 or us-west-2. ~You configured {aws_region}, which only has the availability zones: {azs}. ~Please choose a different region. """)) Terraform.create_state_storage(layer) gen_opta_resource_tags(layer) cloud_client: CloudClient if layer.cloud == "aws": cloud_client = AWS(layer) elif layer.cloud == "google": cloud_client = GCP(layer) elif layer.cloud == "azurerm": cloud_client = Azure(layer) elif layer.cloud == "local": if local: # boolean passed via cli pass cloud_client = Local(layer) elif layer.cloud == "helm": cloud_client = HelmCloudClient(layer) else: raise Exception(f"Cannot handle upload config for cloud {layer.cloud}") existing_config: Optional[ StructuredConfig] = cloud_client.get_remote_config() old_semver_string = ("" if existing_config is None else existing_config.get("opta_version", "").strip("v")) current_semver_string = VERSION.strip("v") _verify_semver(old_semver_string, current_semver_string, layer, auto_approve) try: existing_modules: Set[str] = set() first_loop = True for module_idx, current_modules, total_block_count in gen( layer, existing_config, image_tag, image_digest, test, True, auto_approve): if first_loop: # This is set during the first iteration, since the tf file must exist. existing_modules = Terraform.get_existing_modules(layer) first_loop = False configured_modules = set([x.name for x in current_modules]) is_last_module = module_idx == total_block_count - 1 has_new_modules = not configured_modules.issubset(existing_modules) if not is_last_module and not has_new_modules and not refresh: continue if is_last_module: untouched_modules = existing_modules - configured_modules configured_modules = configured_modules.union( untouched_modules) layer.pre_hook(module_idx) if layer.cloud == "local": if is_last_module: targets = [] else: targets = list( map(lambda x: f"-target=module.{x}", sorted(configured_modules))) if test: Terraform.plan("-lock=false", *targets, layer=layer) print( "Plan ran successfully, not applying since this is a test." ) else: current_properties = event_properties.copy() current_properties["module_idx"] = module_idx amplitude_client.send_event( amplitude_client.APPLY_EVENT, event_properties=current_properties, ) logger.info("Planning your changes (might take a minute)") try: Terraform.plan( "-lock=false", "-input=false", f"-out={TF_PLAN_PATH}", layer=layer, *targets, quiet=True, ) except CalledProcessError as e: logger.error(e.stderr or "") raise e PlanDisplayer.display(detailed_plan=detailed_plan) if not auto_approve: click.confirm( "The above are the planned changes for your opta run. Do you approve?", abort=True, ) logger.info("Applying your changes (might take a minute)") service_modules = (layer.get_module_by_type( "k8s-service", module_idx) if layer.cloud == "aws" else layer.get_module_by_type( "gcp-k8s-service", module_idx)) if (len(service_modules) != 0 and cluster_exist(layer.root()) and stdout_logs): service_module = service_modules[0] # Tailing logs logger.info( f"Identified deployment for kubernetes service module {service_module.name}, tailing logs now." ) new_thread = Thread( target=tail_module_log, args=( layer, service_module.name, 10, datetime.datetime.utcnow().replace( tzinfo=pytz.UTC), 2, ), daemon=True, ) # Tailing events new_thread.start() new_thread = Thread( target=tail_namespace_events, args=( layer, datetime.datetime.utcnow().replace( tzinfo=pytz.UTC), 3, ), daemon=True, ) new_thread.start() tf_flags: List[str] = [] if auto_approve: tf_flags.append("-auto-approve") try: Terraform.apply(layer, *tf_flags, TF_PLAN_PATH, no_init=True, quiet=False) except Exception as e: layer.post_hook(module_idx, e) raise e else: layer.post_hook(module_idx, None) cloud_client.upload_opta_config() logger.info("Opta updates complete!") except Exception as e: event_properties["success"] = False event_properties["error_name"] = e.__class__.__name__ raise e else: event_properties["success"] = True finally: amplitude_client.send_event( amplitude_client.FINISH_GEN_EVENT, event_properties=event_properties, )
def _get_azure_lock_id(cls, layer: "Layer") -> str: azure = Azure(layer) return azure.get_terraform_lock_id()
def _create_azure_state_storage(cls, providers: dict) -> None: resource_group_name = providers["terraform"]["backend"]["azurerm"][ "resource_group_name"] region = providers["provider"]["azurerm"]["location"] subscription_id = providers["provider"]["azurerm"]["subscription_id"] storage_account_name = providers["terraform"]["backend"]["azurerm"][ "storage_account_name"] container_name = providers["terraform"]["backend"]["azurerm"][ "container_name"] # Create RG credential = Azure.get_credentials() resource_client = ResourceManagementClient(credential, subscription_id) try: rg_result = resource_client.resource_groups.create_or_update( resource_group_name, {"location": region}) except ResourceNotFoundError as e: if "SubscriptionNotFound" in e.message: raise UserErrors( f"SubscriptionId {subscription_id} does not exists. Please check and use the correct Subscription Id. " "This is used for accessing the resources in the Resource Group." ) except HttpResponseError as e: if "InvalidSubscriptionId" in e.message: raise UserErrors( f"Malformed or Invalid SubscriptionId {subscription_id} used. Please check and use the correct Subscription Id. " "This is used for accessing the resources in the Resource Group." ) logger.debug( f"Provisioned resource group {rg_result.name} in the {rg_result.location} region" ) authorization_client = AuthorizationManagementClient( credential, subscription_id, api_version="2018-01-01-preview") owner_role_name = "Owner" owner_role = list( authorization_client.role_definitions.list( rg_result.id, filter="roleName eq '{}'".format(owner_role_name)))[0] storage_role_name = "Storage Blob Data Owner" storage_role = list( authorization_client.role_definitions.list( rg_result.id, filter="roleName eq '{}'".format(storage_role_name)))[0] key_vault_role_name = "Key Vault Administrator" key_vault_role = list( authorization_client.role_definitions.list( rg_result.id, filter="roleName eq '{}'".format(key_vault_role_name)))[0] role_assignments = authorization_client.role_assignments.list_for_resource_group( rg_result.name) for role_assignment in role_assignments: if role_assignment.role_definition_id == owner_role.id: try: authorization_client.role_assignments.create( scope= f"/subscriptions/{subscription_id}/resourceGroups/{rg_result.name}", role_assignment_name=uuid4(), parameters={ "role_definition_id": storage_role.id, "principal_id": role_assignment.principal_id, }, ) except ResourceExistsError: pass try: authorization_client.role_assignments.create( scope= f"/subscriptions/{subscription_id}/resourceGroups/{rg_result.name}", role_assignment_name=uuid4(), parameters={ "role_definition_id": key_vault_role.id, "principal_id": role_assignment.principal_id, }, ) except ResourceExistsError: pass # Create SA storage_client = StorageManagementClient(credential, subscription_id) try: storage_client.storage_accounts.get_properties( resource_group_name, storage_account_name) logger.debug( f"Storage account {storage_account_name} already exists!") except ResourceNotFoundError: logger.debug("Need to create storage account") # create sa try: poller = storage_client.storage_accounts.begin_create( resource_group_name, storage_account_name, { "location": region, "kind": "StorageV2", "sku": { "name": "Standard_LRS" }, }, ) except ResourceExistsError: raise UserErrors( "The Storage Account name already exists in Another Subscription. " "Please change the Name or Org Name in Config.") account_result = poller.result() logger.debug(f"Provisioned storage account {account_result.name}") # TODO(ankur): assign Storage Blob Data Contributor to this SA, # otherwise it doesn't work # create container try: container = storage_client.blob_containers.get( resource_group_name, storage_account_name, container_name) logger.debug(f"container {container.name} exists") except ResourceNotFoundError: logger.debug("Need to create container") container = storage_client.blob_containers.create( resource_group_name, storage_account_name, container_name, {}) logger.debug(f"Provisioned container {container.name}")
def download_state(cls, layer: "Layer") -> bool: if layer.is_stateless_mode() is True: # no remote state for stateless mode return False if not cls.verify_storage(layer): logger.debug( fmt_msg(""" We store state in S3/GCP buckets/Azure Storage. Since the state bucket was not found, ~this probably means that you either haven't created your opta resources yet, ~or you previously successfully destroyed your opta resources. """)) return False state_file: str = "./tmp.tfstate" providers = layer.gen_providers(0) terraform_backends = providers.get("terraform", {}).get("backend", {}) if "s3" in terraform_backends: bucket = providers["terraform"]["backend"]["s3"]["bucket"] region = providers["terraform"]["backend"]["s3"]["region"] key = providers["terraform"]["backend"]["s3"]["key"] logger.debug( f"Found an s3 backend in bucket {bucket} and key {key}, " "gonna try to download the statefile from there") s3 = boto3.client("s3", config=Config(region_name=region)) try: s3.download_file(Bucket=bucket, Key=key, Filename=state_file) except ClientError as e: if e.response["Error"]["Code"] == "404": # The object does not exist. logger.debug("Did not find terraform state file") return False raise elif "gcs" in terraform_backends: bucket = providers["terraform"]["backend"]["gcs"]["bucket"] prefix = providers["terraform"]["backend"]["gcs"]["prefix"] credentials, project_id = GCP.get_credentials() gcs_client = storage.Client(project=project_id, credentials=credentials) bucket_object = gcs_client.get_bucket(bucket) blob = storage.Blob(f"{prefix}/default.tfstate", bucket_object) try: with open(state_file, "wb") as file_obj: gcs_client.download_blob_to_file(blob, file_obj) except GoogleClientError as e: if e.code == 404: # The object does not exist. os.remove(state_file) return False raise elif "azurerm" in terraform_backends: storage_account_name = providers["terraform"]["backend"][ "azurerm"]["storage_account_name"] container_name = providers["terraform"]["backend"]["azurerm"][ "container_name"] key = providers["terraform"]["backend"]["azurerm"]["key"] credentials = Azure.get_credentials() try: blob = (BlobServiceClient( f"https://{storage_account_name}.blob.core.windows.net/", credential=credentials, ).get_container_client(container_name).get_blob_client(key)) with open(state_file, "wb") as file_obj: blob_data = blob.download_blob() blob_data.readinto(file_obj) except ResourceNotFoundError: return False elif layer.cloud == "local": try: tf_file = os.path.join( cls.get_local_opta_dir(), "tfstate", f"{layer.name}", ) if os.path.exists(tf_file): copyfile(tf_file, state_file) else: return False except Exception: UserErrors(f"Could copy local state file to {state_file}") elif layer.cloud == "helm": set_kube_config(layer) load_opta_kube_config() v1 = CoreV1Api() secret_name = f"tfstate-default-{layer.state_storage()}" secrets: V1SecretList = v1.list_namespaced_secret( "default", field_selector=f"metadata.name={secret_name}") if len(secrets.items) == 0: return False secret: V1Secret = secrets.items[0] decoded_secret = gzip.decompress( base64.b64decode(secret.data["tfstate"])) with open(state_file, "wb") as file_obj: file_obj.write(decoded_secret) else: raise UserErrors( "Need to get state from S3 or GCS or Azure storage") with open(state_file, "r") as file: raw_state = file.read().strip() os.remove(state_file) if raw_state != "": cls.downloaded_state[layer.name] = json.loads(raw_state) return True return False