Example #1
0
    def delete_state_storage(cls, layer: "Layer") -> None:
        """
        Idempotently remove remote storage for tf state
        """
        # After the layer is completely deleted, remove the opta config from the state bucket.
        if layer.cloud == "aws":
            cloud_client: CloudClient = AWS(layer)
        elif layer.cloud == "google":
            cloud_client = GCP(layer)
        elif layer.cloud == "azurerm":
            cloud_client = Azure(layer)
        elif layer.cloud == "local":
            cloud_client = Local(layer)
        elif layer.cloud == "helm":
            # There is no opta managed storage to delete
            return
        else:
            raise Exception(
                f"Can not handle opta config deletion for cloud {layer.cloud}")
        cloud_client.delete_opta_config()
        cloud_client.delete_remote_state()

        # If this is the env layer, delete the state bucket & dynamo table as well.
        if layer.name == layer.root().name:

            logger.info(f"Deleting the state storage for {layer.name}...")
            if layer.cloud == "aws":
                cls._aws_delete_state_storage(layer)
            elif layer.cloud == "google":
                cls._gcp_delete_state_storage(layer)
            elif layer.cloud == "local":
                cls._local_delete_state_storage(layer)
Example #2
0
 def _azure_verify_storage(cls, layer: "Layer") -> bool:
     providers = layer.gen_providers(0)
     storage_account_name = providers["terraform"]["backend"]["azurerm"][
         "storage_account_name"]
     container_name = providers["terraform"]["backend"]["azurerm"][
         "container_name"]
     return Azure(layer).bucket_exists(container_name, storage_account_name)
Example #3
0
File: layer.py Project: run-x/opta
 def get_cloud_client(self) -> CloudClient:
     if self.cloud == "aws":
         return AWS(self)
     elif self.cloud == "google":
         return GCP(self)
     elif self.cloud == "azurerm":
         return Azure(self)
     elif self.cloud == "local":
         return Local(self)
     elif self.cloud == "helm":
         return HelmCloudClient(self)
     else:
         raise Exception(
             f"Unknown cloud {self.cloud}. Can not handle getting the cloud client"
         )
Example #4
0
def _azure_get_configs(layer: "Layer") -> List[str]:
    providers = layer.gen_providers(0)

    credentials = Azure.get_credentials()
    storage_account_name = providers["terraform"]["backend"]["azurerm"][
        "storage_account_name"
    ]
    container_name = providers["terraform"]["backend"]["azurerm"]["container_name"]
    storage_client = ContainerClient(
        account_url=f"https://{storage_account_name}.blob.core.windows.net",
        container_name=container_name,
        credential=credentials,
    )
    prefix = "opta_config/"
    blobs = storage_client.list_blobs(name_starts_with=prefix)
    configs = [blob.name[len(prefix) :] for blob in blobs]
    if layer.name in configs:
        configs.remove(layer.name)
    return configs
Example #5
0
def _apply(
    config: str,
    env: Optional[str],
    refresh: bool,
    local: bool,
    image_tag: Optional[str],
    test: bool,
    auto_approve: bool,
    input_variables: Dict[str, str],
    image_digest: Optional[str] = None,
    stdout_logs: bool = True,
    detailed_plan: bool = False,
) -> None:
    pre_check()
    _clean_tf_folder()
    if local and not test:
        config = local_setup(config,
                             input_variables,
                             image_tag,
                             refresh_local_env=True)

    layer = Layer.load_from_yaml(config, env, input_variables=input_variables)
    layer.verify_cloud_credentials()
    layer.validate_required_path_dependencies()

    if Terraform.download_state(layer):
        tf_lock_exists, _ = Terraform.tf_lock_details(layer)
        if tf_lock_exists:
            raise UserErrors(USER_ERROR_TF_LOCK)
    _verify_parent_layer(layer, auto_approve)

    event_properties: Dict = layer.get_event_properties()
    amplitude_client.send_event(
        amplitude_client.START_GEN_EVENT,
        event_properties=event_properties,
    )

    # We need a region with at least 3 AZs for leader election during failover.
    # Also EKS historically had problems with regions that have fewer than 3 AZs.
    if layer.cloud == "aws":
        providers = layer.gen_providers(0)["provider"]
        aws_region = providers["aws"]["region"]
        azs = _fetch_availability_zones(aws_region)
        if len(azs) < 3:
            raise UserErrors(
                fmt_msg(f"""
                    Opta requires a region with at least *3* availability zones like us-east-1 or us-west-2.
                    ~You configured {aws_region}, which only has the availability zones: {azs}.
                    ~Please choose a different region.
                    """))

    Terraform.create_state_storage(layer)
    gen_opta_resource_tags(layer)
    cloud_client: CloudClient
    if layer.cloud == "aws":
        cloud_client = AWS(layer)
    elif layer.cloud == "google":
        cloud_client = GCP(layer)
    elif layer.cloud == "azurerm":
        cloud_client = Azure(layer)
    elif layer.cloud == "local":
        if local:  # boolean passed via cli
            pass
        cloud_client = Local(layer)
    elif layer.cloud == "helm":
        cloud_client = HelmCloudClient(layer)
    else:
        raise Exception(f"Cannot handle upload config for cloud {layer.cloud}")

    existing_config: Optional[
        StructuredConfig] = cloud_client.get_remote_config()
    old_semver_string = ("" if existing_config is None else
                         existing_config.get("opta_version", "").strip("v"))
    current_semver_string = VERSION.strip("v")
    _verify_semver(old_semver_string, current_semver_string, layer,
                   auto_approve)

    try:
        existing_modules: Set[str] = set()
        first_loop = True
        for module_idx, current_modules, total_block_count in gen(
                layer, existing_config, image_tag, image_digest, test, True,
                auto_approve):
            if first_loop:
                # This is set during the first iteration, since the tf file must exist.
                existing_modules = Terraform.get_existing_modules(layer)
                first_loop = False
            configured_modules = set([x.name for x in current_modules])
            is_last_module = module_idx == total_block_count - 1
            has_new_modules = not configured_modules.issubset(existing_modules)
            if not is_last_module and not has_new_modules and not refresh:
                continue
            if is_last_module:
                untouched_modules = existing_modules - configured_modules
                configured_modules = configured_modules.union(
                    untouched_modules)

            layer.pre_hook(module_idx)
            if layer.cloud == "local":
                if is_last_module:
                    targets = []
            else:
                targets = list(
                    map(lambda x: f"-target=module.{x}",
                        sorted(configured_modules)))
            if test:
                Terraform.plan("-lock=false", *targets, layer=layer)
                print(
                    "Plan ran successfully, not applying since this is a test."
                )
            else:
                current_properties = event_properties.copy()
                current_properties["module_idx"] = module_idx
                amplitude_client.send_event(
                    amplitude_client.APPLY_EVENT,
                    event_properties=current_properties,
                )
                logger.info("Planning your changes (might take a minute)")

                try:
                    Terraform.plan(
                        "-lock=false",
                        "-input=false",
                        f"-out={TF_PLAN_PATH}",
                        layer=layer,
                        *targets,
                        quiet=True,
                    )
                except CalledProcessError as e:
                    logger.error(e.stderr or "")
                    raise e
                PlanDisplayer.display(detailed_plan=detailed_plan)

                if not auto_approve:
                    click.confirm(
                        "The above are the planned changes for your opta run. Do you approve?",
                        abort=True,
                    )
                logger.info("Applying your changes (might take a minute)")
                service_modules = (layer.get_module_by_type(
                    "k8s-service", module_idx) if layer.cloud == "aws" else
                                   layer.get_module_by_type(
                                       "gcp-k8s-service", module_idx))
                if (len(service_modules) != 0 and cluster_exist(layer.root())
                        and stdout_logs):
                    service_module = service_modules[0]
                    # Tailing logs
                    logger.info(
                        f"Identified deployment for kubernetes service module {service_module.name}, tailing logs now."
                    )
                    new_thread = Thread(
                        target=tail_module_log,
                        args=(
                            layer,
                            service_module.name,
                            10,
                            datetime.datetime.utcnow().replace(
                                tzinfo=pytz.UTC),
                            2,
                        ),
                        daemon=True,
                    )
                    # Tailing events
                    new_thread.start()
                    new_thread = Thread(
                        target=tail_namespace_events,
                        args=(
                            layer,
                            datetime.datetime.utcnow().replace(
                                tzinfo=pytz.UTC),
                            3,
                        ),
                        daemon=True,
                    )
                    new_thread.start()

                tf_flags: List[str] = []
                if auto_approve:
                    tf_flags.append("-auto-approve")
                try:
                    Terraform.apply(layer,
                                    *tf_flags,
                                    TF_PLAN_PATH,
                                    no_init=True,
                                    quiet=False)
                except Exception as e:
                    layer.post_hook(module_idx, e)
                    raise e
                else:
                    layer.post_hook(module_idx, None)
                cloud_client.upload_opta_config()
                logger.info("Opta updates complete!")
    except Exception as e:
        event_properties["success"] = False
        event_properties["error_name"] = e.__class__.__name__
        raise e
    else:
        event_properties["success"] = True
    finally:
        amplitude_client.send_event(
            amplitude_client.FINISH_GEN_EVENT,
            event_properties=event_properties,
        )
Example #6
0
 def _get_azure_lock_id(cls, layer: "Layer") -> str:
     azure = Azure(layer)
     return azure.get_terraform_lock_id()
Example #7
0
    def _create_azure_state_storage(cls, providers: dict) -> None:
        resource_group_name = providers["terraform"]["backend"]["azurerm"][
            "resource_group_name"]

        region = providers["provider"]["azurerm"]["location"]
        subscription_id = providers["provider"]["azurerm"]["subscription_id"]
        storage_account_name = providers["terraform"]["backend"]["azurerm"][
            "storage_account_name"]
        container_name = providers["terraform"]["backend"]["azurerm"][
            "container_name"]

        # Create RG
        credential = Azure.get_credentials()
        resource_client = ResourceManagementClient(credential, subscription_id)
        try:
            rg_result = resource_client.resource_groups.create_or_update(
                resource_group_name, {"location": region})
        except ResourceNotFoundError as e:
            if "SubscriptionNotFound" in e.message:
                raise UserErrors(
                    f"SubscriptionId {subscription_id} does not exists. Please check and use the correct Subscription Id. "
                    "This is used for accessing the resources in the Resource Group."
                )
        except HttpResponseError as e:
            if "InvalidSubscriptionId" in e.message:
                raise UserErrors(
                    f"Malformed or Invalid SubscriptionId {subscription_id} used. Please check and use the correct Subscription Id. "
                    "This is used for accessing the resources in the Resource Group."
                )

        logger.debug(
            f"Provisioned resource group {rg_result.name} in the {rg_result.location} region"
        )
        authorization_client = AuthorizationManagementClient(
            credential, subscription_id, api_version="2018-01-01-preview")

        owner_role_name = "Owner"
        owner_role = list(
            authorization_client.role_definitions.list(
                rg_result.id,
                filter="roleName eq '{}'".format(owner_role_name)))[0]

        storage_role_name = "Storage Blob Data Owner"
        storage_role = list(
            authorization_client.role_definitions.list(
                rg_result.id,
                filter="roleName eq '{}'".format(storage_role_name)))[0]

        key_vault_role_name = "Key Vault Administrator"
        key_vault_role = list(
            authorization_client.role_definitions.list(
                rg_result.id,
                filter="roleName eq '{}'".format(key_vault_role_name)))[0]

        role_assignments = authorization_client.role_assignments.list_for_resource_group(
            rg_result.name)
        for role_assignment in role_assignments:
            if role_assignment.role_definition_id == owner_role.id:
                try:
                    authorization_client.role_assignments.create(
                        scope=
                        f"/subscriptions/{subscription_id}/resourceGroups/{rg_result.name}",
                        role_assignment_name=uuid4(),
                        parameters={
                            "role_definition_id": storage_role.id,
                            "principal_id": role_assignment.principal_id,
                        },
                    )
                except ResourceExistsError:
                    pass
                try:
                    authorization_client.role_assignments.create(
                        scope=
                        f"/subscriptions/{subscription_id}/resourceGroups/{rg_result.name}",
                        role_assignment_name=uuid4(),
                        parameters={
                            "role_definition_id": key_vault_role.id,
                            "principal_id": role_assignment.principal_id,
                        },
                    )
                except ResourceExistsError:
                    pass

        # Create SA
        storage_client = StorageManagementClient(credential, subscription_id)
        try:
            storage_client.storage_accounts.get_properties(
                resource_group_name, storage_account_name)
            logger.debug(
                f"Storage account {storage_account_name} already exists!")
        except ResourceNotFoundError:
            logger.debug("Need to create storage account")
            # create sa
            try:
                poller = storage_client.storage_accounts.begin_create(
                    resource_group_name,
                    storage_account_name,
                    {
                        "location": region,
                        "kind": "StorageV2",
                        "sku": {
                            "name": "Standard_LRS"
                        },
                    },
                )
            except ResourceExistsError:
                raise UserErrors(
                    "The Storage Account name already exists in Another Subscription. "
                    "Please change the Name or Org Name in Config.")

            account_result = poller.result()
            logger.debug(f"Provisioned storage account {account_result.name}")
            # TODO(ankur): assign Storage Blob Data Contributor to this SA,
            # otherwise it doesn't work

        # create container
        try:
            container = storage_client.blob_containers.get(
                resource_group_name, storage_account_name, container_name)
            logger.debug(f"container {container.name} exists")
        except ResourceNotFoundError:
            logger.debug("Need to create container")
            container = storage_client.blob_containers.create(
                resource_group_name, storage_account_name, container_name, {})
            logger.debug(f"Provisioned container {container.name}")
Example #8
0
    def download_state(cls, layer: "Layer") -> bool:
        if layer.is_stateless_mode() is True:
            # no remote state for stateless mode
            return False

        if not cls.verify_storage(layer):
            logger.debug(
                fmt_msg("""
                    We store state in S3/GCP buckets/Azure Storage. Since the state bucket was not found,
                    ~this probably means that you either haven't created your opta resources yet,
                    ~or you previously successfully destroyed your opta resources.
                    """))
            return False

        state_file: str = "./tmp.tfstate"
        providers = layer.gen_providers(0)
        terraform_backends = providers.get("terraform", {}).get("backend", {})
        if "s3" in terraform_backends:
            bucket = providers["terraform"]["backend"]["s3"]["bucket"]
            region = providers["terraform"]["backend"]["s3"]["region"]
            key = providers["terraform"]["backend"]["s3"]["key"]
            logger.debug(
                f"Found an s3 backend in bucket {bucket} and key {key}, "
                "gonna try to download the statefile from there")
            s3 = boto3.client("s3", config=Config(region_name=region))
            try:
                s3.download_file(Bucket=bucket, Key=key, Filename=state_file)
            except ClientError as e:
                if e.response["Error"]["Code"] == "404":
                    # The object does not exist.
                    logger.debug("Did not find terraform state file")
                    return False
                raise
        elif "gcs" in terraform_backends:
            bucket = providers["terraform"]["backend"]["gcs"]["bucket"]
            prefix = providers["terraform"]["backend"]["gcs"]["prefix"]
            credentials, project_id = GCP.get_credentials()
            gcs_client = storage.Client(project=project_id,
                                        credentials=credentials)
            bucket_object = gcs_client.get_bucket(bucket)
            blob = storage.Blob(f"{prefix}/default.tfstate", bucket_object)
            try:
                with open(state_file, "wb") as file_obj:
                    gcs_client.download_blob_to_file(blob, file_obj)
            except GoogleClientError as e:
                if e.code == 404:
                    # The object does not exist.
                    os.remove(state_file)
                    return False
                raise
        elif "azurerm" in terraform_backends:
            storage_account_name = providers["terraform"]["backend"][
                "azurerm"]["storage_account_name"]
            container_name = providers["terraform"]["backend"]["azurerm"][
                "container_name"]
            key = providers["terraform"]["backend"]["azurerm"]["key"]

            credentials = Azure.get_credentials()
            try:
                blob = (BlobServiceClient(
                    f"https://{storage_account_name}.blob.core.windows.net/",
                    credential=credentials,
                ).get_container_client(container_name).get_blob_client(key))
                with open(state_file, "wb") as file_obj:
                    blob_data = blob.download_blob()
                    blob_data.readinto(file_obj)
            except ResourceNotFoundError:
                return False
        elif layer.cloud == "local":
            try:
                tf_file = os.path.join(
                    cls.get_local_opta_dir(),
                    "tfstate",
                    f"{layer.name}",
                )
                if os.path.exists(tf_file):
                    copyfile(tf_file, state_file)

                else:
                    return False
            except Exception:
                UserErrors(f"Could copy local state file to {state_file}")

        elif layer.cloud == "helm":
            set_kube_config(layer)
            load_opta_kube_config()
            v1 = CoreV1Api()
            secret_name = f"tfstate-default-{layer.state_storage()}"
            secrets: V1SecretList = v1.list_namespaced_secret(
                "default", field_selector=f"metadata.name={secret_name}")
            if len(secrets.items) == 0:
                return False
            secret: V1Secret = secrets.items[0]
            decoded_secret = gzip.decompress(
                base64.b64decode(secret.data["tfstate"]))
            with open(state_file, "wb") as file_obj:
                file_obj.write(decoded_secret)
        else:
            raise UserErrors(
                "Need to get state from S3 or GCS or Azure storage")

        with open(state_file, "r") as file:
            raw_state = file.read().strip()
        os.remove(state_file)
        if raw_state != "":
            cls.downloaded_state[layer.name] = json.loads(raw_state)
            return True
        return False