Exemple #1
0
 def get_credentials(cls) -> DefaultAzureCredential:
     if cls.credentials is None:
         cls.credentials = DefaultAzureCredential()
         f = StringIO()
         try:
             with redirect_stderr(f):
                 cls.credentials.get_token("https://storage.azure.com/")
         except ClientAuthenticationError:
             pass
         except Exception as e:
             logger.error(f.getvalue())
             raise e
     return cls.credentials
Exemple #2
0
def _upgrade() -> None:
    try:
        upgrade_present = check_version_upgrade(is_upgrade_call=True)
        if upgrade_present:
            _make_installation_file()
            nice_run([f"./{TEMP_INSTALLATION_FILENAME}"], input=b"y")
            _upgrade_successful()
    except Exception:
        logger.error(
            "\nUnable to install latest version of Opta."
            "\nPlease follow the instructions on https://docs.opta.dev/installation"
        )
    finally:
        _cleanup_installation_file()
Exemple #3
0
def tail_pod_log(namespace: str, pod: V1Pod, color_idx: int,
                 seconds: Optional[int]) -> None:
    v1 = CoreV1Api()
    watch = Watch()
    print(
        f"{fg(color_idx)}Showing the logs for server {pod.metadata.name} of your service{attr(0)}"
    )
    retry_count = 0
    while True:
        try:
            for logline in watch.stream(
                    v1.read_namespaced_pod_log,
                    name=pod.metadata.name,
                    namespace=namespace,
                    container="k8s-service",
                    since_seconds=seconds,
            ):
                print(f"{fg(color_idx)}{pod.metadata.name} {logline}{attr(0)}")
        except Exception as e:
            if type(e) == ApiException:
                if e.status == 404:  # type: ignore
                    print(
                        f"{fg(color_idx)}Server {pod.metadata.name} has been terminated{attr(0)}"
                    )
                    return

            if retry_count < 15:
                print(
                    f"{fg(color_idx)}Couldn't get logs, waiting a bit and retrying{attr(0)}"
                )
                time.sleep(retry_count)
                retry_count += 1
            else:
                logger.error(
                    f"Got the following error while trying to fetch the logs for pod {pod.metadata.name} in namespace {namespace}: {e}"
                )
                return
Exemple #4
0
def _apply(
    config: str,
    env: Optional[str],
    refresh: bool,
    local: bool,
    image_tag: Optional[str],
    test: bool,
    auto_approve: bool,
    input_variables: Dict[str, str],
    image_digest: Optional[str] = None,
    stdout_logs: bool = True,
    detailed_plan: bool = False,
) -> None:
    pre_check()
    _clean_tf_folder()
    if local and not test:
        config = local_setup(config,
                             input_variables,
                             image_tag,
                             refresh_local_env=True)

    layer = Layer.load_from_yaml(config, env, input_variables=input_variables)
    layer.verify_cloud_credentials()
    layer.validate_required_path_dependencies()

    if Terraform.download_state(layer):
        tf_lock_exists, _ = Terraform.tf_lock_details(layer)
        if tf_lock_exists:
            raise UserErrors(USER_ERROR_TF_LOCK)
    _verify_parent_layer(layer, auto_approve)

    event_properties: Dict = layer.get_event_properties()
    amplitude_client.send_event(
        amplitude_client.START_GEN_EVENT,
        event_properties=event_properties,
    )

    # We need a region with at least 3 AZs for leader election during failover.
    # Also EKS historically had problems with regions that have fewer than 3 AZs.
    if layer.cloud == "aws":
        providers = layer.gen_providers(0)["provider"]
        aws_region = providers["aws"]["region"]
        azs = _fetch_availability_zones(aws_region)
        if len(azs) < 3:
            raise UserErrors(
                fmt_msg(f"""
                    Opta requires a region with at least *3* availability zones like us-east-1 or us-west-2.
                    ~You configured {aws_region}, which only has the availability zones: {azs}.
                    ~Please choose a different region.
                    """))

    Terraform.create_state_storage(layer)
    gen_opta_resource_tags(layer)
    cloud_client: CloudClient
    if layer.cloud == "aws":
        cloud_client = AWS(layer)
    elif layer.cloud == "google":
        cloud_client = GCP(layer)
    elif layer.cloud == "azurerm":
        cloud_client = Azure(layer)
    elif layer.cloud == "local":
        if local:  # boolean passed via cli
            pass
        cloud_client = Local(layer)
    elif layer.cloud == "helm":
        cloud_client = HelmCloudClient(layer)
    else:
        raise Exception(f"Cannot handle upload config for cloud {layer.cloud}")

    existing_config: Optional[
        StructuredConfig] = cloud_client.get_remote_config()
    old_semver_string = ("" if existing_config is None else
                         existing_config.get("opta_version", "").strip("v"))
    current_semver_string = VERSION.strip("v")
    _verify_semver(old_semver_string, current_semver_string, layer,
                   auto_approve)

    try:
        existing_modules: Set[str] = set()
        first_loop = True
        for module_idx, current_modules, total_block_count in gen(
                layer, existing_config, image_tag, image_digest, test, True,
                auto_approve):
            if first_loop:
                # This is set during the first iteration, since the tf file must exist.
                existing_modules = Terraform.get_existing_modules(layer)
                first_loop = False
            configured_modules = set([x.name for x in current_modules])
            is_last_module = module_idx == total_block_count - 1
            has_new_modules = not configured_modules.issubset(existing_modules)
            if not is_last_module and not has_new_modules and not refresh:
                continue
            if is_last_module:
                untouched_modules = existing_modules - configured_modules
                configured_modules = configured_modules.union(
                    untouched_modules)

            layer.pre_hook(module_idx)
            if layer.cloud == "local":
                if is_last_module:
                    targets = []
            else:
                targets = list(
                    map(lambda x: f"-target=module.{x}",
                        sorted(configured_modules)))
            if test:
                Terraform.plan("-lock=false", *targets, layer=layer)
                print(
                    "Plan ran successfully, not applying since this is a test."
                )
            else:
                current_properties = event_properties.copy()
                current_properties["module_idx"] = module_idx
                amplitude_client.send_event(
                    amplitude_client.APPLY_EVENT,
                    event_properties=current_properties,
                )
                logger.info("Planning your changes (might take a minute)")

                try:
                    Terraform.plan(
                        "-lock=false",
                        "-input=false",
                        f"-out={TF_PLAN_PATH}",
                        layer=layer,
                        *targets,
                        quiet=True,
                    )
                except CalledProcessError as e:
                    logger.error(e.stderr or "")
                    raise e
                PlanDisplayer.display(detailed_plan=detailed_plan)

                if not auto_approve:
                    click.confirm(
                        "The above are the planned changes for your opta run. Do you approve?",
                        abort=True,
                    )
                logger.info("Applying your changes (might take a minute)")
                service_modules = (layer.get_module_by_type(
                    "k8s-service", module_idx) if layer.cloud == "aws" else
                                   layer.get_module_by_type(
                                       "gcp-k8s-service", module_idx))
                if (len(service_modules) != 0 and cluster_exist(layer.root())
                        and stdout_logs):
                    service_module = service_modules[0]
                    # Tailing logs
                    logger.info(
                        f"Identified deployment for kubernetes service module {service_module.name}, tailing logs now."
                    )
                    new_thread = Thread(
                        target=tail_module_log,
                        args=(
                            layer,
                            service_module.name,
                            10,
                            datetime.datetime.utcnow().replace(
                                tzinfo=pytz.UTC),
                            2,
                        ),
                        daemon=True,
                    )
                    # Tailing events
                    new_thread.start()
                    new_thread = Thread(
                        target=tail_namespace_events,
                        args=(
                            layer,
                            datetime.datetime.utcnow().replace(
                                tzinfo=pytz.UTC),
                            3,
                        ),
                        daemon=True,
                    )
                    new_thread.start()

                tf_flags: List[str] = []
                if auto_approve:
                    tf_flags.append("-auto-approve")
                try:
                    Terraform.apply(layer,
                                    *tf_flags,
                                    TF_PLAN_PATH,
                                    no_init=True,
                                    quiet=False)
                except Exception as e:
                    layer.post_hook(module_idx, e)
                    raise e
                else:
                    layer.post_hook(module_idx, None)
                cloud_client.upload_opta_config()
                logger.info("Opta updates complete!")
    except Exception as e:
        event_properties["success"] = False
        event_properties["error_name"] = e.__class__.__name__
        raise e
    else:
        event_properties["success"] = True
    finally:
        amplitude_client.send_event(
            amplitude_client.FINISH_GEN_EVENT,
            event_properties=event_properties,
        )
Exemple #5
0
def tail_namespace_events(
        layer: "Layer",
        earliest_event_start_time: Optional[datetime.datetime] = None,
        color_idx: int = 15,  # White Color
) -> None:
    load_opta_kube_config()
    v1 = EventsV1Api()
    watch = Watch()
    print(f"{fg(color_idx)}Showing events for namespace {layer.name}{attr(0)}")
    retry_count = 0
    old_events: List[EventsV1Event] = v1.list_namespaced_event(
        namespace=layer.name).items
    # Filter by time
    if earliest_event_start_time is not None:
        # Redefine so mypy doesn't complain about earliest_event_start_time being Optional during lambda call
        filter_start_time = earliest_event_start_time

        old_events = list(
            filter(
                lambda x: _event_last_observed(x) > filter_start_time,
                old_events,
            ))
    # Sort by timestamp
    old_events = sorted(old_events, key=lambda x: _event_last_observed(x))
    event: EventsV1Event
    for event in old_events:
        if do_not_show_event(event):
            continue
        earliest_event_start_time = _event_last_observed(event)
        print(
            f"{fg(color_idx)}{earliest_event_start_time} Namespace {layer.name} event: {event.note}{attr(0)}"
        )
    deleted_pods = set()
    while True:
        try:
            for stream_obj in watch.stream(
                    v1.list_namespaced_event,
                    namespace=layer.name,
            ):
                event = stream_obj["object"]
                event_time = _event_last_observed(event)
                if (earliest_event_start_time is None
                        or event_time > earliest_event_start_time):
                    if "Deleted pod:" in event.note:
                        deleted_pods.add(event.note.split(" ")[-1])
                    involved_object: Optional[
                        V1ObjectReference] = event.regarding
                    if (involved_object is not None
                            and involved_object.kind == "Pod"
                            and involved_object.name in deleted_pods):
                        continue
                    if do_not_show_event(event):
                        continue
                    print(
                        f"{fg(color_idx)}{event_time} Namespace {layer.name} event: {event.note}{attr(0)}"
                    )
        except ApiException as e:
            if retry_count < 5:
                print(
                    f"{fg(color_idx)}Couldn't get logs, waiting a bit and retrying{attr(0)}"
                )
                time.sleep(1 << retry_count)
                retry_count += 1
            else:
                logger.error(
                    f"{fg(color_idx)}Got the following error while trying to fetch the events in namespace {layer.name}: {e}"
                )
                return
        except Exception as e:
            # print(sys.exc_info()[2])
            logger.error(
                f"{fg(color_idx)}Got the following error while trying to fetch the events in namespace {layer.name}: {e}{attr(0)}"
            )
            logger.debug("Event watch exception", exc_info=True)
            return
Exemple #6
0
cli.add_command(generate_terraform)
cli.add_command(help)
cli.add_command(show)

if __name__ == "__main__":
    try:
        # In case OPTA_DEBUG is set, local state files may not be cleaned up
        # after the command.
        # However, we should still clean them up before the next command, or
        # else it may interfere with it.
        one_time()
        cleanup_files()
        cli()
    except UserErrors as e:
        if os.environ.get("OPTA_DEBUG") is None:
            logger.error(str(e))
        else:
            logger.exception(str(e))
        logger.info(
            f"{fg('magenta')}If you need more help please reach out to the contributors in our slack channel at: https://slack.opta.dev{attr(0)}"
        )
        sys.exit(1)
    except Exception as e:
        logger.exception(str(e))
        logger.info(
            f"{fg('red')}Unhandled error encountered -- a crash report zipfile has been created for you. "
            "If you need more help please reach out (passing the crash report) to the contributors in our "
            f"slack channel at: https://slack.opta.dev{attr(0)}"
            "\nHint: As a first step in debugging, try rerunning the command and seeing if it still fails."
        )
        CURRENT_CRASH_REPORTER.generate_report()
Exemple #7
0
def destroy(
    config: str,
    env: Optional[str],
    auto_approve: bool,
    detailed_plan: bool,
    local: Optional[bool],
    var: Dict[str, str],
) -> None:
    """Destroy all opta resources from the current config

    To destroy an environment, you have to first destroy all the services first.

    Examples:

    opta destroy -c my-service.yaml --auto-approve

    opta destroy -c my-env.yaml --auto-approve
    """
    try:
        opta_acquire_lock()
        pre_check()
        logger.warning(
            "You are destroying your cloud infra state. DO NOT, I REPEAT, DO NOT do this as "
            "an attempt to debug a weird/errored apply. What you have created is not some ephemeral object that can be "
            "tossed arbitrarily (perhaps some day) and destroying unnecessarily just to reapply typically makes it "
            "worse. If you're doing this cause you are really trying to destroy the environment entirely, then that's"
            "perfectly fine-- if not then please reach out to the opta team in the slack workspace "
            "(https://slack.opta.dev) and I promise that they'll be happy to help debug."
        )

        config = check_opta_file_exists(config)
        if local:
            config, _ = _handle_local_flag(config, False)
            _clean_tf_folder()
        layer = Layer.load_from_yaml(config, env, input_variables=var)
        event_properties: Dict = layer.get_event_properties()
        amplitude_client.send_event(
            amplitude_client.DESTROY_EVENT, event_properties=event_properties,
        )
        layer.verify_cloud_credentials()
        layer.validate_required_path_dependencies()
        if not Terraform.download_state(layer):
            logger.info(
                "The opta state could not be found. This may happen if destroy ran successfully before."
            )
            return

        tf_lock_exists, _ = Terraform.tf_lock_details(layer)
        if tf_lock_exists:
            raise UserErrors(USER_ERROR_TF_LOCK)

        # Any child layers should be destroyed first before the current layer.
        children_layers = _fetch_children_layers(layer)
        if children_layers:
            # TODO: ideally we can just automatically destroy them but it's
            # complicated...
            logger.error(
                "Found the following services that depend on this environment. Please run `opta destroy` on them first!\n"
                + "\n".join(children_layers)
            )
            raise UserErrors("Dependant services found!")

        tf_flags: List[str] = []
        if auto_approve:
            sleep_time = 5
            logger.info(
                f"{attr('bold')}Opta will now destroy the {attr('underlined')}{layer.name}{attr(0)}"
                f"{attr('bold')} layer.{attr(0)}\n"
                f"{attr('bold')}Sleeping for {attr('underlined')}{sleep_time} secs{attr(0)}"
                f"{attr('bold')}, press Ctrl+C to Abort.{attr(0)}"
            )
            time.sleep(sleep_time)
            tf_flags.append("-auto-approve")
        modules = Terraform.get_existing_modules(layer)
        layer.modules = [x for x in layer.modules if x.name in modules]
        gen_all(layer)
        Terraform.init(False, "-reconfigure", layer=layer)
        Terraform.refresh(layer)

        idx = len(layer.modules) - 1
        for module in reversed(layer.modules):
            try:
                module_address_prefix = f"-target=module.{module.name}"
                logger.info("Planning your changes (might take a minute)")
                Terraform.plan(
                    "-lock=false",
                    "-input=false",
                    "-destroy",
                    f"-out={TF_PLAN_PATH}",
                    layer=layer,
                    *list([module_address_prefix]),
                )
                PlanDisplayer.display(detailed_plan=detailed_plan)
                tf_flags = []
                if not auto_approve:
                    click.confirm(
                        "The above are the planned changes for your opta run. Do you approve?",
                        abort=True,
                    )
                else:
                    tf_flags.append("-auto-approve")
                Terraform.apply(layer, *tf_flags, TF_PLAN_PATH, no_init=True, quiet=False)
                layer.post_delete(idx)
                idx -= 1
            except Exception as e:
                raise e

        Terraform.delete_state_storage(layer)
    finally:
        opta_release_lock()