Ejemplo n.º 1
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            default_fail_if_cluster_running: bool,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        fail_if_cluster_running_lambda = emr_lambdas.FailIfClusterRunningBuilder.get_or_build(
            construct)

        return sfn.Task(
            construct,
            'Fail If Cluster Running',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                fail_if_cluster_running_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'DefaultFailIfClusterRunning':
                    default_fail_if_cluster_running,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value
                }))
Ejemplo n.º 2
0
    def build(scope: core.Construct,
              id: str,
              *,
              input_path: str = '$',
              output_path: Optional[str] = None,
              result_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        update_cluster_tags_lambda = emr_lambdas.UpdateClusterTagsBuilder.get_or_build(
            construct)

        return sfn_tasks.LambdaInvoke(
            construct,
            'Update Cluster Tags',
            output_path=output_path,
            result_path=result_path,
            lambda_function=update_cluster_tags_lambda,
            payload_response_only=True,
            payload=sfn.TaskInput.from_object({
                'ExecutionInput':
                sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                'Input':
                sfn.TaskInput.from_data_at(input_path).value
            }),
        )
Ejemplo n.º 3
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        update_cluster_tags_lambda = emr_lambdas.UpdateClusterTagsBuilder.get_or_build(
            construct)

        return sfn.Task(
            construct,
            'Update Cluster Tags',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                update_cluster_tags_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value
                }))
Ejemplo n.º 4
0
    def build(scope: core.Construct,
              id: str,
              *,
              default_fail_if_cluster_running: bool,
              input_path: str = '$',
              output_path: Optional[str] = None,
              result_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        fail_if_cluster_running_lambda = emr_lambdas.FailIfClusterRunningBuilder.get_or_build(
            construct)

        return sfn_tasks.LambdaInvoke(
            construct,
            'Fail If Cluster Running',
            output_path=output_path,
            result_path=result_path,
            lambda_function=fail_if_cluster_running_lambda,
            payload_response_only=True,
            payload=sfn.TaskInput.from_object({
                'ExecutionInput':
                sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                'DefaultFailIfClusterRunning':
                default_fail_if_cluster_running,
                'Input':
                sfn.TaskInput.from_data_at(input_path).value
            }),
        )
Ejemplo n.º 5
0
    def build(scope: core.Construct,
              id: str,
              *,
              override_cluster_configs_lambda: Optional[
                  aws_lambda.Function] = None,
              allowed_cluster_config_overrides: Optional[Dict[str,
                                                              str]] = None,
              input_path: str = '$',
              output_path: Optional[str] = None,
              result_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        override_cluster_configs_lambda = \
            emr_lambdas.OverrideClusterConfigsBuilder.get_or_build(construct) \
            if override_cluster_configs_lambda is None \
            else override_cluster_configs_lambda

        return sfn_tasks.LambdaInvoke(
            construct,
            'Override Cluster Configs',
            output_path=output_path,
            result_path=result_path,
            lambda_function=override_cluster_configs_lambda,
            payload_response_only=True,
            payload=sfn.TaskInput.from_object({
                'ExecutionInput':
                sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                'Input':
                sfn.TaskInput.from_data_at(input_path).value,
                'AllowedClusterConfigOverrides':
                allowed_cluster_config_overrides
            }),
        )
Ejemplo n.º 6
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            override_cluster_configs_lambda: Optional[
                aws_lambda.Function] = None,
            allowed_cluster_config_overrides: Optional[Dict[str, str]] = None,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        override_cluster_configs_lambda = \
            emr_lambdas.OverrideClusterConfigsBuilder.get_or_build(construct) \
            if override_cluster_configs_lambda is None \
            else override_cluster_configs_lambda

        return sfn.Task(
            construct,
            'Override Cluster Configs',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                override_cluster_configs_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value,
                    'AllowedClusterConfigOverrides':
                    allowed_cluster_config_overrides
                }))
Ejemplo n.º 7
0
    def build(scope: core.Construct,
              id: str,
              *,
              cluster_name: str,
              cluster_tags: List[core.Tag],
              profile_namespace: str,
              profile_name: str,
              configuration_namespace: str,
              configuration_name: str,
              output_path: Optional[str] = None,
              result_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        load_cluster_configuration_lambda = emr_lambdas.LoadClusterConfigurationBuilder.build(
            construct,
            profile_namespace=profile_namespace,
            profile_name=profile_name,
            configuration_namespace=configuration_namespace,
            configuration_name=configuration_name)

        return sfn_tasks.LambdaInvoke(
            construct,
            'Load Cluster Configuration',
            output_path=output_path,
            result_path=result_path,
            lambda_function=load_cluster_configuration_lambda,
            payload_response_only=True,
            payload=sfn.TaskInput.from_object({
                'ClusterName':
                cluster_name,
                'ClusterTags': [{
                    'Key': t.key,
                    'Value': t.value
                } for t in cluster_tags],
                'ProfileNamespace':
                profile_namespace,
                'ProfileName':
                profile_name,
                'ConfigurationNamespace':
                configuration_namespace,
                'ConfigurationName':
                configuration_name,
            }),
        )
Ejemplo n.º 8
0
    def build(scope: core.Construct,
              id: str,
              *,
              name: str,
              cluster_id: str,
              result_path: Optional[str] = None,
              output_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Task ids
        construct = core.Construct(scope, id)

        return sfn.Task(
            construct,
            name,
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.EmrTerminateCluster(
                cluster_id=cluster_id,
                integration_pattern=sfn.ServiceIntegrationPattern.SYNC))
Ejemplo n.º 9
0
    def build(scope: core.Construct,
              id: str,
              *,
              cluster_name: str,
              cluster_tags: List[core.Tag],
              profile_namespace: str,
              profile_name: str,
              configuration_namespace: str,
              configuration_name: str,
              output_path: str = '$',
              result_path: str = '$.ClusterConfiguration') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        load_cluster_configuration_lambda = emr_lambdas.LoadClusterConfigurationBuilder.build(
            construct,
            profile_namespace=profile_namespace,
            profile_name=profile_name,
            configuration_namespace=configuration_namespace,
            configuration_name=configuration_name)

        return sfn.Task(construct,
                        'Load Cluster Configuration',
                        output_path=output_path,
                        result_path=result_path,
                        task=sfn_tasks.InvokeFunction(
                            load_cluster_configuration_lambda,
                            payload={
                                'ClusterName':
                                cluster_name,
                                'ClusterTags': [{
                                    'Key': t.key,
                                    'Value': t.value
                                } for t in cluster_tags],
                                'ProfileNamespace':
                                profile_namespace,
                                'ProfileName':
                                profile_name,
                                'ConfigurationNamespace':
                                configuration_namespace,
                                'ConfigurationName':
                                configuration_name,
                            }))
Ejemplo n.º 10
0
  def __add_peers(self)->None:
    for peer in self.peers:
      if peer == self.landing_zone:
        continue

      net_counter=0
      isolated = len(peer.vpc.isolated_subnets)
      private= len(peer.vpc.private_subnets)
      routes = core.Construct(self,'{}-I.{}/Pr.{}'.format(peer.zone_name,isolated, private))
      for net in self.landing_zone.vpc.isolated_subnets:
        net_counter+= 1
        ec2.CfnRoute(routes,'I.{}'.format(net_counter),
          route_table_id= net.route_table.route_table_id,
          destination_cidr_block=peer.cidr_block,
          transit_gateway_id=self.gateway.ref)
      for net in self.landing_zone.vpc.private_subnets:
        net_counter+= 1
        ec2.CfnRoute(routes,'Pr.{}'.format(net_counter),
          route_table_id= net.route_table.route_table_id,
          destination_cidr_block=peer.cidr_block,
          transit_gateway_id=self.gateway.ref)
Ejemplo n.º 11
0
    def build(scope: core.Construct,
              id: str,
              *,
              roles: emr_roles.EMRRoles,
              input_path: str = '$',
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_cluster_start: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        integration_pattern = sfn.IntegrationPattern.RUN_JOB if wait_for_cluster_start \
            else sfn.IntegrationPattern.REQUEST_RESPONSE

        return EmrCreateClusterTask(
            construct,
            'Start EMR Cluster',
            output_path=output_path,
            result_path=result_path,
            roles=roles,
            input_path=input_path,
            integration_pattern=integration_pattern,
        )
Ejemplo n.º 12
0
    def build(scope: core.Construct,
              id: str,
              *,
              emr_step: emr_code.EMRStep,
              cluster_id: str,
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_step_completion: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Task ids
        construct = core.Construct(scope, id)
        resolved_step = emr_step.resolve(construct)

        integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_step_completion \
            else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET

        return sfn.Task(construct,
                        emr_step.name,
                        output_path=output_path,
                        result_path=result_path,
                        task=EmrAddStepTask(
                            cluster_id=cluster_id,
                            step=resolved_step,
                            integration_pattern=integration_pattern))
Ejemplo n.º 13
0
    def add_spark_jars(self, code: emr_code.EMRCode, jars_in_code: List[str]):
        if self._rehydrated:
            raise ReadOnlyClusterConfigurationError()

        self._configuration_artifacts.append({
            'Bucket':
            code.deployment_bucket.bucket_name,
            'Path':
            os.path.join(code.deployment_prefix, '*')
        })

        # We use a nested Construct to avoid Construct id collisions
        # First generate an ID for the Construct from bucket_name and deployment_prefix
        # We use a Hash to avoid potential problems with Tokens in the bucket_name
        hasher = hashlib.md5()
        hasher.update(
            os.path.join(code.deployment_bucket.bucket_name,
                         code.deployment_prefix).encode('utf-8'))
        token = base64.urlsafe_b64encode(hasher.digest()).decode()
        construct_id = f'EmrCode_SparkJar_{token}'

        # Then attempt to find a previous Construct with this id
        construct = self.node.try_find_child(construct_id)
        # If we didn't find a previous Construct, construct a new one
        construct = core.Construct(
            self, construct_id) if construct is None else construct

        bucket_path = code.resolve(construct)['S3Path']
        for jar in jars_in_code:
            self._spark_jars.append(os.path.join(bucket_path, jar))
        config = self.config
        config['Configurations'] = self.update_configurations(
            config['Configurations'], 'spark-defaults',
            {'spark.jars': ','.join(self._spark_jars)})
        self.update_config(config)
        return self
Ejemplo n.º 14
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            roles: emr_roles.EMRRoles,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            result_path: Optional[str] = None,
            output_path: Optional[str] = None,
            wait_for_cluster_start: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_cluster_start \
            else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET

        return sfn.Task(
            construct,
            'Start EMR Cluster',
            output_path=output_path,
            result_path=result_path,
            task=EmrCreateClusterTask(
                roles=roles,
                cluster_configuration_path=cluster_configuration_path,
                integration_pattern=integration_pattern))
Ejemplo n.º 15
0
    def build(scope: core.Construct,
              id: str,
              *,
              emr_step: emr_code.EMRStep,
              cluster_id: str,
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_step_completion: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Task ids
        construct = core.Construct(scope, id)
        resolved_step = emr_step.resolve(construct)

        integration_pattern = sfn.IntegrationPattern.RUN_JOB if wait_for_step_completion \
            else sfn.IntegrationPattern.REQUEST_RESPONSE

        return EmrAddStepTask(
            construct,
            emr_step.name,
            output_path=output_path,
            result_path=result_path,
            cluster_id=cluster_id,
            step=resolved_step,
            integration_pattern=integration_pattern,
        )
Ejemplo n.º 16
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self.defaultVpc = aws_ec2.Vpc(
            self,
            "Default",
            cidr='10.0.0.0/24',
            nat_gateways=2,
            subnet_configuration=[
                aws_ec2.SubnetConfiguration(
                    name="public",
                    cidr_mask=27,
                    reserved=False,
                    subnet_type=aws_ec2.SubnetType.PUBLIC),
                aws_ec2.SubnetConfiguration(
                    name="private",
                    cidr_mask=27,
                    reserved=False,
                    subnet_type=aws_ec2.SubnetType.PRIVATE)
            ])

        self.eksVpc = aws_ec2.Vpc(
            self,
            "EKSvpc",
            cidr='192.168.0.0/24',
            subnet_configuration=[
                aws_ec2.SubnetConfiguration(
                    name="private",
                    cidr_mask=26,
                    reserved=False,
                    subnet_type=aws_ec2.SubnetType.ISOLATED)
            ])

        self.transitGw = aws_ec2.CfnTransitGateway(
            self,
            "DefaultTransitGw",
            default_route_table_association="disable",
        )

        self.tgAttachmentPrivate = aws_ec2.CfnTransitGatewayAttachment(
            self,
            "DefaultTGAttachment",
            transit_gateway_id=self.transitGw.ref,
            vpc_id=self.defaultVpc.vpc_id,
            subnet_ids=[
                self.defaultVpc.private_subnets[0].subnet_id,
                self.defaultVpc.private_subnets[1].subnet_id
            ],
            tags=None)
        self.tgAttachmentPrivate.add_depends_on(self.transitGw)

        self.tgAttachmentEks = aws_ec2.CfnTransitGatewayAttachment(
            self,
            "EksTGAttachment",
            transit_gateway_id=self.transitGw.ref,
            vpc_id=self.eksVpc.vpc_id,
            subnet_ids=[
                self.eksVpc.isolated_subnets[0].subnet_id,
                self.eksVpc.isolated_subnets[1].subnet_id
            ],
            tags=None)
        self.tgAttachmentEks.add_depends_on(self.transitGw)

        isolatedSubnetRoutes = core.Construct(self, 'Isolated Subnet Routes')
        for (i, subnet) in enumerate(self.eksVpc.isolated_subnets):
            aws_ec2.CfnRoute(
                isolatedSubnetRoutes,
                id=f"Default Route EKS {i}",
                route_table_id=subnet.route_table.route_table_id,
                destination_cidr_block="0.0.0.0/0",
                transit_gateway_id=self.transitGw.ref).add_depends_on(
                    self.tgAttachmentEks)

        privateSubnetRoutes = core.Construct(self, 'Private Subnet Routes')
        for (i, subnet) in enumerate(self.defaultVpc.private_subnets):
            aws_ec2.CfnRoute(
                privateSubnetRoutes,
                id=f"Eks route defalt {i}",
                route_table_id=subnet.route_table.route_table_id,
                destination_cidr_block=self.eksVpc.vpc_cidr_block,
                transit_gateway_id=self.transitGw.ref).add_depends_on(
                    self.tgAttachmentEks)

        publicSubnetRoutes = core.Construct(self, 'Public Subnet Routes')
        for (i, subnet) in enumerate(self.defaultVpc.public_subnets):
            aws_ec2.CfnRoute(
                publicSubnetRoutes,
                id=f"Eks route defalt {i}",
                route_table_id=subnet.route_table.route_table_id,
                destination_cidr_block=self.eksVpc.vpc_cidr_block,
                transit_gateway_id=self.transitGw.ref).add_depends_on(
                    self.tgAttachmentEks)

        self.transitGwRT = aws_ec2.CfnTransitGatewayRouteTable(
            self,
            'transitGw Route Table',
            transit_gateway_id=self.transitGw.ref,
            tags=None)

        self.transitGwRoute = aws_ec2.CfnTransitGatewayRoute(
            self,
            'transitGW Route',
            transit_gateway_route_table_id=self.transitGwRT.ref,
            destination_cidr_block='0.0.0.0/0',
            transit_gateway_attachment_id=self.tgAttachmentPrivate.ref)

        self.TGRouteTableAssociationDefaultVPC = aws_ec2.CfnTransitGatewayRouteTableAssociation(
            self,
            'DefaultVPC Association',
            transit_gateway_attachment_id=self.tgAttachmentPrivate.ref,
            transit_gateway_route_table_id=self.transitGwRoute.
            transit_gateway_route_table_id)

        self.TGRouteTablePropagationDefaultVPC = aws_ec2.CfnTransitGatewayRouteTablePropagation(
            self,
            'DefaultVPC Propagation',
            transit_gateway_attachment_id=self.tgAttachmentPrivate.ref,
            transit_gateway_route_table_id=self.transitGwRoute.
            transit_gateway_route_table_id)

        self.TGRouteTableAssociationEksVPC = aws_ec2.CfnTransitGatewayRouteTableAssociation(
            self,
            'EksVPC Association',
            transit_gateway_attachment_id=self.tgAttachmentEks.ref,
            transit_gateway_route_table_id=self.transitGwRoute.
            transit_gateway_route_table_id)

        self.TGRouteTablePropagationEksVPC = aws_ec2.CfnTransitGatewayRouteTablePropagation(
            self,
            'EksVPC Propagation',
            transit_gateway_attachment_id=self.tgAttachmentEks.ref,
            transit_gateway_route_table_id=self.transitGwRoute.
            transit_gateway_route_table_id)
Ejemplo n.º 17
0
    def __init__(
        self,
        scope: core.Construct,
        id: str,
        *,
        configuration_name: str,
        namespace: str = 'default',
        release_label: Optional[str] = 'emr-5.29.0',
        applications: Optional[List[str]] = None,
        bootstrap_actions: Optional[List[emr_code.EMRBootstrapAction]] = None,
        configurations: Optional[List[dict]] = None,
        use_glue_catalog: Optional[bool] = True,
        step_concurrency_level: Optional[int] = 1,
        description: Optional[str] = None,
        secret_configurations: Optional[Dict[str,
                                             secretsmanager.Secret]] = None):

        super().__init__(scope, id)

        self._override_interfaces = {}

        if configuration_name is None:
            return

        self._configuration_name = configuration_name
        self._namespace = namespace
        self._description = description
        self._bootstrap_actions = bootstrap_actions
        self._secret_configurations = secret_configurations
        self._spark_packages = []
        self._spark_jars = []

        if bootstrap_actions:
            # Create a nested Construct to avoid Construct id collisions
            construct = core.Construct(self, 'BootstrapActions')
            resolved_bootstrap_actions = [
                b.resolve(construct) for b in bootstrap_actions
            ]
        else:
            resolved_bootstrap_actions = []

        self._config = {
            'AdditionalInfo':
            None,
            'AmiVersion':
            None,
            'Applications':
            self._get_applications(applications),
            'AutoScalingRole':
            None,
            'BootstrapActions':
            resolved_bootstrap_actions,
            'Configurations':
            self._get_configurations(configurations, use_glue_catalog),
            'CustomAmiId':
            None,
            'EbsRootVolumeSize':
            None,
            'Instances': {
                'AdditionalMasterSecurityGroups': None,
                'AdditionalSlaveSecurityGroups': None,
                'Ec2KeyName': None,
                'Ec2SubnetId': None,
                'Ec2SubnetIds': None,
                'EmrManagedMasterSecurityGroup': None,
                'EmrManagedSlaveSecurityGroup': None,
                'HadoopVersion': None,
                'InstanceCount': None,
                'InstanceFleets': None,
                'InstanceGroups': None,
                'KeepJobFlowAliveWhenNoSteps': True,
                'MasterInstanceType': None,
                'Placement': None,
                'ServiceAccessSecurityGroup': None,
                'SlaveInstanceType': None,
                'TerminationProtected': False,
            },
            'JobFlowRole':
            None,
            'KerberosAttributes':
            None,
            'LogUri':
            None,
            'Name':
            configuration_name,
            'NewSupportedProducts':
            None,
            'ReleaseLabel':
            release_label,
            'RepoUpgradeOnBoot':
            None,
            'ScaleDownBehavior':
            None,
            'SecurityConfiguration':
            None,
            'ServiceRole':
            None,
            'StepConcurrencyLevel':
            step_concurrency_level,
            'SupportedProducts':
            None,
            'Tags': [],
            'VisibleToAllUsers':
            True,
        }

        self._configuration_artifacts = []
        if bootstrap_actions is not None:
            for bootstrap_action in bootstrap_actions:
                if bootstrap_action.code is not None:
                    self._configuration_artifacts.append({
                        'Bucket':
                        bootstrap_action.code.deployment_bucket.bucket_name,
                        'Path':
                        os.path.join(bootstrap_action.code.deployment_prefix,
                                     '*')
                    })

        self._ssm_parameter = ssm.CfnParameter(
            self,
            'SSMParameter',
            type='String',
            value=json.dumps(self.to_json()),
            tier='Intelligent-Tiering',
            name=f'{SSM_PARAMETER_PREFIX}/{namespace}/{configuration_name}')

        self.override_interfaces['default'] = {
            'ClusterName': {
                'JsonPath': 'Name',
                'Default': configuration_name
            },
            'ReleaseLabel': {
                'JsonPath': 'ReleaseLabel',
                'Default': release_label
            },
            'StepConcurrencyLevel': {
                'JsonPath': 'StepConcurrencyLevel',
                'Default': step_concurrency_level
            }
        }

        self._rehydrated = False
Ejemplo n.º 18
0
    def build(scope: core.Construct,
              id: str,
              *,
              roles: emr_roles.EMRRoles,
              kerberos_attributes_secret: Optional[
                  secretsmanager.Secret] = None,
              secret_configurations: Optional[Dict[
                  str, secretsmanager.Secret]] = None,
              cluster_configuration_path: str = '$.ClusterConfiguration',
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_cluster_start: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        event_rule = core.Stack.of(scope).node.try_find_child('EventRule')
        if event_rule is None:
            event_rule = events.Rule(construct,
                                     'EventRule',
                                     enabled=False,
                                     schedule=events.Schedule.rate(
                                         core.Duration.minutes(1)))
            BaseBuilder.tag_construct(event_rule)

        run_job_flow_lambda = emr_lambdas.RunJobFlowBuilder.get_or_build(
            construct, roles, event_rule)
        check_cluster_status_lambda = emr_lambdas.CheckClusterStatusBuilder.get_or_build(
            construct, event_rule)

        if kerberos_attributes_secret:
            run_job_flow_lambda.add_to_role_policy(
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=['secretsmanager:GetSecretValue'],
                    resources=[f'{kerberos_attributes_secret.secret_arn}*']))

        if secret_configurations is not None:
            for secret in secret_configurations.values():
                run_job_flow_lambda.add_to_role_policy(
                    iam.PolicyStatement(
                        effect=iam.Effect.ALLOW,
                        actions=['secretsmanager:GetSecretValue'],
                        resources=[f'{secret.secret_arn}*']))

        return sfn.Task(
            construct,
            'Start EMR Cluster (with Secrets)',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.RunLambdaTask(
                run_job_flow_lambda,
                integration_pattern=sfn.ServiceIntegrationPattern.
                WAIT_FOR_TASK_TOKEN,
                payload=sfn.TaskInput.from_object({
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value,
                    'TaskToken':
                    sfn.Context.task_token,
                    'CheckStatusLambda':
                    check_cluster_status_lambda.function_arn,
                    'RuleName':
                    event_rule.rule_name,
                    'FireAndForget':
                    not wait_for_cluster_start
                })))
Ejemplo n.º 19
0
    def provision_unmanaged_nodegroup(self, name: str, ng: Type[EKS.NodegroupBase], max_nodegroup_azs: int) -> None:
        ami_id, user_data = self._get_machine_image(name, ng.machine_image)

        machine_image = (
            ec2.MachineImage.generic_linux({self.scope.region: ami_id})
            if ami_id
            else eks.EksOptimizedImage(
                cpu_arch=eks.CpuArch.X86_64,
                kubernetes_version=self.eks_version.version,
                node_type=eks.NodeType.GPU if ng.gpu else eks.NodeType.STANDARD,
            )
        )

        if not hasattr(self, "unmanaged_sg"):
            self.unmanaged_sg = ec2.SecurityGroup(
                self.scope,
                "UnmanagedSG",
                vpc=self.vpc,
                security_group_name=f"{self.name}-sharedNodeSG",
                allow_all_outbound=False,
            )

        if self.bastion_sg:
            self.unmanaged_sg.add_ingress_rule(
                peer=self.bastion_sg,
                connection=ec2.Port(
                    protocol=ec2.Protocol("TCP"),
                    string_representation="ssh",
                    from_port=22,
                    to_port=22,
                ),
            )

        scope = cdk.Construct(self.scope, f"UnmanagedNodeGroup{name}")
        cfn_lt = None
        for i, az in enumerate(self.vpc.availability_zones[:max_nodegroup_azs]):
            indexed_name = f"{self.name}-{name}-{az}"
            asg = aws_autoscaling.AutoScalingGroup(
                scope,
                f"{self.name}-{name}-{i}",
                auto_scaling_group_name=indexed_name,
                instance_type=ec2.InstanceType(ng.instance_types[0]),
                machine_image=machine_image,
                vpc=self.cluster.vpc,
                min_capacity=ng.min_size,
                max_capacity=ng.max_size,
                vpc_subnets=ec2.SubnetSelection(
                    subnet_group_name=self.private_subnet_name,
                    availability_zones=[az],
                ),
                role=self.ng_role,
                security_group=self.unmanaged_sg,
            )
            for k, v in (
                {
                    **ng.tags,
                    **{
                        f"k8s.io/cluster-autoscaler/{self.cluster.cluster_name}": "owned",
                        "k8s.io/cluster-autoscaler/enabled": "true",
                        "eks:cluster-name": self.cluster.cluster_name,
                        "Name": indexed_name,
                    },
                }
            ).items():
                cdk.Tags.of(asg).add(str(k), str(v), apply_to_launched_instances=True)

            mime_user_data = self._handle_user_data(name, ami_id, ng.ssm_agent, [asg.user_data, user_data])

            if not cfn_lt:
                lt = ec2.LaunchTemplate(
                    scope,
                    f"LaunchTemplate{i}",
                    launch_template_name=indexed_name,
                    block_devices=[
                        ec2.BlockDevice(
                            device_name="/dev/xvda",
                            volume=ec2.BlockDeviceVolume.ebs(
                                ng.disk_size,
                                volume_type=ec2.EbsDeviceVolumeType.GP2,
                            ),
                        )
                    ],
                    role=self.ng_role,
                    instance_type=ec2.InstanceType(ng.instance_types[0]),
                    key_name=ng.key_name,
                    machine_image=machine_image,
                    user_data=mime_user_data,
                    security_group=self.unmanaged_sg,
                )
                # mimic adding the security group via the ASG during connect_auto_scaling_group_capacity
                lt.connections.add_security_group(self.cluster.cluster_security_group)
                cfn_lt: ec2.CfnLaunchTemplate = lt.node.default_child
                lt_data = ec2.CfnLaunchTemplate.LaunchTemplateDataProperty(
                    **cfn_lt.launch_template_data._values,
                    metadata_options=ec2.CfnLaunchTemplate.MetadataOptionsProperty(
                        http_endpoint="enabled", http_tokens="required", http_put_response_hop_limit=2
                    ),
                )
                cfn_lt.launch_template_data = lt_data

            # https://github.com/aws/aws-cdk/issues/6734
            cfn_asg: aws_autoscaling.CfnAutoScalingGroup = asg.node.default_child
            # Remove the launch config from our stack
            asg.node.try_remove_child("LaunchConfig")
            cfn_asg.launch_configuration_name = None
            # Attach the launch template to the auto scaling group
            cfn_asg.mixed_instances_policy = cfn_asg.MixedInstancesPolicyProperty(
                launch_template=cfn_asg.LaunchTemplateProperty(
                    launch_template_specification=cfn_asg.LaunchTemplateSpecificationProperty(
                        launch_template_id=cfn_lt.ref,
                        version=lt.version_number,
                    ),
                    overrides=[cfn_asg.LaunchTemplateOverridesProperty(instance_type=it) for it in ng.instance_types],
                ),
            )

            options: dict[str, Any] = {
                "bootstrap_enabled": ami_id is None,
            }
            if not ami_id:
                extra_args: list[str] = []
                if labels := ng.labels:
                    extra_args.append(
                        "--node-labels={}".format(",".join(["{}={}".format(k, v) for k, v in labels.items()]))
                    )

                if taints := ng.taints:
                    extra_args.append(
                        "--register-with-taints={}".format(",".join(["{}={}".format(k, v) for k, v in taints.items()]))
                    )
                options["bootstrap_options"] = eks.BootstrapOptions(kubelet_extra_args=" ".join(extra_args))