Example #1
0
def create_state_machines(client, role_arn, names):
    """Create the state machine for running tasks on instances

    Args:
        client (SFN.client): boto3 step functions client
        role_arn (str): The Amazon Resource Name (ARN) of the IAM
            role to use for this state machine.
        names (namespace): the names used to label provisioned aws resources

    Returns:
        namespace: context object containing AWS state machine identifying
            information
    """
    ctx = Namespace()
    ctx.activity_arn = _create_worker_activity(client=client, names=names)
    ctx.task_state_machine_arn = _create_task_state_machine(
        client=client,
        worker_activity_resource_arn=ctx.activity_arn,
        role_arn=role_arn,
        names=names)
    ctx.app_state_machine_arn = \
        _create_application_state_machine(
            client=client, task_state_machine_arn=ctx.task_state_machine_arn,
            role_arn=role_arn, names=names)

    return ctx
Example #2
0
def main():
    parser = ArgumentParser(
        description="Starts an execution on a cbm3_aws cluster")

    parser.add_argument(
        "--resource_description_path",
        required=True,
        type=os.path.abspath,
        help="Path to a json formatted file containing the allocated AWS "
        "cbm3_aws cluster")
    parser.add_argument(
        "--execution_name",
        required=True,
        help="The name of the execution. This name must be unique for your "
        "AWS account, region, and state machine for 90 days. For more "
        "information, see Limits Related to State Machine Executions in "
        "the AWS Step Functions Developer Guide.")
    parser.add_argument(
        "--tasks_file_path",
        required=True,
        type=os.path.abspath,
        help="Path to json formatted tasks who has at a minimum a "
        "'task_list' key whose value is the list of tasks to "
        "pass to each instance as they call get_activity_task")
    parser.add_argument("--response_output_path",
                        required=True,
                        type=os.path.abspath,
                        help="Path")

    log_helper.start_logging("start_execution", level="INFO")
    logger = log_helper.get_logger("start_execution")
    try:
        args = parser.parse_args()
        logger.info("start_execution")
        logger.info(vars(args))

        if os.path.exists(args.response_output_path):
            # do not overwrite an existing file, which could potentially
            # contain useful information.
            raise ValueError("specified response_output_path already exists: "
                             f"'{args.response_output_path}'")

        with open(args.response_output_path, 'w') as out_file:
            with open(args.resource_description_path, 'r') as resources_fp:
                rd = Namespace(**json.load(resources_fp))
            with open(args.tasks_file_path, 'r') as tasks_fp:
                tasks = json.load(tasks_fp)
            state_machine_arn = rd.state_machine_context.app_state_machine_arn
            start_execution_response = execution.start_execution(
                execution_name=args.execution_name,
                state_machine_arn=state_machine_arn,
                region_name=rd.region_name,
                tasks=tasks)
            logger.info(json.dumps(start_execution_response, indent=4))
            json.dump(start_execution_response, out_file)

    except Exception:
        logger.exception("")
Example #3
0
    def test_namespace_dictionary_round_trip(self):
        data = {
            "a": 1,
            "b": [1, 2],
            "c": {"d": 1, "e": 2},
            "f": {"g": [1, 2], "h": {"i": 1, "j": [2]}}
        }
        ns = Namespace(**data)

        self.assertTrue(ns.a == 1)
        self.assertTrue(ns.b == [1, 2])
        self.assertTrue(ns.c.d == 1)
        self.assertTrue(ns.c.e == 2)
        self.assertTrue(ns.f.g == [1, 2])
        self.assertTrue(ns.f.h.i == 1)
        self.assertTrue(ns.f.h.j == [2])

        data_result = ns.to_dict()
        self.assertTrue(data_result == data)
Example #4
0
def create_instance_iam_role(client, policy_context_list, names):
    """Create an instance IAM role

    Args:
        client (IAM.client): boto3 IAM client
        policy_context_list (list): list of objects containing the
            policy ARN to assign to the instance IAM role.
        names (namespace): the names used to label provisioned aws resources

    Returns:
        namespace: context object containing the identifying
            information belonging to the IAM role and Instance profile
    """

    ec2_assume_role_policy = {
        "Version":
        "2012-10-17",
        "Statement": [{
            "Effect": "Allow",
            "Principal": {
                "Service": "ec2.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }]
    }

    create_role_response = client.create_role(
        Path='/',
        RoleName=names.instance_iam_role,
        AssumeRolePolicyDocument=json.dumps(ec2_assume_role_policy),
        Description='grants ec2 instances read and write permission to '
        'specific bucket, state machine access, and autoscale '
        'group access')

    for policy_context in policy_context_list:
        client.attach_role_policy(RoleName=names.instance_iam_role,
                                  PolicyArn=policy_context.policy_arn)

    create_instance_profile_response = client.create_instance_profile(
        InstanceProfileName=names.instance_iam_role)

    client.add_role_to_instance_profile(
        InstanceProfileName=names.instance_iam_role,
        RoleName=names.instance_iam_role)

    return Namespace(
        role_arn=create_role_response["Role"]["Arn"],
        role_name=create_role_response["Role"]["RoleName"],
        instance_profile_name=create_instance_profile_response[
            "InstanceProfile"]["InstanceProfileName"],
        instance_profile_id=create_instance_profile_response["InstanceProfile"]
        ["InstanceProfileId"],
        instance_profile_arn=create_instance_profile_response[
            "InstanceProfile"]["Arn"])
Example #5
0
def get_names(uuid):
    return Namespace(
        run_activity=f"cbm3_run_activity_{uuid}",
        autoscale_launch_template=f"cbm3_run_launch_template_{uuid}",
        autoscale_group=f"cbm3_autoscale_group_{uuid}",
        run_task_state_machine=f"cbm3_run_task_state_machine_{uuid}",
        run_state_machine=f"cbm3_run_state_machine_{uuid}",
        state_machine_policy=f"cbm3_state_machine_policy_{uuid}",
        state_machine_role=f"cbm3_state_machine_role_{uuid}",
        instance_s3_policy=f"cbm3_s3_instance_policy_{uuid}",
        instance_iam_role=f"cbm3_iam_instance_role_{uuid}")
Example #6
0
def create_state_machine_policy(client, account_number, names):
    """Create a state machine policy to allow state machine function

    Args:
        client (IAM.client): boto3 IAM client
        account_number (str): the AWS account number for filtering permitted
            resources
        names (namespace): the names used to label provisioned aws resources

    Returns:
        namespace: object containing the policy ARN
    """

    # see:
    # https://docs.aws.amazon.com/step-functions/latest/dg/stepfunctions-iam.html
    policy = {
        "Version":
        "2012-10-17",
        "Statement": [{
            "Effect":
            "Allow",
            "Action": ["states:StartExecution"],
            "Resource": [f"arn:aws:states:*:{account_number}:stateMachine:*"]
        }, {
            "Effect":
            "Allow",
            "Action": ["states:DescribeExecution", "states:StopExecution"],
            "Resource":
            "*"
        }, {
            "Effect":
            "Allow",
            "Action":
            ["events:PutTargets", "events:PutRule", "events:DescribeRule"],
            "Resource": [
                f"arn:aws:events:*:{account_number}:"
                "rule/StepFunctionsGetEventsForStepFunctionsExecutionRule"
            ]
        }]
    }
    create_policy_response = client.create_policy(
        PolicyName=names.state_machine_policy,
        Path='/',
        PolicyDocument=json.dumps(policy),
        Description='grants access for state machine execution')
    return Namespace(policy_arn=create_policy_response["Policy"]["Arn"])
Example #7
0
def main():
    parser = ArgumentParser(
        description="Deallocates AWS resources that were allocated for "
        "cbm3_aws runs")

    parser.add_argument(
        "--resource_description_path",
        required=True,
        help="Path to a json formatted file containing the allocated AWS "
        "resources to de-allocate with this script.")

    log_helper.start_logging("aws_cleanup", level="INFO")
    logger = log_helper.get_logger("aws_cleanup")
    try:
        args = parser.parse_args()
        logger.info("aws_cleanup start up")
        logger.info(vars(args))

        path = os.path.abspath(args.resource_description_path)
        with open(path, 'r') as fp:
            data = Namespace(**json.load(fp))
        resources.cleanup(resource_description=data)
    except Exception:
        logger.exception("")
Example #8
0
def iterate_tasks(task_message, local_projects, local_results_dir):
    for task in task_message:
        for simulation_id in task["simulation_ids"]:
            project_code = task["project_code"]
            yield Namespace(
                project_code=project_code,
                project_path=local_projects[project_code],
                simulation_id=simulation_id,
                results_database_path=os.path.join(
                    local_results_dir,
                    project_code,
                    str(simulation_id),
                    f"{simulation_id}.mdb"),
                tempfiles_output_dir=os.path.join(
                    local_results_dir,
                    project_code,
                    str(simulation_id),
                    f"temp_files_{simulation_id}"),
                stdout_path=os.path.join(
                    local_results_dir,
                    project_code,
                    str(simulation_id),
                    f"stdout_{simulation_id}.txt")
                )
Example #9
0
def deploy(region_name,
           s3_bucket_name,
           min_virtual_cpu,
           max_virtual_cpu,
           image_ami_id,
           resource_description_path,
           vpc_zone_identifier=None):

    if os.path.exists(resource_description_path):
        raise ValueError("specified resource_description_path already exists: "
                         f"'{resource_description_path}'")

    # resource description
    rd = Namespace()
    rd.uuid = get_uuid()
    __write_resources_file(rd, resource_description_path)
    rd.names = get_names(rd.uuid)
    rd.region_name = region_name
    rd.s3_bucket_name = s3_bucket_name
    rd.min_virtual_cpu = int(min_virtual_cpu)
    rd.max_virtual_cpu = int(max_virtual_cpu)
    rd.image_ami_id = image_ami_id

    try:
        logger.info("connecting")
        s3_client = boto3.client("s3", region_name=rd.region_name)
        ec2_client = boto3.client("ec2", region_name=rd.region_name)
        auto_scale_client = boto3.client('autoscaling',
                                         region_name=rd.region_name)
        iam_client = boto3.client("iam", region_name=rd.region_name)
        sts_client = boto3.client("sts", region_name=rd.region_name)
        sfn_client = boto3.client('stepfunctions', region_name=rd.region_name)

        logger.info("check if bucket exists")
        if not __s3_bucket_exists(s3_client, rd.s3_bucket_name):
            logger.info(f"creating s3 bucket {rd.s3_bucket_name}")
            s3_bucket.create_bucket(client=s3_client,
                                    bucket_name=rd.s3_bucket_name,
                                    region=rd.region_name)

        account_number = __get_account_number(sts_client)
        logger.info("creating policies")
        rd.ec2_worker_policy = roles.create_ec2_worker_policy(
            client=iam_client,
            s3_bucket_name=rd.s3_bucket_name,
            account_number=account_number,
            names=rd.names)
        rd.state_machine_policy_context = roles.create_state_machine_policy(
            client=iam_client, account_number=account_number, names=rd.names)

        logger.info("creating iam roles")
        rd.instance_iam_role_context = roles.create_instance_iam_role(
            client=iam_client,
            policy_context_list=[rd.ec2_worker_policy],
            names=rd.names)

        logger.info("creating state machine role")
        rd.state_machine_role_context = roles.create_state_machine_role(
            client=iam_client,
            policy_context_list=[rd.state_machine_policy_context],
            names=rd.names)

        # https://github.com/hashicorp/terraform/issues/15341
        # need to add a delay for the iam changes above to be processed
        # internally by AWS
        wait_time = 20
        logger.info(
            f"waiting {wait_time} seconds for changes to take effect on AWS")
        time.sleep(wait_time)

        logger.info("creating state machine")
        rd.state_machine_context = step_functions.create_state_machines(
            client=sfn_client,
            role_arn=rd.state_machine_role_context.role_arn,
            names=rd.names)

        logger.info("creating userdata")
        rd.user_data = create_userdata(
            s3_bucket_name=rd.s3_bucket_name,
            activity_arn=rd.state_machine_context.activity_arn,
            region_name=rd.region_name)

        iam_instance_profile_arn = \
            rd.instance_iam_role_context.instance_profile_arn

        logger.info("creating launch template")
        rd.launch_template_context = autoscale_group.create_launch_template(
            client=ec2_client,
            name=rd.names.autoscale_launch_template,
            image_ami_id=rd.image_ami_id,
            iam_instance_profile_arn=iam_instance_profile_arn,
            user_data=rd.user_data)

        availability_zones = None
        if not vpc_zone_identifier:
            logger.info("getting availability zones")
            availability_zones = autoscale_group.get_availability_zones(
                client=ec2_client)
        logger.info(f"using zones: {availability_zones}")
        logger.info(f"create autoscaling group")
        rd.autoscale_group_context = autoscale_group.create_autoscaling_group(
            client=auto_scale_client,
            name=rd.names.autoscale_group,
            launch_template_context=rd.launch_template_context,
            min_size=rd.min_virtual_cpu,
            max_size=rd.max_virtual_cpu,
            availability_zones=availability_zones,
            vpc_zone_identifier=vpc_zone_identifier)

        return rd

    except ClientError as err:
        # from:
        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
        if err.response['Error']['Code'] == 'InternalError':  # Generic error
            logger.error('Error Message: {}'.format(
                err.response['Error']['Message']))
            logger.error('Request ID: {}'.format(
                err.response['ResponseMetadata']['RequestId']))
            logger.error('Http code: {}'.format(
                err.response['ResponseMetadata']['HTTPStatusCode']))
        else:
            raise err
    finally:
        __write_resources_file(rd, resource_description_path)
Example #10
0
def create_launch_template(client, name, image_ami_id,
                           iam_instance_profile_arn, user_data):
    """Create a launch template for provisioning instances

    Args:
        client (EC2.Client): boto3 ec2 client
        name (str): the name of the launch template
        image_ami_id (str): the ami id for the launched instances
        iam_instance_profile_arn (str): ARN for for the Iam instance profile
            to attach to launched instances
        user_data (str): line break seperated commands to run on instance start

    Returns:
        object: launch template context object
    """
    client_token = str(uuid.uuid4())

    response = client.create_launch_template(
        DryRun=False,
        ClientToken=client_token,
        LaunchTemplateName=name,
        LaunchTemplateData={
            'EbsOptimized':
            False,
            'IamInstanceProfile': {
                'Arn': iam_instance_profile_arn,
            },
            'ImageId':
            image_ami_id,
            'Monitoring': {
                'Enabled': True
            },
            'InstanceInitiatedShutdownBehavior':
            'terminate',
            'UserData':
            user_data,
            'TagSpecifications': [
                {
                    'ResourceType': 'instance',
                    'Tags': [
                        {
                            'Key': 'Name',
                            'Value': 'CBM3 Worker Instance'
                        },
                    ]
                },
                {
                    'ResourceType': 'volume',
                    'Tags': [
                        {
                            'Key': 'Name',
                            'Value': 'CBM3 Worker volume'
                        },
                    ]
                },
            ]
        },
        TagSpecifications=[
            {
                'ResourceType': 'launch-template',
                'Tags': [
                    {
                        'Key': 'name',
                        'Value': 'CBM3 launch template'
                    },
                ]
            },
        ])

    return Namespace(
        launch_template_name=response["LaunchTemplate"]["LaunchTemplateName"],
        launch_template_id=response["LaunchTemplate"]["LaunchTemplateId"])
Example #11
0
def create_autoscaling_group(client,
                             name,
                             launch_template_context,
                             min_size,
                             max_size,
                             availability_zones=None,
                             vpc_zone_identifier=None):
    """Create an autoscaling group to manage spot instances.

    Args:
        client (AutoScaling.Client): boto3 autoscaling client
        name (str): the name of the autoscaling group
        launch_template_context (object): Return value of:
            :py:func:`create_launch_template`
        min_size (int): minimum number of threads to run in auto scaling
            group.
        max_size (int): maximum number of threads to run in auto scaling
            group.
        availability_zones (list): the list of availability zones for the
            autoscaling group.
        vpc_zone_identifier (str): A comma-separated list of subnet IDs
            for your virtual private cloud (VPC).

    Returns:
        object: autoscaling group context
    """

    kwargs = dict(
        AutoScalingGroupName=name,
        MixedInstancesPolicy={
            'LaunchTemplate': {
                'LaunchTemplateSpecification': {
                    'LaunchTemplateId':
                    launch_template_context.launch_template_id
                },
                'Overrides': [{
                    'InstanceType': 'm5.2xlarge',
                    'WeightedCapacity': '8'
                }, {
                    'InstanceType': 'm5.4xlarge',
                    'WeightedCapacity': '16'
                }, {
                    'InstanceType': 'm5.8xlarge',
                    'WeightedCapacity': '32'
                }, {
                    'InstanceType': 'm5a.2xlarge',
                    'WeightedCapacity': '8'
                }, {
                    'InstanceType': 'm5a.4xlarge',
                    'WeightedCapacity': '16'
                }, {
                    'InstanceType': 'm5a.8xlarge',
                    'WeightedCapacity': '32'
                }, {
                    'InstanceType': 'm4.2xlarge',
                    'WeightedCapacity': '8'
                }, {
                    'InstanceType': 'm4.4xlarge',
                    'WeightedCapacity': '16'
                }, {
                    'InstanceType': 'm4.10xlarge',
                    'WeightedCapacity': '40'
                }, {
                    'InstanceType': 'c5.2xlarge',
                    'WeightedCapacity': '8'
                }, {
                    'InstanceType': 'c5.4xlarge',
                    'WeightedCapacity': '16'
                }, {
                    'InstanceType': 'c4.2xlarge',
                    'WeightedCapacity': '8'
                }, {
                    'InstanceType': 'c4.4xlarge',
                    'WeightedCapacity': '16'
                }],
            },
            'InstancesDistribution': {
                # prioritized by the order of the above overrides list
                # for on-demand only
                'OnDemandAllocationStrategy': 'prioritized',
                # minimum number of On demand instances
                'OnDemandBaseCapacity': 0,
                # percent of on demand versus spot instances
                'OnDemandPercentageAboveBaseCapacity': 0,
                'SpotAllocationStrategy': 'capacity-optimized',
            }
        },
        MinSize=min_size,
        MaxSize=max_size,
        TerminationPolicies=["NewestInstance"],
        NewInstancesProtectedFromScaleIn=False)

    if availability_zones:
        kwargs["AvailabilityZones"] = availability_zones
    if vpc_zone_identifier:
        kwargs["VPCZoneIdentifier"] = vpc_zone_identifier

    client.create_auto_scaling_group(**kwargs)

    return Namespace(auto_scaling_group_name=name)
Example #12
0
def create_ec2_worker_policy(client, s3_bucket_name, account_number, names):
    """Create a policy object for:
        1. permitting put/get/delete operations on the
           specified named bucket
        2. interact with activity tasks for the cbm3_aws state machine

    Args:
        client (IAM.client): boto3 IAM client
        s3_bucket_name (str): the name of the bucket for which to assign the
            policy
        names (namespace): the names used to label provisioned aws resources

    Returns:
        namespace: a namespace containing the policy ARN
    """
    policy = {
        "Version":
        "2012-10-17",
        "Statement": [{
            "Sid":
            "0",
            "Effect":
            "Allow",
            "Action": ["s3:PutObject", "s3:GetObject", "s3:DeleteObject"],
            "Resource":
            f"arn:aws:s3:::{s3_bucket_name}/*"
        }, {
            "Sid":
            "1",
            "Effect":
            "Allow",
            "Action":
            "states:GetActivityTask",
            "Resource":
            f"arn:aws:states:*:{account_number}:activity:"
            f"{names.run_activity}"
        }, {
            "Sid":
            "2",
            "Effect":
            "Allow",
            "Action": [
                "states:SendTaskSuccess", "states:SendTaskFailure",
                "states:ListActivities", "states:SendTaskHeartbeat"
            ],
            "Resource":
            "*"
        }, {
            "Sid":
            "3",
            "Effect":
            "Allow",
            "Action": [
                "logs:CreateLogStream", "logs:CreateLogGroup",
                "logs:PutLogEvents"
            ],
            "Resource": [
                f"arn:aws:logs:*:{account_number}:log-group:cbm3_aws",
                f"arn:aws:logs:*:{account_number}:log-group:cbm3_aws:log-stream:*"
            ]
        }]
    }

    create_policy_response = client.create_policy(
        PolicyName=names.instance_s3_policy,
        Path='/',
        PolicyDocument=json.dumps(policy),
        Description='grants read/write/delete access to a particular s3 '
        'bucket and step function activity tasks access for '
        'IAM instance role')

    return Namespace(policy_arn=create_policy_response["Policy"]["Arn"])
Example #13
0
def get_step_functions_executions_name(uuid):
    return Namespace(
        step_functions_execution=f"cbm3_aws_step_functions_execution_{uuid}")