Esempio n. 1
0
    def _block_until_ssh_ready(host: str) -> None:
        nprint_header(f"Waiting for instance to be ready for ssh at {host}. "
                      "This can take up to 2 minutes... ")

        start = time.monotonic()

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(1)

        reconnect_count = 0
        while reconnect_count < CONFIG.ssh_timeout:
            error_num = sock.connect_ex((host, 22))

            if error_num == 0:
                break

            time.sleep(1)
            reconnect_count += 1
        else:
            raise RuntimeError(
                "Something went wrong while connecting to the instance.\n"
                "Please verify your security groups, instance key and "
                "instance profile, and try again.\n"
                "More info at docs.nimbo.sh/common-issues#cant-ssh.\n")

        nprint_header(f"Ready. (%0.3f s)" % (time.monotonic() - start))
Esempio n. 2
0
 def delete_instance(instance_id: str, dry_run=False) -> None:
     ec2 = CONFIG.get_session().client("ec2")
     try:
         response = ec2.terminate_instances(InstanceIds=[instance_id],
                                            DryRun=dry_run)
         status = response["TerminatingInstances"][0]["CurrentState"][
             "Name"]
         nprint_header(f"Instance [green]{instance_id}[/green]: {status}")
     except botocore.exceptions.ClientError as e:
         if "DryRunOperation" not in str(e):
             raise
Esempio n. 3
0
 def delete_all_instances(dry_run=False) -> None:
     ec2 = CONFIG.get_session().client("ec2")
     try:
         response = ec2.describe_instances(
             Filters=[{
                 "Name": "instance-state-name",
                 "Values": ["running"]
             }] + AwsInstance._make_instance_filters(),
             DryRun=dry_run,
         )
         for reservation in response["Reservations"]:
             for inst in reservation["Instances"]:
                 instance_id = inst["InstanceId"]
                 delete_response = ec2.terminate_instances(
                     InstanceIds=[instance_id], )
                 status = delete_response["TerminatingInstances"][0][
                     "CurrentState"]["Name"]
                 nprint_header(
                     f"Instance [green]{instance_id}[/green]: {status}")
     except botocore.exceptions.ClientError as e:
         if "DryRunOperation" not in str(e):
             raise
Esempio n. 4
0
    def setup(profile: str, full_s3_access=False) -> None:
        session = boto3.Session(profile_name=profile)
        account = session.client("sts").get_caller_identity()["Account"]

        iam = session.client("iam")

        nprint_header(f"Creating user group {NIMBO_USER_GROUP}...")
        AwsPermissions._create_group(iam, NIMBO_USER_GROUP)

        nprint_header(f"Creating policy {EC2_POLICY_NAME}...")
        AwsPermissions._create_policy(iam, EC2_POLICY_NAME, EC2_POLICY_JSON)

        nprint_header(
            f"Attaching policy {EC2_POLICY_NAME} to user group {NIMBO_USER_GROUP}..."
        )
        iam.attach_group_policy(
            GroupName=NIMBO_USER_GROUP,
            PolicyArn=f"arn:aws:iam::{account}:policy/{EC2_POLICY_NAME}",
        )

        if full_s3_access:
            nprint_header(f"Creating role {S3_ACCESS_ROLE_NAME}...")
            AwsPermissions._create_role_and_instance_profile(
                iam, S3_ACCESS_ROLE_NAME)

            nprint_header(
                f"Attaching AmazonS3FullAccess policy to role {S3_ACCESS_ROLE_NAME}..."
            )
            iam.attach_role_policy(
                PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess",
                RoleName=S3_ACCESS_ROLE_NAME,
            )

            nprint_header(f"Creating policy {PASS_ROLE_POLICY_NAME}...")
            pass_role_policy_json = {
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Sid":
                    "NimboPassRolePolicy",
                    "Effect":
                    "Allow",
                    "Action":
                    "iam:PassRole",
                    "Resource":
                    f"arn:aws:iam::*:role/{S3_ACCESS_ROLE_NAME}",
                }],
            }
            AwsPermissions._create_policy(iam, PASS_ROLE_POLICY_NAME,
                                          pass_role_policy_json)

            nprint_header(f"Attaching policy {PASS_ROLE_POLICY_NAME}"
                          f" to user group {NIMBO_USER_GROUP}...")
            iam.attach_group_policy(
                GroupName=NIMBO_USER_GROUP,
                PolicyArn=
                f"arn:aws:iam::{account}:policy/{PASS_ROLE_POLICY_NAME}",
            )

            nprint_header(f"Attaching policy AmazonS3FullAccess"
                          f" to user group {NIMBO_USER_GROUP}...")
            iam.attach_group_policy(
                GroupName=NIMBO_USER_GROUP,
                PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess",
            )

        else:
            nprint(
                "\nSince you chose not to give full S3 access to the Nimbo user group"
                " and instance role,\nwe recommend that you create a role with the"
                " necessary S3 permissions in the AWS console.\nOnce you do this, give"
                " the role name to the people using Nimbo so that they can set\n"
                "the 'role' field in the nimbo-config.yml to this value.",
                style="warning",
            )

        print()
        nprint_header("Done.")
        nprint_header("To add users to the NimboUserGroup, simply"
                      " run 'nimbo add-user USERNAME YOUR_AWS_PROFILE'.\n"
                      "For more info use 'nimbo add-user --help'")
Esempio n. 5
0
    def _start_instance() -> str:
        AwsPermissions.allow_ingress_current_ip(CONFIG.security_group)

        ec2 = CONFIG.get_session().client("ec2")
        instance_tags = AwsInstance._make_instance_tags()
        instance_filters = AwsInstance._make_instance_filters()

        image = AwsInstance._get_image_id()
        nprint_header(f"Launching instance with image {image}... ")

        ebs_config = {
            "VolumeSize": CONFIG.disk_size,
            "VolumeType": CONFIG.disk_type,
        }
        if CONFIG.disk_iops:
            ebs_config["Iops"] = CONFIG.disk_iops

        instance_config = {
            "BlockDeviceMappings": [{
                "DeviceName": "/dev/sda1",
                "Ebs": ebs_config
            }],
            "ImageId": image,
            "InstanceType": CONFIG.instance_type,
            "KeyName": Path(CONFIG.instance_key).stem,
            "Placement": {
                "Tenancy": "default"
            },
            "SecurityGroups": [CONFIG.security_group],
            "IamInstanceProfile": {
                "Name": CONFIG.role
            },
        }

        if CONFIG.spot:
            extra_kwargs = {}
            if CONFIG.spot_duration:
                extra_kwargs = {"BlockDurationMinutes": CONFIG.spot_duration}

            instance = ec2.request_spot_instances(
                LaunchSpecification=instance_config,
                TagSpecifications=[{
                    "ResourceType": "spot-instances-request",
                    "Tags": instance_tags
                }],
                **extra_kwargs,
            )
            instance_request = instance["SpotInstanceRequests"][0]
            request_id = instance_request["SpotInstanceRequestId"]

            try:
                nprint_header("Spot instance request submitted.")
                nprint_header(
                    "Waiting for the spot instance request to be fulfilled... "
                )

                status = ""
                while status != "fulfilled":
                    time.sleep(2)
                    response = ec2.describe_spot_instance_requests(
                        SpotInstanceRequestIds=[request_id],
                        Filters=instance_filters,
                    )
                    instance_request = response["SpotInstanceRequests"][0]
                    status = instance_request["Status"]["Code"]
                    if status not in [
                            "fulfilled",
                            "pending-evaluation",
                            "pending-fulfillment",
                    ]:
                        raise Exception(
                            response["SpotInstanceRequests"][0]["Status"])
            except KeyboardInterrupt:
                ec2.cancel_spot_instance_requests(
                    SpotInstanceRequestIds=[request_id])
                nprint_header("Cancelled spot instance request.")
                sys.exit(1)

            nprint_header("Done.")
            ec2.create_tags(
                Resources=[instance_request["InstanceId"]],
                Tags=instance_tags,
            )
            instance = instance_request
        else:
            instance_config["MinCount"] = 1
            instance_config["MaxCount"] = 1
            instance_config["InstanceInitiatedShutdownBehavior"] = "terminate"
            instance_config["TagSpecifications"] = [{
                "ResourceType": "instance",
                "Tags": instance_tags
            }]
            instance = ec2.run_instances(**instance_config)
            instance = instance["Instances"][0]

        return instance["InstanceId"]
Esempio n. 6
0
    def run(job_cmd: str, dry_run=False) -> Dict[str, str]:
        if dry_run:
            return {"message": job_cmd + "_dry_run"}

        # Launch instance with new volume for anaconda
        telemetry.record_event("run")

        start_t = time.monotonic()

        instance_id = AwsInstance._start_instance()

        try:
            # Wait for the instance to be running
            AwsInstance._block_until_instance_running(instance_id)
            end_t = time.monotonic()
            nprint_header(
                f"Instance running. ({round((end_t - start_t), 2)} s)")
            nprint_header(f"InstanceId: [green]{instance_id}[/green]")
            print()

            time.sleep(5)
            host = AwsInstance._get_host_from_instance_id(instance_id)

            AwsInstance._block_until_ssh_ready(host)

            if job_cmd == "_nimbo_launch":
                nprint_header(
                    f"Run [cyan]nimbo ssh {instance_id}[/cyan] to log onto the instance"
                )
                return {
                    "message": job_cmd + "_success",
                    "instance_id": instance_id
                }

            ssh = (
                f"ssh -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"
                " -o ServerAliveInterval=5 ")
            scp = f"scp -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"

            local_env = "/tmp/local_env.yml"
            user_conda_yml = CONFIG.conda_env
            # TODO: Replace this with shutil
            subprocess.check_output(f"cp {user_conda_yml} {local_env}",
                                    shell=True)

            # Send conda env yaml and setup scripts to instance
            print()
            nprint_header(f"Syncing conda, config, and setup files...")
            AwsInstance._write_nimbo_vars()

            # Create project folder and send env and config files there
            subprocess.check_output(f"{ssh} ubuntu@{host} mkdir project",
                                    shell=True)
            subprocess.check_output(
                f"{scp} {local_env} {CONFIG.nimbo_config_file} {NIMBO_VARS}"
                f" ubuntu@{host}:/home/ubuntu/project/",
                shell=True,
            )

            # Sync code with instance
            print()
            nprint_header(f"Syncing code...")
            AwsInstance._sync_code(host)

            nprint_header(f"Running setup code on the instance from here on.")
            # Run remote_setup script on instance
            AwsInstance._run_remote_script(ssh, scp, host, instance_id,
                                           job_cmd, "remote_setup.sh")

            if job_cmd == "_nimbo_notebook":
                subprocess.Popen(
                    f"{ssh} -o 'ExitOnForwardFailure yes' "
                    f"ubuntu@{host} -NfL 57467:localhost:57467 >/dev/null 2>&1",
                    shell=True,
                ).communicate()
                nprint_header(
                    "Make sure to run 'nimbo sync-notebooks <instance_id>' frequently "
                    "to sync the notebook to your local folder, as the remote notebooks"
                    " will be lost once the instance is terminated.")

            return {
                "message": job_cmd + "_success",
                "instance_id": instance_id
            }

        except BaseException as e:
            if (type(e) != KeyboardInterrupt
                    and type(e) != subprocess.CalledProcessError):
                nprint(e, style="error")

            if not CONFIG.persist:
                nprint_header(
                    f"Deleting instance {instance_id} (from local)... ")
                AwsInstance.delete_instance(instance_id)

            return {"message": job_cmd + "_error", "instance_id": instance_id}
Esempio n. 7
0
    def run_access_test(dry_run=False) -> None:
        if dry_run:
            return

        CONFIG.instance_type = "t3.medium"
        CONFIG.run_in_background = False
        CONFIG.persist = False

        try:
            # Send test file to s3 results path and delete it
            profile = CONFIG.aws_profile
            region = CONFIG.region_name
            results_path = CONFIG.s3_results_path

            subprocess.check_output(
                "echo 'Hello World' > nimbo-access-test.txt", shell=True)
            command = AwsStorage.mk_s3_command("cp", "nimbo-access-test.txt",
                                               results_path)
            subprocess.check_output(command, shell=True)

            command = f"aws s3 ls {results_path} --profile {profile} --region {region}"
            subprocess.check_output(command, shell=True)
            command = (f"aws s3 rm {results_path}/nimbo-access-test.txt "
                       f"--profile {profile} --region {region}")
            subprocess.check_output(command, shell=True)

            print("You have the necessary S3 read/write "
                  "permissions from your computer \u2713")

        except subprocess.CalledProcessError as e:
            nprint(e, style="error")
            sys.exit(1)

        # Launch instance with new volume for anaconda
        print("Launching test instance... ")

        instance_id = AwsInstance._start_instance()

        try:
            # Wait for the instance to be running
            AwsInstance._block_until_instance_running(instance_id)
            print(f"Instance running. Instance creation allowed \u2713")
            print(f"InstanceId: {instance_id}")
            print()

            print("Trying to delete this instance...")
            AwsInstance.delete_instance(instance_id)

            print("Instance deletion allowed \u2713")
            print("\nLaunching another instance...")
            instance_id = AwsInstance._start_instance()
            print(f"Instance running. InstanceId: {instance_id}")

            time.sleep(5)
            host = AwsInstance._get_host_from_instance_id(instance_id)
            ssh = (
                f"ssh -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no' "
                "-o ServerAliveInterval=20")
            scp = f"scp -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"

            AwsInstance._block_until_ssh_ready(host)

            print("Instance key allows ssh access to remote instance \u2713")
            print("Security group allows ssh access to remote instance \u2713")

            AwsInstance._write_nimbo_vars()

            subprocess.check_output(
                f"{scp} {CONFIG.nimbo_config_file} {NIMBO_VARS} " +
                f"ubuntu@{host}:/home/ubuntu/",
                shell=True,
            )
            AwsInstance._run_remote_script(ssh, scp, host, instance_id, "",
                                           "remote_s3_test.sh")

        except BaseException as e:
            if (type(e) != KeyboardInterrupt
                    and type(e) != subprocess.CalledProcessError):
                nprint(e, style="error")

            if not CONFIG.persist:
                nprint_header(
                    f"Deleting instance {instance_id} (from local)...")
                AwsInstance.delete_instance(instance_id)

            sys.exit(1)