Beispiel #1
0
 def _sync_code(host: str) -> None:
     if ".git" not in os.listdir():
         nprint(
             "No git repo found. Syncing all python and bash files as a fallback.",
             style="warning",
         )
         nprint("Please consider using git to track the files to sync.",
                style="warning")
         subprocess.Popen(
             f"rsync -avm -e 'ssh -i {CONFIG.instance_key}' "
             f"--include '*/' --include '*.py' --include '*.ipynb' --include '*.sh' "
             f"--exclude '*' "
             f". ubuntu@{host}:/home/ubuntu/project",
             shell=True,
         ).communicate()
     else:
         output, error = subprocess.Popen("git ls-tree -r HEAD --name-only",
                                          stdout=subprocess.PIPE,
                                          shell=True).communicate()
         git_tracked_files = output.decode("utf-8").strip().splitlines()
         include_files = [
             f"--include '{file_name}'" for file_name in git_tracked_files
         ]
         include_string = " ".join(include_files)
         subprocess.Popen(
             f"rsync -amr -e 'ssh -i {CONFIG.instance_key}' "
             f"--include '*/' {include_string} --exclude '*' "
             f". ubuntu@{host}:/home/ubuntu/project",
             shell=True,
         ).communicate()
Beispiel #2
0
    def ls_spot_gpu_prices(dry_run=False) -> None:
        if dry_run:
            return

        ec2 = CONFIG.get_session().client("ec2")

        string = AwsUtils._format_price_string("InstanceType",
                                               "Price ($/hour)", "GPUs",
                                               "CPUs", "Mem (Gb)")
        print()
        nprint(string, style="bold")

        for instance_type in AwsUtils._instance_types():
            response = ec2.describe_spot_price_history(
                InstanceTypes=[instance_type],
                Filters=[{
                    "Name": "product-description",
                    "Values": ["Linux/UNIX"]
                }],
            )

            price = float(response["SpotPriceHistory"][0]["SpotPrice"])

            num_gpus, gpu_type, mem, cpus = INSTANCE_GPU_MAP[instance_type]
            string = AwsUtils._format_price_string(instance_type,
                                                   round(price, 2),
                                                   f"{num_gpus} x {gpu_type}",
                                                   cpus, mem)
            print(string)
        print()
Beispiel #3
0
 def _create_policy(client, policy_name, policy_json):
     try:
         client.create_policy(PolicyName=policy_name,
                              PolicyDocument=json.dumps(policy_json))
     except botocore.exceptions.ClientError as e:
         if e.response["Error"]["Code"] == "EntityAlreadyExists":
             nprint(f"Policy {policy_name} already exists. Skipping.",
                    style="warning")
         else:
             raise
Beispiel #4
0
 def _create_group(client, group_name):
     try:
         client.create_group(GroupName=group_name)
     except botocore.exceptions.ClientError as e:
         if e.response["Error"]["Code"] == "EntityAlreadyExists":
             nprint(
                 f"User group {group_name} already exists. Skipping.",
                 style="warning",
             )
         else:
             raise
Beispiel #5
0
 def decorated(*args, **kwargs):
     try:
         CONFIG.assert_required_config_exists(*cases)
         return func(*args, **kwargs)
     except AssertionError as e:
         nprint(e, style="error")
         sys.exit(1)
     except FileNotFoundError as e:
         # Happens when nimbo config file is not found
         nprint(e, style="error")
         sys.exit(1)
Beispiel #6
0
 def decorated(*args, **kwargs):
     if IS_TEST_ENV:
         return func(*args, **kwargs)
     else:
         try:
             return func(*args, **kwargs)
         except botocore.errorfactory.ClientError as e:
             nprint(e, style="error")
             sys.exit(1)
         except ValueError as e:
             nprint(e, style="error")
             sys.exit(1)
         except KeyboardInterrupt:
             print("Aborting...")
             sys.exit(1)
Beispiel #7
0
    def allow_ingress_current_ip(target: str, dry_run=False) -> None:
        ec2 = CONFIG.get_session().client("ec2")

        try:
            response = ec2.describe_security_groups(GroupNames=[target],
                                                    DryRun=dry_run)
            security_group_id = response["SecurityGroups"][0]["GroupId"]
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "InvalidGroup.NotFound":
                nprint(
                    f"Security group {target} not found. Please use an existing"
                    " security group or create a new one in the AWS console.",
                    style="error",
                )
                sys.exit(1)
            elif e.response["Error"]["Code"] == "UnauthorizedOperation":
                return
            else:
                raise

        my_public_ip = requests.get(
            "https://checkip.amazonaws.com").text.strip()

        try:
            ec2.authorize_security_group_ingress(
                GroupId=security_group_id,
                IpPermissions=[{
                    "IpProtocol": "tcp",
                    "FromPort": 22,
                    "ToPort": 22,
                    "IpRanges": [{
                        "CidrIp": f"{my_public_ip}/16"
                    }],
                }],
            )
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "InvalidPermission.Duplicate":
                return
            elif e.response["Error"]["Code"] == "UnauthorizedOperation":
                return
            else:
                raise
Beispiel #8
0
    def ls_active_instances(dry_run=False) -> None:
        ec2 = CONFIG.get_session().client("ec2")
        try:
            response = ec2.describe_instances(
                Filters=[{
                    "Name": "instance-state-name",
                    "Values": ["running", "pending"]
                }] + AwsInstance._make_instance_filters(),
                DryRun=dry_run,
            )
            for reservation in response["Reservations"]:
                for inst in reservation["Instances"]:
                    nprint(
                        f"Id: [bright_green]{inst['InstanceId']}[/bright_green]\n"
                        f"Status: {inst['State']['Name']}\n"
                        f"Launch Time: {inst['LaunchTime']}\n"
                        f"InstanceType: {inst['InstanceType']}\n"
                        f"IP Address: {inst['PublicIpAddress']}\n")

        except botocore.exceptions.ClientError as e:
            if "DryRunOperation" not in str(e):
                raise
Beispiel #9
0
    def mk_bucket(bucket_name: str, dry_run=False) -> None:
        """Create an S3 bucket in a specified region

        :param bucket_name: Bucket to create
        :param dry_run
        :return: True if bucket created, else False
        """

        try:
            session = CONFIG.get_session()
            s3 = session.client("s3")
            location = {"LocationConstraint": session.region_name}
            s3.create_bucket(Bucket=bucket_name,
                             CreateBucketConfiguration=location)
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "BucketAlreadyOwnedByYou":
                nprint("Bucket nimbo-main-bucket already exists.",
                       style="warning")
            else:
                nprint(e, style="error")
            return

        print("Bucket %s created." % bucket_name)
Beispiel #10
0
    def _create_role_and_instance_profile(client, role_name):
        try:
            client.create_role(
                RoleName=role_name,
                AssumeRolePolicyDocument=json.dumps(ASSUME_ROLE_POLICY),
            )
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "EntityAlreadyExists":
                nprint(f"Role {role_name} already exists. Skipping.",
                       style="warning")
            else:
                raise

        try:
            client.create_instance_profile(InstanceProfileName=role_name,
                                           Path="/")
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "EntityAlreadyExists":
                nprint(
                    f"Instance profile for role {role_name} already exists. Skipping.",
                    style="warning",
                )
            else:
                raise

        try:
            client.add_role_to_instance_profile(InstanceProfileName=role_name,
                                                RoleName=role_name)
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "LimitExceeded":
                nprint(
                    f"Instance profile {role_name} already has a role. Skipping.",
                    style="warning",
                )
            else:
                raise
Beispiel #11
0
    def setup(profile: str, full_s3_access=False) -> None:
        session = boto3.Session(profile_name=profile)
        account = session.client("sts").get_caller_identity()["Account"]

        iam = session.client("iam")

        nprint_header(f"Creating user group {NIMBO_USER_GROUP}...")
        AwsPermissions._create_group(iam, NIMBO_USER_GROUP)

        nprint_header(f"Creating policy {EC2_POLICY_NAME}...")
        AwsPermissions._create_policy(iam, EC2_POLICY_NAME, EC2_POLICY_JSON)

        nprint_header(
            f"Attaching policy {EC2_POLICY_NAME} to user group {NIMBO_USER_GROUP}..."
        )
        iam.attach_group_policy(
            GroupName=NIMBO_USER_GROUP,
            PolicyArn=f"arn:aws:iam::{account}:policy/{EC2_POLICY_NAME}",
        )

        if full_s3_access:
            nprint_header(f"Creating role {S3_ACCESS_ROLE_NAME}...")
            AwsPermissions._create_role_and_instance_profile(
                iam, S3_ACCESS_ROLE_NAME)

            nprint_header(
                f"Attaching AmazonS3FullAccess policy to role {S3_ACCESS_ROLE_NAME}..."
            )
            iam.attach_role_policy(
                PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess",
                RoleName=S3_ACCESS_ROLE_NAME,
            )

            nprint_header(f"Creating policy {PASS_ROLE_POLICY_NAME}...")
            pass_role_policy_json = {
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Sid":
                    "NimboPassRolePolicy",
                    "Effect":
                    "Allow",
                    "Action":
                    "iam:PassRole",
                    "Resource":
                    f"arn:aws:iam::*:role/{S3_ACCESS_ROLE_NAME}",
                }],
            }
            AwsPermissions._create_policy(iam, PASS_ROLE_POLICY_NAME,
                                          pass_role_policy_json)

            nprint_header(f"Attaching policy {PASS_ROLE_POLICY_NAME}"
                          f" to user group {NIMBO_USER_GROUP}...")
            iam.attach_group_policy(
                GroupName=NIMBO_USER_GROUP,
                PolicyArn=
                f"arn:aws:iam::{account}:policy/{PASS_ROLE_POLICY_NAME}",
            )

            nprint_header(f"Attaching policy AmazonS3FullAccess"
                          f" to user group {NIMBO_USER_GROUP}...")
            iam.attach_group_policy(
                GroupName=NIMBO_USER_GROUP,
                PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess",
            )

        else:
            nprint(
                "\nSince you chose not to give full S3 access to the Nimbo user group"
                " and instance role,\nwe recommend that you create a role with the"
                " necessary S3 permissions in the AWS console.\nOnce you do this, give"
                " the role name to the people using Nimbo so that they can set\n"
                "the 'role' field in the nimbo-config.yml to this value.",
                style="warning",
            )

        print()
        nprint_header("Done.")
        nprint_header("To add users to the NimboUserGroup, simply"
                      " run 'nimbo add-user USERNAME YOUR_AWS_PROFILE'.\n"
                      "For more info use 'nimbo add-user --help'")
Beispiel #12
0
    def run(job_cmd: str, dry_run=False) -> Dict[str, str]:
        if dry_run:
            return {"message": job_cmd + "_dry_run"}

        # Launch instance with new volume for anaconda
        telemetry.record_event("run")

        start_t = time.monotonic()

        instance_id = AwsInstance._start_instance()

        try:
            # Wait for the instance to be running
            AwsInstance._block_until_instance_running(instance_id)
            end_t = time.monotonic()
            nprint_header(
                f"Instance running. ({round((end_t - start_t), 2)} s)")
            nprint_header(f"InstanceId: [green]{instance_id}[/green]")
            print()

            time.sleep(5)
            host = AwsInstance._get_host_from_instance_id(instance_id)

            AwsInstance._block_until_ssh_ready(host)

            if job_cmd == "_nimbo_launch":
                nprint_header(
                    f"Run [cyan]nimbo ssh {instance_id}[/cyan] to log onto the instance"
                )
                return {
                    "message": job_cmd + "_success",
                    "instance_id": instance_id
                }

            ssh = (
                f"ssh -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"
                " -o ServerAliveInterval=5 ")
            scp = f"scp -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"

            local_env = "/tmp/local_env.yml"
            user_conda_yml = CONFIG.conda_env
            # TODO: Replace this with shutil
            subprocess.check_output(f"cp {user_conda_yml} {local_env}",
                                    shell=True)

            # Send conda env yaml and setup scripts to instance
            print()
            nprint_header(f"Syncing conda, config, and setup files...")
            AwsInstance._write_nimbo_vars()

            # Create project folder and send env and config files there
            subprocess.check_output(f"{ssh} ubuntu@{host} mkdir project",
                                    shell=True)
            subprocess.check_output(
                f"{scp} {local_env} {CONFIG.nimbo_config_file} {NIMBO_VARS}"
                f" ubuntu@{host}:/home/ubuntu/project/",
                shell=True,
            )

            # Sync code with instance
            print()
            nprint_header(f"Syncing code...")
            AwsInstance._sync_code(host)

            nprint_header(f"Running setup code on the instance from here on.")
            # Run remote_setup script on instance
            AwsInstance._run_remote_script(ssh, scp, host, instance_id,
                                           job_cmd, "remote_setup.sh")

            if job_cmd == "_nimbo_notebook":
                subprocess.Popen(
                    f"{ssh} -o 'ExitOnForwardFailure yes' "
                    f"ubuntu@{host} -NfL 57467:localhost:57467 >/dev/null 2>&1",
                    shell=True,
                ).communicate()
                nprint_header(
                    "Make sure to run 'nimbo sync-notebooks <instance_id>' frequently "
                    "to sync the notebook to your local folder, as the remote notebooks"
                    " will be lost once the instance is terminated.")

            return {
                "message": job_cmd + "_success",
                "instance_id": instance_id
            }

        except BaseException as e:
            if (type(e) != KeyboardInterrupt
                    and type(e) != subprocess.CalledProcessError):
                nprint(e, style="error")

            if not CONFIG.persist:
                nprint_header(
                    f"Deleting instance {instance_id} (from local)... ")
                AwsInstance.delete_instance(instance_id)

            return {"message": job_cmd + "_error", "instance_id": instance_id}
Beispiel #13
0
    def run_access_test(dry_run=False) -> None:
        if dry_run:
            return

        CONFIG.instance_type = "t3.medium"
        CONFIG.run_in_background = False
        CONFIG.persist = False

        try:
            # Send test file to s3 results path and delete it
            profile = CONFIG.aws_profile
            region = CONFIG.region_name
            results_path = CONFIG.s3_results_path

            subprocess.check_output(
                "echo 'Hello World' > nimbo-access-test.txt", shell=True)
            command = AwsStorage.mk_s3_command("cp", "nimbo-access-test.txt",
                                               results_path)
            subprocess.check_output(command, shell=True)

            command = f"aws s3 ls {results_path} --profile {profile} --region {region}"
            subprocess.check_output(command, shell=True)
            command = (f"aws s3 rm {results_path}/nimbo-access-test.txt "
                       f"--profile {profile} --region {region}")
            subprocess.check_output(command, shell=True)

            print("You have the necessary S3 read/write "
                  "permissions from your computer \u2713")

        except subprocess.CalledProcessError as e:
            nprint(e, style="error")
            sys.exit(1)

        # Launch instance with new volume for anaconda
        print("Launching test instance... ")

        instance_id = AwsInstance._start_instance()

        try:
            # Wait for the instance to be running
            AwsInstance._block_until_instance_running(instance_id)
            print(f"Instance running. Instance creation allowed \u2713")
            print(f"InstanceId: {instance_id}")
            print()

            print("Trying to delete this instance...")
            AwsInstance.delete_instance(instance_id)

            print("Instance deletion allowed \u2713")
            print("\nLaunching another instance...")
            instance_id = AwsInstance._start_instance()
            print(f"Instance running. InstanceId: {instance_id}")

            time.sleep(5)
            host = AwsInstance._get_host_from_instance_id(instance_id)
            ssh = (
                f"ssh -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no' "
                "-o ServerAliveInterval=20")
            scp = f"scp -i {CONFIG.instance_key} -o 'StrictHostKeyChecking no'"

            AwsInstance._block_until_ssh_ready(host)

            print("Instance key allows ssh access to remote instance \u2713")
            print("Security group allows ssh access to remote instance \u2713")

            AwsInstance._write_nimbo_vars()

            subprocess.check_output(
                f"{scp} {CONFIG.nimbo_config_file} {NIMBO_VARS} " +
                f"ubuntu@{host}:/home/ubuntu/",
                shell=True,
            )
            AwsInstance._run_remote_script(ssh, scp, host, instance_id, "",
                                           "remote_s3_test.sh")

        except BaseException as e:
            if (type(e) != KeyboardInterrupt
                    and type(e) != subprocess.CalledProcessError):
                nprint(e, style="error")

            if not CONFIG.persist:
                nprint_header(
                    f"Deleting instance {instance_id} (from local)...")
                AwsInstance.delete_instance(instance_id)

            sys.exit(1)
Beispiel #14
0
    def ls_gpu_prices(dry_run=False) -> None:
        if dry_run:
            return

        full_region_name = FULL_REGION_NAMES[CONFIG.region_name]

        pricing = CONFIG.get_session().client("pricing",
                                              region_name="us-east-1")

        string = AwsUtils._format_price_string("InstanceType",
                                               "Price ($/hour)", "GPUs",
                                               "CPUs", "Mem (Gb)")
        print()
        nprint(string, style="bold")

        for instance_type in AwsUtils._instance_types():
            response = pricing.get_products(
                ServiceCode="AmazonEC2",
                MaxResults=100,
                FormatVersion="aws_v1",
                Filters=[
                    {
                        "Type": "TERM_MATCH",
                        "Field": "instanceType",
                        "Value": instance_type,
                    },
                    {
                        "Type": "TERM_MATCH",
                        "Field": "location",
                        "Value": full_region_name,
                    },
                    {
                        "Type": "TERM_MATCH",
                        "Field": "operatingSystem",
                        "Value": "Linux",
                    },
                    {
                        "Type": "TERM_MATCH",
                        "Field": "capacitystatus",
                        "Value": "Used"
                    },
                    {
                        "Type": "TERM_MATCH",
                        "Field": "preInstalledSw",
                        "Value": "NA"
                    },
                    {
                        "Type": "TERM_MATCH",
                        "Field": "tenancy",
                        "Value": "shared"
                    },
                ],
            )

            inst = json.loads(response["PriceList"][0])
            inst = inst["terms"]["OnDemand"]
            inst = list(inst.values())[0]
            inst = list(inst["priceDimensions"].values())[0]
            inst = inst["pricePerUnit"]
            currency = list(inst.keys())[0]
            price = float(inst[currency])

            num_gpus, gpu_type, mem, cpus = INSTANCE_GPU_MAP[instance_type]
            string = AwsUtils._format_price_string(instance_type,
                                                   round(price, 2),
                                                   f"{num_gpus} x {gpu_type}",
                                                   cpus, mem)
            print(string)
        print()