Esempio n. 1
0
    def deploy(self,
               project_config: ProjectConfig,
               output: AbstractOutputWriter,
               dry_run=False):
        # remove the stack it it exists to make all the disks available
        stack = self.stack
        stack.delete_stack(output=output)

        # create or get existing bucket for the project
        bucket_name = self.bucket.get_or_create_bucket(output, dry_run)

        # sync the project with the bucket
        output.write('Syncing the project with the bucket...')
        sync_local_to_bucket(project_config.project_dir, bucket_name,
                             project_config.sync_filters, dry_run)

        # check GPU configuration
        check_gpu_configuration(self._ce, self.instance_config.gpu)

        # get volumes
        volumes = self._get_volumes()
        if volumes:
            # create disks
            output.write('\nCreating disks...')
            with output.prefix('  '):
                self._create_disks(volumes, output=output, dry_run=dry_run)
            output.write('')

        # prepare Deployment Manager template
        output.write('Preparing the deployment template...')
        with output.prefix('  '):
            # get an image
            image = self._get_image()

            # get or create an SSH key
            public_key_value = self.ssh_key.get_public_key_value()

            container = ContainerDeployment(project_config.project_name,
                                            volumes, project_config.container)
            template = prepare_instance_template(
                self.instance_config, container, project_config.sync_filters,
                volumes, self.machine_name, image.self_link, bucket_name,
                public_key_value, self._credentials.service_account_email,
                output)
        output.write('')

        # print information about the volumes
        output.write(
            'Volumes:\n%s\n' %
            render_volumes_info_table(container.volume_mounts, volumes))

        # create stack
        if not dry_run:
            stack.create_stack(template, output=output)
Esempio n. 2
0
    def delete_stack(self, output: AbstractOutputWriter, stack_id=None):
        """Deletes an AMI stack.

        Args:
            output: output writer
            stack_id: ID of the stack to delete (for older versions of Spotty)
        """
        # delete the image
        stack = Stack.get_by_name(self._cf,
                                  stack_id) if stack_id else self.get_stack()
        stack.delete()

        output.write('Waiting for the AMI to be deleted...')

        # wait for the deletion to be completed
        with output.prefix('  '):
            stack = stack.wait_status_changed(
                waiting_status='DELETE_IN_PROGRESS',
                resource_messages=[],
                resource_success_status='DELETE_COMPLETE',
                output=output)

        if stack.status == 'DELETE_COMPLETE':
            output.write('\n'
                         '-----------------------------\n'
                         'AMI was successfully deleted.\n'
                         '-----------------------------')
        else:
            raise ValueError(
                'Stack "%s" not deleted.\n'
                'See CloudFormation and CloudWatch logs for details.' %
                stack_id)
    def create_stack(self, template: str, output: AbstractOutputWriter):
        """Deploys a Deployment Manager template."""

        # create a stack
        res = Stack.create(self._dm, self._stack_name, template)
        # print(res)
        # exit()

        output.write('Waiting for the stack to be created...')

        resource_messages = OrderedDict([
            (self._INSTANCE_RESOURCE_NAME, 'launching the instance'),
            (self._DOCKER_WAITER_RESOURCE_NAME,
             'running the Docker container'),
        ])

        # wait for the stack to be created
        with output.prefix('  '):
            wait_resources(self._dm,
                           self._ce,
                           self._stack_name,
                           resource_messages,
                           instance_resource_name=self._INSTANCE_RESOURCE_NAME,
                           machine_name=self._machine_name,
                           output=output)
Esempio n. 4
0
    def _get_instance_id(instances: List[dict], instance_name: str, output: AbstractOutputWriter):
        if not instance_name:
            if len(instances) > 1:
                # ask user to choose the instance
                output.write('Select the instance:\n')
                with output.prefix('  '):
                    for i, instance_config in enumerate(instances):
                        output.write('[%d] %s' % (i + 1, instance_config['name']))
                output.write()

                try:
                    num = int(input('Enter number: '))
                    output.write()
                except ValueError:
                    num = 0

                if num < 1 or num > len(instances):
                    raise ValueError('The value from 1 to %d was expected.' % len(instances))

                instance_id = num - 1
            else:
                instance_id = 0
        else:
            # get instance ID by name
            instance_ids = [i for i, instance in enumerate(instances) if instance['name'] == instance_name]
            if not instance_ids:
                raise ValueError('Instance "%s" not found in the configuration file' % instance_name)

            instance_id = instance_ids[0]

        return instance_id
Esempio n. 5
0
    def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter):
        dry_run = args.dry_run

        if args.container:
            # check that the instance is started
            if not instance_manager.is_running():
                raise InstanceNotRunningError(instance_manager.instance_config.name)

            # start a container on the running instance
            instance_manager.start_container(output, dry_run=dry_run)

            if not dry_run:
                instance_name = ''
                if len(instance_manager.project_config.instances) > 1:
                    instance_name = ' ' + instance_manager.instance_config.name

                output.write('\nContainer was successfully started.\n'
                             'Use the "spotty sh%s" command to connect to the container.\n'
                             % instance_name)
        else:
            # start the instance
            with output.prefix('[dry-run] ' if dry_run else ''):
                instance_manager.start(output, dry_run)

            if not dry_run:
                instance_name = ''
                if len(instance_manager.project_config.instances) > 1:
                    instance_name = ' ' + instance_manager.instance_config.name

                output.write('\n%s\n'
                             '\nUse the "spotty sh%s" command to connect to the container.\n'
                             % (instance_manager.get_status_text(), instance_name))
Esempio n. 6
0
    def _run(self, instance_manager: AbstractInstanceManager, args: Namespace,
             output: AbstractOutputWriter):
        filters = [{'exclude': ['*']}, {'include': args.filters}]

        dry_run = args.dry_run
        with output.prefix('[dry-run] ' if dry_run else ''):
            instance_manager.download(filters, output, dry_run)

        output.write('Done')
Esempio n. 7
0
    def deploy(self, project_config: ProjectConfig, output: AbstractOutputWriter, dry_run=False):
        # check that it's not a Nitro-based instance
        if is_nitro_instance(self.instance_config.instance_type):
            raise ValueError('Currently Nitro-based instances are not supported.')

        # check availability zone and subnet configuration
        check_az_and_subnet(self._ec2, self.instance_config.region, self.instance_config.availability_zone,
                            self.instance_config.subnet_id)

        # get volumes
        volumes = self._get_volumes()

        # get deployment availability zone
        availability_zone = self._get_availability_zone(volumes)

        # check the maximum price for a spot instance
        check_max_price(self._ec2, self.instance_config.instance_type, self.instance_config.on_demand,
                        self.instance_config.max_price, availability_zone)

        # create or get existing bucket for the project
        bucket_name = self.bucket.get_or_create_bucket(output, project_config.tags, dry_run)

        # sync the project with the bucket
        output.write('Syncing the project with S3 bucket...')
        sync_project_with_s3(project_config.project_dir, bucket_name, self.instance_config.region,
                             project_config.sync_filters, dry_run)

        # create or update instance profile
        if not dry_run:
            instance_profile_stack = InstanceProfileStackResource(
                self._project_name, self.instance_config.name, self.instance_config.region)
            instance_profile_arn = instance_profile_stack.create_or_update_stack(
                self.instance_config.managed_policy_arns, output=output, tags=project_config.tags)
        else:
            instance_profile_arn = None

        output.write('Preparing CloudFormation template...')

        # prepare CloudFormation template
        container = ContainerDeployment(project_config.project_name, volumes, project_config.container)
        with output.prefix('  '):
            template = prepare_instance_template(self.instance_config, volumes, availability_zone, container,
                                                 output)

            # get parameters for the template
            parameters = self._get_template_parameters(instance_profile_arn, self.instance_config.name, bucket_name,
                                                       project_config.sync_filters, volumes, container, output,
                                                       dry_run=dry_run)

        # print information about the volumes
        output.write('\nVolumes:\n%s\n' % render_volumes_info_table(container.volume_mounts, volumes))

        # create stack
        if not dry_run:
            self.stack.create_or_update_stack(template, parameters, output, project_config.tags)
Esempio n. 8
0
    def _run(self, instance_manager: AbstractInstanceManager, args: Namespace,
             output: AbstractOutputWriter):
        # check that the instance is started
        if not instance_manager.is_running():
            raise InstanceNotRunningError(
                instance_manager.instance_config.name)

        dry_run = args.dry_run
        with output.prefix('[dry-run] ' if dry_run else ''):
            try:
                instance_manager.sync(output, dry_run)
            except NothingToDoError as e:
                output.write(str(e))
                return

        output.write('Done')
Esempio n. 9
0
    def _run(self, instance_manager: AbstractInstanceManager, args: Namespace,
             output: AbstractOutputWriter):
        # start the instance
        dry_run = args.dry_run
        with output.prefix('[dry-run] ' if dry_run else ''):
            instance_manager.start(output, dry_run)

        if not dry_run:
            instance_name = ''
            if len(instance_manager.project_config.instances) > 1:
                instance_name = ' ' + instance_manager.instance_config.name

            output.write(
                '\nThe instance was successfully started.\n'
                '\n%s\n'
                '\nUse the "spotty ssh%s" command to connect to the Docker container.\n'
                % (instance_manager.get_status_text(), instance_name))
Esempio n. 10
0
    def delete(self, output: AbstractOutputWriter):
        # terminate the instance
        instance = self.get_instance()
        if instance:
            output.write('Terminating the instance... ', newline=False)
            instance.terminate()
            output.write('DONE')
        else:
            output.write('The instance was already terminated.')

        # delete the stack in background if it exists
        self.stack_manager.delete_stack(output, no_wait=True)

        output.write('Applying deletion policies for the volumes...')

        # apply deletion policies for the volumes
        with output.prefix('  '):
            apply_deletion_policies(self._ec2, self.instance_config.volumes, output)
Esempio n. 11
0
    def delete(self, output: AbstractOutputWriter):
        # terminate the instance
        instance = self.get_instance()
        if instance:
            output.write('Terminating the instance...')
            instance.terminate()
            instance.wait_instance_terminated()
        else:
            output.write('The instance is already terminated.')

        # delete the stack in background if it exists
        self.stack.delete_stack(output, no_wait=True)

        output.write('Applying deletion policies for the volumes...')

        # apply deletion policies for the volumes
        with output.prefix('  '):
            self._apply_deletion_policies(output)
Esempio n. 12
0
    def create_stack(self, template: str, machine_name: str, debug_mode: bool,
                     output: AbstractOutputWriter):
        """Creates an image stack and waits for the image to be created."""

        # check that the stack doesn't exist
        if self.get_stack():
            raise ValueError('Deployment "%s" already exists.' %
                             self._stack_name)

        # create stack
        Stack.create(self._dm, self._stack_name, template)

        output.write('Waiting for the image to be created...')

        resource_messages = OrderedDict([
            (machine_name, 'launching the instance'),
            ('%s-docker-waiter' % machine_name, 'installing NVIDIA Docker'),
        ])

        if not debug_mode:
            resource_messages[
                '%s-image-waiter' %
                machine_name] = 'creating an image and terminating the instance'

        # wait for the stack to be created
        with output.prefix('  '):
            wait_resources(self._dm,
                           self._ce,
                           self._stack_name,
                           resource_messages,
                           instance_resource_name=machine_name,
                           machine_name=machine_name,
                           output=output)

        if debug_mode:
            output.write('Stack "%s" was created in debug mode.' %
                         self._stack_name)
        else:
            output.write('\n'
                         '--------------------------------------------------\n'
                         'Image "%s" was successfully created.\n'
                         'Use the "spotty start" command to run an instance.\n'
                         '--------------------------------------------------' %
                         self._image_name)
Esempio n. 13
0
    def _get_instance_config(project_config: ProjectConfig, instance_name: str,
                             output: AbstractOutputWriter):
        if not instance_name:
            if len(project_config.instances) > 1:
                # ask user to choose the instance
                output.write('Select the instance:\n')
                with output.prefix('  '):
                    for i, instance_config in enumerate(
                            project_config.instances):
                        output.write('[%d] %s' %
                                     (i + 1, instance_config['name']))
                output.write()

                try:
                    num = int(input('Enter number: '))
                    output.write()
                except ValueError:
                    num = 0

                if num < 1 or num > len(project_config.instances):
                    raise ValueError('The value from 1 to %d was expected.' %
                                     len(project_config.instances))
            else:
                num = 1

            instance_config = project_config.instances[num - 1]
        else:
            # get the instance by name
            instance_configs = filter_list(project_config.instances, 'name',
                                           instance_name)
            if not instance_configs:
                raise ValueError(
                    'Instance "%s" not found in the configuration file' %
                    instance_name)

            instance_config = instance_configs[0]

        return instance_config
Esempio n. 14
0
    def create_stack(self, template: str, parameters: dict, debug_mode: bool,
                     output: AbstractOutputWriter):
        """Creates an AMI stack and waits for the AMI to be created.

        Args:
            template: CloudFormation template
            parameters: parameters for the template
            debug_mode: if "True", NVIDIA Docker will be installed, but an AMI will not be created and the instance
                        will not be terminated, so the user can connect to the instance for debugging.
            output: output writer
        """
        stack = Stack.create_stack(
            cf=self._cf,
            StackName=self._stack_name,
            TemplateBody=template,
            Parameters=[{
                'ParameterKey': key,
                'ParameterValue': value
            } for key, value in parameters.items()],
            Capabilities=['CAPABILITY_IAM'],
            OnFailure='DO_NOTHING' if debug_mode else 'DELETE',
        )

        output.write('Waiting for the AMI to be created...')

        resource_messages = [
            ('InstanceProfile', 'creating IAM role for the instance'),
            ('Instance', 'launching the instance'),
            ('InstanceReadyWaitCondition', 'installing NVIDIA Docker'),
            ('AMICreatedWaitCondition',
             'creating AMI and terminating the instance'),
        ]

        # wait for the stack to be created
        with output.prefix('  '):
            stack = stack.wait_status_changed(
                waiting_status='CREATE_IN_PROGRESS',
                resource_messages=resource_messages,
                resource_success_status='CREATE_COMPLETE',
                output=output)

        if stack.status != 'CREATE_COMPLETE':
            raise ValueError(
                'Stack "%s" was not created.\n'
                'Please, see CloudFormation logs for the details.' %
                self._stack_name)

        if debug_mode:
            output.write('Stack "%s" was created in debug mode.' %
                         self._stack_name)
        else:
            ami_id = [
                row['OutputValue'] for row in stack.outputs
                if row['OutputKey'] == 'NewAMI'
            ][0]
            output.write('\n'
                         '--------------------------------------------------\n'
                         'AMI "%s" (ID=%s) was successfully created.\n'
                         'Use the "spotty start" command to run an instance.\n'
                         '--------------------------------------------------' %
                         (parameters['ImageName'], ami_id))
Esempio n. 15
0
    def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter):
        dry_run = args.dry_run
        with output.prefix('[dry-run] ' if dry_run else ''):
            instance_manager.sync(output, dry_run)

        output.write('Done')
Esempio n. 16
0
    def create_or_update_stack(self, template: str, parameters: dict,
                               instance_config: InstanceConfig,
                               output: AbstractOutputWriter):
        """Runs CloudFormation template."""

        # delete the stack if it exists
        stack = Stack.get_by_name(self._cf, self._stack_name)
        if stack:
            self.delete_stack(output)

        # create new stack
        stack = Stack.create_stack(
            cf=self._cf,
            StackName=self._stack_name,
            TemplateBody=template,
            Parameters=[{
                'ParameterKey': key,
                'ParameterValue': value
            } for key, value in parameters.items()],
            Capabilities=['CAPABILITY_IAM'],
            OnFailure='DO_NOTHING',
        )

        output.write('Waiting for the stack to be created...')

        tasks = [
            Task(
                message='launching the instance',
                start_resource=None,
                finish_resource='Instance',
                enabled=True,
            ),
            Task(
                message='preparing the instance',
                start_resource='Instance',
                finish_resource='MountingVolumesSignal',
                enabled=True,
            ),
            Task(
                message='mounting volumes',
                start_resource='MountingVolumesSignal',
                finish_resource='SettingDockerRootSignal',
                enabled=bool(instance_config.volumes),
            ),
            Task(
                message='setting Docker data root',
                start_resource='SettingDockerRootSignal',
                finish_resource='SyncingProjectSignal',
                enabled=bool(instance_config.docker_data_root),
            ),
            Task(
                message='syncing project files',
                start_resource='SyncingProjectSignal',
                finish_resource='RunningInstanceStartupCommandsSignal',
                enabled=True,
            ),
            Task(
                message='running instance startup commands',
                start_resource='RunningInstanceStartupCommandsSignal',
                finish_resource='BuildingDockerImageSignal',
                enabled=bool(instance_config.commands),
            ),
            Task(
                message='building Docker image',
                start_resource='BuildingDockerImageSignal',
                finish_resource='StartingContainerSignal',
                enabled=bool(instance_config.dockerfile_path),
            ),
            Task(
                message='starting container',
                start_resource='StartingContainerSignal',
                finish_resource='RunningContainerStartupCommandsSignal',
                enabled=True,
            ),
            Task(
                message='running container startup commands',
                start_resource='RunningContainerStartupCommandsSignal',
                finish_resource='DockerReadyWaitCondition',
                enabled=bool(instance_config.container_config.commands),
            ),
        ]

        # wait for the stack to be created
        with output.prefix('  '):
            stack.wait_tasks(tasks,
                             resource_success_status='CREATE_COMPLETE',
                             resource_fail_status='CREATE_FAILED',
                             output=output)
            stack = stack.wait_status_changed(
                stack_waiting_status='CREATE_IN_PROGRESS', output=output)

        return stack
Esempio n. 17
0
    def deploy(self, container_commands: DockerCommands, bucket_name: str,
               data_transfer: DataTransfer, output: AbstractOutputWriter, dry_run: bool = False):
        # get deployment availability zone
        availability_zone = update_availability_zone(self._ec2, self.instance_config.availability_zone,
                                                     self.instance_config.volumes)

        # check availability zone and subnet configuration
        check_az_and_subnet(self._ec2, self.instance_config.region, availability_zone, self.instance_config.subnet_id)

        # check the maximum price for a spot instance
        check_max_spot_price(self._ec2, self.instance_config.instance_type, self.instance_config.is_spot_instance,
                             self.instance_config.max_price, availability_zone)

        # sync the project with the S3 bucket
        if bucket_name is not None:
            output.write('Syncing the project with the S3 bucket...')
            data_transfer.upload_local_to_bucket(bucket_name, dry_run=dry_run)

        # create or update instance profile
        if not dry_run:
            instance_profile_stack_manager = InstanceProfileStackManager(
                self._project_name, self.instance_config.name, self.instance_config.region)
            if not self.instance_config.instance_profile_arn:
                instance_profile_arn = instance_profile_stack_manager.create_or_update_stack(
                    self.instance_config.managed_policy_arns, output=output)
            else:
                instance_profile_arn = self.instance_config.instance_profile_arn
        else:
            instance_profile_arn = None

        # create a key pair if it doesn't exist
        if not dry_run:
            self.key_pair_manager.maybe_create_key()

        output.write('Preparing CloudFormation template...')

        # prepare CloudFormation template
        with output.prefix('  '):
            template = prepare_instance_template(
                ec2=self._ec2,
                instance_config=self.instance_config,
                docker_commands=container_commands,
                availability_zone=availability_zone,
                sync_project_cmd=data_transfer.get_download_bucket_to_instance_command(bucket_name=bucket_name),
                output=output,
            )

            # get parameters for the template
            parameters = get_template_parameters(
                ec2=self._ec2,
                instance_config=self.instance_config,
                instance_profile_arn=instance_profile_arn,
                bucket_name=bucket_name,
                key_pair_name=self.key_pair_manager.key_name,
                output=output,
            )

        # print information about the volumes
        output.write('\nVolumes:\n%s\n'
                     % render_volumes_info_table(self.instance_config.volume_mounts, self.instance_config.volumes))

        # create stack
        if not dry_run:
            stack = self.stack_manager.create_or_update_stack(template, parameters, self.instance_config, output)
            if stack.status != 'CREATE_COMPLETE':
                logs_str = 'Please, see CloudFormation logs for the details.'

                # download CloudFormation logs from the instance if it was created
                if self.get_instance():
                    log_paths = download_logs(
                        bucket_name=bucket_name,
                        instance_name=self.instance_config.name,
                        stack_uuid=stack.stack_uuid,
                        region=self.instance_config.region,
                    )

                    logs_str = 'Please, see the logs for the details:\n  '
                    logs_str += '\n  '.join(log_paths)

                raise ValueError('Stack "%s" was not created.\n%s' % (stack.name, logs_str))
Esempio n. 18
0
    def deploy(self,
               container_commands: DockerCommands,
               bucket_name: str,
               data_transfer: DataTransfer,
               output: AbstractOutputWriter,
               dry_run: bool = False):
        # check machine type
        if not self._ce.get_machine_types(self.instance_config.machine_type):
            raise ValueError(
                '"%s" machine type is not available in the "%s" zone.' %
                (self.instance_config.machine_type, self.instance_config.zone))

        # check GPU configuration
        check_gpu_configuration(self._ce, self.instance_config.gpu)

        # remove the stack it it exists to make all the disks available
        stack_manager = self.stack_manager
        stack_manager.delete_stack(output=output)

        # sync the project with the S3 bucket
        if bucket_name is not None:
            output.write('Syncing the project with the bucket...')
            data_transfer.upload_local_to_bucket(bucket_name, dry_run=dry_run)

        # create volumes
        if self.instance_config.volumes:
            # create disks
            output.write('\nCreating disks...')
            with output.prefix('  '):
                create_disks(self._ce,
                             self.instance_config.volumes,
                             output=output,
                             dry_run=dry_run)
            output.write('')

        # prepare Deployment Manager template
        output.write('Preparing the deployment template...')
        with output.prefix('  '):
            # get an image
            image_link = get_image(self._ce, self.instance_config.image_uri,
                                   self.instance_config.image_name).self_link

            # get or create an SSH key
            public_key_value = self.ssh_key_manager.get_public_key_value()

            # prepare the deployment template
            sync_project_cmd = data_transfer.get_download_bucket_to_instance_command(
                bucket_name=bucket_name)
            template = prepare_instance_template(
                instance_config=self.instance_config,
                docker_commands=container_commands,
                image_link=image_link,
                bucket_name=bucket_name,
                sync_project_cmd=sync_project_cmd,
                public_key_value=public_key_value,
                service_account_email=self._credentials.service_account_email,
                output=output,
            )

        output.write('')

        # print information about the volumes
        output.write('Volumes:\n%s\n' % render_volumes_info_table(
            self.instance_config.volume_mounts, self.instance_config.volumes))

        # create stack
        if not dry_run:
            stack_manager.create_stack(template, output=output)