예제 #1
0
 def _get_block_device_mappings(self, resources_needed):
     return [{
         'DeviceName': '/dev/sda1',
         'Ebs': {
             'DeleteOnTermination': True,
             'VolumeSize': memstr2int(resources_needed['hdd']) /
             memstr2int('1g'),
             'VolumeType': 'standard'
         }
     }]
예제 #2
0
    def _select_instance_type(self, resources_needed):
        sorted_specs = sorted(_instance_specs.items(),
                              key=lambda x: self.prices[x[0]])
        for instance in sorted_specs:
            if int(instance[1]['cpus']) >= int(
                    resources_needed['cpus']) and memstr2int(
                        instance[1]['ram']) >= memstr2int(
                            resources_needed['ram']) and int(
                                instance[1]['gpus']) >= int(
                                    resources_needed['gpus']):
                return instance[0]

        raise ValueError('No instances that satisfy requirements {} '
                         'can be found'.format(resources_needed))
예제 #3
0
    def _generate_machine_type(self, resources_needed={}):
        if not any(resources_needed):
            machine_type = "zones/{}/machineTypes/n1-standard-1".format(
                self.zone)
        else:
            cpus = int(resources_needed['cpus'])
            default_ram_per_cpu = 4096
            ram = default_ram_per_cpu * cpus

            if 'ram' in resources_needed.keys():
                ram = memstr2int(resources_needed['ram']) / memstr2int('1Mb')
                ram = int(math.ceil(ram / 256.0) * 256)

            ram_per_cpu = ram / cpus
            assert 1024 <= ram_per_cpu and ram_per_cpu <= 6192, \
                "RAM per cpu should be between 0.9 and 6.5 Gb"

            machine_type = "zones/{}/machineTypes/custom-{}-{}".format(
                self.zone, cpus, ram)

        return machine_type
예제 #4
0
    def _get_instance_config(self, resources_needed, queue_name, timeout=300):
        image_response = self.compute.images().getFromFamily(
            project='debian-cloud', family='debian-9').execute()
        source_disk_image = image_response['selfLink']

        # Configure the machine
        machine_type = self._generate_machine_type(resources_needed)
        self.logger.debug('Machine type = {}'.format(machine_type))

        with open(self.startup_script_file, 'r') as f:
            startup_script = f.read()
        if self.runner_args is not None:
            startup_script = startup_script.replace("{studioml_branch}",
                                                    self.runner_args.branch)
            startup_script = insert_user_startup_script(
                self.runner_args.user_startup_script, startup_script,
                self.logger)
        else:
            startup_script = startup_script.replace("{studioml_branch}",
                                                    "master")

        self.logger.info('Startup script:')
        self.logger.info(startup_script)

        with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as f:
            credentials = f.read()

        if self.auth_cookie is not None:
            auth_key = os.path.basename(self.auth_cookie)
            with open(self.auth_cookie, 'r') as f:
                auth_data = f.read()
        else:
            auth_key = None
            auth_data = None

        config = {
            'machineType':
            machine_type,

            # Specify the boot disk and the image to use as a source.
            'disks': [{
                'boot': True,
                'autoDelete': True,
                'initializeParams': {
                    'sourceImage': source_disk_image,
                }
            }],

            # Specify a network interface with NAT to access the public
            # internet.
            'networkInterfaces': [{
                'network':
                'global/networks/default',
                'accessConfigs': [{
                    'type': 'ONE_TO_ONE_NAT',
                    'name': 'External NAT'
                }]
            }],

            # Allow the instance to access cloud storage and logging.
            'serviceAccounts': [{
                'email':
                'default',
                'scopes': [
                    'https://www.googleapis.com/auth/cloud-platform',
                ]
            }],

            # Metadata is readable from the instance and allows you to
            # pass configuration from deployment scripts to instances.
            'metadata': {
                'items': [{
                    'key': 'startup-script',
                    'value': startup_script
                }, {
                    'key': 'credentials',
                    'value': credentials
                }, {
                    'key': 'queue_name',
                    'value': queue_name
                }, {
                    'key': 'auth_key',
                    'value': auth_key
                }, {
                    'key': 'auth_data',
                    'value': auth_data
                }, {
                    'key': 'timeout',
                    'value': str(timeout)
                }]
            },
            "scheduling": {
                "preemptilble": False
            }
        }

        if 'hdd' in resources_needed.keys():
            config['disks'][0]['initializeParams']['diskSizeGb'] = \
                memstr2int(resources_needed['hdd']) / memstr2int('1Gb')

        return config
예제 #5
0
    def _get_instance_config(self, resources_needed, queue_name, timeout=300):
        # image_response = self.compute.images().getFromFamily(
        #    project='studio-ed756', family='studioml').execute()

        image_response = None

        if image_response is None:
            image_response = self.compute.images().getFromFamily(
                project='ubuntu-os-cloud', family='ubuntu-1604-lts').execute()

        source_disk_image = image_response['selfLink']

        # Configure the machine
        machine_type = self._generate_machine_type(resources_needed)
        self.logger.debug('Machine type = {}'.format(machine_type))

        with open(self.startup_script_file, 'r') as f:
            startup_script = f.read()

        with open(self.install_studio_script) as f:
            install_studio_script = f.read()

        startup_script = insert_user_startup_script(self.user_startup_script,
                                                    startup_script,
                                                    self.logger)

        startup_script = startup_script.replace('{install_studio}',
                                                install_studio_script)
        startup_script = startup_script.format(studioml_branch=self.branch,
                                               repo_url=self.repo_url,
                                               log_bucket=self.log_bucket,
                                               use_gpus=resources_needed.get(
                                                   'gpus', 0))

        self.logger.info('Startup script:')
        self.logger.info(startup_script)

        with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as f:
            credentials = f.read()

        if self.auth_cookie is not None:
            auth_key = os.path.basename(self.auth_cookie)
            with open(self.auth_cookie, 'r') as f:
                auth_data = f.read()
        else:
            auth_key = None
            auth_data = None

        config = {
            'machineType':
            machine_type,

            # Specify the boot disk and the image to use as a source.
            'disks': [{
                'boot': True,
                'autoDelete': True,
                'initializeParams': {
                    'sourceImage': source_disk_image,
                }
            }],

            # Specify a network interface with NAT to access the public
            # internet.
            'networkInterfaces': [{
                'network':
                'global/networks/default',
                'accessConfigs': [{
                    'type': 'ONE_TO_ONE_NAT',
                    'name': 'External NAT'
                }]
            }],

            # Allow the instance to access cloud storage and logs.
            'serviceAccounts': [{
                'email':
                'default',
                'scopes': [
                    'https://www.googleapis.com/auth/cloud-platform',
                ]
            }],

            # Metadata is readable from the instance and allows you to
            # pass configuration from deployment scripts to instances.
            'metadata': {
                'items': [{
                    'key': 'startup-script',
                    'value': startup_script
                }, {
                    'key': 'credentials',
                    'value': credentials
                }, {
                    'key': 'queue_name',
                    'value': queue_name
                }, {
                    'key': 'auth_key',
                    'value': auth_key
                }, {
                    'key': 'auth_data',
                    'value': auth_data
                }, {
                    'key': 'timeout',
                    'value': str(timeout)
                }]
            },
            "scheduling": {
                "preemptilble": False
            }
        }

        if 'hdd' in resources_needed.keys():
            config['disks'][0]['initializeParams']['diskSizeGb'] = \
                memstr2int(resources_needed['hdd']) / memstr2int('1Gb')

        if resources_needed['gpus'] > 0:
            gpu_type = "nvidia-tesla-k80"
            config['guestAccelerators'] = [{
                "acceleratorType":
                "projects/{}/zones/{}/acceleratorTypes/{}".format(
                    self.projectid, self.zone, gpu_type),
                "acceleratorCount":
                resources_needed['gpus']
            }]

            config["scheduling"]['onHostMaintenance'] = "TERMINATE"
            config["automaticRestart"] = True

        return config