def validate(self, metadata_specs): ''' Check that this metadata has the correct metadata keys and that it has metadata values of the correct types. ''' expected_keys = set(spec.key for spec in metadata_specs) for key in self._metadata_keys: if key not in expected_keys: raise UsageError('Unexpected metadata key: %s' % (key, )) for spec in metadata_specs: if spec.key in self._metadata_keys: value = getattr(self, spec.key) if spec.type is float and isinstance(value, int): # cast int to float value = float(value) # Validate formatted string fields if issubclass(spec.type, str) and spec.formatting is not None and value: try: if spec.formatting == 'duration': formatting.parse_duration(value) elif spec.formatting == 'size': formatting.parse_size(value) elif spec.formatting == 'date': formatting.parse_datetime(value) except ValueError as e: raise UsageError(str(e)) if value is not None and not isinstance(value, spec.type): raise UsageError( 'Metadata value for %s should be of type %s, was %s (type %s)' % (spec.key, spec.type.__name__, value, type(value).__name__)) elif not spec.generated and not spec.optional: raise UsageError('Missing metadata key: %s' % (spec.key, ))
def validate(self, metadata_specs): ''' Check that this metadata has the correct metadata keys and that it has metadata values of the correct types. ''' expected_keys = set(spec.key for spec in metadata_specs) for key in self._metadata_keys: if key not in expected_keys: raise UsageError('Unexpected metadata key: %s' % (key,)) for spec in metadata_specs: if spec.key in self._metadata_keys: value = getattr(self, spec.key) if spec.type is float and isinstance(value, int): # cast int to float value = float(value) # Validate formatted string fields if issubclass(spec.type, basestring) and spec.formatting is not None and value: try: if spec.formatting == 'duration': formatting.parse_duration(value) elif spec.formatting == 'size': formatting.parse_size(value) elif spec.formatting == 'date': formatting.parse_datetime(value) except ValueError as e: raise UsageError(e.message) if value is not None and not isinstance(value, spec.type): raise UsageError( 'Metadata value for %s should be of type %s, was %s (type %s)' % (spec.key, spec.type.__name__, value, type(value).__name__) ) elif not spec.generated: raise UsageError('Missing metadata key: %s' % (spec.key,))
def _compute_request_time(self, bundle): """ Compute the time limit used for scheduling the run. """ #TODO: Remove this once we want to deprecate old versions if not bundle.metadata.request_time: return formatting.parse_duration('1d') return formatting.parse_duration(bundle.metadata.request_time)
def default_user_info(self): info = self.config['server'].get('default_user_info', { 'time_quota': '1y', 'disk_quota': '1t' }) info['time_quota'] = formatting.parse_duration(info['time_quota']) info['disk_quota'] = formatting.parse_size(info['disk_quota']) return info
def _compute_request_time(self, bundle): """ Compute the time limit used for scheduling the run. The default is min(time quota the user has left, global max) """ if not bundle.metadata.request_time: return self._max_request_time return formatting.parse_duration(bundle.metadata.request_time)
def default_user_info(self): info = self.config['server'].get( 'default_user_info', {'time_quota': '1y', 'disk_quota': '1t'} ) return { 'time_quota': formatting.parse_duration(info['time_quota']), 'disk_quota': formatting.parse_size(info['disk_quota']), }
def default_user_info(self): info = self.config['server'].get( 'default_user_info', {'time_quota': '1y', 'disk_quota': '1t', 'parallel_run_quota': 3} ) return { 'time_quota': formatting.parse_duration(info['time_quota']), 'disk_quota': formatting.parse_size(info['disk_quota']), 'parallel_run_quota': info['parallel_run_quota'], }
def _compute_request_time(self, bundle): """ Compute the time limit used for scheduling the run. The default is min(time quota the user has left, global max) """ if not bundle.metadata.request_time: return min( self._model.get_user_time_quota_left(bundle.owner_id) - 1, self._max_request_time ) return formatting.parse_duration(bundle.metadata.request_time)
def _compute_request_time(self, bundle, user_info=None): """ Compute the time limit used for scheduling the run. The default is min(time quota the user has left, global max) """ if not bundle.metadata.request_time: return min( self._model.get_user_time_quota_left(bundle.owner_id, user_info) - 1, self._max_request_time, ) return formatting.parse_duration(bundle.metadata.request_time)
def _compute_request_time(self, bundle): """ Compute the time limit used for scheduling the run. """ return formatting.parse_duration(bundle.metadata.request_time)
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >>sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set defaults for the dispatcher. docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image request_time = self.default_request_time if bundle.metadata.request_time: request_time = bundle.metadata.request_time request_memory = self.default_request_memory if bundle.metadata.request_memory: request_memory = bundle.metadata.request_memory request_cpus = self.default_request_cpus if bundle.metadata.request_cpus: request_cpus = bundle.metadata.request_cpus request_gpus = self.default_request_gpus if bundle.metadata.request_gpus: request_gpus = bundle.metadata.request_gpus request_queue = self.default_request_queue if bundle.metadata.request_queue: request_queue = bundle.metadata.request_queue request_priority = self.default_request_priority if bundle.metadata.request_priority: request_priority = bundle.metadata.request_priority script_file = temp_dir + '.sh' # main entry point ptr_temp_dir = '$temp_dir' # 1) If no argument to script_file, use the temp_dir (e.g., Torque, master/worker share file system). # 2) If argument is 'use_script_for_temp_dir', use the script to determine temp_dir (e.g., qsub, no master/worker do not share file system). set_temp_dir_header = 'if [ -z "$1" ]; then temp_dir=' + temp_dir + '; else temp_dir=`readlink -f $0 | sed -e \'s/\\.sh$//\'`; fi\n' # Write the command to be executed to a script. if docker_image: internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # These paths depend on $temp_dir, an environment variable which will be set (referenced inside script_file) ptr_container_file = ptr_temp_dir + '.cid' # contains the docker container id ptr_action_file = ptr_temp_dir + '.action' # send actions to the container (e.g., kill) ptr_status_dir = ptr_temp_dir + '.status' # receive information from the container (e.g., memory) ptr_script_file = ptr_temp_dir + '.sh' # main entry point ptr_internal_script_file = ptr_temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: f.write(set_temp_dir_header) # Monitor CPU/memory/disk def copy_if_exists(source_template, arg, target): source = source_template % arg # -f because target might be read-only return 'if [ -e %s ] && [ -e %s ]; then cp -f %s %s; fi' % (arg, source, source, target) monitor_commands = [ # Report on status (memory, cpu, etc.) 'mkdir -p %s' % ptr_status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is copy_if_exists('$cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes', ptr_container_file, ptr_status_dir), # Respond to kill action '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (ptr_action_file, ptr_action_file, ptr_container_file, ptr_action_file), # Sleep 'sleep 1', ] f.write('while [ -e %s ]; do\n %s\ndone &\n' % (ptr_temp_dir, '\n '. join(monitor_commands))) # Tell docker to constrain resources (memory). # Note: limiting memory is not always supported. See: # http://programster.blogspot.com/2014/09/docker-implementing-container-memory.html resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int(formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write("docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s & wait $!\n" % ( resource_args, ptr_container_file, os.geteuid(), ptr_temp_dir, docker_temp_dir, ptr_internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username username = pwd.getpwuid(os.getuid())[0] # do this because os.getlogin() doesn't always work f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (username, os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) > stdout 2>stderr\n' % bundle.command) else: # Just run the command regularly without docker with open(script_file, 'w') as f: f.write(set_temp_dir_header) f.write("cd %s &&\n" % ptr_temp_dir) f.write('(%s) > stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if request_time: resource_args.extend(['--request_time', formatting.parse_duration(request_time)]) if request_memory: resource_args.extend(['--request_memory', formatting.parse_size(request_memory)]) if request_cpus: resource_args.extend(['--request_cpus', request_cpus]) if request_gpus: resource_args.extend(['--request_gpus', request_gpus]) if request_queue: resource_args.extend(['--request_queue', request_queue]) if request_priority: resource_args.extend(['--request_priority', request_priority]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map(str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >>sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set docker image docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image # Write the command to be executed to a script. if docker_image: container_file = temp_dir + '.cid' # contains the docker container id action_file = temp_dir + '.action' # send actions to the container (e.g., kill) status_dir = temp_dir + '.status' # receive information from the container (e.g., memory) script_file = temp_dir + '.sh' # main entry point internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: # trap doesn't quite work reliably with Torque, so don't use it #f.write('trap \'echo Killing docker container $(cat %s); docker kill $(cat %s); echo Killed: $?; exit 143\' TERM\n' % (container_file, container_file)) # Inspect doesn't tell us a lot, so don't use it #f.write('while [ -e %s ]; do docker inspect $(cat %s) > %s; sleep 1; done &\n' % (temp_dir, container_file, status_dir)) # Monitor CPU/memory/disk monitor_commands = [ # Report on status 'mkdir -p %s' % status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is 'cp -f $cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat %s' % (container_file, status_dir), 'cp -f $cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes %s' % (container_file, status_dir), 'cp -f $cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes %s' % (container_file, status_dir), # Respond to actions '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (action_file, action_file, container_file, action_file), ] f.write('while [ -e %s ]; do %s; sleep 1; done &\n' % (temp_dir, '; '. join(monitor_commands))) # Constrain resources resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int(formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write("docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s & wait $!\n" % ( resource_args, container_file, os.geteuid(), temp_dir, docker_temp_dir, internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (os.getlogin(), os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) > stdout 2>stderr\n' % bundle.command) else: # Just run the command regularly without docker script_file = temp_dir + '.sh' with open(script_file, 'w') as f: f.write("cd %s &&\n" % temp_dir) f.write('(%s) > stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if bundle.metadata.request_time: resource_args.extend(['--request_time', formatting.parse_duration(bundle.metadata.request_time)]) if bundle.metadata.request_memory: resource_args.extend(['--request_memory', formatting.parse_size(bundle.metadata.request_memory)]) if bundle.metadata.request_cpus: resource_args.extend(['--request_cpus', bundle.metadata.request_cpus]) if bundle.metadata.request_gpus: resource_args.extend(['--request_gpus', bundle.metadata.request_gpus]) if bundle.metadata.request_queue: resource_args.extend(['--request_queue', bundle.metadata.request_queue]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map(str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }
def _deserialize(self, value, attr, data): return formatting.parse_duration(value)
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >> sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % ( bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set defaults for the dispatcher. docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image request_time = self.default_request_time if bundle.metadata.request_time: request_time = bundle.metadata.request_time request_memory = self.default_request_memory if bundle.metadata.request_memory: request_memory = bundle.metadata.request_memory request_cpus = self.default_request_cpus if bundle.metadata.request_cpus: request_cpus = bundle.metadata.request_cpus request_gpus = self.default_request_gpus if bundle.metadata.request_gpus: request_gpus = bundle.metadata.request_gpus request_queue = self.default_request_queue if bundle.metadata.request_queue: request_queue = bundle.metadata.request_queue request_priority = self.default_request_priority if bundle.metadata.request_priority: request_priority = bundle.metadata.request_priority script_file = temp_dir + '.sh' # main entry point ptr_temp_dir = '$temp_dir' # 1) If no argument to script_file, use the temp_dir (e.g., Torque, master/worker share file system). # 2) If argument is 'use_script_for_temp_dir', use the script to determine temp_dir (e.g., qsub, no master/worker do not share file system). set_temp_dir_header = 'if [ -z "$1" ]; then temp_dir=' + temp_dir + '; else temp_dir=`readlink -f $0 | sed -e \'s/\\.sh$//\'`; fi\n' # Write the command to be executed to a script. if docker_image: internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # These paths depend on $temp_dir, an environment variable which will be set (referenced inside script_file) ptr_container_file = ptr_temp_dir + '.cid' # contains the docker container id ptr_action_file = ptr_temp_dir + '.action' # send actions to the container (e.g., kill) ptr_status_dir = ptr_temp_dir + '.status' # receive information from the container (e.g., memory) ptr_script_file = ptr_temp_dir + '.sh' # main entry point ptr_internal_script_file = ptr_temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: f.write(set_temp_dir_header) # Monitor CPU/memory/disk def copy_if_exists(source_template, arg, target): source = source_template % arg # -f because target might be read-only return 'if [ -e %s ] && [ -e %s ]; then cp -f %s %s; fi' % ( arg, source, source, target) monitor_commands = [ # Report on status (memory, cpu, etc.) 'mkdir -p %s' % ptr_status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is copy_if_exists( '$cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat', ptr_container_file, ptr_status_dir), copy_if_exists( '$cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes', ptr_container_file, ptr_status_dir), copy_if_exists( '$cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes', ptr_container_file, ptr_status_dir), # Respond to kill action '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (ptr_action_file, ptr_action_file, ptr_container_file, ptr_action_file), # Sleep 'sleep 1', ] f.write('while [ -e %s ]; do\n %s\ndone &\n' % (ptr_temp_dir, '\n '.join(monitor_commands))) # Tell docker to constrain resources (memory). # Note: limiting memory is not always supported. See: # http://programster.blogspot.com/2014/09/docker-implementing-container-memory.html resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int( formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write( "docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s >%s/stdout 2>%s/stderr & wait $!\n" % (resource_args, ptr_container_file, os.geteuid(), ptr_temp_dir, docker_temp_dir, ptr_internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file, ptr_temp_dir, ptr_temp_dir)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username username = pwd.getpwuid(os.getuid())[ 0] # do this because os.getlogin() doesn't always work f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (username, os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) >>stdout 2>>stderr\n' % bundle.command) else: # Just run the command regularly without docker with open(script_file, 'w') as f: f.write(set_temp_dir_header) f.write("cd %s &&\n" % ptr_temp_dir) f.write('(%s) >stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if request_time: resource_args.extend( ['--request_time', formatting.parse_duration(request_time)]) if request_memory: resource_args.extend( ['--request_memory', formatting.parse_size(request_memory)]) if request_cpus: resource_args.extend(['--request_cpus', request_cpus]) if request_gpus: resource_args.extend(['--request_gpus', request_gpus]) if request_queue: resource_args.extend(['--request_queue', request_queue]) if request_priority: resource_args.extend(['--request_priority', request_priority]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map( str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }