def check_and_report_finished(run_state): try: finished, exitcode, failure_msg = docker_utils.check_finished(run_state.container) except docker_utils.DockerException: traceback.print_exc() finished, exitcode, failure_msg = False, None, None new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg) run_state.info.update(new_info) run_state = run_state._replace(info=run_state.info) return run_state
def check_and_report_finished(run_state): try: finished, exitcode, failure_msg = docker_utils.check_finished( run_state.container) except docker_utils.DockerException: traceback.print_exc() finished, exitcode, failure_msg = False, None, None new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg) run_state.info.update(new_info) run_state = run_state._replace(info=run_state.info) return run_state
def _transition_from_CLEANING_UP(self, run_state): """ 1- delete the container if still existent 2- clean up the dependencies from bundle folder 3- release the dependencies in dependency manager 4- If bundle has contents to upload (i.e. was RUNNING at some point), move to UPLOADING_RESULTS state Otherwise move to FINALIZING state """ bundle_uuid = run_state.bundle['uuid'] if run_state.container_id is not None: while True: try: finished, _, _ = docker_utils.check_finished( run_state.container) if finished: run_state.container.remove(force=True) break except docker.errors.APIError: traceback.print_exc() time.sleep(1) for dep in run_state.bundle['dependencies']: self.dependency_manager.release( bundle_uuid, (dep['parent_uuid'], dep['parent_path'])) child_path = os.path.join(run_state.bundle_path, dep['child_path']) try: remove_path(child_path) except Exception: traceback.print_exc() if run_state.has_contents: return run_state._replace( stage=LocalRunStage.UPLOADING_RESULTS, run_status='Uploading results', container=None, ) else: return self.finalize_run(run_state)
def _transition_from_CLEANING_UP(self, run_state): """ 1- delete the container if still existent 2- clean up the dependencies from bundle folder 3- release the dependencies in dependency manager 4- If bundle has contents to upload (i.e. was RUNNING at some point), move to UPLOADING_RESULTS state Otherwise move to FINALIZING state """ bundle_uuid = run_state.bundle['uuid'] if run_state.container_id is not None: while True: try: finished, _, _ = docker_utils.check_finished(run_state.container) if finished: run_state.container.remove(force=True) break except docker.errors.APIError: traceback.print_exc() time.sleep(1) for dep in run_state.bundle['dependencies']: self.dependency_manager.release(bundle_uuid, (dep['parent_uuid'], dep['parent_path'])) child_path = os.path.join(run_state.bundle_path, dep['child_path']) try: remove_path(child_path) except Exception: traceback.print_exc() if run_state.has_contents: return run_state._replace( stage=LocalRunStage.UPLOADING_RESULTS, run_status='Uploading results', container=None, ) else: return self.finalize_run(run_state)
def _transition_from_RUNNING(self, run_state): """ 1- Check run status of the docker container 2- If run is killed, kill the container 3- If run is finished, move to CLEANING_UP state """ bundle_uuid = run_state.bundle['uuid'] def check_and_report_finished(run_state): try: finished, exitcode, failure_msg = docker_utils.check_finished( run_state.container) except docker_utils.DockerException: traceback.print_exc() finished, exitcode, failure_msg = False, None, None new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg) run_state.info.update(new_info) run_state = run_state._replace(info=run_state.info) return run_state def check_resource_utilization(run_state): kill_messages = [] run_stats = docker_utils.get_container_stats(run_state.container) time_used = time.time() - run_state.start_time run_state = run_state._replace(time_used=time_used) run_state = run_state._replace(max_memory=max( run_state.max_memory, run_stats.get('memory', 0))) run_state = run_state._replace( disk_utilization=self.disk_utilization[bundle_uuid] ['disk_utilization']) if (run_state.resources['request_time'] and run_state.time_used > run_state.resources['request_time']): kill_messages.append( 'Time limit %s exceeded.' % duration_str(run_state.resources['request_time'])) if run_state.max_memory > run_state.resources['request_memory']: kill_messages.append( 'Memory limit %s exceeded.' % duration_str(run_state.resources['request_memory'])) if (run_state.resources['request_disk'] and run_state.disk_utilization > run_state.resources['request_disk']): kill_messages.append( 'Disk limit %sb exceeded.' % size_str(run_state.resources['request_disk'])) if kill_messages: new_info = run_state.info new_info['kill_message'] = ' '.join(kill_messages) run_state = run_state._replace(info=new_info, is_killed=True) return run_state def check_disk_utilization(): running = True while running: start_time = time.time() try: disk_utilization = get_path_size(run_state.bundle_path) self.disk_utilization[bundle_uuid][ 'disk_utilization'] = disk_utilization running = self.disk_utilization[bundle_uuid]['running'] except Exception: traceback.print_exc() end_time = time.time() # To ensure that we don't hammer the disk for this computation when # there are lots of files, we run it at most 10% of the time. time.sleep(max((end_time - start_time) * 10, 1.0)) self.disk_utilization.add_if_new( bundle_uuid, threading.Thread(target=check_disk_utilization, args=[])) run_state = check_and_report_finished(run_state) run_state = check_resource_utilization(run_state) if run_state.is_killed and run_state.container_id is not None: try: run_state.container.kill() except docker.errors.APIError: finished, _, _ = docker_utils.check_finished( run_state.container) if not finished: # If we can't kill a Running container, something is wrong # Otherwise all well traceback.print_exc() self.disk_utilization[bundle_uuid]['running'] = False self.disk_utilization.remove(bundle_uuid) return run_state._replace(stage=LocalRunStage.CLEANING_UP, container_id=None) if run_state.info['finished']: logger.debug( 'Finished run with UUID %s, exitcode %s, failure_message %s', bundle_uuid, run_state.info['exitcode'], run_state.info['failure_message'], ) self.disk_utilization[bundle_uuid]['running'] = False self.disk_utilization.remove(bundle_uuid) return run_state._replace(stage=LocalRunStage.CLEANING_UP, run_status='Uploading results') else: return run_state
def _transition_from_RUNNING(self, run_state): """ 1- Check run status of the docker container 2- If run is killed, kill the container 3- If run is finished, move to CLEANING_UP state """ bundle_uuid = run_state.bundle['uuid'] def check_and_report_finished(run_state): try: finished, exitcode, failure_msg = docker_utils.check_finished(run_state.container) except docker_utils.DockerException: traceback.print_exc() finished, exitcode, failure_msg = False, None, None new_info = dict(finished=finished, exitcode=exitcode, failure_message=failure_msg) run_state.info.update(new_info) run_state = run_state._replace(info=run_state.info) return run_state def check_resource_utilization(run_state): kill_messages = [] run_stats = docker_utils.get_container_stats(run_state.container) time_used = time.time() - run_state.start_time run_state = run_state._replace(time_used=time_used) run_state = run_state._replace( max_memory=max(run_state.max_memory, run_stats.get('memory', 0)) ) run_state = run_state._replace( disk_utilization=self.disk_utilization[bundle_uuid]['disk_utilization'] ) if ( run_state.resources['request_time'] and run_state.time_used > run_state.resources['request_time'] ): kill_messages.append( 'Time limit %s exceeded.' % duration_str(run_state.resources['request_time']) ) if ( run_state.max_memory > run_state.resources['request_memory'] or run_state.info.get('exitcode', '0') == '137' ): kill_messages.append( 'Memory limit %s exceeded.' % size_str(run_state.resources['request_memory']) ) if ( run_state.resources['request_disk'] and run_state.disk_utilization > run_state.resources['request_disk'] ): kill_messages.append( 'Disk limit %sb exceeded.' % size_str(run_state.resources['request_disk']) ) if kill_messages: new_info = run_state.info new_info['kill_message'] = ' '.join(kill_messages) run_state = run_state._replace(info=new_info, is_killed=True) return run_state def check_disk_utilization(): running = True while running: start_time = time.time() try: disk_utilization = get_path_size(run_state.bundle_path) self.disk_utilization[bundle_uuid]['disk_utilization'] = disk_utilization running = self.disk_utilization[bundle_uuid]['running'] except Exception: traceback.print_exc() end_time = time.time() # To ensure that we don't hammer the disk for this computation when # there are lots of files, we run it at most 10% of the time. time.sleep(max((end_time - start_time) * 10, 1.0)) self.disk_utilization.add_if_new( bundle_uuid, threading.Thread(target=check_disk_utilization, args=[]) ) run_state = check_and_report_finished(run_state) run_state = check_resource_utilization(run_state) if run_state.is_killed: try: run_state.container.kill() except docker.errors.APIError: finished, _, _ = docker_utils.check_finished(run_state.container) if not finished: # If we can't kill a Running container, something is wrong # Otherwise all well traceback.print_exc() self.disk_utilization[bundle_uuid]['running'] = False self.disk_utilization.remove(bundle_uuid) return run_state._replace(stage=LocalRunStage.CLEANING_UP) if run_state.info['finished']: logger.debug( 'Finished run with UUID %s, exitcode %s, failure_message %s', bundle_uuid, run_state.info['exitcode'], run_state.info['failure_message'], ) self.disk_utilization[bundle_uuid]['running'] = False self.disk_utilization.remove(bundle_uuid) return run_state._replace( stage=LocalRunStage.CLEANING_UP, run_status='Uploading results' ) else: return run_state