def find(self, name: str): if name not in self._environments: if name == 'default': raise UserError( f'No default env set in environments. Set a default environment or provide one.' ) raise UserError(f'Could not find {name} in available environments.') return self._environments[name]
def _validate_request_object(d): if 'name' not in d: raise UserError(f"Request json object missing 'name' field: {d}") if 'endpoint' not in d: raise UserError( f"Request json object missing 'endpoint' field: {d}") if 'type' not in d: raise UserError(f"Request json object missing 'type' field: {d}") if 'body' not in d: raise UserError(f"Request json object missing 'body' field: {d}")
def get_remote_config(url, config, cli_args): r = requests.get(url) r.raise_for_status() data = r.json() if 'error' in data: raise UserError('get_remote_config: server returned error: %s' % (data['error'],)) elif not 'result' in data: raise UserError('get_remote_config: missing "result" in response') return data['result']
def get_google_bucket_name(fi, PROJECT_ACL): try: project_info = PROJECT_ACL[fi.get("project_id").split("-")[0]] except KeyError: raise UserError("PROJECT_ACL does not have {} key".format( fi.get("project_id"))) return project_info["gs_bucket_prefix"] + ("-open" if fi.get( "acl") in {"[u'open']", "['open']", "*"} else "-controlled")
def _load_requests(self, filename): try: with open(filename) as json_file: data = json.load(json_file) for d in data: self._validate_request_object(d) self._requests[d['name']] = d except Exception as e: raise UserError(str(e))
def update_job(jid): """Update the status of a job. Accepted keys in json payload: log: a log message that will be appended to job.log running_state: cwl workflow running step output: output of a job """ try: payload = json.loads(request.data) except ValueError as e: raise UserError("Invalid json: {}".format(e)) allowed_keys = {"log", "running_state", "output"} not_allowed = set(payload.keys()).difference(allowed_keys) if not_allowed: raise UserError("Keys: {} not allowed, only allow update" "on: {}".format(not_allowed, allowed_keys)) return jsonify(slurm.update_job(jid, payload))
def _load_environments(self, filename): try: with open(filename) as json_file: data = json.load(json_file) for d in data: self._validate_environment_object(d) for name in d['names']: self._environments[name] = d except Exception as e: raise UserError(str(e))
def make_call(self, request_name, env_name): request = self.requests.find(request_name) environment = self.environments.find(env_name) parsed_req = self._parse_request(request, env_name) url = f"{environment['base_url']}{parsed_req['endpoint']}" headers = {'content-type': 'application/json'} if 'headers' in environment: headers = {**headers, **environment['headers']} if parsed_req['type'] == 'POST': response = requests.post(url, data=json.dumps(parsed_req['body']), headers=headers) elif parsed_req['type'] == 'PUT': response = requests.put(url, data=json.dumps(parsed_req['body']), headers=headers) elif parsed_req['type'] == 'GET': response = requests.get(url, params=parsed_req['body'], headers=headers) else: raise UserError(f'Unknown HTTP method {parsed_req["type"]}') response_json = response.json() if response.status_code != 200: raise RequestError( f'{response.status_code} returned when calling {request_name} with response ' f'{response_json}. Expected status code 200.') if self.print_all_responses: print(f'Response for call to {request_name}:') print(response_json) return response_json
def create_job(): """ Create a job. :query type: type of the job, can be bash or cwl, default to command ** Example of a bash job: ** .. code-block:: http POST /job/ HTTP/1.1 Authorization: Basic QWxhZGRpbjpPcGVuU2VzYW1l .. code-block:: Javascript { "command": "echo 'test'" } ** Example output: ** .. code-block:: http HTTP/1.1 200 Content-Type: application/json .. code-block:: Javascript { "job": "445" } ** Example of a cwl job: ** A cwl job accept a json document of your workflow, and a json input, if there are input of type File, you should give another json which has the content of your files Example workflow: .. code-block:: yaml class: CommandLineTool requirements: # DockerRequirement is required for scheduler API - class: DockerRequirement dockerPull: quay.io/cdis/cwlutils:s3cwl inputs: - id: "#echo-in" type: File label: "Message" description: "The message to print" inputBinding: {} outputs: - id: "#echo-out" type: File label: "Printed Message" description: "The file containing the message" outputBinding: glob: messageout.txt baseCommand: echo stdout: messageout.txt Example inputs: .. code-block:: yaml "echo-in": class: File # need to be a relative path path: filea Example input files: .. code-block:: yaml filea: content of the file .. code-block:: http POST /job/ HTTP/1.1 Authorization: Basic QWxhZGRpbjpPcGVuU2VzYW1l .. code-block:: Javascript { "document": cwl json, "inputs": input json, "input_files": json which specify actual content of input files } ** Example output: ** .. code-block:: http HTTP/1.1 200 Content-Type: application/json .. code-block:: Javascript { "job": "445" } """ req_type = request.args.get('type', 'bash') payload = request.get_json() env = os.environ if req_type == 'cwl': command, env = capp.cwl.construct_script(payload) script = resource_filename('scheduler', 'resources/slurm/scripts/cwl.py') elif req_type == 'bash': assert_admin() command = [payload.get('command')] script = resource_filename('scheduler', 'resources/slurm/scripts/command.sh') else: raise UserError("{} type not supported".format(req_type)) return jsonify( slurm.submit_job(script, command, payload.get("args", []), env=env))
def find(self, name: str): if name not in self._requests: raise UserError(f'Could not find {name} in available requests.') return self._requests[name]
def _validate_environment_object(d): if 'names' not in d: raise UserError(f"Environment json object missing 'names' field: {d}") if 'base_url' not in d: raise UserError(f"Environment json object missing 'base_url' field: {d}")
def start_single_stream(port, config, stream_data, cli_args): port = int(port) required_keys = [ 'cache_size', 'extended_logging', 'engine_version', ] for k in required_keys: if not k in stream_data: raise UserError("Missing %r in stream data" % (k,)) try: cache_size = int(stream_data['cache_size']) except: raise UserError("Malformed cache_size: %r" % (stream_data['cache_size'],)) if cache_size < MIN_CACHE_SIZE: raise UserError("Too low: cache_size=%r min=%r" % (cache_size, MIN_CACHE_SIZE)) logger.info( "%s: port=%d engine=%s debug=%r cache=%r source=%s", "DRY-RUN" if cli_args.dry_run else "start", port, stream_data['engine_version'], stream_data['extended_logging'], stream_data['cache_size'], stream_data['download_from'] ) stream_data['extended_logging'] = int(stream_data['extended_logging']) token = config.get("node_access_token", required=False) log_path = os.path.join(config.get_dir("log"), "support_node_%d.log" % (port,)) cache_path = os.path.join(config.get_dir("cache"), "support_node_%d_cache" % (port,)) state_path = os.path.join(config.get_dir("state"), "support_node_%d_state" % (port,)) engine_path = config.get_engine_path(stream_data['engine_version']) if cli_args.clear_logs_on_start and os.path.isfile(log_path): os.remove(log_path) if token: transport_file_url = "http://%s/app/%s/get_transport_file?format=raw" % (stream_data['download_from'], token) else: transport_file_url = "http://%s/app/get_transport_file?format=raw" % (stream_data['download_from'],) args = [ engine_path, '--log-file', str(log_path), '--port', str(port), '--url', transport_file_url, '--download-from', stream_data['download_from'], '--cache-dir', str(cache_path), '--state-dir', str(state_path), '--pid-file-dir', str(config.get_dir("pid")), '--live-mem-cache-size', str(cache_size), ] if token: args.extend(['--service-access-token', str(token)]) args.extend(COMMON_ARGS) if stream_data['extended_logging'] == 1: args.extend(COMMON_DEBUG_ARGS) if cli_args.dry_run: logger.info('DRY-RUN: start engine: args=%r', args) else: if sys.platform == "win32": subprocess.Popen(args, creationflags=DETACHED_PROCESS, stdin=None, stderr=subprocess.STDOUT, stdout=open(log_path, "a")) else: # need "nohup" otherwise engine process will stop on shell logout args.insert(0, 'nohup') subprocess.Popen(args, close_fds=True, stdin=None, stderr=subprocess.STDOUT, stdout=open(log_path, "a")) return True
def main(): args = parse_args() try: config = Config() streams = get_config(config, args) instances = get_running_instances(with_data=True) logger.info("%d running instances, loaded %d sources", len(instances), len(streams)) instances_by_port = {} busy_ports = set() running_sources = {} for instance in instances: if instance.get('data') is not None: busy_ports.add(instance['port']) download_from_list = instance['data'].get('download_from') if not download_from_list: logger.error("instance with no 'download_from' info: pid=%r port=%r", instance['pid'], instance['port']) elif len(download_from_list) != 1: logger.error("only one 'download_from' is supported: pid=%r port=%r count=%r", instance['pid'], instance['port'], len(download_from_list)) else: download_from = download_from_list[0] # |download_from| is a tuple (ip, port) source_addr = "%s:%s" % (download_from[0], download_from[1]) running_sources[source_addr] = instance['port'] instances_by_port[instance['port']] = instance else: logger.error("instance with no data: pid=%r port=%r", instance['pid'], instance['port']) if len(streams) > 0: mem = psutil.virtual_memory() cache_size = int(mem.total * USE_PERCENT_FROM_TOTAL_MEMORY / len(streams)) cache_size -= RESERVE_PER_PROCESS_MEMORY if cache_size < MIN_CACHE_SIZE: cache_size = MIN_CACHE_SIZE elif cache_size > MAX_CACHE_SIZE: cache_size = MAX_CACHE_SIZE for node_config in streams: node_config['port'] = int(node_config['port']) node_config['cache_size'] = cache_size if args.port and int(node_config['port']) != int(args.port): continue download_from = node_config['download_from'] if download_from in running_sources: port = running_sources[download_from] logger.debug("already running: source_addr=%s port=%r", download_from, port) instances_by_port[port]['_exists'] = True else: port = node_config['port'] if port in busy_ports: raise UserError("port is busy: %r" % (port,)) try: if start_single_stream(port, config, node_config, args): busy_ports.add(port) running_sources[download_from] = port except UserError as e: logger.error("failed to start node: port=%r err=%s", port, str(e)) # check running instances which are not in the playlist if not args.port: for instance in instances: exists = instance.get('_exists') if not exists: url = None download_from_list = None if instance.get('data') is not None: download_from_list = instance['data'].get('download_from') if args.dry_run: logger.info("DRY-RUN: instance not exists in the playlist, stop: pid=%r port=%r download_from_list=%s\r", instance['pid'], instance['port'], download_from_list) else: logger.info("instance not exists in the playlist, stop: pid=%r port=%r download_from_list=%s\r", instance['pid'], instance['port'], download_from_list) try: os.kill(instance['pid'], signal.SIGTERM) except: logger.info("failed to stop instance: pid=%r port=%r", instance['pid'], instance['port']) except UserError as e: print str(e)