예제 #1
0
 def find(self, name: str):
     if name not in self._environments:
         if name == 'default':
             raise UserError(
                 f'No default env set in environments. Set a default environment or provide one.'
             )
         raise UserError(f'Could not find {name} in available environments.')
     return self._environments[name]
예제 #2
0
 def _validate_request_object(d):
     if 'name' not in d:
         raise UserError(f"Request json object missing 'name' field: {d}")
     if 'endpoint' not in d:
         raise UserError(
             f"Request json object missing 'endpoint' field: {d}")
     if 'type' not in d:
         raise UserError(f"Request json object missing 'type' field: {d}")
     if 'body' not in d:
         raise UserError(f"Request json object missing 'body' field: {d}")
예제 #3
0
def get_remote_config(url, config, cli_args):
    r = requests.get(url)
    r.raise_for_status()
    data = r.json()

    if 'error' in data:
        raise UserError('get_remote_config: server returned error: %s' % (data['error'],))
    elif not 'result' in data:
        raise UserError('get_remote_config: missing "result" in response')

    return data['result']
예제 #4
0
def get_google_bucket_name(fi, PROJECT_ACL):
    try:
        project_info = PROJECT_ACL[fi.get("project_id").split("-")[0]]
    except KeyError:
        raise UserError("PROJECT_ACL does not have {} key".format(
            fi.get("project_id")))
    return project_info["gs_bucket_prefix"] + ("-open" if fi.get(
        "acl") in {"[u'open']", "['open']", "*"} else "-controlled")
예제 #5
0
 def _load_requests(self, filename):
     try:
         with open(filename) as json_file:
             data = json.load(json_file)
             for d in data:
                 self._validate_request_object(d)
                 self._requests[d['name']] = d
     except Exception as e:
         raise UserError(str(e))
예제 #6
0
def update_job(jid):
    """Update the status of a job.

    Accepted keys in json payload:
    log:            a log message that will be appended to job.log
    running_state:  cwl workflow running step
    output:         output of a job

    """
    try:
        payload = json.loads(request.data)
    except ValueError as e:
        raise UserError("Invalid json: {}".format(e))
    allowed_keys = {"log", "running_state", "output"}
    not_allowed = set(payload.keys()).difference(allowed_keys)
    if not_allowed:
        raise UserError("Keys: {} not allowed, only allow update"
                        "on: {}".format(not_allowed, allowed_keys))
    return jsonify(slurm.update_job(jid, payload))
예제 #7
0
 def _load_environments(self, filename):
     try:
         with open(filename) as json_file:
             data = json.load(json_file)
             for d in data:
                 self._validate_environment_object(d)
                 for name in d['names']:
                     self._environments[name] = d
     except Exception as e:
         raise UserError(str(e))
예제 #8
0
    def make_call(self, request_name, env_name):
        request = self.requests.find(request_name)
        environment = self.environments.find(env_name)

        parsed_req = self._parse_request(request, env_name)

        url = f"{environment['base_url']}{parsed_req['endpoint']}"

        headers = {'content-type': 'application/json'}
        if 'headers' in environment:
            headers = {**headers, **environment['headers']}

        if parsed_req['type'] == 'POST':
            response = requests.post(url,
                                     data=json.dumps(parsed_req['body']),
                                     headers=headers)
        elif parsed_req['type'] == 'PUT':
            response = requests.put(url,
                                    data=json.dumps(parsed_req['body']),
                                    headers=headers)
        elif parsed_req['type'] == 'GET':
            response = requests.get(url,
                                    params=parsed_req['body'],
                                    headers=headers)
        else:
            raise UserError(f'Unknown HTTP method {parsed_req["type"]}')

        response_json = response.json()
        if response.status_code != 200:
            raise RequestError(
                f'{response.status_code} returned when calling {request_name} with response '
                f'{response_json}. Expected status code 200.')

        if self.print_all_responses:
            print(f'Response for call to {request_name}:')
            print(response_json)

        return response_json
예제 #9
0
def create_job():
    """
    Create a job.
    :query type: type of the job, can be bash or cwl, default to command
    ** Example of a bash job: **
    .. code-block:: http
        POST /job/ HTTP/1.1
        Authorization: Basic QWxhZGRpbjpPcGVuU2VzYW1l
    .. code-block:: Javascript
        {
            "command": "echo 'test'"
        }
    ** Example output: **
    .. code-block:: http
        HTTP/1.1 200
        Content-Type: application/json

    .. code-block:: Javascript
        {
            "job": "445"
        }

    ** Example of a cwl job: **
    A cwl job accept a json document of your workflow, and a json input, if
    there are input of type File, you should give another json which has
    the content of your files

    Example workflow:
    .. code-block:: yaml
      class: CommandLineTool
      requirements:
        # DockerRequirement is required for scheduler API
        - class: DockerRequirement
          dockerPull: quay.io/cdis/cwlutils:s3cwl
      inputs:
        - id: "#echo-in"
          type: File
          label: "Message"
          description: "The message to print"
          inputBinding: {}
      outputs:
        - id: "#echo-out"
          type: File
          label: "Printed Message"
          description: "The file containing the message"
          outputBinding:
            glob: messageout.txt

      baseCommand: echo
      stdout: messageout.txt

    Example inputs:
    .. code-block:: yaml
        "echo-in":
            class: File
            # need to be a relative path
            path: filea

    Example input files:
    .. code-block:: yaml
        filea: content of the file

    .. code-block:: http
        POST /job/ HTTP/1.1
        Authorization: Basic QWxhZGRpbjpPcGVuU2VzYW1l

    .. code-block:: Javascript
        {
            "document": cwl json,
            "inputs": input json,
            "input_files": json which specify actual content of input files
        }

    ** Example output: **
    .. code-block:: http
        HTTP/1.1 200
        Content-Type: application/json

    .. code-block:: Javascript
        {
            "job": "445"
        }

    """
    req_type = request.args.get('type', 'bash')
    payload = request.get_json()
    env = os.environ
    if req_type == 'cwl':
        command, env = capp.cwl.construct_script(payload)
        script = resource_filename('scheduler',
                                   'resources/slurm/scripts/cwl.py')

    elif req_type == 'bash':
        assert_admin()
        command = [payload.get('command')]
        script = resource_filename('scheduler',
                                   'resources/slurm/scripts/command.sh')

    else:
        raise UserError("{} type not supported".format(req_type))
    return jsonify(
        slurm.submit_job(script, command, payload.get("args", []), env=env))
예제 #10
0
 def find(self, name: str):
     if name not in self._requests:
         raise UserError(f'Could not find {name} in available requests.')
     return self._requests[name]
예제 #11
0
 def _validate_environment_object(d):
     if 'names' not in d:
         raise UserError(f"Environment json object missing 'names' field: {d}")
     if 'base_url' not in d:
         raise UserError(f"Environment json object missing 'base_url' field: {d}")
예제 #12
0
def start_single_stream(port, config, stream_data, cli_args):
    port = int(port)

    required_keys = [
        'cache_size',
        'extended_logging',
        'engine_version',
    ]
    for k in required_keys:
        if not k in stream_data:
            raise UserError("Missing %r in stream data" % (k,))

    try:
        cache_size = int(stream_data['cache_size'])
    except:
        raise UserError("Malformed cache_size: %r" % (stream_data['cache_size'],))

    if cache_size < MIN_CACHE_SIZE:
        raise UserError("Too low: cache_size=%r min=%r" % (cache_size, MIN_CACHE_SIZE))

    logger.info(
        "%s: port=%d engine=%s debug=%r cache=%r source=%s",
        "DRY-RUN" if cli_args.dry_run else "start",
        port,
        stream_data['engine_version'],
        stream_data['extended_logging'],
        stream_data['cache_size'],
        stream_data['download_from']
        )

    stream_data['extended_logging'] = int(stream_data['extended_logging'])

    token = config.get("node_access_token", required=False)
    log_path = os.path.join(config.get_dir("log"), "support_node_%d.log" % (port,))
    cache_path = os.path.join(config.get_dir("cache"), "support_node_%d_cache" % (port,))
    state_path = os.path.join(config.get_dir("state"), "support_node_%d_state" % (port,))
    engine_path = config.get_engine_path(stream_data['engine_version'])

    if cli_args.clear_logs_on_start and os.path.isfile(log_path):
        os.remove(log_path)

    if token:
        transport_file_url = "http://%s/app/%s/get_transport_file?format=raw" % (stream_data['download_from'], token)
    else:
        transport_file_url = "http://%s/app/get_transport_file?format=raw" % (stream_data['download_from'],)

    args = [
        engine_path,
        '--log-file', str(log_path),
        '--port', str(port),
        '--url', transport_file_url,
        '--download-from', stream_data['download_from'],
        '--cache-dir', str(cache_path),
        '--state-dir', str(state_path),
        '--pid-file-dir', str(config.get_dir("pid")),
        '--live-mem-cache-size', str(cache_size),
    ]

    if token:
        args.extend(['--service-access-token', str(token)])

    args.extend(COMMON_ARGS)

    if stream_data['extended_logging'] == 1:
        args.extend(COMMON_DEBUG_ARGS)

    if cli_args.dry_run:
        logger.info('DRY-RUN: start engine: args=%r', args)
    else:
        if sys.platform == "win32":
            subprocess.Popen(args, creationflags=DETACHED_PROCESS, stdin=None, stderr=subprocess.STDOUT, stdout=open(log_path, "a"))
        else:
            # need "nohup" otherwise engine process will stop on shell logout
            args.insert(0, 'nohup')
            subprocess.Popen(args, close_fds=True, stdin=None, stderr=subprocess.STDOUT, stdout=open(log_path, "a"))

    return True
예제 #13
0
def main():
    args = parse_args()

    try:
        config = Config()

        streams = get_config(config, args)
        instances = get_running_instances(with_data=True)

        logger.info("%d running instances, loaded %d sources", len(instances), len(streams))

        instances_by_port = {}
        busy_ports = set()
        running_sources = {}

        for instance in instances:
            if instance.get('data') is not None:
                busy_ports.add(instance['port'])
                download_from_list = instance['data'].get('download_from')
                if not download_from_list:
                    logger.error("instance with no 'download_from' info: pid=%r port=%r", instance['pid'], instance['port'])
                elif len(download_from_list) != 1:
                    logger.error("only one 'download_from' is supported: pid=%r port=%r count=%r", instance['pid'], instance['port'], len(download_from_list))
                else:
                    download_from = download_from_list[0]
                    # |download_from| is a tuple (ip, port)
                    source_addr = "%s:%s" % (download_from[0], download_from[1])
                    running_sources[source_addr] = instance['port']
                    instances_by_port[instance['port']] = instance
            else:
                logger.error("instance with no data: pid=%r port=%r", instance['pid'], instance['port'])

        if len(streams) > 0:
            mem = psutil.virtual_memory()
            cache_size = int(mem.total * USE_PERCENT_FROM_TOTAL_MEMORY / len(streams))
            cache_size -= RESERVE_PER_PROCESS_MEMORY

            if cache_size < MIN_CACHE_SIZE:
                cache_size = MIN_CACHE_SIZE
            elif cache_size > MAX_CACHE_SIZE:
                cache_size = MAX_CACHE_SIZE

        for node_config in streams:
            node_config['port'] = int(node_config['port'])
            node_config['cache_size'] = cache_size

            if args.port and int(node_config['port']) != int(args.port):
                continue

            download_from = node_config['download_from']

            if download_from in running_sources:
                port = running_sources[download_from]
                logger.debug("already running: source_addr=%s port=%r", download_from, port)
                instances_by_port[port]['_exists'] = True
            else:
                port = node_config['port']
                if port in busy_ports:
                    raise UserError("port is busy: %r" % (port,))

                try:
                    if start_single_stream(port, config, node_config, args):
                        busy_ports.add(port)
                        running_sources[download_from] = port
                except UserError as e:
                    logger.error("failed to start node: port=%r err=%s", port, str(e))

        # check running instances which are not in the playlist
        if not args.port:
            for instance in instances:
                exists = instance.get('_exists')
                if not exists:
                    url = None
                    download_from_list = None
                    if instance.get('data') is not None:
                        download_from_list = instance['data'].get('download_from')

                    if args.dry_run:
                        logger.info("DRY-RUN: instance not exists in the playlist, stop: pid=%r port=%r download_from_list=%s\r", instance['pid'], instance['port'], download_from_list)
                    else:
                        logger.info("instance not exists in the playlist, stop: pid=%r port=%r download_from_list=%s\r", instance['pid'], instance['port'], download_from_list)
                        try:
                            os.kill(instance['pid'], signal.SIGTERM)
                        except:
                            logger.info("failed to stop instance: pid=%r port=%r", instance['pid'], instance['port'])

    except UserError as e:
        print str(e)