Example #1
0
def job_api_read(configuration,
                 workflow_session,
                 job_type=JOB,
                 **job_attributes):
    """
    Handler for 'read' calls to job API.
    :param configuration: The MiG configuration object.
    :param workflow_session: The MiG workflow session. This must contain the
    key 'owner'
    :param job_type: [optional] A MiG job read type. This should
    be one of 'job', or 'queue'. Default is 'job'.
    :param job_attributes: dictionary of arguments used to select the
    job to read.
    :return: (Tuple (boolean, string) or function call to 'get_job_with_id')
    If the given job_type is 'job', the function 'get_job_with_id' will be
    called. If the given job_type is 'queue' then a tuple is returned with the
    first value being True and a dictionary of jobs being the second value,
    with the job ids being the keys. If a problem is encountered a tuple is
    returned with the first value being False and an explanatory error message
    for a second value.
    """
    _logger = configuration.logger

    if 'vgrid' not in job_attributes:
        return (False, "Can't read jobs without 'vgrid' attribute")
    vgrid = job_attributes['vgrid']

    # User is vgrid owner or member
    client_id = workflow_session['owner']
    success, msg, _ = init_vgrid_script_list(vgrid, client_id, configuration)
    if not success:
        return (False, msg)

    if job_type == QUEUE:
        job_list = get_vgrid_workflow_jobs(configuration,
                                           vgrid,
                                           json_serializable=True)

        _logger.info("Found %d jobs" % len(job_list))

        job_dict = {}
        for job in job_list:
            job_dict[job['JOB_ID']] = job

        return (True, job_dict)
    else:
        if 'job_id' not in job_attributes:
            return (False, "Can't read single job without 'job_id' attribute")

        return get_job_with_id(configuration,
                               job_attributes['job_id'],
                               vgrid,
                               workflow_session['owner'],
                               only_user_jobs=False)
Example #2
0
def job_api_create(configuration,
                   workflow_session,
                   job_type=JOB,
                   **job_attributes):
    """
    Handler for 'create' calls to job API.
    :param configuration: The MiG configuration object.
    :param workflow_session: The MiG job session. This must contain the
    key 'owner'
    :param job_type: [optional] A MiG job type. Default is 'job'.
    :param job_attributes: dictionary of arguments used to create the job
    :return: Tuple (boolean, string)
    If a job can be created then a tuple is returned of first value True, and
    the created job's id in the second value. If it cannot be created then a
    tuple is returned with a first value of False, and an explanatory error
    message as the second value.
    """
    _logger = configuration.logger

    client_id = workflow_session['owner']
    external_dict = get_keywords_dict(configuration)

    if 'vgrid' not in job_attributes:
        msg = "Cannot create new job without specifying a %s (vgrid) for it " \
              "to be attached to. " % configuration.site_vgrid_label
        return (False, msg)
    vgrid = job_attributes['vgrid']

    # User is vgrid owner or member
    success, msg, _ = init_vgrid_script_list(vgrid, client_id, configuration)
    if not success:
        return (False, msg)

    job_attributes.pop('vgrid')

    mrsl = fields_to_mrsl(configuration, job_attributes, external_dict)

    tmpfile = None

    # save to temporary file
    try:
        (filehandle, real_path) = tempfile.mkstemp(text=True)
        os.write(filehandle, mrsl)
        os.close(filehandle)
    except Exception, err:
        msg = 'Failed to write temporary mRSL file: %s' % err
        _logger.error(msg)
        return (False, msg)
Example #3
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    vgrid_name = accepted['vgrid_name'][-1]

    # Validity of user and vgrid names is checked in this init function so
    # no need to worry about illegal directory traversal through variables

    (ret_val, msg,
     ret_variables) = init_vgrid_script_list(vgrid_name, client_id,
                                             configuration)
    if not ret_val:
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # list

    (list_status, msg) = vgrid_list(vgrid_name,
                                    'triggers',
                                    configuration,
                                    allow_missing=True)
    if not list_status:
        output_objects.append({
            'object_type': 'error_text',
            'text': '%s' % msg
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    output_objects.append({'object_type': 'list', 'list': msg})
    return (output_objects, returnvalues.OK)
Example #4
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    vgrid_name = accepted['vgrid_name'][-1]

    # Validity of user and vgrid names is checked in this init function so
    # no need to worry about illegal directory traversal through variables

    (ret_val, msg, ret_variables) = init_vgrid_script_list(vgrid_name,
            client_id, configuration)
    if not ret_val:
        output_objects.append({'object_type': 'error_text', 'text'
                              : msg})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # list

    (status, msg) = vgrid_list(vgrid_name, 'triggers', configuration)
    if not status:
        output_objects.append({'object_type': 'error_text', 'text': '%s'
                               % msg})
        return (output_objects, returnvalues.SYSTEM_ERROR)

    output_objects.append({'object_type': 'list', 'list': msg})
    return (output_objects, returnvalues.OK)
Example #5
0
def get_job_with_id(configuration, job_id, vgrid, client_id,
                    only_user_jobs=True):
    """Retrieves the job mrsl definition of a given job id. If only_user_jobs
    is true then only the clients job files are searched, otherwise the jobs
    of other users in the given vgrid are also used, provided the client is a
    user in that vgrid."""

    # This will try and retrieve a jobs mrsl file as efficiently as possible
    # depending on what help the function caller can provide. If a caller
    # already knows the client_id then we can attempt to go straight to the
    # job file, otherwise we will need to search through all users in a vgrid.
    # This should only be used as a last resort as this will be an extremely
    # costly action.

    if only_user_jobs and not client_id:
        return (False, "Cannot retrieve a job without 'client_id' being set. ")

    success, msg, _ = init_vgrid_script_list(vgrid, client_id,
                                             configuration)
    if not success:
        return (False, msg)

    job_file = job_id
    if not job_file.endswith('.mRSL'):
        job_file += '.mRSL'

    # First search given users directory.
    path = os.path.abspath(
        os.path.join(configuration.mrsl_files_dir,
                     client_id_dir(client_id), job_file)
    )

    if os.path.exists(path):
        job_dict = unpickle(path, configuration.logger)

        if job_dict:
            return (True, job_dict)

    if only_user_jobs:
        return (False, "Could not locate job file '%s' for user '%s'"
                % (job_file, client_id))

    users = []
    # If vgrid is known we can just search through the users on that vgrid.
    if vgrid:
        vgrid_map = get_vgrid_map(configuration)[VGRIDS][vgrid]

        users += vgrid_map[OWNERS]
        users += vgrid_map[MEMBERS]
        for user in users:
            path = os.path.abspath(
                os.path.join(configuration.mrsl_files_dir, user, job_file)
            )

            matches = glob(path)
            if not matches:
                break
            if len(matches) > 1:
                break

            job_dict = unpickle(matches[0], configuration.logger)

            if job_dict:
                return (True, job_dict)

    return (False, "Could not locate job file for job '%s'." % job_file)
Example #6
0
def main(client_id, user_arguments_dict):
    """
    Main function used by front end.
    :param client_id: A MiG user.
    :param user_arguments_dict: A JSON message sent to the MiG. This will be
    parsed and if valid, the relevant API handler functions are called to
    generate meaningful output.
    :return: (Tuple (list, Tuple(integer,string))) Returns a tuple with the
    first value being a list of output objects generated by the call. The
    second value is also a tuple used for error code reporting, with the first
    value being an error code and the second being a brief explanation.
    """
    # Ensure that the output format is in JSON
    user_arguments_dict['output_format'] = ['json']
    user_arguments_dict.pop('__DELAYED_INPUT__', None)
    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_title=False, op_header=False,
                                  op_menu=False)

    if not correct_handler('POST'):
        msg = "Interaction from %s not POST request" % client_id
        logger.error(msg)
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if not configuration.site_enable_workflows:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Workflows are not enabled on this system'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    # Add allow Access-Control-Allow-Origin to headers
    # Required to allow Jupyter Widget from localhost to request against the
    # API
    # TODO, possibly restrict allowed origins
    output_objects[0]['headers'].append(('Access-Control-Allow-Origin', '*'))
    output_objects[0]['headers'].append(
        ('Access-Control-Allow-Headers', 'Content-Type'))
    output_objects[0]['headers'].append(('Access-Control-Max-Age', 600))
    output_objects[0]['headers'].append(
        ('Access-Control-Allow-Methods', 'POST, OPTIONS'))
    output_objects[0]['headers'].append(('Content-Type', 'application/json'))

    # Input data
    data = sys.stdin.read()
    try:
        json_data = json.loads(data, object_hook=force_utf8_rec)
    except ValueError:
        msg = "An invalid format was supplied to: '%s', requires a JSON " \
              "compatible format" % op_name
        logger.error(msg)
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # TODO: consider additional CSRF protection here?
    # attacker needs to intercept jupyter session_id from running session
    # and work around security restrictions in Jupyter API to abuse anything
    # https://github.com/jupyter/jupyter/wiki/Jupyter-Notebook-Server-API

    # IMPORTANT!! Do not access the json_data input before it has been
    # validated by validated_input.
    accepted, rejected = validated_input(json_data,
                                         JOB_SIGNATURE,
                                         type_override=JOB_TYPE_MAP,
                                         value_override=JOB_VALUE_MAP,
                                         list_wrap=True)

    if not accepted or rejected:
        logger.error("A validation error occurred: '%s'" % rejected)
        msg = "Invalid input was supplied to the job API: %s" % rejected
        # TODO, Transform error messages to something more readable
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # Should use 'accepted' here, but all data jumbled together into one big
    # dict, easier to access json data by known keys
    job_attributes = json_data.get('attributes', None)
    job_type = json_data.get('type', None)
    operation = json_data.get('operation', None)
    workflow_session_id = json_data.get('workflowsessionid', None)

    if not valid_session_id(configuration, workflow_session_id):
        output_objects.append({
            'object_type': 'error_text',
            'text': 'Invalid workflowsessionid'
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    # workflow_session_id symlink points to the vGrid it gives access to
    workflow_sessions_db = []
    try:
        workflow_sessions_db = load_workflow_sessions_db(configuration)
    except IOError:
        logger.info("Workflow sessions db didn't load, creating new db")
        if not touch_workflow_sessions_db(configuration, force=True):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                "Internal sessions db failure, please contact "
                "an admin at '%s' to resolve this issue." %
                configuration.admin_email
            })
            return (output_objects, returnvalues.SYSTEM_ERROR)
        else:
            # Try reload
            workflow_sessions_db = load_workflow_sessions_db(configuration)

    if workflow_session_id not in workflow_sessions_db:
        logger.error("Workflow session '%s' from user '%s' not found in "
                     "database" % (workflow_session_id, client_id))
        configuration.auth_logger.error(
            "Workflow session '%s' provided by user '%s' but not present in "
            "database" % (workflow_session_id, client_id))
        # TODO Also track multiple attempts from the same IP
        output_objects.append({
            'object_type': 'error_text',
            'text': 'Invalid workflowsessionid'
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    workflow_session = workflow_sessions_db.get(workflow_session_id)
    client_id = workflow_session['owner']

    if 'vgrid' not in job_attributes:
        logger.info("Invalid json job interaction. user '%s' does not specify "
                    "a vgrid in '%s'" % (client_id, job_attributes.keys()))
        msg = "Cannot create new job without specifying a %s (vgrid) for it " \
              "to be attached to. " % configuration.site_vgrid_label
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.CLIENT_ERROR)
    vgrid = job_attributes['vgrid']

    # User is vgrid owner or member
    success, msg, _ = init_vgrid_script_list(vgrid, client_id, configuration)

    if not success:
        logger.error("Illegal access attempt by user '%s' to vgrid '%s'. %s" %
                     (client_id, vgrid, msg))
        output_objects.append({'object_type': 'error_text', 'text': msg})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # Create
    if operation == JOB_API_CREATE:
        created, msg = job_api_create(configuration, workflow_session,
                                      job_type, **job_attributes)
        if not created:
            output_objects.append({'object_type': 'error_text', 'text': msg})
            logger.error("Returning error msg '%s'" % msg)
            return (output_objects, returnvalues.CLIENT_ERROR)
        output_objects.append({'object_type': 'text', 'text': msg})
        return (output_objects, returnvalues.OK)
    # Read
    if operation == JOB_API_READ:
        status, jobs = job_api_read(configuration, workflow_session, job_type,
                                    **job_attributes)
        if not status:
            output_objects.append({'object_type': 'error_text', 'text': jobs})
            return (output_objects, returnvalues.OK)

        output_objects.append({'object_type': 'job_dict', 'jobs': jobs})
        return (output_objects, returnvalues.OK)

    # Update
    if operation == JOB_API_UPDATE:
        updated, msg = job_api_update(configuration, workflow_session,
                                      job_type, **job_attributes)
        if not updated:
            output_objects.append({'object_type': 'error_text', 'text': msg})
            return (output_objects, returnvalues.OK)
        output_objects.append({'object_type': 'text', 'text': msg})
        return (output_objects, returnvalues.OK)

    # Delete has not been implemented, and probably shouldn't be. Jobs may be
    # canceled remotely but not entirely deleted.

    output_objects.append({
        'object_type': 'error_text',
        'text': 'You are out of bounds here'
    })
    return (output_objects, returnvalues.CLIENT_ERROR)
Example #7
0
def job_api_update(configuration,
                   workflow_session,
                   job_type=JOB,
                   **job_attributes):
    """
    Handler for 'update' calls to job API.
    :param configuration: The MiG configuration object.
    :param workflow_session: The MiG workflow session. This must contain the
    key 'owner'
    :param job_type: [optional] A MiG job type. Default is 'job'.
    :param job_attributes: dictionary of arguments used to update the
    specified workflow object. Currently can only be a job id to cancel.
    :return: Tuple (boolean, string)
    If the given job_type is valid a tuple is returned with True in the
    first value and a feedback message in the second. Else, a tuple is
    returned with a first value of False, and an explanatory error message as
    the second value.
    """
    _logger = configuration.logger

    client_id = workflow_session['owner']

    job_id = job_attributes.get('JOB_ID', None)
    if not job_id:
        msg = "No job id provided in update"
        _logger.error(msg)
        return (False, msg)

    if 'vgrid' not in job_attributes:
        return (False, "Can't update job without 'vgrid' attribute")
    vgrid = job_attributes['vgrid']

    # User is vgrid owner or member
    client_id = workflow_session['owner']
    success, msg, _ = init_vgrid_script_list(vgrid, client_id, configuration)
    if not success:
        return (False, msg)

    status, job = get_job_with_id(configuration,
                                  job_id,
                                  vgrid,
                                  client_id,
                                  only_user_jobs=False)

    if not status:
        msg = "Could not open job file for job '%s'" % job_id
        _logger.error(msg)
        return (False, msg)

    if 'STATUS' in job_attributes:
        new_state = 'CANCELED'
        if job_attributes['STATUS'] == new_state:
            possible_cancel_states = [
                'PARSE', 'QUEUED', 'RETRY', 'EXECUTING', 'FROZEN'
            ]

            if not job['STATUS'] in possible_cancel_states:
                msg = 'Could not cancel job with status ' + job['STATUS']
                _logger.error(msg)
                return (False, msg)

            job_user_dir = client_id_dir(job['USER_CERT'])
            file_path = os.path.join(configuration.mrsl_files_dir,
                                     job_user_dir, job_id + '.mRSL')
            if not unpickle_and_change_status(file_path, new_state, _logger):
                _logger.error('%s could not cancel job: %s' %
                              (client_id, job_id))
                msg = 'Could not change status of job ' + job_id
                _logger.error(msg)
                return (False, msg)

            if not job.has_key('UNIQUE_RESOURCE_NAME'):
                job['UNIQUE_RESOURCE_NAME'] = 'UNIQUE_RESOURCE_NAME_NOT_FOUND'
            if not job.has_key('EXE'):
                job['EXE'] = 'EXE_NAME_NOT_FOUND'

            message = 'JOBACTION ' + job_id + ' ' \
                      + job['STATUS'] + ' ' + new_state + ' ' \
                      + job['UNIQUE_RESOURCE_NAME'] + ' ' \
                      + job['EXE'] + '\n'
            if not send_message_to_grid_script(message, _logger,
                                               configuration):
                msg = '%s failed to send message to grid script: %s' \
                      % (client_id, message)
                _logger.error(msg)
                return (False, msg)
            return (True, 'Job %s has been succesfully canceled' % job_id)

    return (False, "No updated applied from attributes '%s'" % job_attributes)