Esempio n. 1
0
    def handle_submission_response(self, response, executed_op):
        response_json = json.loads(response.text)
        if response.status_code == 201:
            try:
                status = response_json['status']
            except KeyError as ex:
                status = 'Unknown'
            if status == self.SUBMITTED_STATUS:
                logger.info('Job was successfully submitted' ' to Cromwell.')
                # Cromwell assigns its own UUID to the job
                cromwell_job_id = response_json['id']
                executed_op.job_id = cromwell_job_id
                executed_op.execution_start_datetime = datetime.datetime.now()
            else:
                logger.info('Received an unexpected status'
                            ' from Cromwell following a 201'
                            ' response code: {status}'.format(
                                status=response_json['status']))
                executed_op.status = status

        else:
            error_msg = ('Received a response code of {rc} when submitting job'
                         ' to the remote Cromwell runner.'.format(
                             rc=response.status_code))
            logger.info(error_msg)
            alert_admins(error_msg)
            executed_op.status = 'Not submitted. Try again later. Admins have been notified.'
        executed_op.save()
Esempio n. 2
0
 def handle_other_job_outcome(self, executed_op):
     executed_op.status = (
         'Experienced an unexpected response'
         ' when querying for the job status. Admins have been notified.')
     alert_admins(
         'Experienced an unexpected response when querying for '
         'the job status of op: {op_id}.'.format(op_id=executed_op.job_id))
Esempio n. 3
0
 def post(self, request, format=None):
     serializer = FeedbackSerializer(data=request.data)
     if serializer.is_valid():
         serializer.save(user=request.user)
         data = serializer.data
         alert_admins(data['message'])
         return Response(data, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def validate_resource(resource_pk, requested_resource_type):
    '''
    This function only performs validation of the resource.
    Note that it calls the `resource_utilities.validate_resource` 
    function which does NOT perform a save on the passed Resource
    instance
    '''
    resource = resource_utilities.get_resource_by_pk(resource_pk)

    try:
        resource_utilities.validate_resource(resource, requested_resource_type)
    except Exception as ex:
        logger.info(
            'Caught an exception raised by the validate_resource function.')
        alert_admins(str(ex))
        resource.status = str(ex)
    resource.is_active = True
    resource.save()
Esempio n. 5
0
    def handle_job_success(self, executed_op):

        job_id = executed_op.job_id
        job_metadata = self.query_for_metadata(job_id)
        try:
            end_time_str = job_metadata['end']
        except KeyError as ex:
            end_time = datetime.datetime.now()
        else:
            end_time = datetime.datetime.strptime(
                end_time_str, self.CROMWELL_DATETIME_STR_FORMAT)

        # get the job outputs
        # This is a mapping of the Cromwell output ID (e.g. Workflow.Variable)
        # to either a primitive (String, Number) or a filepath (in a bucket)
        try:
            outputs_dict = job_metadata['outputs']
        except KeyError as ex:
            outputs_dict = {}
            error_msg = (
                'The job metadata payload received from executed op ({op_id})'
                ' with Cromwell ID {cromwell_id} did not contain the "outputs"'
                ' key in the payload'.format(cromwell_id=job_id,
                                             op_id=executed_op.id))
            logger.info(error_msg)
            alert_admins(error_msg)

        # instantiate the output converter class which will take the job outputs
        # and create MEV-compatible data structures or resources:
        converter = RemoteCromwellOutputConverter()
        try:
            converted_outputs = self.convert_outputs(executed_op, converter,
                                                     outputs_dict)
            executed_op.outputs = converted_outputs
            executed_op.execution_stop_datetime = end_time
            executed_op.job_failed = False
            executed_op.status = ExecutedOperation.COMPLETION_SUCCESS
        except OutputConversionException as ex:
            executed_op.execution_stop_datetime = end_time
            executed_op.job_failed = True
            executed_op.status = ExecutedOperation.FINALIZING_ERROR
            alert_admins(str(ex))
def validate_resource_and_store(resource_pk, requested_resource_type):
    '''
    This function handles the background validation of uploaded
    files.

    Previous to calling this function, we set the `is_active` flag
    to False so that the `Resource` is disabled for use.

    Note that the `resource_utilities.validate_and_store_resource`
    function performs a save on the passed Resource
    '''
    resource = resource_utilities.get_resource_by_pk(resource_pk)
    try:
        resource_utilities.validate_and_store_resource(
            resource, requested_resource_type)
    except Exception as ex:
        logger.info(
            'Caught an exception raised by the validate_and_store_resource function.'
        )
        alert_admins(str(ex))
Esempio n. 7
0
def validate_and_store_resource(resource, requested_resource_type):

    # move the file backing this Resource.
    # Note that we do this BEFORE validating so that the validation functions don't
    # have to contain different steps for handling new uploads or requests to
    # change the type of a Resource.  By immediately moving the file to its
    # final storage backend, we can handle all the variations in the same manner.
    # If the `move_resource_to_final_location` function does not succeed, it will
    # raise an exception which we allow to percolate. The proper attributes
    # are set on `resource` to properly denote that failure, so we don't do anything here
    resource.path = move_resource_to_final_location(resource)

    try:
        validate_resource(resource, requested_resource_type)
        # save the filesize as well
        resource.size = get_resource_size(resource)
    except Exception as ex:
        resource.status = str(ex)
        alert_admins(
            'Encountered an issue during resource validation and storage. See logs.'
        )
    resource.is_active = True
    resource.save()
Esempio n. 8
0
    def filter_against_query_params(self, query_params):
        '''
        Looks through the query params to subset the table
        '''
        table_cols = self.table.columns

        # guard against some edge case where the table we are filtering happens to have
        # columns that conflict with the pagination parameters. We simply inform the admins
        # and ignore that conflict by not using that filter

        if any([x in self.IGNORED_QUERY_PARAMS for x in table_cols]):
            logger.warning(
                'One of the column names conflicted with the pagination query params.'
            )
            alert_admins(
                'Edge-case error: when filtering on a column, one of the column names'
                ' conflicted with the pagination query params. Query params was: {p} and column'
                ' names were: {c}'.format(p=query_params,
                                          c=','.join(table_cols)))
        filters = []

        # used to map the pandas native type to a MEV-type so we can do type casting consistently
        type_dict = self.get_type_dict()
        for k, v in query_params.items():
            split_v = v.split(settings.QUERY_PARAM_DELIMITER)
            if (not k in self.IGNORED_QUERY_PARAMS) and (k in table_cols):
                # v is either a value (in the case of strict equality)
                # or a delimited string which will dictate the comparison.
                # For example, to filter on the 'pval' column for values less than or equal to 0.01,
                # v would be "[lte]:0.01". The "[lte]" string is set in our general settings file.
                column_type = type_dict[
                    k]  # gets a type name (as a string, e.g. "Float")
                if len(split_v) == 1:
                    # strict equality
                    val = self.do_type_cast(v, column_type)
                    try:
                        filters.append(self.table[k] == val)
                    except Exception as ex:
                        logger.error('Encountered exception!!')
                elif len(split_v) == 2:
                    val = self.do_type_cast(split_v[1], column_type)
                    try:
                        op = settings.OPERATOR_MAPPING[split_v[0]]
                    except KeyError as ex:
                        raise ParseException(
                            'The operator string ("{s}") was not understood. Choose'
                            ' from among: {vals}'.format(
                                s=split_v[0],
                                vals=','.join(
                                    settings.OPERATOR_MAPPING.keys())))
                    filters.append(self.table[k].apply(lambda x: op(x, val)))
                else:
                    raise ParseException(
                        'The query param string ({v}) for filtering on'
                        ' the {col} column was not formatted properly.'.format(
                            v=v, col=k))
            elif k in self.IGNORED_QUERY_PARAMS:
                pass
            elif k == settings.ROWNAME_FILTER:
                if len(split_v) != 2:
                    raise ParseException(
                        'The query for filtering on the rows'
                        ' was not properly formatted. It should be [<op>]:<value>'
                    )
                # we don't allow indexes that are all numbers, so don't worry about casting
                # the filter value from a string
                val = split_v[1]
                try:
                    op = settings.OPERATOR_MAPPING[split_v[0]]
                except KeyError as ex:
                    raise ParseException(
                        'The operator string ("{s}") was not understood. Choose'
                        ' from among: {vals}'.format(
                            s=split_v[0],
                            vals=','.join(settings.OPERATOR_MAPPING.keys())))
                try:
                    rowname_filter = self.table.index.to_series().apply(
                        lambda x: op(x, val))
                    filters.append(rowname_filter)
                except Exception as ex:
                    raise ParseException(
                        'Error encountered with filter on rows.'
                        ' Admin has been notified.')
                    alert_admins(
                        'Error when attempting to perform a row filter. Exception was: {x}'
                        .format(x=ex))
            else:
                raise ParseException(
                    'The column "{c}" is not available for filtering.'.format(
                        c=k))
        if len(filters) > 1:
            combined_filter = reduce(lambda x, y: x & y, filters)
            self.table = self.table.loc[combined_filter]
        elif len(filters) == 1:
            self.table = self.table.loc[filters[0]]
Esempio n. 9
0
    def finalize(self, executed_op):
        '''
        Finishes up an ExecutedOperation. Does things like registering files 
        with a user, cleanup, etc.
        '''
        job_id = str(executed_op.job_id)
        exit_code = check_container_exit_code(job_id)
        finish_datetime = get_finish_datetime(job_id)
        executed_op.execution_stop_datetime = finish_datetime

        if exit_code != 0:
            logger.info('Received a non-zero exit code ({n}) from container'
                        ' executing job: {op_id}'.format(
                            op_id=executed_op.job_id, n=exit_code))
            executed_op.job_failed = True
            executed_op.status = ExecutedOperation.COMPLETION_ERROR

            # collect the errors that are  reported in the logs
            log_msg = get_logs(job_id)
            message_list = [
                log_msg,
            ]

            # handle the out of memory error-- we can't do it all!
            if exit_code == 137:
                logger.info('Executed job {op_id} exhausted the available'
                            ' memory.'.format(op_id=executed_op.job_id))
                message_list.append(
                    'The process ran out of memory and exited.'
                    ' Sometimes the job parameters can result in analyses exceeding'
                    ' the processing capabilities of WebMeV.')

            executed_op.error_messages = message_list
            alert_admins(','.join(log_msg))

        else:
            logger.info('Container exit code was zero. Fetch outputs.')
            # read the outputs json file and convert to mev-compatible outputs:
            try:
                outputs_dict = self.load_outputs_file(job_id)

                # instantiate the output converter class:
                converter = LocalDockerOutputConverter()
                converted_outputs = self.convert_outputs(
                    executed_op, converter, outputs_dict)

                executed_op.outputs = converted_outputs

                executed_op.job_failed = False
                executed_op.status = ExecutedOperation.COMPLETION_SUCCESS

            except Exception as ex:
                # if the outputs file was not found or if some other exception was
                # raised, mark the job failed.
                executed_op.job_failed = True
                executed_op.status = str(ex)
                alert_admins(str(ex))

        # finally, we cleanup the docker container
        remove_container(job_id)

        executed_op.is_finalizing = False  # so future requests don't think it is still finalizing
        executed_op.save()
        return
Esempio n. 10
0
    def run(self, executed_op, op_data, validated_inputs):
        logger.info('Running in local Docker mode.')
        logger.info('Executed op type: %s' % type(executed_op))
        logger.info('Executed op ID: %s' % str(executed_op.id))
        logger.info('Op data: %s' % op_data)
        logger.info(validated_inputs)

        # the UUID identifying the execution of this operation:
        execution_uuid = str(executed_op.id)

        # get the operation dir so we can look at which converters and command to use:
        op_dir = os.path.join(settings.OPERATION_LIBRARY_DIR,
                              str(op_data['id']))

        # To avoid conflicts or corruption of user data, we run each operation in its
        # own sandbox. We must first copy over their files to that sandbox dir.
        execution_dir = os.path.join(settings.OPERATION_EXECUTION_DIR,
                                     execution_uuid)
        make_local_directory(execution_dir)

        # convert the user inputs into args compatible with commandline usage:
        # For instance, a differential gene expression requires one to specify
        # the samples that are in each group-- to do this, the Operation requires
        # two ObservationSet instances are submitted as arguments. The "translator"
        # will take the ObservationSet data structures and turn them into something
        # that the call with use- e.g. making a CSV list to submit as one of the args
        # like:
        # docker run <image> run_something.R -a sampleA,sampleB -b sampleC,sampleD
        arg_dict = self._map_inputs(op_dir, validated_inputs, execution_dir)

        logger.info('After mapping the user inputs, we have the'
                    ' following structure: {d}'.format(d=arg_dict))

        # Construct the command that will be run in the container:
        entrypoint_file_path = os.path.join(op_dir, self.ENTRYPOINT_FILE)
        if not os.path.exists(entrypoint_file_path):
            logger.error(
                'Could not find the required entrypoint file at {p}.'
                ' Something must have corrupted the operation directory.'.
                format(p=entrypoint_file_path))
            raise Exception('The repository must have been corrupted.'
                            ' Failed to find the entrypoint file.'
                            ' Check dir at: {d}'.format(d=op_dir))
        entrypoint_cmd = self._get_entrypoint_command(entrypoint_file_path,
                                                      arg_dict)

        image_str = get_image_name_and_tag(op_data['repo_name'],
                                           op_data['git_hash'])

        cmd = self.DOCKER_RUN_CMD.format(
            container_name=execution_uuid,
            execution_mount=settings.OPERATION_EXECUTION_DIR,
            work_dir=settings.OPERATION_EXECUTION_DIR,
            job_dir=execution_dir,
            docker_image=image_str,
            cmd=entrypoint_cmd)
        try:
            run_shell_command(cmd)
            executed_op.job_id = execution_uuid
            executed_op.save()
        except Exception as ex:
            logger.info('Failed when running shell command: {c}'.format(c=cmd))
            logger.info('Exception was: {ex}'.format(ex=ex))
            # if an exception is raised when issuing the Docker run
            # command, then the job has failed. This error is likely
            # not due to user error, but something with the issuing
            # command or allocating appropriate Docker resources.
            executed_op.job_failed = True
            executed_op.execution_stop_datetime = datetime.datetime.now()
            executed_op.status = ExecutedOperation.ADMIN_NOTIFIED
            executed_op.save()
            alert_admins(str(ex))
Esempio n. 11
0
    def convert_outputs(self, executed_op, converter, outputs_dict):
        '''
        Handles the mapping from outputs (as provided by the runner)
        to MEV-compatible data structures or resources.
        '''

        # the workspace so we know which workspace to associate outputs with:
        user_workspace = getattr(executed_op, 'workspace', None)

        # get the operation spec so we know which types correspond to each output
        op_data = get_operation_instance_data(executed_op.operation)
        op_spec_outputs = op_data['outputs']

        converted_outputs_dict = {}
        try:
            # note that the sort is not necessary, but it incurs little penalty.
            # However, it does make unit testing easier.
            for k in sorted(op_spec_outputs.keys()):
                current_output = op_spec_outputs[k]
                try:
                    v = outputs_dict[k]
                except KeyError as ex:
                    error_msg = (
                        'Could not locate the output with key={k} in'
                        ' the outputs of operation with ID: {id}'.format(
                            k=k, id=str(executed_op.operation.id)))
                    logger.info(error_msg)
                    alert_admins(error_msg)
                    raise OutputConversionException(error_msg)

                else:
                    if v is not None:
                        logger.info(
                            'Executed operation output was not None. Convert.')
                        converted_outputs_dict[k] = converter.convert_output(
                            executed_op, user_workspace, current_output, v)
                    else:
                        logger.info('Executed operation output was null/None.')
                        converted_outputs_dict[k] = None

            # If here, we had all the required output keys and they converted properly.
            # However, the analysis might have specified EXTRA outputs. This isn't necessarily
            # an error, but we treat it as such since it's clear there is a discrepancy between
            # the "spec" and the actual output.
            # We don't fail the job, but we alert the admins.
            extra_keys = set(outputs_dict.keys()).difference(
                op_spec_outputs.keys())
            if len(extra_keys) > 0:
                error_msg = ('There were extra keys ({keys}) in the output of'
                             ' the operation. Check this.'.format(
                                 keys=','.join(extra_keys)))
                logger.info(error_msg)
                alert_admins(error_msg)

            return converted_outputs_dict
        except OutputConversionException as ex:
            logger.info(
                'Requesting cleanup of an ExecutedOperation due to failure'
                ' while converting outputs.')
            self.cleanup_on_error(op_spec_outputs, converted_outputs_dict)
            raise ex