Example #1
0
def create_regional_bucket(bucketname, region):
    '''
    Creates a storage bucket in the current region.
    '''
    client = storage.Client()
    b = storage.Bucket(bucketname)
    b.name = bucketname
    b.location = region
    try:
        final_bucket = client.create_bucket(b)
        return
    except google.api_core.exceptions.Conflict as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, the storage API indicated
            that this was an existing bucket.  Exception reported: %s  
        ''' % (bucketname, ex)
    except google.api_core.exceptions.BadRequest as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, the storage API indicated
            that there was an error during creation.  Exception reported: %s  
        ''' % (bucketname, ex)
    except Exception as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, there was an unexpected exception
            raised.  Exception reported: %s  
        ''' % (bucketname, ex)
    subject = 'Error with bucket creation'
    notify_admins(message, subject)
Example #2
0
def log_client_errors(job, stderr_file_list):
    '''
    This handles pulling the stderr files (which indicate what went wrong)
    from the cloud-based storage and extracting their contents
    '''
    errors = []

    # make a folder where we can dump these stderr files temporarily:
    foldername = 'tmp_stderr_%s' % datetime.datetime.now().strftime(
        '%H%M%S_%m%d%Y')
    stderr_folder = os.path.join(job.job_staging_dir, foldername)
    os.mkdir(stderr_folder)

    storage_client = storage.Client()
    bucket_prefix = settings.CONFIG_PARAMS['google_storage_gs_prefix']
    local_file_list = []
    for i, stderr_path in enumerate(stderr_file_list):
        path_without_prefix = stderr_path[len(bucket_prefix):]
        bucket_name = path_without_prefix.split('/')[0]
        object_name = '/'.join(path_without_prefix.split('/')[1:])
        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(object_name)
        file_location = os.path.join(stderr_folder, 'stderr_%d' % i)
        local_file_list.append(file_location)
        try:
            blob.download_to_filename(file_location)
            local_file_list.append(file_location)
        except google.api_core.exceptions.NotFound as ex:
            # if the stderr file was not found, it means something other issue
            # occurred that prevented Cromwell from creating it.
            error_text = 'An unexpected error has occurred.  Please contact the administrator.'
            jc = JobClientError(project=job.project, error_text=error_text)
            jc.save()
            errors.append(jc)
            message = '''Job (%s) for project %s experienced failure on Cromwell.  
            The expected stderr file (%s) was not found, however. 
            Staging dir was %s.
            ''' % (job.job_id, job.project, stderr_path, job.job_staging_dir)
            subject = 'Cromwell runtime job failure'
            notify_admins(message, subject)

    # now have all files-- read content and create database objects to track:
    for f in local_file_list:
        file_contents = open(f).read()
        if len(file_contents) > 0:
            stderr_sections = file_contents.split(
                settings.CROMWELL_STDERR_DELIM)
            for section in stderr_sections:
                jc = JobClientError(project=job.project, error_text=section)
                jc.save()
                errors.append(jc)

    shutil.rmtree(stderr_folder)
    return errors
Example #3
0
def handle_exception(ex, message=''):
    '''
    This function handles situations where there an error when submitting
    to the cromwell server (submission or execution)
    '''
    subject = 'Error encountered with asynchronous task.'

    # save this problem in the database:
    issue = Issue(message=message)
    issue.save()

    notify_admins(message, subject)
Example #4
0
def handle_exception(ex, message=''):
    '''
    This function handles situations where there an error when submitting
    to the cromwell server (submission or execution)
    '''
    subject = 'Unexpected error encountered with download'

    # save this problem in the database:
    issue = Issue(message=str(ex))
    issue.save()

    notify_admins(message, subject)
Example #5
0
    def post(self, request, *args, **kwargs):
        '''
        With a POST request, the form is being submitted.  We parse the contents
        of that request, prepare a pending analysis, and prepare a summary.
        '''

        try:
            workflow_obj, analysis_project = AnalysisView.get_workflow(kwargs, request.user.is_staff)
        except AnalysisQueryException as ex:
            message = str(ex)
            return HttpResponseBadRequest(message)
        except Exception as ex:
            return HttpResponseBadRequest('Some unexpected error has occurred.')


        if analysis_project is None:
            return JsonResponse({'message': 'No action taken since workflow was not assigned to a project.'})

        if analysis_project.started:
            return HttpResponseBadRequest('Analysis was already started/run.')

        if request.user != analysis_project.owner:
            return HttpResponseForbidden('You do not own this project, so you may not initiate an analysis')

        # parse the payload from the POST request and make a dictionary
        data = request.POST.get('data')
        j = json.loads(data)
        j['analysis_uuid'] = analysis_project.analysis_uuid

        try:
            analysis_project.started = True
            analysis_project.status = 'Preparing workflow'
            analysis_project.save()
            start_job_on_gcp(request, j, workflow_obj)
            return JsonResponse({'message': '''
            Your analysis has been submitted.  You may return to this page to check on the status of the job.  
            If it has been enabled, an email will be sent upon completion'''})
        except Exception as ex:
            message = 'There was a problem instantiating an analysis.  Project was %s.\n' % str(analysis_project.analysis_uuid)
            message += 'Payload sent to backend was: %s' % json.dumps(j)
            subject = 'Error instantiating workflow'
            notify_admins(message, subject)

            issue = Issue(message=message)
            issue.save()

            return HttpResponseBadRequest('Error when instantiating workflow.')
Example #6
0
def handle_failure(job):
    '''
    This is executed when a WDL job has completed and Cromwell has indicated a failure has occurred
    `job` is an instance of SubmittedJob
    '''
    project = job.project
    cj = CompletedJob(project=project,
                      job_id=job.job_id,
                      job_status=job.job_status,
                      success=False,
                      job_staging_dir=job.job_staging_dir)
    cj.save()
    job.delete()

    # update the AnalysisProject instance to reflect the failure:
    project.completed = False
    project.success = False
    project.error = True
    project.status = 'The job submission has failed.  An administrator has been notified.'
    project.finish_time = datetime.datetime.now()
    project.restart_allowed = False  # do not allow restarts for runtime failures
    project.save()

    # inform client (if desired):
    if not settings.SILENT_CLIENTSIDE_FAILURE:
        recipient = project.owner.email
        email_html = open('email_templates/analysis_fail.html').read()
        email_plaintext = open('email_templates/analysis_fail.txt').read()
        email_subject = open(
            'email_templates/analysis_fail_subject.txt').readline().strip()
        send_email(email_plaintext, email_html, recipient, email_subject)

    # notify admins:
    message = 'Job (%s) experienced failure.' % cj.job_id
    subject = 'Cromwell job failure'
    notify_admins(message, subject)
Example #7
0
def handle_precheck_failure(job):
    '''
    If a pre-check job failed, something was wrong with the inputs.  
    We query the cromwell metadata to get the error so the user can correct it
    '''
    config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
    config_dict = utils.load_config(config_path)

    # pull together the components of the request to the Cromwell server
    metadata_endpoint = config_dict['metadata_endpoint']
    metadata_url_template = Template(settings.CROMWELL_SERVER_URL +
                                     metadata_endpoint)
    metadata_url = metadata_url_template.render({'job_id': job.job_id})
    try:
        response = requests.get(metadata_url)
        response_json = response.json()
        stderr_file_list = walk_response('', response_json, 'stderr')
        error_obj_list = log_client_errors(job, stderr_file_list)

        # update the AnalysisProject instance:
        project = job.project
        project.completed = False
        project.success = False
        project.error = True
        project.status = 'Issue encountered with inputs.'
        project.message = ''
        project.finish_time = datetime.datetime.now()
        project.save()

        # inform the client of this problem so they can fix it (if allowed):
        email_address = project.owner.email
        current_site = Site.objects.get_current()
        domain = current_site.domain
        project_url = reverse('analysis-project-execute',
                              args=[
                                  project.analysis_uuid,
                              ])
        url = 'https://%s%s' % (domain, project_url)
        context = {'site': url, 'user_email': email_address}
        if project.restart_allowed:
            email_template_path = 'email_templates/analysis_fail_with_recovery.html'
            email_plaintxt_path = 'email_templates/analysis_fail_with_recovery.txt'
            email_subject = 'email_templates/analysis_fail_subject.txt'
        else:
            email_template_path = 'email_templates/analysis_fail.html'
            email_plaintxt_path = 'email_templates/analysis_fail.txt'
            email_subject = 'email_templates/analysis_fail_subject.txt'

        email_template = get_jinja_template(email_template_path)
        email_html = email_template.render(context)
        email_plaintxt_template = get_jinja_template(email_plaintxt_path)
        email_plaintxt = email_plaintxt_template.render(context)
        email_subject = open(email_subject).readline().strip()
        send_email(email_plaintxt, email_html, email_address, email_subject)

        if not project.restart_allowed:
            # a project that had a pre-check failed, but a restart was NOT allowed.
            # need to inform admins:
            message = 'Job (%s) experienced failure during pre-check.  No restart was allowed.  Staging dir was %s' % (
                job.job_id, job.job_staging_dir)
            subject = 'Cromwell job failure on pre-check'
            notify_admins(message, subject)

        # delete the failed job:
        job.delete()

    except Exception as ex:
        print('An exception was raised when requesting metadata '
              'from cromwell server following a pre-check failure')
        print(ex)
        message = 'An exception occurred when trying to query metadata. \n'
        message += 'Job ID was: %s' % job.job_id
        message += 'Project ID was: %s' % job.project.analysis_uuid
        message += str(ex)
        try:
            warnings_sent = Warning.objects.get(job=job)
            print(
                'Error when querying cromwell for metadata.  Notification suppressed'
            )
        except analysis.models.Warning.DoesNotExist:
            handle_exception(ex, message=message)

            # add a 'Warning' object in the database so that we don't
            # overwhelm the admin email boxes.
            warn = Warning(message=message, job=job)
            warn.save()
        raise ex