Ejemplo n.º 1
0
def register_outputs(job):
    '''
    This adds outputs from the workflow to the list of Resources owned by the client
    This way they are able to download files produced by the workflow
    '''
    config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
    config_dict = utils.load_config(config_path)

    # pull together the components of the request to the Cromwell server
    outputs_endpoint = config_dict['outputs_endpoint']
    outputs_url_template = Template(settings.CROMWELL_SERVER_URL +
                                    outputs_endpoint)
    outputs_url = outputs_url_template.render({'job_id': job.job_id})

    try:
        response = requests.get(outputs_url)
        response_json = json.loads(response.text)
        if (response.status_code == 404) or (response.status_code
                                             == 400) or (response.status_code
                                                         == 500):
            job.project.status = 'Analysis completed.  Error encountered when collecting final outputs.'
            job.project.error = True
            job.project.save()
            handle_exception(
                None, 'Query for job failed with message: %s' %
                response_json['message'])
        else:  # the request itself was OK
            outputs = response_json['outputs']
            output_filepath_list = parse_outputs(outputs)
            environment = settings.CONFIG_PARAMS['cloud_environment']
            for p in output_filepath_list:
                size_in_bytes = get_resource_size(p)
                full_destination_with_prefix = move_resource_to_user_bucket(
                    job, p)

                # add the Resource to the database:
                r = Resource(source=environment,
                             path=full_destination_with_prefix,
                             name=os.path.basename(p),
                             owner=job.project.owner,
                             size=size_in_bytes)
                r.save()

                # add a ProjectResource to the database, so we can tie the Resource created above with the analysis project:
                apr = AnalysisProjectResource(analysis_project=job.project,
                                              resource=r)
                apr.save()

    except Exception as ex:
        print(
            'An exception was raised when requesting job outputs from cromwell server'
        )
        print(ex)
        message = 'An exception occurred when trying to query outputs from Cromwell. \n'
        message += 'Job ID was: %s' % job.job_id
        message += 'Project ID was: %s' % job.project.analysis_uuid
        message += str(ex)
        raise JobOutputsException(message)
Ejemplo n.º 2
0
    def __init__(self, upload_data):
        #instantiate the wrapped classes:
        self.uploader = self.uploader_cls(upload_data)
        self.launcher = self.launcher_cls()

        # get the config params for the uploader:
        uploader_cfg = self.uploader_cls.get_config(self.config_file)
        additional_cfg = utils.load_config(self.config_file, self.config_keys)
        uploader_cfg.update(additional_cfg)
        self.config_params = uploader_cfg
Ejemplo n.º 3
0
def main():
    models = {"baseline": BaselineModel, "second_order": SecondOrderModel}
    config = load_config()
    for part in ["nouns", "verbs"]:
        for phase in ["public", "private"]:

            with open(config[f"{phase}_test_{part}_path"], 'r', encoding='utf-8') as f:
                test_data = f.read().split("\n")[:-1]
            baseline = models[config["model"]](params=config, part=part, phase=phase)
            print("Model loaded")
            results = baseline.predict_hypernyms(list(test_data))
            save_to_file(results, config[f"{phase}_output_{part}_path"], baseline.ruwordnet)
Ejemplo n.º 4
0
    def post(self, request, *args, **kwargs):

        if not request.user.is_staff:
            return HttpResponseForbidden()
        try:
            payload = request.POST
            analysis_uuid = payload['cnap_uuid']
            analysis_project = AnalysisProject.objects.get(
                analysis_uuid=analysis_uuid)
        except analysis.models.AnalysisProject.DoesNotExist as ex:
            return HttpResponseBadRequest(
                'Could not find a project with that UUID')

        # now have a project, but to kill the job, we need a SubmittedJob
        try:
            sj = SubmittedJob.objects.get(project=analysis_project)
            cromwell_id = sj.job_id

            # send Cromwell a message to abort the job:
            # read config to get the names/locations/parameters for job submission
            config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
            config_dict = utils.load_config(config_path)

            # pull together the components of the POST request to the Cromwell server
            abort_endpoint_str = config_dict['abort_endpoint']
            abort_url_template = Template(settings.CROMWELL_SERVER_URL +
                                          abort_endpoint_str)
            abort_url = abort_url_template.render({'job_id': cromwell_id})
            r = requests.post(abort_url)
            if r.status_code != 200:
                return HttpResponseBadRequest(
                    'Did not return a proper response code from Cromwell.  Reason was: %s'
                    % r.text)
            else:
                # reset the project attributes
                analysis_project.error = False
                analysis_project.completed = False
                analysis_project.started = False
                analysis_project.message = ''
                analysis_project.status = ''
                analysis_project.save()

                # finally, delete the submitted job
                sj.delete()

                return JsonResponse({'message': 'Job has been aborted.'})

        except analysis.models.SubmittedJob.DoesNotExist:
            return HttpResponseBadRequest(
                'Could not find a running job for project %s' %
                analysis_project.analysis_uuid)
Ejemplo n.º 5
0
 def get_config(cls, config_filepath):
     return utils.load_config(config_filepath, cls.config_keys)
        with open(output_path, 'w', encoding='utf-8') as w:
            w.write(f"{vectors.shape[0]} {vectors.shape[1]}\n")
            for word, vector in zip(words, vectors):
                vector_line = " ".join(map(str, vector))
                w.write(f"{word.upper()} {vector_line}\n")


def process_data(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        dataset = f.read().lower().split("\n")[:-1]
    w2v_vec.vectorize_data(dataset, output_file)


if __name__ == '__main__':
    from helpers.utils import load_config
    config = load_config()
    w2v_vec = wiki2vecVectorizer(config["vectorizer_path"])
    ruwordnet = RuWordnet(db_path=config["db_path"], ruwordnet_path=config["ruwordnet_path"], with_lemmas=False)
    noun_synsets = defaultdict(list)
    verb_synsets = defaultdict(list)
    for sense_id, synset_id, text in ruwordnet.get_all_senses():
        if synset_id.endswith("N"):
            noun_synsets[synset_id].append(text.lower())
        elif synset_id.endswith("V"):
            verb_synsets[synset_id].append(text.lower())

    w2v_vec.vectorize_ruwordnet(noun_synsets, "models/vectors/ruwordnet_nouns.txt")
    w2v_vec.vectorize_ruwordnet(verb_synsets, "models/vectors/ruwordnet_verbs.txt")

    process_data("../data/public_test/verbs_public.tsv", "models/vectors/verbs_public.txt")
    process_data("../data/public_test/nouns_public.tsv", "models/vectors/nouns_public.txt")
Ejemplo n.º 7
0
def execute_wdl(analysis_project, staging_dir, run_precheck=False):
    '''
    This function performs the actual work of submitting the job
    '''

    # read config to get the names/locations/parameters for job submission
    config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
    config_dict = utils.load_config(config_path)

    # the path of the input json file:
    wdl_input_path = os.path.join(staging_dir, WDL_INPUTS)

    # pull together the components of the POST request to the Cromwell server
    submission_endpoint = config_dict['submit_endpoint']
    submission_url = settings.CROMWELL_SERVER_URL + submission_endpoint
    payload = {}
    payload = {'workflowType': config_dict['workflow_type'], \
        'workflowTypeVersion': config_dict['workflow_type_version']
    }

    # load the options file so we can fill-in the zones:
    options_json = {}
    current_zone = get_zone_as_string()
    if current_zone:
        options_json['default_runtime_attributes'] = {'zones': current_zone}

    options_json_str = json.dumps(options_json)
    options_io = io.BytesIO(options_json_str.encode('utf-8'))

    files = {
        'workflowOptions': options_io,
        'workflowInputs': open(wdl_input_path, 'rb')
    }

    if run_precheck:
        files['workflowSource'] = open(
            os.path.join(staging_dir, settings.PRECHECK_WDL), 'rb')
    else:
        files['workflowSource'] = open(
            os.path.join(staging_dir, settings.MAIN_WDL), 'rb')

    zip_archive = os.path.join(staging_dir, ZIPNAME)
    if os.path.exists(zip_archive):
        files['workflowDependencies'] = open(zip_archive, 'rb')

    # start the job:
    try:
        response = requests.post(submission_url, data=payload, files=files)
    except Exception as ex:
        print('An exception was raised when requesting cromwell server:')
        print(ex)
        message = 'An exception occurred when trying to submit a job to Cromwell. \n'
        message += 'Project ID was: %s' % str(analysis_project.analysis_uuid)
        message += str(ex)

        analysis_project.status = '''
            Error on job submission.  An administrator has been automatically notified of this error.
            Thank you for your patience.
            '''
        analysis_project.error = True
        analysis_project.save()
        handle_exception(ex, message=message)
        raise ex
    response_json = json.loads(response.text)
    if response.status_code == 201:
        if response_json['status'] == 'Submitted':
            job_id = response_json['id']

            if run_precheck:
                job_status = 'Checking input data...'
            else:
                job_status = 'Job submitted...'

            job = SubmittedJob(project=analysis_project,
                               job_id=job_id,
                               job_status=job_status,
                               job_staging_dir=staging_dir,
                               is_precheck=run_precheck)
            job.save()

            # update the project also:
            analysis_project.started = True  # should already be set
            analysis_project.start_time = datetime.datetime.now()
            analysis_project.status = job_status
            analysis_project.save()
        else:
            # In case we get other types of responses, inform the admins:
            message = 'Job was submitted, but received an unexpected response from Cromwell:\n'
            message += response.text
            handle_exception(None, message=message)
    else:
        message = 'Did not submit job-- status code was %d, and response text was: %s' % (
            response.status_code, response.text)
        analysis_project.status = '''
            Error on job submission.  An administrator has been automatically notified of this error.
            Thank you for your patience.
            '''
        analysis_project.error = True
        analysis_project.save()
        handle_exception(None, message=message)
Ejemplo n.º 8
0
def check_job():
    '''
    Used for pinging the cromwell server to check job status
    '''
    terminal_actions = {'Succeeded': handle_success, 'Failed': handle_failure}

    precheck_terminal_actions = {
        'Succeeded': handle_precheck_success,
        'Failed': handle_precheck_failure
    }

    other_states = ['Submitted', 'Running']

    config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
    config_dict = utils.load_config(config_path)

    # pull together the components of the request to the Cromwell server
    query_endpoint = config_dict['query_status_endpoint']
    query_url_template = Template(settings.CROMWELL_SERVER_URL +
                                  query_endpoint)

    # get the job IDs for active jobs:
    active_job_set = SubmittedJob.objects.all()
    print('%d active jobs found.' % len(active_job_set))
    for job in active_job_set:
        query_url = query_url_template.render({'job_id': job.job_id})
        try:
            response = requests.get(query_url)
            response_json = json.loads(response.text)
            if (response.status_code
                    == 404) or (response.status_code
                                == 400) or (response.status_code == 500):
                handle_exception(
                    None, 'Query for job failed with message: %s' %
                    response_json['message'])
            else:  # the request itself was OK
                status = response_json['status']

                # if the job was in one of the finished states, execute some specific logic
                if status in terminal_actions.keys():

                    if job.is_precheck:
                        precheck_terminal_actions[status](
                            job
                        )  # call the function to execute the logic for this end-state

                    else:
                        terminal_actions[status](
                            job
                        )  # call the function to execute the logic for this end-state
                elif status in other_states:
                    # any custom behavior for unfinished tasks
                    # can be handled here if desired

                    # update the job status in the database
                    job.job_status = status
                    job.save()

                    project = job.project
                    project.status = status
                    project.save()
                else:
                    # has some status we do not recognize
                    message = 'When querying for status of job ID: %s, ' % job.job_id
                    message += 'received an unrecognized response: %s' % response.text
                    job.job_status = 'Unknown'
                    job.save()

                    try:
                        warnings_sent = Warning.objects.get(job=job)
                        print(
                            'When querying cromwell for job status, received unrecognized status.  Notification suppressed'
                        )
                    except analysis.models.Warning.DoesNotExist:
                        handle_exception(None, message=message)

                        # add a 'Warning' object in the database so that we don't
                        # overwhelm the admin email boxes.
                        warn = Warning(message=message, job=job)
                        warn.save()
        except Exception as ex:
            print(
                'An exception was raised when requesting job status from cromwell server'
            )
            print(ex)
            message = 'An exception occurred when trying to query a job. \n'
            message += 'Job ID was: %s' % job.job_id
            message += 'Project ID was: %s' % job.project.analysis_uuid
            message += str(ex)
            try:
                warnings_sent = Warning.objects.get(job=job)
                print(
                    'Error when querying cromwell for job status.  Notification suppressed'
                )
            except analysis.models.Warning.DoesNotExist:
                handle_exception(ex, message=message)

                # add a 'Warning' object in the database so that we don't
                # overwhelm the admin email boxes.
                warn = Warning(message=message, job=job)
                warn.save()
            raise ex
Ejemplo n.º 9
0
def handle_precheck_failure(job):
    '''
    If a pre-check job failed, something was wrong with the inputs.  
    We query the cromwell metadata to get the error so the user can correct it
    '''
    config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg')
    config_dict = utils.load_config(config_path)

    # pull together the components of the request to the Cromwell server
    metadata_endpoint = config_dict['metadata_endpoint']
    metadata_url_template = Template(settings.CROMWELL_SERVER_URL +
                                     metadata_endpoint)
    metadata_url = metadata_url_template.render({'job_id': job.job_id})
    try:
        response = requests.get(metadata_url)
        response_json = response.json()
        stderr_file_list = walk_response('', response_json, 'stderr')
        error_obj_list = log_client_errors(job, stderr_file_list)

        # update the AnalysisProject instance:
        project = job.project
        project.completed = False
        project.success = False
        project.error = True
        project.status = 'Issue encountered with inputs.'
        project.message = ''
        project.finish_time = datetime.datetime.now()
        project.save()

        # inform the client of this problem so they can fix it (if allowed):
        email_address = project.owner.email
        current_site = Site.objects.get_current()
        domain = current_site.domain
        project_url = reverse('analysis-project-execute',
                              args=[
                                  project.analysis_uuid,
                              ])
        url = 'https://%s%s' % (domain, project_url)
        context = {'site': url, 'user_email': email_address}
        if project.restart_allowed:
            email_template_path = 'email_templates/analysis_fail_with_recovery.html'
            email_plaintxt_path = 'email_templates/analysis_fail_with_recovery.txt'
            email_subject = 'email_templates/analysis_fail_subject.txt'
        else:
            email_template_path = 'email_templates/analysis_fail.html'
            email_plaintxt_path = 'email_templates/analysis_fail.txt'
            email_subject = 'email_templates/analysis_fail_subject.txt'

        email_template = get_jinja_template(email_template_path)
        email_html = email_template.render(context)
        email_plaintxt_template = get_jinja_template(email_plaintxt_path)
        email_plaintxt = email_plaintxt_template.render(context)
        email_subject = open(email_subject).readline().strip()
        send_email(email_plaintxt, email_html, email_address, email_subject)

        if not project.restart_allowed:
            # a project that had a pre-check failed, but a restart was NOT allowed.
            # need to inform admins:
            message = 'Job (%s) experienced failure during pre-check.  No restart was allowed.  Staging dir was %s' % (
                job.job_id, job.job_staging_dir)
            subject = 'Cromwell job failure on pre-check'
            notify_admins(message, subject)

        # delete the failed job:
        job.delete()

    except Exception as ex:
        print('An exception was raised when requesting metadata '
              'from cromwell server following a pre-check failure')
        print(ex)
        message = 'An exception occurred when trying to query metadata. \n'
        message += 'Job ID was: %s' % job.job_id
        message += 'Project ID was: %s' % job.project.analysis_uuid
        message += str(ex)
        try:
            warnings_sent = Warning.objects.get(job=job)
            print(
                'Error when querying cromwell for metadata.  Notification suppressed'
            )
        except analysis.models.Warning.DoesNotExist:
            handle_exception(ex, message=message)

            # add a 'Warning' object in the database so that we don't
            # overwhelm the admin email boxes.
            warn = Warning(message=message, job=job)
            warn.save()
        raise ex
Ejemplo n.º 10
0
    elif val == 'False' or val == 'false':
        CONFIG_PARAMS[key] = False

# using the value of EXPIRATION_PERIOD_DAYS from the config, set a timedelta:
# This logic could be altered as desired:
EXPIRATION_PERIOD = datetime.timedelta(
    days=int(CONFIG_PARAMS['expiration_period_days']))

# These are the days on which clients are reminded of pending deletion of Resources:
EXPIRATION_REMINDER_DAYS = [
    int(x.strip())
    for x in CONFIG_PARAMS['expiration_reminder_days'].split(',')
]

additional_sections = [GOOGLE_DRIVE, DROPBOX, GOOGLE]
LIVE_TEST_CONFIG_PARAMS = utils.load_config(
    os.path.join(CONFIG_DIR, 'live_tests.cfg'), additional_sections)

# Configuration for upload providers and compute environments:

UPLOADER_CONFIG = {
    'CONFIG_PATH': os.path.join(CONFIG_DIR, 'uploaders.cfg'),

    # for each item in the following dictionary, there needs to be a section
    # header in the config file located at UPLOADER_CONFIG.CONFIG_PATH
    'UPLOAD_SOURCES': [DROPBOX, GOOGLE_DRIVE]
}

DOWNLOADER_CONFIG = {
    'CONFIG_PATH': os.path.join(CONFIG_DIR, 'downloaders.cfg'),

    # for each item in the following dictionary, there needs to be a section