def register_outputs(job): ''' This adds outputs from the workflow to the list of Resources owned by the client This way they are able to download files produced by the workflow ''' config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg') config_dict = utils.load_config(config_path) # pull together the components of the request to the Cromwell server outputs_endpoint = config_dict['outputs_endpoint'] outputs_url_template = Template(settings.CROMWELL_SERVER_URL + outputs_endpoint) outputs_url = outputs_url_template.render({'job_id': job.job_id}) try: response = requests.get(outputs_url) response_json = json.loads(response.text) if (response.status_code == 404) or (response.status_code == 400) or (response.status_code == 500): job.project.status = 'Analysis completed. Error encountered when collecting final outputs.' job.project.error = True job.project.save() handle_exception( None, 'Query for job failed with message: %s' % response_json['message']) else: # the request itself was OK outputs = response_json['outputs'] output_filepath_list = parse_outputs(outputs) environment = settings.CONFIG_PARAMS['cloud_environment'] for p in output_filepath_list: size_in_bytes = get_resource_size(p) full_destination_with_prefix = move_resource_to_user_bucket( job, p) # add the Resource to the database: r = Resource(source=environment, path=full_destination_with_prefix, name=os.path.basename(p), owner=job.project.owner, size=size_in_bytes) r.save() # add a ProjectResource to the database, so we can tie the Resource created above with the analysis project: apr = AnalysisProjectResource(analysis_project=job.project, resource=r) apr.save() except Exception as ex: print( 'An exception was raised when requesting job outputs from cromwell server' ) print(ex) message = 'An exception occurred when trying to query outputs from Cromwell. \n' message += 'Job ID was: %s' % job.job_id message += 'Project ID was: %s' % job.project.analysis_uuid message += str(ex) raise JobOutputsException(message)
def __init__(self, upload_data): #instantiate the wrapped classes: self.uploader = self.uploader_cls(upload_data) self.launcher = self.launcher_cls() # get the config params for the uploader: uploader_cfg = self.uploader_cls.get_config(self.config_file) additional_cfg = utils.load_config(self.config_file, self.config_keys) uploader_cfg.update(additional_cfg) self.config_params = uploader_cfg
def main(): models = {"baseline": BaselineModel, "second_order": SecondOrderModel} config = load_config() for part in ["nouns", "verbs"]: for phase in ["public", "private"]: with open(config[f"{phase}_test_{part}_path"], 'r', encoding='utf-8') as f: test_data = f.read().split("\n")[:-1] baseline = models[config["model"]](params=config, part=part, phase=phase) print("Model loaded") results = baseline.predict_hypernyms(list(test_data)) save_to_file(results, config[f"{phase}_output_{part}_path"], baseline.ruwordnet)
def post(self, request, *args, **kwargs): if not request.user.is_staff: return HttpResponseForbidden() try: payload = request.POST analysis_uuid = payload['cnap_uuid'] analysis_project = AnalysisProject.objects.get( analysis_uuid=analysis_uuid) except analysis.models.AnalysisProject.DoesNotExist as ex: return HttpResponseBadRequest( 'Could not find a project with that UUID') # now have a project, but to kill the job, we need a SubmittedJob try: sj = SubmittedJob.objects.get(project=analysis_project) cromwell_id = sj.job_id # send Cromwell a message to abort the job: # read config to get the names/locations/parameters for job submission config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg') config_dict = utils.load_config(config_path) # pull together the components of the POST request to the Cromwell server abort_endpoint_str = config_dict['abort_endpoint'] abort_url_template = Template(settings.CROMWELL_SERVER_URL + abort_endpoint_str) abort_url = abort_url_template.render({'job_id': cromwell_id}) r = requests.post(abort_url) if r.status_code != 200: return HttpResponseBadRequest( 'Did not return a proper response code from Cromwell. Reason was: %s' % r.text) else: # reset the project attributes analysis_project.error = False analysis_project.completed = False analysis_project.started = False analysis_project.message = '' analysis_project.status = '' analysis_project.save() # finally, delete the submitted job sj.delete() return JsonResponse({'message': 'Job has been aborted.'}) except analysis.models.SubmittedJob.DoesNotExist: return HttpResponseBadRequest( 'Could not find a running job for project %s' % analysis_project.analysis_uuid)
def get_config(cls, config_filepath): return utils.load_config(config_filepath, cls.config_keys)
with open(output_path, 'w', encoding='utf-8') as w: w.write(f"{vectors.shape[0]} {vectors.shape[1]}\n") for word, vector in zip(words, vectors): vector_line = " ".join(map(str, vector)) w.write(f"{word.upper()} {vector_line}\n") def process_data(input_file, output_file): with open(input_file, 'r', encoding='utf-8') as f: dataset = f.read().lower().split("\n")[:-1] w2v_vec.vectorize_data(dataset, output_file) if __name__ == '__main__': from helpers.utils import load_config config = load_config() w2v_vec = wiki2vecVectorizer(config["vectorizer_path"]) ruwordnet = RuWordnet(db_path=config["db_path"], ruwordnet_path=config["ruwordnet_path"], with_lemmas=False) noun_synsets = defaultdict(list) verb_synsets = defaultdict(list) for sense_id, synset_id, text in ruwordnet.get_all_senses(): if synset_id.endswith("N"): noun_synsets[synset_id].append(text.lower()) elif synset_id.endswith("V"): verb_synsets[synset_id].append(text.lower()) w2v_vec.vectorize_ruwordnet(noun_synsets, "models/vectors/ruwordnet_nouns.txt") w2v_vec.vectorize_ruwordnet(verb_synsets, "models/vectors/ruwordnet_verbs.txt") process_data("../data/public_test/verbs_public.tsv", "models/vectors/verbs_public.txt") process_data("../data/public_test/nouns_public.tsv", "models/vectors/nouns_public.txt")
def execute_wdl(analysis_project, staging_dir, run_precheck=False): ''' This function performs the actual work of submitting the job ''' # read config to get the names/locations/parameters for job submission config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg') config_dict = utils.load_config(config_path) # the path of the input json file: wdl_input_path = os.path.join(staging_dir, WDL_INPUTS) # pull together the components of the POST request to the Cromwell server submission_endpoint = config_dict['submit_endpoint'] submission_url = settings.CROMWELL_SERVER_URL + submission_endpoint payload = {} payload = {'workflowType': config_dict['workflow_type'], \ 'workflowTypeVersion': config_dict['workflow_type_version'] } # load the options file so we can fill-in the zones: options_json = {} current_zone = get_zone_as_string() if current_zone: options_json['default_runtime_attributes'] = {'zones': current_zone} options_json_str = json.dumps(options_json) options_io = io.BytesIO(options_json_str.encode('utf-8')) files = { 'workflowOptions': options_io, 'workflowInputs': open(wdl_input_path, 'rb') } if run_precheck: files['workflowSource'] = open( os.path.join(staging_dir, settings.PRECHECK_WDL), 'rb') else: files['workflowSource'] = open( os.path.join(staging_dir, settings.MAIN_WDL), 'rb') zip_archive = os.path.join(staging_dir, ZIPNAME) if os.path.exists(zip_archive): files['workflowDependencies'] = open(zip_archive, 'rb') # start the job: try: response = requests.post(submission_url, data=payload, files=files) except Exception as ex: print('An exception was raised when requesting cromwell server:') print(ex) message = 'An exception occurred when trying to submit a job to Cromwell. \n' message += 'Project ID was: %s' % str(analysis_project.analysis_uuid) message += str(ex) analysis_project.status = ''' Error on job submission. An administrator has been automatically notified of this error. Thank you for your patience. ''' analysis_project.error = True analysis_project.save() handle_exception(ex, message=message) raise ex response_json = json.loads(response.text) if response.status_code == 201: if response_json['status'] == 'Submitted': job_id = response_json['id'] if run_precheck: job_status = 'Checking input data...' else: job_status = 'Job submitted...' job = SubmittedJob(project=analysis_project, job_id=job_id, job_status=job_status, job_staging_dir=staging_dir, is_precheck=run_precheck) job.save() # update the project also: analysis_project.started = True # should already be set analysis_project.start_time = datetime.datetime.now() analysis_project.status = job_status analysis_project.save() else: # In case we get other types of responses, inform the admins: message = 'Job was submitted, but received an unexpected response from Cromwell:\n' message += response.text handle_exception(None, message=message) else: message = 'Did not submit job-- status code was %d, and response text was: %s' % ( response.status_code, response.text) analysis_project.status = ''' Error on job submission. An administrator has been automatically notified of this error. Thank you for your patience. ''' analysis_project.error = True analysis_project.save() handle_exception(None, message=message)
def check_job(): ''' Used for pinging the cromwell server to check job status ''' terminal_actions = {'Succeeded': handle_success, 'Failed': handle_failure} precheck_terminal_actions = { 'Succeeded': handle_precheck_success, 'Failed': handle_precheck_failure } other_states = ['Submitted', 'Running'] config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg') config_dict = utils.load_config(config_path) # pull together the components of the request to the Cromwell server query_endpoint = config_dict['query_status_endpoint'] query_url_template = Template(settings.CROMWELL_SERVER_URL + query_endpoint) # get the job IDs for active jobs: active_job_set = SubmittedJob.objects.all() print('%d active jobs found.' % len(active_job_set)) for job in active_job_set: query_url = query_url_template.render({'job_id': job.job_id}) try: response = requests.get(query_url) response_json = json.loads(response.text) if (response.status_code == 404) or (response.status_code == 400) or (response.status_code == 500): handle_exception( None, 'Query for job failed with message: %s' % response_json['message']) else: # the request itself was OK status = response_json['status'] # if the job was in one of the finished states, execute some specific logic if status in terminal_actions.keys(): if job.is_precheck: precheck_terminal_actions[status]( job ) # call the function to execute the logic for this end-state else: terminal_actions[status]( job ) # call the function to execute the logic for this end-state elif status in other_states: # any custom behavior for unfinished tasks # can be handled here if desired # update the job status in the database job.job_status = status job.save() project = job.project project.status = status project.save() else: # has some status we do not recognize message = 'When querying for status of job ID: %s, ' % job.job_id message += 'received an unrecognized response: %s' % response.text job.job_status = 'Unknown' job.save() try: warnings_sent = Warning.objects.get(job=job) print( 'When querying cromwell for job status, received unrecognized status. Notification suppressed' ) except analysis.models.Warning.DoesNotExist: handle_exception(None, message=message) # add a 'Warning' object in the database so that we don't # overwhelm the admin email boxes. warn = Warning(message=message, job=job) warn.save() except Exception as ex: print( 'An exception was raised when requesting job status from cromwell server' ) print(ex) message = 'An exception occurred when trying to query a job. \n' message += 'Job ID was: %s' % job.job_id message += 'Project ID was: %s' % job.project.analysis_uuid message += str(ex) try: warnings_sent = Warning.objects.get(job=job) print( 'Error when querying cromwell for job status. Notification suppressed' ) except analysis.models.Warning.DoesNotExist: handle_exception(ex, message=message) # add a 'Warning' object in the database so that we don't # overwhelm the admin email boxes. warn = Warning(message=message, job=job) warn.save() raise ex
def handle_precheck_failure(job): ''' If a pre-check job failed, something was wrong with the inputs. We query the cromwell metadata to get the error so the user can correct it ''' config_path = os.path.join(THIS_DIR, 'wdl_job_config.cfg') config_dict = utils.load_config(config_path) # pull together the components of the request to the Cromwell server metadata_endpoint = config_dict['metadata_endpoint'] metadata_url_template = Template(settings.CROMWELL_SERVER_URL + metadata_endpoint) metadata_url = metadata_url_template.render({'job_id': job.job_id}) try: response = requests.get(metadata_url) response_json = response.json() stderr_file_list = walk_response('', response_json, 'stderr') error_obj_list = log_client_errors(job, stderr_file_list) # update the AnalysisProject instance: project = job.project project.completed = False project.success = False project.error = True project.status = 'Issue encountered with inputs.' project.message = '' project.finish_time = datetime.datetime.now() project.save() # inform the client of this problem so they can fix it (if allowed): email_address = project.owner.email current_site = Site.objects.get_current() domain = current_site.domain project_url = reverse('analysis-project-execute', args=[ project.analysis_uuid, ]) url = 'https://%s%s' % (domain, project_url) context = {'site': url, 'user_email': email_address} if project.restart_allowed: email_template_path = 'email_templates/analysis_fail_with_recovery.html' email_plaintxt_path = 'email_templates/analysis_fail_with_recovery.txt' email_subject = 'email_templates/analysis_fail_subject.txt' else: email_template_path = 'email_templates/analysis_fail.html' email_plaintxt_path = 'email_templates/analysis_fail.txt' email_subject = 'email_templates/analysis_fail_subject.txt' email_template = get_jinja_template(email_template_path) email_html = email_template.render(context) email_plaintxt_template = get_jinja_template(email_plaintxt_path) email_plaintxt = email_plaintxt_template.render(context) email_subject = open(email_subject).readline().strip() send_email(email_plaintxt, email_html, email_address, email_subject) if not project.restart_allowed: # a project that had a pre-check failed, but a restart was NOT allowed. # need to inform admins: message = 'Job (%s) experienced failure during pre-check. No restart was allowed. Staging dir was %s' % ( job.job_id, job.job_staging_dir) subject = 'Cromwell job failure on pre-check' notify_admins(message, subject) # delete the failed job: job.delete() except Exception as ex: print('An exception was raised when requesting metadata ' 'from cromwell server following a pre-check failure') print(ex) message = 'An exception occurred when trying to query metadata. \n' message += 'Job ID was: %s' % job.job_id message += 'Project ID was: %s' % job.project.analysis_uuid message += str(ex) try: warnings_sent = Warning.objects.get(job=job) print( 'Error when querying cromwell for metadata. Notification suppressed' ) except analysis.models.Warning.DoesNotExist: handle_exception(ex, message=message) # add a 'Warning' object in the database so that we don't # overwhelm the admin email boxes. warn = Warning(message=message, job=job) warn.save() raise ex
elif val == 'False' or val == 'false': CONFIG_PARAMS[key] = False # using the value of EXPIRATION_PERIOD_DAYS from the config, set a timedelta: # This logic could be altered as desired: EXPIRATION_PERIOD = datetime.timedelta( days=int(CONFIG_PARAMS['expiration_period_days'])) # These are the days on which clients are reminded of pending deletion of Resources: EXPIRATION_REMINDER_DAYS = [ int(x.strip()) for x in CONFIG_PARAMS['expiration_reminder_days'].split(',') ] additional_sections = [GOOGLE_DRIVE, DROPBOX, GOOGLE] LIVE_TEST_CONFIG_PARAMS = utils.load_config( os.path.join(CONFIG_DIR, 'live_tests.cfg'), additional_sections) # Configuration for upload providers and compute environments: UPLOADER_CONFIG = { 'CONFIG_PATH': os.path.join(CONFIG_DIR, 'uploaders.cfg'), # for each item in the following dictionary, there needs to be a section # header in the config file located at UPLOADER_CONFIG.CONFIG_PATH 'UPLOAD_SOURCES': [DROPBOX, GOOGLE_DRIVE] } DOWNLOADER_CONFIG = { 'CONFIG_PATH': os.path.join(CONFIG_DIR, 'downloaders.cfg'), # for each item in the following dictionary, there needs to be a section