def process_datachecked_db(self, dc_job_id, spec): """ Task to wait until DCs finish and then respond e.g. * submit copy if DC succeed * send error email if not """ # allow infinite retries self.max_retries = None src_uri = spec['src_uri'] progress_msg = 'Datachecks in progress, please see: %sjobs/%s' % (cfg.dc_uri, dc_job_id) log_and_publish(make_report('INFO', progress_msg, spec, src_uri)) try: result = dc_client.retrieve_job(dc_job_id) except Exception as e: err_msg = 'Handover failed, cannot retrieve datacheck job' log_and_publish(make_report('ERROR', err_msg, spec, src_uri)) raise ValueError('Handover failed, cannot retrieve datacheck job %s' % e) from e if result['status'] in ['incomplete', 'running', 'submitted']: log_and_publish(make_report('DEBUG', 'Datacheck Job incomplete, checking again later', spec, src_uri)) raise self.retry() # check results elif result['status'] == 'failed': prob_msg = 'Datachecks found problems, you can download the output here: %sdownload_datacheck_outputs/%s' % (cfg.dc_uri, dc_job_id) log_and_publish(make_report('INFO', prob_msg, spec, src_uri)) msg = """ Running datachecks on %s completed but found problems. You can download the output here %s """ % (src_uri, cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id)) send_email(to_address=spec['contact'], subject='Datacheck found problems', body=msg, smtp_server=cfg.smtp_server) else: log_and_publish(make_report('INFO', 'Datachecks successful, starting copy', spec, src_uri)) spec['progress_complete'] = 1 submit_copy(spec)
def process_copied_db(self, copy_job_id, spec): """Wait for copy to complete and then respond accordingly: * if success, submit to metadata database * if failure, flag error using email""" # allow infinite retries self.max_retries = None src_uri = spec['src_uri'] copy_in_progress_msg = 'Copying in progress, please see: %s%s' % (cfg.copy_web_uri, copy_job_id) log_and_publish(make_report('INFO', copy_in_progress_msg, spec, src_uri)) try: result = db_copy_client.retrieve_job(copy_job_id) except Exception as e: log_and_publish(make_report('ERROR', 'Handover failed, cannot retrieve copy job', spec, src_uri)) raise ValueError('Handover failed, cannot retrieve copy job %s' % e) from e if result['status'] in ['incomplete', 'running', 'submitted']: log_and_publish(make_report('DEBUG', 'Database copy job incomplete, checking again later', spec, src_uri)) raise self.retry() if result['status'] == 'failed': copy_failed_msg = 'Copy failed, please see: %s%s' % (cfg.copy_web_uri, copy_job_id) log_and_publish(make_report('INFO', copy_failed_msg, spec, src_uri)) msg = """ Copying %s to %s failed. Please see %s """ % (src_uri, spec['tgt_uri'], cfg.copy_web_uri + str(copy_job_id)) send_email(to_address=spec['contact'], subject='Database copy failed', body=msg, smtp_server=cfg.smtp_server) return elif 'GRCh37'in spec: log_and_publish(make_report('INFO', 'Copying complete, Handover successful', spec, src_uri)) spec['progress_complete'] = 2 else: log_and_publish(make_report('INFO', 'Copying complete, submitting metadata job', spec, src_uri)) spec['progress_complete'] = 2 submit_metadata_update(spec)
def process_db_metadata(self, metadata_job_id, spec): """Wait for metadata update to complete and then respond accordingly: * if success, submit event to event handler for further processing * if failure, flag error using email""" reporting.set_logger_context(get_logger(), spec['tgt_uri'], spec) # allow infinite retries self.max_retries = None get_logger().info("Loading into metadata database, please see: " + cfg.meta_uri + "jobs/" + str(metadata_job_id)) try: result = metadata_client.retrieve_job(metadata_job_id) except Exception as e: get_logger().error("Handover failed, Cannot retrieve metadata job") raise ValueError( "Handover failed, Cannot retrieve metadata job {}".format(e)) if result['status'] in ['incomplete', 'running', 'submitted']: get_logger().debug( "Metadata load Job incomplete, checking again later") raise self.retry() if result['status'] == 'failed': get_logger().info("Metadata load failed, please see " + cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures') msg = """ Metadata load of %s failed. Please see %s """ % (spec['tgt_uri'], cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures') send_email(to_address=spec['contact'], subject='Metadata load failed, please see: ' + cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures', body=msg, smtp_server=cfg.smtp_server) return else: #Cleaning up old assembly or old genebuild databases for Wormbase when database suffix has changed if 'events' in result['output'] and result['output']['events']: for event in result['output']['events']: details = json.loads(event['details']) if 'current_database_list' in details: drop_current_databases(details['current_database_list'], spec['staging_uri'], spec['tgt_uri']) if event['genome'] in blat_species and event[ 'type'] == 'new_assembly': send_email( to_address=cfg.production_email, subject= 'BLAT species list needs updating in FTP Dumps config', body='The following species ' + event['genome'] + ' has a new assembly, please update the port number for this species here and communicate to Web: https://github.com/Ensembl/ensembl-production/blob/master/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm#L107' ) get_logger().info("Metadata load complete, Handover successful") spec['progress_complete'] = 3 #get_logger().info("Metadata load complete, submitting event") #submit_event(spec,result) #qrp job submit submit_qrp_event(spec, result) return
def email(self, address, subject, body): """ Simple task to send an email as specified Arguments: smtp_server from_email_address subject body """ send_email(smtp_server=smtp_server, from_email_address=from_email_address, address=address, subject=subject, body=body)
def process_checked_db(self, hc_job_id, spec): """ Task to wait until HCs finish and then respond e.g. * submit copy if HCs succeed * send error email if not """ reporting.set_logger_context(get_logger(), spec['src_uri'], spec) # allow infinite retries self.max_retries = None get_logger().info("HCs in progress, please see: " + cfg.hc_web_uri + str(hc_job_id)) try: result = hc_client.retrieve_job(hc_job_id) except Exception as e: get_logger().error("Handover failed, cannot retrieve hc job") raise ValueError( "Handover failed, cannot retrieve hc job {}".format(e)) if result['status'] in ['incomplete', 'running', 'submitted']: get_logger().debug("HC Job incomplete, checking again later") raise self.retry() # check results if result['status'] == 'failed': get_logger().info("HCs failed to run, please see: " + cfg.hc_web_uri + str(hc_job_id)) msg = """ Running healthchecks on %s failed to execute. Please see %s """ % (spec['src_uri'], cfg.hc_web_uri + str(hc_job_id)) send_email(to_address=spec['contact'], subject='HC failed to run', body=msg, smtp_server=cfg.smtp_server) return elif result['output']['status'] == 'failed': get_logger().info("HCs found problems, please see: " + cfg.hc_web_uri + str(hc_job_id)) msg = """ Running healthchecks on %s completed but found failures. Please see %s """ % (spec['src_uri'], cfg.hc_web_uri + str(hc_job_id)) send_email(to_address=spec['contact'], subject='HC ran but failed', body=msg, smtp_server=cfg.smtp_server) return else: get_logger().info("HCs fine, starting copy") spec['progress_complete'] = 1 submit_copy(spec)
def process_db_metadata(self, metadata_job_id, spec): """Wait for metadata update to complete and then respond accordingly: * if success, submit event to event handler for further processing * if failure, flag error using email""" # allow infinite retries self.max_retries = None tgt_uri = spec['tgt_uri'] loading_msg = 'Loading into metadata database, please see: %sjobs/%s' % (cfg.meta_uri, metadata_job_id) log_and_publish(make_report('INFO', loading_msg, spec, tgt_uri)) try: result = metadata_client.retrieve_job(metadata_job_id) except Exception as e: err_msg = 'Handover failed, Cannot retrieve metadata job' log_and_publish(make_report('ERROR', err_msg, spec, tgt_uri)) raise ValueError('Handover failed, Cannot retrieve metadata job %s' % e) from e if result['status'] in ['incomplete', 'running', 'submitted']: incomplete_msg = 'Metadata load Job incomplete, checking again later' log_and_publish(make_report('DEBUG', incomplete_msg, spec, tgt_uri)) raise self.retry() if result['status'] == 'failed': drop_msg='Dropping %s' % tgt_uri log_and_publish(make_report('INFO', drop_msg, spec, tgt_uri)) drop_database(spec['tgt_uri']) failed_msg = 'Metadata load failed, please see %sjobs/%s?format=failures' % (cfg.meta_uri, metadata_job_id) log_and_publish(make_report('INFO', failed_msg, spec, tgt_uri)) msg = """ Metadata load of %s failed. Please see %s """ % (tgt_uri, cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures') send_email(to_address=spec['contact'], subject='Metadata load failed, please see: '+cfg.meta_uri+ 'jobs/' + str(metadata_job_id) + '?format=failures', body=msg, smtp_server=cfg.smtp_server) else: # Cleaning up old assembly or old genebuild databases for Wormbase when database suffix has changed if 'events' in result['output'] and result['output']['events']: for event in result['output']['events']: details = json.loads(event['details']) if 'current_database_list' in details : drop_current_databases(details['current_database_list'], spec) if event['genome'] in blat_species and event['type'] == 'new_assembly': msg = 'The following species %s has a new assembly, please update the port number for this species here and communicate to Web: https://github.com/Ensembl/ensembl-production/blob/master/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm#L107' % event['genome'] send_email(to_address=cfg.production_email, subject='BLAT species list needs updating in FTP Dumps config', body=msg) log_and_publish(make_report('INFO', 'Metadata load complete, Handover successful', spec, tgt_uri)) spec['progress_complete'] = 3
def email_when_complete(self, url, address): """ Task to check a URL and send an email once the result has a non-incomplete status Used for periodically checking whether a hive job has finished. If status is not complete, the task is retried Arguments: url - URL to check for job completion. Must return JSON containing status, subject and body fields address - address to send email """ # allow infinite retries self.max_retries = None try: with http_session() as session: response = session.get(url) result = response.json() except requests.RequestException as e: logger.error('RequestsException: %s', e) raise self.retry(countdown=retry_wait, max_retries=120) except json.JSONDecodeError: err = 'Invalid response. Status: {} URL: {}'.format( response.status_code, response.url) logger.error('%s Body: %s', err, response.text) raise self.retry(countdown=retry_wait, max_retries=120) try: status = result['status'] if status in ('incomplete', 'running', 'submitted'): # job incomplete so retry task after waiting raise self.retry(countdown=retry_wait) subject = result['subject'] body = result['body'] except KeyError as e: err = 'Invalid response. Missing parameter "{}". URL: {}'.format( str(e), response.url) logger.error('%s Body: %s', err, response.text) raise Reject(err, requeue=False) # job complete so send email and complete task send_email(smtp_server=smtp_server, from_email_address=from_email_address, to_address=address, subject=subject, body=body) return result
def process_copied_db(self, copy_job_id, spec): """Wait for copy to complete and then respond accordingly: * if success, submit to metadata database * if failure, flag error using email""" reporting.set_logger_context(get_logger(), spec['src_uri'], spec) # allow infinite retries self.max_retries = None get_logger().info("Copying in progress, please see: " + cfg.copy_web_uri + str(copy_job_id)) try: result = db_copy_client.retrieve_job(copy_job_id) except Exception as e: get_logger().error("Handover failed, cannot retrieve copy job") raise ValueError( "Handover failed, cannot retrieve copy job {}".format(e)) if result['status'] in ['incomplete', 'running', 'submitted']: get_logger().debug( "Database copy job incomplete, checking again later") raise self.retry() if result['status'] == 'failed': get_logger().info("Copy failed, please see: " + cfg.copy_web_uri + str(copy_job_id)) msg = """ Copying %s to %s failed. Please see %s """ % (spec['src_uri'], spec['tgt_uri'], cfg.copy_web_uri + str(copy_job_id)) send_email(to_address=spec['contact'], subject='Database copy failed', body=msg, smtp_server=cfg.smtp_server) return elif 'GRCh37' in spec: get_logger().info("Copying complete, Handover successful") spec['progress_complete'] = 2 else: get_logger().info("Copying complete, submitting metadata job") spec['progress_complete'] = 2 submit_metadata_update(spec)
def process_datachecked_db(self, dc_job_id, spec): """ Task to wait until DCs finish and then respond e.g. * submit copy if DC succeed * send error email if not """ reporting.set_logger_context(get_logger(), spec['src_uri'], spec) # allow infinite retries self.max_retries = None get_logger().info("Datachecks in progress, please see: " + cfg.dc_uri + "jobs/" + str(dc_job_id)) try: result = dc_client.retrieve_job(dc_job_id) except Exception as e: get_logger().error("Handover failed, cannot retrieve datacheck job") raise ValueError( "Handover failed, cannot retrieve datacheck job {}".format(e)) if result['status'] in ['incomplete', 'running', 'submitted']: get_logger().debug("Datacheck Job incomplete, checking again later") raise self.retry() # check results if result['status'] == 'failed': get_logger().info( "Datachecks found problems, you can download the output here: " + cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id)) msg = """ Running datachecks on %s completed but found problems. You can download the output here %s """ % (spec['src_uri'], cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id)) send_email(to_address=spec['contact'], subject='Datachecks found problems', body=msg, smtp_server=cfg.smtp_server) return else: get_logger().info("Datachecks successful, starting copy") spec['progress_complete'] = 1 submit_copy(spec)
def email_when_complete(self, url, address): """ Task to check a URL and send an email once the result has a non-incomplete status Used for periodically checking whether a hive job has finished. If status is not complete, the task is retried Arguments: url - URL to check for job completion. Must return JSON containing status, subject and body fields address - address to send email """ # allow infinite retries self.max_retries = None result = requests.get(url).json() if (result['status'] == 'incomplete') or (result['status'] == 'running') or (result['status'] == 'submitted'): # job incomplete so retry task after waiting raise self.retry(countdown=retry_wait) # job complete so send email and complete task send_email(smtp_server=smtp_server, from_email_address=from_email_address, to_address=address, subject=result['subject'], body=result['body']) return result