コード例 #1
0
def process_datachecked_db(self, dc_job_id, spec):
    """ Task to wait until DCs finish and then respond e.g.
    * submit copy if DC succeed
    * send error email if not
    """
    # allow infinite retries
    self.max_retries = None
    src_uri = spec['src_uri']
    progress_msg = 'Datachecks in progress, please see: %sjobs/%s' % (cfg.dc_uri, dc_job_id)
    log_and_publish(make_report('INFO', progress_msg, spec, src_uri))
    try:
        result = dc_client.retrieve_job(dc_job_id)
    except Exception as e:
        err_msg = 'Handover failed, cannot retrieve datacheck job'
        log_and_publish(make_report('ERROR', err_msg, spec, src_uri))
        raise ValueError('Handover failed, cannot retrieve datacheck job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        log_and_publish(make_report('DEBUG', 'Datacheck Job incomplete, checking again later', spec, src_uri))
        raise self.retry()
    # check results
    elif result['status'] == 'failed':
        prob_msg = 'Datachecks found problems, you can download the output here: %sdownload_datacheck_outputs/%s' % (cfg.dc_uri, dc_job_id)
        log_and_publish(make_report('INFO', prob_msg, spec, src_uri))
        msg = """
Running datachecks on %s completed but found problems.
You can download the output here %s
""" % (src_uri, cfg.dc_uri + "download_datacheck_outputs/" + str(dc_job_id))
        send_email(to_address=spec['contact'], subject='Datacheck found problems', body=msg, smtp_server=cfg.smtp_server)
    else:
        log_and_publish(make_report('INFO', 'Datachecks successful, starting copy', spec, src_uri))
        spec['progress_complete'] = 1
        submit_copy(spec)
コード例 #2
0
def process_result(self, event, process, job_id):
    """
    Wait for the completion of the job and then process any output further
    """

    # allow infinite retries
    self.max_retries = None
    genome = event['genome']
    checking_msg = 'Checking %s event %s' % (process, job_id)
    log_and_publish(make_report('INFO', checking_msg, event, genome))
    result = event_client.retrieve_job(process, job_id)
    if (result['status']
            == 'incomplete') or (result['status']
                                 == 'running') or (result['status']
                                                   == 'submitted'):
        log_and_publish(
            make_report('INFO', 'Job incomplete, retrying', event, genome))
        raise self.retry()
    result_msg = 'Handling result for %s' % json.dumps(event)
    log_and_publish(
        make_report('DEBUG', 'Job incomplete, retrying', event, genome))
    result_dump = json.dumps(result)
    if result['status'] == 'failure':
        log_and_publish(
            make_report('FATAL', 'Event failed: %s' % result_dump, event,
                        genome))
    else:
        log_and_publish(
            make_report('INFO', 'Event succeeded: %s' % result_dump, event,
                        genome))
        # TODO
        # 1. update metadata
        # 2. schedule new events as required

    return event['event_id']
コード例 #3
0
def submit_metadata_update(spec):
    """Submit the source database for copying to the target. Returns a celery job identifier."""
    src_uri = spec['src_uri']
    try:
        metadata_job_id = metadata_client.submit_job(spec['tgt_uri'], None, None, None,
                None, spec['contact'], spec['comment'], 'Handover', None)
    except Exception as e:
        log_and_publish(make_report('ERROR', 'Handover failed, cannot submit metadata job', spec, src_uri))
        raise ValueError('Handover failed, cannot submit metadata job %s' % e) from e
    spec['metadata_job_id'] = metadata_job_id
    task_id = process_db_metadata.delay(metadata_job_id, spec)
    dbg_msg = 'Submitted DB for metadata loading %s' % task_id
    log_and_publish(make_report('DEBUG', dbg_msg, spec, src_uri))
    return task_id
コード例 #4
0
def submit_copy(spec):
    """Submit the source database for copying to the target. Returns a celery job identifier"""
    src_uri = spec['src_uri']
    try:
        copy_job_id = db_copy_client.submit_job(src_uri, spec['tgt_uri'], None, None,
                                                False, True, True, None, None)
    except Exception as e:
        log_and_publish(make_report('ERROR', 'Handover failed, cannot submit copy job', spec, src_uri))
        raise ValueError('Handover failed, cannot submit copy job %s' % e) from e
    spec['copy_job_id'] = copy_job_id
    task_id = process_copied_db.delay(copy_job_id, spec)
    dbg_msg = 'Submitted DB for copying as %s' % task_id
    log_and_publish(make_report('DEBUG', 'Handover failed, cannot submit copy job', spec, src_uri))
    return task_id
コード例 #5
0
def jobs(process):
    """
    Endpoint to retrieve all the jobs results from the database
    ---
    tags:
      - jobs
    parameters:
      - name: process
        in: path
        type: string
        required: true
        default: 1
        description: process name
    operationId: jobs
    consumes:
      - application/json
    produces:
      - application/json
    security:
      jobs_auth:
        - 'write:jobs'
        - 'read:jobs'
    schemes: ['http', 'https']
    deprecated: false
    externalDocs:
      description: Project repository
      url: http://github.com/rochacbruno/flasgger
    responses:
      200:
        description: Retrieve all the jobs results from the database
        schema:
          $ref: '#/definitions/job_id'
    """
    log_and_publish(make_report('INFO', 'Retrieving jobs'))
    return jsonify(get_hive(process).get_all_results(get_analysis(process)))
コード例 #6
0
def results(process, job_id):
    log_and_publish(make_report('INFO', 'Retrieving job from %s with ID %s' % (process, job_id)))
    try:
        job_result = get_hive(process).get_result_for_job_id(job_id)
    except ValueError as e:
        raise HTTPRequestError(str(e), 404)
    return jsonify(job_result)
コード例 #7
0
def drop_current_databases(current_db_list, spec):
    """Drop databases on a previous assembly or previous genebuild (e.g: Wormbase) from the staging MySQL server"""
    tgt_uri = spec['tgt_uri']
    staging_uri = spec['staging_uri']
    tgt_url = make_url(tgt_uri)
    #Check if the new database has the same name as the one on staging. In this case DO NOT drop it
    #This can happen if the assembly get renamed or genebuild version has changed for Wormbase
    if tgt_url.database in current_db_list:
        msg = 'The assembly or genebuild has been updated but the new database %s is the same as old one' % tgt_url.database
        log_and_publish(make_report('DEBUG', msg, spec, tgt_uri))
    else:
        for database in current_db_list:
            db_uri = staging_uri + database
            if database_exists(db_uri):
                msg = 'Dropping %s' % db_uri
                log_and_publish(make_report('INFO', msg, spec, tgt_uri))
                drop_database(db_uri)
コード例 #8
0
def submit_event(spec, result):
    """Submit an event"""
    tgt_uri = spec['tgt_uri']
    logger.debug(result['output']['events'])
    for event in result['output']['events']:
        logger.debug(event)
        event_client.submit_job({'type': event['type'], 'genome': event['genome']})
        log_and_publish(make_report('DEBUG', 'Submitted event to event handler endpoint', spec, tgt_uri))
コード例 #9
0
 def test_make_report(self):
     expected = {
         'params': {'test_param': 'test'},
         'resource': 'test_resource',
         'report_type': 'TEST',
         'msg': 'test_message'
     }
     report = make_report('TEST', 'test_message', {'test_param': 'test'}, 'test_resource')
     self.assertEqual(expected, report)
コード例 #10
0
def results_email(email, process, job_id):
    log_and_publish(make_report('INFO', 'Retrieving job with ID %s for %s' % (job_id, email)))
    hive = get_hive(process)
    try:
        job = hive.get_job_by_id(job_id)
        results = hive.get_result_for_job_id(job_id)
    except ValueError as e:
        raise HTTPRequestError(str(e), 404)
    # TODO
    results['email'] = email
    return jsonify(results)
コード例 #11
0
def process_copied_db(self, copy_job_id, spec):
    """Wait for copy to complete and then respond accordingly:
    * if success, submit to metadata database
    * if failure, flag error using email"""
    # allow infinite retries
    self.max_retries = None
    src_uri = spec['src_uri']
    copy_in_progress_msg = 'Copying in progress, please see: %s%s' % (cfg.copy_web_uri, copy_job_id)
    log_and_publish(make_report('INFO', copy_in_progress_msg, spec, src_uri))
    try:
        result = db_copy_client.retrieve_job(copy_job_id)
    except Exception as e:
        log_and_publish(make_report('ERROR', 'Handover failed, cannot retrieve copy job', spec, src_uri))
        raise ValueError('Handover failed, cannot retrieve copy job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        log_and_publish(make_report('DEBUG', 'Database copy job incomplete, checking again later', spec, src_uri))
        raise self.retry()
    if result['status'] == 'failed':
        copy_failed_msg = 'Copy failed, please see: %s%s' % (cfg.copy_web_uri, copy_job_id)
        log_and_publish(make_report('INFO', copy_failed_msg, spec, src_uri))
        msg = """
Copying %s to %s failed.
Please see %s
""" % (src_uri, spec['tgt_uri'], cfg.copy_web_uri + str(copy_job_id))
        send_email(to_address=spec['contact'], subject='Database copy failed', body=msg, smtp_server=cfg.smtp_server)
        return
    elif 'GRCh37'in spec:
        log_and_publish(make_report('INFO', 'Copying complete, Handover successful', spec, src_uri))
        spec['progress_complete'] = 2
    else:
        log_and_publish(make_report('INFO', 'Copying complete, submitting metadata job', spec, src_uri))
        spec['progress_complete'] = 2
        submit_metadata_update(spec)
コード例 #12
0
def submit_job():
    """
    Endpoint to submit an event to process
    ---
    tags:
      - jobs
    parameters:
      - in: body
        name: body
        description: event
        required: false
        schema:
          $ref: '#/definitions/submit'
    operationId: jobs
    consumes:
      - application/json
    produces:
      - application/json
    security:
      submit_auth:
        - 'write:submit'
        - 'read:submit'
    schemes: ['http', 'https']
    deprecated: false
    externalDocs:
      description: Project repository
      url: http://github.com/rochacbruno/flasgger
    """
    if json_pattern.match(request.headers['Content-Type']):
        event = request.json
        results = {"processes": [], "event": event}
        # convert event to processes
        processes = get_processes_for_event(event)
        for process in processes:
            log_and_publish(make_report('DEBUG', 'Submitting process %s' % process))
            hive = get_hive(process)
            analysis = get_analysis(process)
            try:
                job = hive.create_job(analysis, {'event': event})
            except ValueError as e:
                raise HTTPRequestError(str(e), 404)
            event_task = process_result.delay(event, process, job.job_id)
            results['processes'].append({
                "process":process,
                "job":job.job_id,
                "task":event_task.id
            })
        return jsonify(results);
    else:
        raise HTTPRequestError('Could not handle input of type %s' % request.headers['Content-Type'])
コード例 #13
0
def process_db_metadata(self, metadata_job_id, spec):
    """Wait for metadata update to complete and then respond accordingly:
    * if success, submit event to event handler for further processing
    * if failure, flag error using email"""
    # allow infinite retries
    self.max_retries = None
    tgt_uri = spec['tgt_uri']
    loading_msg = 'Loading into metadata database, please see: %sjobs/%s' % (cfg.meta_uri, metadata_job_id)
    log_and_publish(make_report('INFO', loading_msg, spec, tgt_uri))
    try:
        result = metadata_client.retrieve_job(metadata_job_id)
    except Exception as e:
        err_msg = 'Handover failed, Cannot retrieve metadata job'
        log_and_publish(make_report('ERROR', err_msg, spec, tgt_uri))
        raise ValueError('Handover failed, Cannot retrieve metadata job %s' % e) from e
    if result['status'] in ['incomplete', 'running', 'submitted']:
        incomplete_msg = 'Metadata load Job incomplete, checking again later'
        log_and_publish(make_report('DEBUG', incomplete_msg, spec, tgt_uri))
        raise self.retry()
    if result['status'] == 'failed':
        drop_msg='Dropping %s' % tgt_uri
        log_and_publish(make_report('INFO', drop_msg, spec, tgt_uri))
        drop_database(spec['tgt_uri'])
        failed_msg = 'Metadata load failed, please see %sjobs/%s?format=failures' % (cfg.meta_uri, metadata_job_id)
        log_and_publish(make_report('INFO', failed_msg, spec, tgt_uri))
        msg = """
Metadata load of %s failed.
Please see %s
""" % (tgt_uri, cfg.meta_uri + 'jobs/' + str(metadata_job_id) + '?format=failures')
        send_email(to_address=spec['contact'], subject='Metadata load failed, please see: '+cfg.meta_uri+ 'jobs/' + str(metadata_job_id) + '?format=failures', body=msg, smtp_server=cfg.smtp_server)
    else:
        # Cleaning up old assembly or old genebuild databases for Wormbase when database suffix has changed
        if 'events' in result['output'] and result['output']['events']:
            for event in result['output']['events']:
                details = json.loads(event['details'])
                if 'current_database_list' in details :
                    drop_current_databases(details['current_database_list'], spec)
                if event['genome'] in blat_species and event['type'] == 'new_assembly':
                    msg = 'The following species %s has a new assembly, please update the port number for this species here and communicate to Web: https://github.com/Ensembl/ensembl-production/blob/master/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm#L107' % event['genome']
                    send_email(to_address=cfg.production_email,
                               subject='BLAT species list needs updating in FTP Dumps config',
                               body=msg)
        log_and_publish(make_report('INFO', 'Metadata load complete, Handover successful', spec, tgt_uri))
        spec['progress_complete'] = 3
コード例 #14
0
def submit_dc(spec, src_url, db_type):
    """Submit the source database for checking. Returns a celery job identifier"""
    try:
        src_uri = spec['src_uri']
        tgt_uri = spec['tgt_uri']
        handover_token = spec['handover_token']
        server_url = 'mysql://%s@%s:%s/' % (src_url.username, src_url.host, src_url.port)
        submitting_dc_msg = 'Submitting DC for %s on server: %s' % (src_url.database, server_url)
        submitting_dc_report = make_report('DEBUG', submitting_dc_msg, spec, src_uri)
        if db_type == 'compara':
            log_and_publish(submitting_dc_report)
            dc_job_id = dc_client.submit_job(server_url, src_url.database, None, None,
                    db_type, None, db_type, 'critical', None, handover_token)
        elif db_type == 'ancestral':
            log_and_publish(submitting_dc_report)
            dc_job_id = dc_client.submit_job(server_url, src_url.database, None, None,
                    'core', None, 'ancestral', 'critical', None, handover_token)
        elif db_type in ['rnaseq', 'cdna', 'otherfeatures']:
            division_msg = 'division: %s' % get_division(src_uri, tgt_uri, db_type)
            log_and_publish(make_report('DEBUG', division_msg, spec, src_uri))
            log_and_publish(submitting_dc_report)
            dc_job_id = dc_client.submit_job(server_url, src_url.database, None, None,
                    db_type, None, 'corelike', 'critical', None, handover_token)
        else:
            db_msg = 'src_uri: %s dbtype %s server_url %s' % (src_uri, db_type, server_url)
            log_and_publish(make_report('DEBUG', db_msg, spec, src_uri))
            division_msg = 'division: %s' % get_division(src_uri, tgt_uri, db_type)
            log_and_publish(make_report('DEBUG', division_msg, spec, src_uri))
            log_and_publish(submitting_dc_report)
            dc_job_id = dc_client.submit_job(server_url, src_url.database, None, None,
                    db_type, None, db_type, 'critical', None, handover_token)
    except Exception as e:
        err_msg = 'Handover failed, Cannot submit dc job'
        log_and_publish(make_report('ERROR', err_msg, spec, src_uri))
        raise ValueError('Handover failed, Cannot submit dc job %s' % e) from e
    spec['dc_job_id'] = dc_job_id
    task_id = process_datachecked_db.delay(dc_job_id, spec)
    submitted_dc_msg = 'Submitted DB for checking as %s' % task_id
    log_and_publish(make_report('DEBUG', submitted_dc_msg, spec, src_uri))
    return task_id
コード例 #15
0
def handover_database(spec):
    """ Method to accept a new database for incorporation into the system
    Argument is a dict with the following keys:
    * src_uri - URI to database to handover (required)
    * tgt_uri - URI to copy database to (optional - generated from staging and src_uri if not set)
    * contact - email address of submitter (required)
    * comment - additional information about submission (required)
    The following keys are added during the handover process:
    * handover_token - unique identifier for this particular handover invocation
    * dc_job_id - job ID for datacheck process
    * db_job_id - job ID for database copy process
    * metadata_job_id - job ID for the metadata loading process
    * progress_total - Total number of task to do
    * progress_complete - Total number of task completed
    """
    # TODO verify dict
    src_uri = spec['src_uri']
    # create unique identifier
    spec['handover_token'] = str(uuid.uuid1())
    spec['progress_total'] = 3
    if not database_exists(src_uri):
        msg = "Handover failed, %s does not exist" % src_uri
        log_and_publish(make_report('ERROR', msg, spec, src_uri))
        raise ValueError("%s does not exist" % src_uri)
    src_url = make_url(src_uri)
    #Scan database name and retrieve species or compara name, database type, release number and assembly version
    db_prefix, db_type, assembly = parse_db_infos(src_url.database)
    # Check if the given database can be handed over
    if db_type not in db_types_list:
        msg = "Handover failed, %s has been handed over after deadline. Please contact the Production team" % src_uri
        log_and_publish(make_report('ERROR', msg, spec, src_uri))
        raise ValueError(msg)
    # Check if the database release match the handover service
    if db_type == 'compara':
        compara_release = get_release_compara(src_uri)
        if release != compara_release:
            msg = "Handover failed, %s database release version %s does not match handover service release version %s" % (src_uri,compara_release,release)
            log_and_publish(make_report('ERROR', msg, spec, src_uri))
            raise ValueError(msg)
    else:
        db_release=get_release(src_uri)
        if release != db_release:
            msg = "Handover failed, %s database release version %s does not match handover service release version %s" % (src_uri,db_release,release)
            log_and_publish(make_report('ERROR', msg, spec, src_uri))
            raise ValueError(msg)
    #Check to which staging server the database need to be copied to
    spec, staging_uri, live_uri = check_staging_server(spec, db_type, db_prefix, assembly)
    if 'tgt_uri' not in spec:
        spec['tgt_uri'] = get_tgt_uri(src_url, staging_uri)
    # Check that the database division match the target staging server
    if db_type in ['compara', 'ancestral']:
        db_division = db_prefix
    else:
        db_division = get_division(src_uri, spec['tgt_uri'], db_type)
    if db_division not in allowed_divisions_list:
        raise ValueError('Database division %s does not match server division list %s' % (db_division, allowed_divisions_list))
    spec['staging_uri'] = staging_uri
    spec['progress_complete'] = 0
    msg = "Handling %s" % spec
    log_and_publish(make_report('INFO', msg, spec, src_uri))
    submit_dc(spec, src_url, db_type)
    return spec['handover_token']