예제 #1
0
def main():

    r = Reactor()
    m = r.context.get('message_dict')  # shortcut to message_dict
    p = r.pemagent

    r.logger.debug("Message: {}".format(m))
    # Use JSONschema-based message validator
    # - In theory, this obviates some get() boilerplate
    if r.validate_message(m) is False:
        r.on_failure("Invalid message: {}".format(m))

    (syst, abspath, fname) = agaveutils.from_agave_uri(m.get('uri'))
    try:
        fullpath = os.path.join(abspath, fname)
        r.logger.debug('fullpath: {}'.format(fullpath))
        p.grant(syst, fullpath, m.get('username'), m.get('permission'))
    except Exception as e:
        r.on_failure(e)
예제 #2
0
def main():
    """
    Download and validate a JSON manifest at a given TACC S3 path

    Input message(s):
        {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"}

    Output message(s):
        {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"}

    Linked actors:
      - 'copy_dir_s3'
    """

    r = Reactor()
    ag = r.client  # Agave client for grabbing the file
    db = AgaveKeyValStore(ag)
    m = AttrDict(r.context.message_dict)

    r.logger.debug("Config: {}".format(Reactor.settings))
    r.logger.info("Message: {}".format(m))

    s3_uri = m.get('uri')
    agaveStorageSystem, agaveAbsDir, agaveFileName = \
        agaveutils.uri.from_tacc_s3_uri(s3_uri)
    manifestPath = os.path.join('/', agaveAbsDir, agaveFileName)
    sourceAgaveStorageSystem = \
        r.settings.system_maps.get('source').get(agaveStorageSystem)

    r.logger.debug("source-uri: {}".format(
        r.settings.system_maps.get('source').get(agaveStorageSystem)))
    r.logger.debug("storage-system: {}".format(sourceAgaveStorageSystem))
    r.logger.info("validating: {}".format(s3_uri))

    try:
        result = agaveutils.files.agave_download_file(
            agaveClient=ag,
            agaveAbsolutePath=manifestPath,
            systemId=sourceAgaveStorageSystem,
            localFilename=DOWNLOAD_FILE)
    except Exception as e:
        r.on_failure("download-faiedl: {}".format(
            agaveutils.uri.to_agave_uri(sourceAgaveStorageSystem,
                                        manifestPath)))

    r.logger.info("validating {}".format(result))
    try:
        validate_file_schema(result)
        r.logger.info("validation succeeded")
    except Exception as e:
        r.on_failure("validation failed: {}".format(e))

    # Downstream actions
    #   Trigger S3->POSIX copy via manifest_dir_copy
    #   {"uri": "s3://storage-system-bucket-alias/path/to/manifest.json"}
    if r.local is False:
        try:
            r.logger.info("message: copy_dir_s3")
            mani_dir_copy_id = r.aliases.id_from_alias('copy_dir_s3', db)
            r.logger.info(" copy_dir_s3 id: {}".format(mani_dir_copy_id))
            # Forward original message to the next actor
            mani_dir_copy_msg = m
            r.logger.debug("  message: {}".format(m))
            mani_id = agaveutils.reactors.message_reactor(
                ag, mani_dir_copy_id, mani_dir_copy_msg)
            r.logger.info(" execution: {}".format(mani_id))
        except Exception as e:
            r.logger.error("error initiating copy_dir_s3: {}".format(e))
            if r.settings.linked_reactors.get('copy_dir_s3').get('ignore_err'):
                pass
예제 #3
0
def main():

    r = Reactor()
    m = AttrDict(r.context.message_dict)
    # Look up my own name
    actor_name = r.get_attr('name')
    # example:
    # 'bob' 'was unable to call' 'karen' (id: ABCDEX, exec: BCDEG)
    template = "{} {} {} (actor/exec {} {})"
    # override on_failure and on_success
    funcType = type(r.on_failure)
    r.on_failure = funcType(on_failure, r, Reactor)
    funcType = type(r.on_success)
    r.on_success = funcType(on_success, r, Reactor)

    r.logger.debug("message: {}".format(m))
    # Use JSONschema-based message validator
    # - In theory, this obviates some get() boilerplate
    if not r.validate_message(m):
        r.on_failure(
            template.format(actor_name, 'got an invalid message', m, r.uid,
                            r.execid), None)

    ag = r.client  # Agave client
    # db = AgaveKeyValStore(ag)  # AgaveDB client
    context = r.context  # Actor context
    m = context.message_dict

    r.logger.debug("Message: {}".format(m))

    agave_uri = m.get('uri')
    (agave_storage_sys, agave_abs_dir, agave_filename) =\
        agaveutils.from_agave_uri(agave_uri)
    manifest_path = os.path.join('/', agave_abs_dir, agave_filename)

    r.logger.debug("fetching manifest {}".format(agave_uri))

    try:
        mani_file = agaveutils.agave_download_file(
            agaveClient=r.client,
            agaveAbsolutePath=manifest_path,
            systemId=agave_storage_sys,
            localFilename='manifest.json')

        if mani_file is None:
            raise Exception("no error was detected but file appears empty")

    except Exception as e:
        r.on_failure(
            template.format(actor_name, 'failed to download', manifest_path,
                            r.uid, r.execid), e)

    # Load manifest so we can read the plan and config
    # - Use AttrDict so we can use dot.notation
    r.logger.debug("loading manifest into a dict and getting values")
    manifest_dict = {}
    try:
        with open('manifest.json') as json_data:
            manifest_dict = AttrDict(json.load(json_data))
            plan_uri = manifest_dict.plan
            instrument_config_uri = manifest_dict.instrument_configuration
    except Exception as e:
        r.on_failure(
            template.format(actor_name, 'was unable to properly parse the',
                            'manifest file', r.uid, r.execid), e)

    r.logger.debug("fetching plan {}".format(plan_uri))
    plan_abs_path = None
    try:
        (plan_system, plan_dirpath, plan_filename) =\
            agaveutils.from_agave_uri(plan_uri)
        plan_abs_path = os.path.join(plan_dirpath, plan_filename)
        plan_file = agaveutils.agave_download_file(
            agaveClient=r.client,
            agaveAbsolutePath=plan_abs_path,
            systemId=plan_system,
            localFilename='plan.json')
    except Exception as e:
        r.on_failure(
            template.format(actor_name, 'failed to download', plan_abs_path,
                            r.uid, r.execid), e)

    r.logger.debug(
        "fetching instrument config {}".format(instrument_config_uri))
    try:
        (ic_system, ic_dirpath, ic_filename) = \
            agaveutils.from_agave_uri(instrument_config_uri)
        ic_abs_path = os.path.join(ic_dirpath, ic_filename)
        ic_file = agaveutils.agave_download_file(
            agaveClient=r.client,
            agaveAbsolutePath=ic_abs_path,
            systemId=ic_system,
            localFilename='cytometer_configuration.json')
    except Exception as e:
        r.on_failure(
            template.format(actor_name, 'failed to download', ic_abs_path,
                            r.uid, r.execid), e)

    r.logger.debug(
        "loading dict from instrument config file {}".format(ic_file))
    try:
        cytometer_configuration = json.load(open(ic_file, 'rb'))
    except Exception as e:
        r.on_failure(
            template.format(actor_name,
                            'could not load dict from JSON document', ic_file,
                            r.uid, r.execid), e)

    r.logger.debug("loading tasbe_cytometer_configuration.channels")
    try:
        channels = cytometer_configuration['tasbe_cytometer_configuration'][
            'channels']
    except Exception as e:
        r.on_failure(
            template.format(
                actor_name, 'was unable to load',
                'tasbe_cytometer_configuration.channels from settings', r.uid,
                r.execid), e)

    r.logger.debug("loading dict from plan JSON file {}".format(plan_file))
    try:
        plan = json.load(open(plan_file, 'rb'))
    except Exception as e:
        r.on_failure(
            template.format(actor_name,
                            'could not load dict from JSON document',
                            plan_file, r.uid, r.execid), e)

    r.logger.debug("writing experimental data to local storage")
    experimental_data = extract_experimental_data(manifest_dict, plan)
    with open('experimental_data.json', 'wb') as outfile:
        json.dump(experimental_data,
                  outfile,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ': '))

    r.logger.debug("writing intermediary JSON files to local storage")
    try:
        with open('process_control_data.json', 'wb') as outfile:
            json.dump(build_process_control_data(plan, channels,
                                                 experimental_data,
                                                 instrument_config_uri,
                                                 manifest_dict),
                      outfile,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
        with open('color_model_parameters.json', 'wb') as outfile:
            json.dump(build_color_model(channels),
                      outfile,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
        with open('analysis_parameters.json', 'wb') as outfile:
            json.dump(build_analysis_parameters(),
                      outfile,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
    except Exception as e:
        r.on_failure(
            template.format(actor_name, 'could not load write JSON file(s)',
                            plan_file, r.uid, r.execid), e)

    # We will now upload the completed files to:
    # agave://data-sd2e-community/temp/flow_etl/REACTOR_NAME/PLAN_ID
    # - /temp/flow_etl/REACTOR_NAME is set by config.yml/destination.base_path
    #
    # Expectation: these files have been written to pwd() somwhere above
    datafiles = {
        'analysisParameters': 'analysis_parameters.json',
        'colorModelParameters': 'color_model_parameters.json',
        'cytometerConfiguration': 'cytometer_configuration.json',
        'experimentalData': 'experimental_data.json',
        'processControl': 'process_control_data.json'
    }

    # Figure out the plan_id from plan_uri
    # - Get the JSON file
    plan_uri_file = os.path.basename(plan_uri)
    # - Get JSON filename root
    plan_id = os.path.splitext(plan_uri_file)[0]
    # Default upload destination set in config.yml
    # - may want to add override but not essential now
    dest_dir = os.path.join(r.settings.destination.base_path, plan_id)
    dest_sys = r.settings.destination.system_id

    r.logger.debug("ensuring destination {} exists".format(
        agaveutils.to_agave_uri(dest_sys, dest_dir)))
    try:
        agaveutils.agave_mkdir(r.client, plan_id, dest_sys,
                               r.settings.destination.base_path)
    except Exception as e:
        r.on_failure(
            template.format(actor_name,
                            'could not access or create destination', dest_dir,
                            r.uid, r.execid), e)

    job_def_inputs = {}
    for agaveparam, fname in datafiles.items():
        r.logger.info("uploading {} to {}".format(fname, dest_dir))
        fpath = os.path.join(PWD, fname)

        # rename the remote if it exists
        try:
            r.logger.debug("renaming remote {}".format(fname))
            remote_abs_path = os.path.join(dest_dir, fname)
            new_name = os.path.basename(remote_abs_path) + \
                '.' + str(int(datetime.datetime.now().strftime("%s")) * 1000)
            r.client.files.manage(systemId=dest_sys,
                                  body={
                                      'action': 'rename',
                                      'path': new_name
                                  },
                                  filePath=remote_abs_path)
        except Exception:
            r.logger.debug("{} does not exist or is inaccessible. ({})".format(
                remote_abs_path, 'ignoring error'))
            pass

        # upload the newly-generated file
        try:
            r.logger.debug("now uploading {}".format(fname))
            agaveutils.agave_upload_file(r.client, dest_dir, dest_sys, fpath)
        except Exception as e:
            prefix = '{} failed to upload {}'.format(actor_name, fname)
            r.on_failure(
                template.format(prefix, 'to', dest_dir, r.uid, r.execid), e)

        # Entries in this dict are needed to submit the FCS-ETL job later
        job_def_inputs[agaveparam] = agaveutils.to_agave_uri(
            dest_sys, os.path.join(dest_dir, fname))

    # Base inputPath off path of manifest
    # Cowboy coding - Take grandparent directory sans sanity checking!
    manifest_pathGrandparent = os.path.dirname(os.path.dirname(manifest_path))

    # Build the inputData path from settings (instead of hard-coding vals)
    #
    #   Our settings.job_params.data_subdir could be an array
    #   should there be a need to pull in other top-level dirs.
    #   In such a case inputPath would be constructed as a list
    #   of agave URIs. This is challenging to process in the
    #   job's runner script but possible and documented.

    inputDataPath = os.path.join(manifest_pathGrandparent,
                                 r.settings.job_params.data_subdir)
    job_def_inputs['inputData'] = agaveutils.to_agave_uri(
        agave_storage_sys, inputDataPath)

    # Submit a job request to the FCS-ETL app based on template + vars
    #
    # The job configuration is templated from settings.job_definition
    # name, inputs are empty. notifications are empty, too,
    # but we aren't implementing for the time being. Use the inputs
    # we built above from the uploaded list and path to the manifest
    # and synthesize a job name from app/actor/execution.
    #
    # By convention, slots we wish to template are left empty. Slots
    # we want to have a default value (that aren't defined by the app
    # itself) are included in the template, but can be over-ridden
    # programmatically with Python dict operations

    job_def = r.settings.job_definition
    app_record = r.settings.linked_reactors.get(AGAVE_APP_ALIAS, {})

    # this allows the appId to be set in the job_definition, but overridden
    # by configuration provided in settings.
    job_def_orig_appId = job_def.get('appId', None)
    job_def.appId = app_record.get('id', job_def_orig_appId)

    # add dynamically-generated callback to log aggregator
    # sends gross amounts of JSON in each POST
    if r.settings.logs.get('token', None) is not None:

        proto = r.settings.get('logger', {}).get('proto', 'http')
        hostname = r.settings.get('logger', {}).get('host', 'localhost')
        port = str(r.settings.get('logger', {}).get('port', 8080))
        client_key = r.settings.get('logger', {}).get('client_key', 'KEY')
        client_secret = r.settings.logs.get('token', 'SECRET')
        # read loggger path from default -> reactor settings -> app settings
        path = r.settings.get('logger', {}).get('path', '/logger')
        path = app_record.get('opts', {}).get('logger', {}).get('path', path)

        logger_uri = proto + '://' + client_key + ':' + client_secret + '@' +\
            hostname + ':' + port + path + '/' + job_def.appId

        logger_callback = {'persistent': True, 'event': '*', 'url': logger_uri}

        nlist = list(job_def.notifications)
        nlist.append(logger_callback)
        ntuple = tuple(nlist)
        job_def.notifications = ntuple

    job_def.inputs = job_def_inputs
    job_def.name = "{}-{}".format(r.uid, r.execid)
    # set archivePath and archiveSystem based on manifest
    job_def.archiveSystem = agave_storage_sys
    job_def.archivePath = os.path.join(manifest_pathGrandparent,
                                       r.settings.job_params.output_subdir,
                                       job_def.appId,
                                       "{}-{}".format(r.uid, r.execid))

    # Expected outcome:
    #
    # An experimental data collection 'ABCDEF'
    # has (at present) directories of measurements and one or more
    # manifests (allowing for versioning). ETL apps can deposit results
    # under ABCDEF/processed/appid/<unique-directory-name>.
    r.logger.info('submitting FSC-ETL agave compute job')
    job_id = 'mockup'
    try:
        job_id = r.client.jobs.submit(body=job_def)['id']
        r.logger.info("compute job id is {}".format(job_id))
    except Exception as e:
        # Use a print here so we can more easily snag the job def
        # TODO - come back and take this out if we ever add a nonce to
        #        the callback notifications because that should not
        #        show up in the logs. One alternative would be to
        #        register a plaintext log formatter with redaction
        #        support, but that requires extending our logger module
        print(json.dumps(job_def, indent=4))
        r.on_failure(
            template.format(actor_name,
                            'failed when submitting an agave compute job for',
                            job_def.appId, r.uid, r.execid), e)

    # Make a nice human-readable success message for the Slack log
    suffix = '{} and will deposit outputs in {}'.format(
        job_id, job_def.archivePath)
    r.on_success(
        template.format(actor_name, 'submitted job', suffix, r.uid, r.execid))