Ejemplo n.º 1
0
def create_and_post_processed_file(ff_keys,
                                   file_format,
                                   secondary_file_formats,
                                   source_experiments=None,
                                   other_fields=None):
    printlog(file_format)
    if not file_format:
        raise Exception("file format for processed file must be provided")
    if secondary_file_formats:
        extra_files = [{
            "file_format": parse_formatstr(v)
        } for v in secondary_file_formats]
    else:
        extra_files = None
    pf = ProcessedFileMetadata(file_format=file_format,
                               extra_files=extra_files,
                               source_experiments=source_experiments,
                               other_fields=other_fields)
    # actually post processed file metadata here
    resp = pf.post(key=ff_keys)
    if resp and '@graph' in resp:
        resp = resp.get('@graph')[0]
    else:
        raise Exception("Failed to post Processed file metadata.\n")
    return pf, resp
Ejemplo n.º 2
0
def test_create_ProcessedFileMetadata_from_get_error_if_no_at_type(
        ff_keys, proc_file_in_webdev):
    # can use acc, uuid, @id, any valid url
    with mock.patch('core.pony_utils.get_metadata',
                    return_value=proc_file_in_webdev):
        with pytest.raises(Exception) as expinfo:
            ProcessedFileMetadata.get(proc_file_in_webdev['accession'],
                                      ff_keys)
        assert "only load ProcessedFiles" in str(expinfo.value)
Ejemplo n.º 3
0
def test_add_md5_filesize_to_pf_extra():
    wff = AwsemFile(bucket='somebucket', key='somekey.pairs.gz.px2', runner=None,
                    md5='somemd5', filesize=1234,
                    argument_type='Output processed file', format_if_extra='pairs_px2')
    pf = ProcessedFileMetadata(extra_files=[{'file_format': 'lalala'}, {'file_format': 'pairs_px2'}])
    add_md5_filesize_to_pf_extra(pf, wff)
    assert 'md5sum' in pf.extra_files[1]
    assert 'file_size' in pf.extra_files[1]
    assert pf.extra_files[1]['md5sum'] == 'somemd5'
    assert pf.extra_files[1]['file_size'] == 1234
Ejemplo n.º 4
0
def test_create_ProcessedFileMetadata_from_get(ff_keys, proc_file_in_webdev):
    # can use acc, uuid, @id, any valid url
    file_with_type = proc_file_in_webdev.copy()
    file_with_type['@type'] = ['FileProcessed', 'Item', 'whatever']
    with mock.patch('core.pony_utils.get_metadata',
                    return_value=file_with_type) as ff:
        pf = ProcessedFileMetadata.get(proc_file_in_webdev['accession'],
                                       ff_keys)
        assert pf.__dict__ == proc_file_in_webdev
        assert type(pf) is ProcessedFileMetadata
        ff.was_called_once()
Ejemplo n.º 5
0
def user_supplied_proc_file(user_supplied_output_files, arg_name, tibanna):
    if not user_supplied_output_files:
        raise Exception("user supplied processed files missing\n")
    of = [
        output for output in user_supplied_output_files
        if output.get('workflow_argument_name') == arg_name
    ]
    if of:
        if len(of) > 1:
            raise Exception(
                "multiple output files supplied with same workflow_argument_name"
            )
        of = of[0]
        return ProcessedFileMetadata.get(of.get('uuid'),
                                         tibanna.ff_keys,
                                         tibanna.env,
                                         return_data=True)
    else:
        printlog("no output_files found in input_json matching arg_name")
        printlog("user_supplied_output_files: %s" %
                 str(user_supplied_output_files))
        printlog("arg_name: %s" % str(arg_name))
        printlog("tibanna is %s" % str(tibanna))
        raise Exception("user supplied processed files missing\n")
Ejemplo n.º 6
0
def proc_file_for_arg_name(output_files, arg_name, tibanna):
    if not output_files:
        LOG.info("proc_file_for_arg_name no ouput_files specified")
        return None, None
    of = [
        output for output in output_files
        if output.get('workflow_argument_name') == arg_name
    ]
    if of:
        if len(of) > 1:
            raise Exception(
                "multiple output files supplied with same workflow_argument_name"
            )
        of = of[0]
        return ProcessedFileMetadata.get(of.get('uuid'),
                                         tibanna.ff_keys,
                                         tibanna.env,
                                         return_data=True)
    else:
        LOG.info("no output_files found in input_json matching arg_name")
        LOG.info("output_files: %s" % str(output_files))
        LOG.info("arg_name: %s" % str(arg_name))
        LOG.info("tibanna is %s" % str(tibanna))
        return None, None
Ejemplo n.º 7
0
def real_handler(event, context):
    # check the status and other details of import
    '''
    this is to check if the task run is done:
    http://docs.sevenbridges.com/reference#get-task-execution-details
    '''
    # get data
    # used to automatically determine the environment
    tibanna_settings = event.get('_tibanna', {})
    try:
        tibanna = Tibanna(tibanna_settings['env'], settings=tibanna_settings)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    ff_meta = create_ffmeta_awsem(
        app_name=event.get('ff_meta').get('awsem_app_name'),
        **event.get('ff_meta'))

    if event.get('error', False):
        ff_meta.run_status = 'error'
        ff_meta.description = event.get('error')
        patch_res = ff_meta.patch(key=tibanna.ff_keys)
        printlog("patch response: " + str(patch_res))
        # sending a notification email before throwing error
        if 'email' in event['config'] and event['config']['email']:
            try:
                send_notification_email(
                    event['_tibanna']['settings']['run_name'], event['jobid'],
                    ff_meta.run_status, event['_tibanna']['settings']['url'])
            except Exception as e:
                printlog("Cannot send email: %s" % e)
        raise Exception(event.get('error'))

    metadata_only = event.get('metadata_only', False)

    pf_meta = [ProcessedFileMetadata(**pf) for pf in event.get('pf_meta')]
    custom_qc_fields = event.get('custom_qc_fields', None)

    # ensure this bad boy is always initialized
    awsem = Awsem(event)
    # go through this and replace awsemfile_report with awsf format
    # actually interface should be look through ff_meta files and call
    # give me the status of this thing from the runner, and runner.output_files.length
    # so we just build a runner with interface to sbg and awsem
    # runner.output_files.length()
    # runner.output_files.file.status
    # runner.output_files.file.loc
    # runner.output_files.file.get

    awsem_output = awsem.output_files()
    awsem_output_extra = awsem.secondary_output_files()
    ff_output = len(ff_meta.output_files)
    if len(awsem_output) != ff_output:
        ff_meta.run_status = 'error'
        ff_meta.description = "%d files output expected %s" % (
            ff_output, len(awsem_output))
        ff_meta.patch(key=tibanna.ff_keys)
        raise Exception(
            "Failing the workflow because outputed files = %d and ffmeta = %d"
            % (awsem_output, ff_output))

    def update_metadata_from_awsemfile_list(awsemfile_list):
        patch_meta = False
        for awsemfile in awsemfile_list:
            patch_meta = update_ffmeta_from_awsemfile(awsemfile, ff_meta,
                                                      tibanna,
                                                      custom_qc_fields)
            if not metadata_only:
                update_pfmeta_from_awsemfile(awsemfile, pf_meta, tibanna)
        # allow for a simple way for updater to add appropriate meta_data
        if patch_meta:
            ff_meta.__dict__.update(patch_meta)

    update_metadata_from_awsemfile_list(awsem_output)
    update_metadata_from_awsemfile_list(awsem_output_extra)

    # if we got all the awsemfiles let's go ahead and update our ff_metadata object
    ff_meta.run_status = "complete"

    # add postrunjson log file to ff_meta as a url
    ff_meta.awsem_postrun_json = get_postrunjson_url(event)

    # make all the file awsemfile meta-data stuff here
    # TODO: fix bugs with ff_meta mapping for output and input file
    try:
        ff_meta.patch(key=tibanna.ff_keys)
    except Exception as e:
        raise Exception("Failed to update run_status %s" % str(e))
    # patch processed files - update only status, extra_files, md5sum and file_size
    if pf_meta:
        patch_fields = [
            'uuid', 'status', 'extra_files', 'md5sum', 'file_size',
            'higlass_uid'
        ]
        try:
            for pf in pf_meta:
                printlog(pf.as_dict())
                pf.patch(key=tibanna.ff_keys, fields=patch_fields)
        except Exception as e:
            raise Exception("Failed to update processed metadata %s" % str(e))

    event['ff_meta'] = ff_meta.as_dict()
    event['pf_meta'] = [_.as_dict() for _ in pf_meta]

    # sending a notification email after the job finishes
    if 'email' in event['config'] and event['config']['email']:
        try:
            send_notification_email(event['_tibanna']['settings']['run_name'],
                                    event['jobid'],
                                    event['ff_meta']['run_status'],
                                    event['_tibanna']['settings']['url'])
        except Exception as e:
            printlog("Cannot send email: %s" % e)

    return event
Ejemplo n.º 8
0
def handle_processed_files(workflow_info,
                           tibanna,
                           pf_source_experiments=None,
                           custom_fields=None,
                           user_supplied_output_files=None):
    output_files = []
    pf_meta = []
    fe_map = None
    try:
        print("Inside handle_processed_files")
        LOG.info("Inside handle_processed_files")
        for arg in workflow_info.get('arguments', []):
            print("processing arguments %s" % str(arg))
            LOG.info("processing arguments %s" % str(arg))
            if (arg.get('argument_type') in [
                    'Output processed file', 'Output report file',
                    'Output QC file'
            ]):
                of = dict()
                argname = of['workflow_argument_name'] = arg.get(
                    'workflow_argument_name')
                of['type'] = arg.get('argument_type')

                # see if user supplied the output file already
                # this is often the case for pseudo workflow runs (run externally)
                # TODO move this down next to post of pf
                pf, resp = proc_file_for_arg_name(
                    user_supplied_output_files,
                    arg.get('workflow_argument_name'), tibanna)
                if pf:
                    print(
                        "proc_file_for_arg_name returned %s \nfrom ff result of\n %s"
                        % (str(pf.__dict__), str(resp)))
                    LOG.info(
                        "proc_file_for_arg_name returned %s \nfrom ff result of\n %s"
                        % (str(pf.__dict__), str(resp)))
                    pf_meta.append(pf)
                else:
                    print(
                        "proc_file_for_arg_name returned %s \nfrom ff result of\n %s"
                        % (str(pf), str(resp)))
                    LOG.info(
                        "proc_file_for_arg_name returned %s \nfrom ff result of\n %s"
                        % (str(pf), str(resp)))
                if not resp:  # if it wasn't supplied as output we have to create a new one
                    assert user_supplied_output_files is None
                    if of['type'] == 'Output processed file':
                        print("creating new processedfile")
                        LOG.info("creating new processedfile")
                        if 'argument_format' not in arg:
                            raise Exception(
                                "argument format for processed file must be provided"
                            )
                        if not fe_map:
                            fe_map = get_format_extension_map(tibanna.ff_keys)
                        # These are not processed files but report or QC files.
                        of['format'] = arg.get('argument_format')
                        of['extension'] = fe_map.get(
                            arg.get('argument_format'))
                        if 'secondary_file_formats' in arg:
                            of['secondary_file_formats'] = arg.get(
                                'secondary_file_formats')
                            of['secondary_file_extensions'] = [
                                fe_map.get(v)
                                for v in arg.get('secondary_file_formats')
                            ]
                            extra_files = [{
                                "file_format": v
                            } for v in of['secondary_file_formats']]
                        else:
                            extra_files = None
                        pf_other_fields = dict()
                        if custom_fields:
                            if argname in custom_fields:
                                pf_other_fields.update(custom_fields[argname])
                            if 'ALL' in custom_fields:
                                pf_other_fields.update(custom_fields['ALL'])
                        pf = ProcessedFileMetadata(
                            file_format=arg.get('argument_format'),
                            extra_files=extra_files,
                            source_experiments=pf_source_experiments,
                            other_fields=pf_other_fields)
                        try:
                            # actually post processed file metadata here
                            resp = pf.post(key=tibanna.ff_keys)
                            resp = resp.get('@graph')[0]
                        except Exception as e:
                            LOG.error(
                                "Failed to post Processed file metadata. %s\n"
                                % e)
                            LOG.error("resp" + str(resp) + "\n")
                            raise e
                        pf_meta.append(pf)
                if resp:
                    of['upload_key'] = resp.get('upload_key')
                    of['value'] = resp.get('uuid')
                    of['extra_files'] = resp.get('extra_files')
                output_files.append(of)

    except Exception as e:
        LOG.error("output_files = " + str(output_files) + "\n")
        LOG.error("Can't prepare output_files information. %s\n" % e)
        raise e
    return output_files, pf_meta