Esempio n. 1
0
def output_target_for_input_extra(target_inf,
                                  of,
                                  tibanna,
                                  overwrite_input_extra=False):
    extrafileexists = False
    printlog("target_inf = %s" % str(target_inf))  # debugging
    target_inf_meta = ff_utils.get_metadata(target_inf.get('value'),
                                            key=tibanna.ff_keys,
                                            ff_env=tibanna.env,
                                            add_on='frame=object',
                                            check_queue=True)
    target_format = parse_formatstr(of.get('format'))
    if target_inf_meta.get('extra_files'):
        for exf in target_inf_meta.get('extra_files'):
            if parse_formatstr(exf.get('file_format')) == target_format:
                extrafileexists = True
                if overwrite_input_extra:
                    exf['status'] = 'to be uploaded by workflow'
                break
        if not extrafileexists:
            new_extra = {
                'file_format': target_format,
                'status': 'to be uploaded by workflow'
            }
            target_inf_meta['extra_files'].append(new_extra)
    else:
        new_extra = {
            'file_format': target_format,
            'status': 'to be uploaded by workflow'
        }
        target_inf_meta['extra_files'] = [new_extra]
    if overwrite_input_extra or not extrafileexists:
        # first patch metadata
        printlog("extra_files_to_patch: %s" %
                 str(target_inf_meta.get('extra_files')))  # debugging
        ff_utils.patch_metadata(
            {'extra_files': target_inf_meta.get('extra_files')},
            target_inf.get('value'),
            key=tibanna.ff_keys,
            ff_env=tibanna.env)
        # target key
        # NOTE : The target bucket is assume to be the same as output bucket
        # i.e. the bucket for the input file should be the same as the output bucket.
        # which is true if both input and output are processed files.
        orgfile_key = target_inf_meta.get('upload_key')
        orgfile_format = parse_formatstr(target_inf_meta.get('file_format'))
        fe_map = FormatExtensionMap(tibanna.ff_keys)
        printlog("orgfile_key = %s" % orgfile_key)
        printlog("orgfile_format = %s" % orgfile_format)
        printlog("target_format = %s" % target_format)
        target_key = get_extra_file_key(orgfile_format, orgfile_key,
                                        target_format, fe_map)
        return target_key
    else:
        raise Exception(
            "input already has extra: 'User overwrite_input_extra': true")
Esempio n. 2
0
def get_fileformats_for_accession(accession, key, env):
    meta = get_metadata(accession,
                        key=key,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta:
        file_format = parse_formatstr(meta.get('file_format'))
        extra_formats = [
            parse_formatstr(v.get('file_format'))
            for v in meta.get('extra_files', [])
        ]
        return file_format, extra_formats
    else:
        raise Exception("Can't get file format for accession %s" % accession)
Esempio n. 3
0
def create_and_post_processed_file(ff_keys,
                                   file_format,
                                   secondary_file_formats,
                                   source_experiments=None,
                                   other_fields=None):
    printlog(file_format)
    if not file_format:
        raise Exception("file format for processed file must be provided")
    if secondary_file_formats:
        extra_files = [{
            "file_format": parse_formatstr(v)
        } for v in secondary_file_formats]
    else:
        extra_files = None
    pf = ProcessedFileMetadata(file_format=file_format,
                               extra_files=extra_files,
                               source_experiments=source_experiments,
                               other_fields=other_fields)
    # actually post processed file metadata here
    resp = pf.post(key=ff_keys)
    if resp and '@graph' in resp:
        resp = resp.get('@graph')[0]
    else:
        raise Exception("Failed to post Processed file metadata.\n")
    return pf, resp
Esempio n. 4
0
def get_status_for_extra_file(event, extra_format):
    if not extra_format:
        return None
    upload_key = event['Records'][0]['s3']['object']['key']
    if upload_key.endswith('html'):
        return False

    uuid, object_key = upload_key.split('/')
    accession = object_key.split('.')[0]

    # guess env from bucket name
    bucket = event['Records'][0]['s3']['bucket']['name']
    env = '-'.join(bucket.split('-')[1:3])

    try:
        tibanna = Tibanna(env=env)
    except Exception as e:
        raise TibannaStartException("%s" % e)
    meta = get_metadata(accession,
                        key=tibanna.ff_keys,
                        ff_env=env,
                        add_on='frame=object',
                        check_queue=True)
    if meta and 'extra_files' in meta:
        for exf in meta['extra_files']:
            if parse_formatstr(exf['file_format']) == extra_format:
                return exf.get('status', None)
    return None
Esempio n. 5
0
def which_extra(original_file, format_if_extra=None):
    if format_if_extra:
        if 'extra_files' not in original_file:
            raise Exception(
                "input file has no extra_files," +
                "yet the tag 'format_if_extra' is found in the input json")
        for extra in original_file.get('extra_files'):
            if parse_formatstr(extra.get('file_format')) == format_if_extra:
                return extra
    return None
Esempio n. 6
0
def get_extra_file_key_given_input_uuid_and_key(inf_uuid, inf_key, ff_keys,
                                                ff_env, fe_map):
    extra_file_keys = []
    not_ready_list = [
        'uploading', 'to be uploaded by workflow', 'upload failed', 'deleted'
    ]
    infile_meta = ff_utils.get_metadata(inf_uuid,
                                        key=ff_keys,
                                        ff_env=ff_env,
                                        add_on='frame=object')
    if infile_meta.get('extra_files'):
        infile_format = parse_formatstr(infile_meta.get('file_format'))
        for extra_file in infile_meta.get('extra_files'):
            if 'status' not in extra_file or extra_file.get(
                    'status') not in not_ready_list:
                extra_file_format = parse_formatstr(
                    extra_file.get('file_format'))
                extra_file_key = get_extra_file_key(infile_format, inf_key,
                                                    extra_file_format, fe_map)
                extra_file_keys.append(extra_file_key)
    if len(extra_file_keys) == 0:
        extra_file_keys = None
    return extra_file_keys
Esempio n. 7
0
def _input_extra_updater(status,
                         tibanna,
                         accession,
                         extra_file_format,
                         md5=None,
                         filesize=None,
                         higlass_uid=None):
    try:
        original_file = ff_utils.get_metadata(accession,
                                              key=tibanna.ff_keys,
                                              ff_env=tibanna.env,
                                              add_on='frame=object',
                                              check_queue=True)
    except Exception as e:
        raise Exception("Can't get metadata for input file %s" % e)
    if 'extra_files' not in original_file:
        raise Exception(
            "inconsistency - extra file metadata deleted during workflow run?")
    matching_exf_found = False
    for exf in original_file['extra_files']:
        if parse_formatstr(exf['file_format']) == extra_file_format:
            matching_exf_found = True
            exf['status'] = status
            if status == 'uploaded':
                if md5:
                    exf['md5sum'] = md5
                if filesize:
                    exf['file_size'] = filesize
    if not matching_exf_found:
        raise Exception(
            "inconsistency - extra file metadata deleted during workflow run?")
    try:
        patch_file = {'extra_files': original_file['extra_files']}
        if higlass_uid:
            patch_file['higlass_uid'] = higlass_uid
        ff_utils.patch_metadata(patch_file,
                                original_file['uuid'],
                                key=tibanna.ff_keys)
    except Exception as e:
        raise Exception("patch_metadata failed in extra_updater." + str(e) +
                        "original_file ={}\n".format(str(original_file)))