Beispiel #1
0
def finalize_user_content(spawner):
    """
    This function is called after the singleuser notebook stops.
    Responsible for:
    - adding date_culled to the TrackingItem given by FF_TRACKING_ID
    """
    # grab this info fresh every time
    err_output = []
    ff_keys = recompute_ff_keys(err_output)

    if not os.environ.get('FF_TRACKING_ID'):
        return
    # get current item
    track_id = os.environ['FF_TRACKING_ID']
    try:
        track_res = ff_utils.get_metadata(track_id, key=ff_keys)
    except:
        pass  # Nothing to do here
    else:
        session = track_res.get('jupyterhub_session')
        if session and isinstance(session, dict):
            session['date_culled'] = datetime.datetime.utcnow().isoformat(
            ) + '+00:00'
            try:
                ff_utils.patch_metadata({'jupyterhub_session': session},
                                        track_id,
                                        key=ff_keys)
            except:
                pass
Beispiel #2
0
def test_fastqc():
    key = dev_key()
    data = get_test_json('fastqc.json')
    fq_uuid = post_new_fastqfile(key=key,
                                 upload_file=os.path.join(
                                     FILE_DIR, 'fastq/A.R2.fastq.gz'))
    data['input_files'][0]['uuid'] = fq_uuid
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    res = ff_utils.get_metadata(fq_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key)
    assert 'quality_metric' in res
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(wfr_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['run_status'] == 'complete'
    assert 'quality_metric' in res
def patch_jsons(auth, to_patch):
    for item in to_patch:
        uid = item.get('uuid')
        try:
            patch_metadata(item, uid, auth)
        except Exception as e:
            print(e)
Beispiel #4
0
def test_bed2beddb():
    key = dev_key()
    # prep new File
    data = get_test_json('bedtobeddb.json')
    bed_content = b'chr1\t1000000\t2000000\tregion1'
    gzipped_content = gzip.compress(bed_content)
    bed_uuid = post_new_processedfile(file_format='bed',
                                      key=key,
                                      upload_content=gzipped_content,
                                      extension='bed.gz')
    data['input_files'][0]['uuid'] = bed_uuid
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(bed_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['extra_files']
    assert len(res['extra_files']) == 1
    extra = res['extra_files'][0]
    assert extra['file_format']['display_title'] == 'beddb'
    ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key)
    ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
Beispiel #5
0
def testrun_md5(workflow_name='tibanna_pony', env='webdev'):
    """Creates a random file object with no md5sum/content_md5sum and run md5 workflow.
    It waits for 6 mintues till the workflow run finishes and checks the input file object
    has been updated.
    """
    bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput"
    ff_key = get_authentication_with_server(ff_env='fourfront-' + env)
    newfile = post_random_file(bucket, ff_key)
    uuid = newfile['uuid']
    accession = newfile['accession']
    input_json = {
        "config": {
            "ebs_type": "io1",
            "ebs_iops": 500,
            "s3_access_arn":
            "arn:aws:iam::643366669028:instance-profile/S3_access",
            "ami_id": "ami-cfb14bb5",
            "json_bucket": "4dn-aws-pipeline-run-json",
            "shutdown_min": 30,
            "copy_to_s3": True,
            "launch_instance": True,
            "log_bucket": "tibanna-output",
            "script_url":
            "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/",
            "key_name": "4dn-encode",
            "password": ""
        },
        "_tibanna": {
            "env": "fourfront-webdev",
            "run_type": "md5"
        },
        "parameters": {},
        "app_name":
        "md5",
        "workflow_uuid":
        "c77a117b-9a58-477e-aaa5-291a109a99f6",
        "input_files": [{
            "workflow_argument_name": "input_file",
            "bucket_name": bucket,
            "uuid": uuid,
            "object_key": accession + '.pairs.gz'
        }],
        "output_bucket":
        bucket
    }
    resp = run_workflow(input_json, workflow=workflow_name)
    print(resp)

    # check result
    time.sleep(6 * 60)  # wait for 6 minutes
    filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database')
    content_md5sum = filemeta.get('content_md5sum')
    md5sum = filemeta.get('md5sum')
    if content_md5sum and md5sum:
        print(content_md5sum)
        print(md5sum)
        patch_metadata({'status': 'deleted'}, uuid, key=ff_key)
    else:
        raise Exception('md5 step function run failed')
Beispiel #6
0
def output_target_for_input_extra(target_inf,
                                  of,
                                  tibanna,
                                  overwrite_input_extra=False):
    extrafileexists = False
    printlog("target_inf = %s" % str(target_inf))  # debugging
    target_inf_meta = ff_utils.get_metadata(target_inf.get('value'),
                                            key=tibanna.ff_keys,
                                            ff_env=tibanna.env,
                                            add_on='frame=object',
                                            check_queue=True)
    target_format = parse_formatstr(of.get('format'))
    if target_inf_meta.get('extra_files'):
        for exf in target_inf_meta.get('extra_files'):
            if parse_formatstr(exf.get('file_format')) == target_format:
                extrafileexists = True
                if overwrite_input_extra:
                    exf['status'] = 'to be uploaded by workflow'
                break
        if not extrafileexists:
            new_extra = {
                'file_format': target_format,
                'status': 'to be uploaded by workflow'
            }
            target_inf_meta['extra_files'].append(new_extra)
    else:
        new_extra = {
            'file_format': target_format,
            'status': 'to be uploaded by workflow'
        }
        target_inf_meta['extra_files'] = [new_extra]
    if overwrite_input_extra or not extrafileexists:
        # first patch metadata
        printlog("extra_files_to_patch: %s" %
                 str(target_inf_meta.get('extra_files')))  # debugging
        ff_utils.patch_metadata(
            {'extra_files': target_inf_meta.get('extra_files')},
            target_inf.get('value'),
            key=tibanna.ff_keys,
            ff_env=tibanna.env)
        # target key
        # NOTE : The target bucket is assume to be the same as output bucket
        # i.e. the bucket for the input file should be the same as the output bucket.
        # which is true if both input and output are processed files.
        orgfile_key = target_inf_meta.get('upload_key')
        orgfile_format = parse_formatstr(target_inf_meta.get('file_format'))
        fe_map = FormatExtensionMap(tibanna.ff_keys)
        printlog("orgfile_key = %s" % orgfile_key)
        printlog("orgfile_format = %s" % orgfile_format)
        printlog("target_format = %s" % target_format)
        target_key = get_extra_file_key(orgfile_format, orgfile_key,
                                        target_format, fe_map)
        return target_key
    else:
        raise Exception(
            "input already has extra: 'User overwrite_input_extra': true")
Beispiel #7
0
def test_pseudo_run(testapp, input_json):
    # this test can be problematic; uncomment the following line to disable it
    # assert False

    res = testapp.post_json('/WorkflowRun/pseudo-run', input_json)
    assert(res)

    # cleanup
    output = json.loads(res.json['output'])
    patch_metadata({'status':'deleted'}, output['ff_meta']['uuid'], ff_env='fourfront-webdev')
Beispiel #8
0
def add_preliminary_processed_files(item_id, list_pc, auth, run_type="hic"):
    titles = {
        "hic": "HiC Processing Pipeline - Preliminary Files",
        "repliseq": "Repli-Seq Pipeline - Preliminary Files",
        'chip': "ENCODE ChIP-Seq Pipeline - Preliminary Files",
        'atac': "ENCODE ATAC-Seq Pipeline - Preliminary Files"
    }
    if run_type in titles:
        pc_set_title = titles[run_type]
    else:
        pc_set_title = run_type
    resp = ff_utils.get_metadata(item_id, key=auth)

    # check if this items are in processed files field
    # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one
    ex_pc = resp.get('processed_files')
    if ex_pc:
        ex_pc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]]
                     for a in ex_pc]
        ex_pc_ids = [a for i in ex_pc_ids for a in i]
        for i in list_pc:
            if i in ex_pc_ids:
                print('Error - Cannot add files to pc')
                print(i, 'is already in processed files')
                return

    # extract essential for comparison, unfold all possible ids into a list, and compare list_pc to that one
    ex_opc = resp.get('other_processed_files')
    if ex_opc:
        # check the titles
        all_existing_titles = [a['title'] for a in ex_opc]
        if pc_set_title in all_existing_titles:
            print('Error - Cannot add files to opc')
            print('The same title already in other processed files')
            return
        # check  the individual files
        ex_opc_ids = [[a['@id'], a['uuid'], a['@id'].split('/')[2]]
                      for i in ex_opc for a in i['files']]
        ex_opc_ids = [a for i in ex_opc_ids for a in i]
        for i in list_pc:
            if i in ex_opc_ids:
                print('Error - Cannot add files to opc')
                print(i, 'is already in other processed files')
                return

    # we need raw to get the existing piece, to patch back with the new ones
    patch_data = ff_utils.get_metadata(
        item_id, key=auth, add_on='frame=raw').get('other_processed_files')
    if not patch_data:
        patch_data = []

    new_data = {'title': pc_set_title, 'type': 'preliminary', 'files': list_pc}
    patch_data.append(new_data)
    patch = {'other_processed_files': patch_data}
    ff_utils.patch_metadata(patch, obj_id=item_id, key=auth)
Beispiel #9
0
def patch_and_report(auth, patch_d, skipped, uuid2patch, dryrun):
    # report and patch
    if dryrun:
        print('DRY RUN - nothing will be patched to database')
    if skipped:
        print('WARNING! - SKIPPING for', uuid2patch)
        for f, v in skipped.items():
            print('Field: %s\tHAS: %s\tNOT ADDED: %s' %
                  (f, v['new'], v['old']))

    if not patch_d:
        print('NOTHING TO PATCH - ALL DONE!')
    else:
        print('PATCHING -', uuid2patch)
        for f, v in patch_d.items():
            print(f, '\t', v)

        if not dryrun:
            # do the patch
            res = patch_metadata(patch_d, uuid2patch, auth)
            if res['status'] == 'success':
                print("SUCCESS!")
                return True
            else:
                print("FAILED TO PATCH", uuid2patch, "RESPONSE STATUS",
                      res['status'], res['description'])
                return False
    return True
Beispiel #10
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as items:
        for i in items:
            [iid, payload] = [t.strip() for t in i.split('\t')]
            payload = json.loads(payload)
            if args.dbupdate:
                e = patch_metadata(payload, iid, auth)
            else:
                print("DRY RUN\n\tPATCH: ", iid, " TO\n", payload)
                e = {'status': 'success'}

            status = e.get('status')
            if status and status == 'success':
                print(status)
            else:
                print('FAILED', e)

    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))
def main():
    args = get_args()
    try:
        auth = ff.get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    print("Working on {}".format(auth.get('server')))
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    seen = []
    failed = []
    for itemid in itemids:
        print("Touching ", itemid)
        if args.dbupdate:
            try:
                res = ff.patch_metadata({}, itemid, auth)
                print(res.get('status'))
                if res.get('status') == 'success':
                    seen.append(itemid)
            except Exception:
                print(itemid, ' failed to patch')
                failed.append(itemid)
                continue
        else:
            print('dry run!')
    for i in seen:
        print(i)
    print("Failures")
    for f in failed:
        print(f)
Beispiel #12
0
def release_files(set_id, list_items, auth, status=None):
    if status:
        item_status = status
    else:
        item_status = ff_utils.get_metadata(set_id, key=auth)['status']
    # bring files to same status as experiments and sets
    if item_status in ['released', 'released to project', 'pre-release']:
        for a_file in list_items:
            it_resp = ff_utils.get_metadata(a_file, key=auth)
            workflow = it_resp.get('workflow_run_outputs')
            # release the wfr that produced the file
            if workflow:
                ff_utils.patch_metadata({"status": item_status},
                                        obj_id=workflow[0]['uuid'],
                                        key=auth)
            ff_utils.patch_metadata({"status": item_status},
                                    obj_id=a_file,
                                    key=auth)
Beispiel #13
0
 def patch(self, key, fields=None):
     if fields:
         patch_json = {
             k: v
             for k, v in self.as_dict().items() if k in fields
         }
     else:
         patch_json = self.as_dict()
     print(patch_json)
     return patch_metadata(patch_json, key=key, add_on='force_md5')
Beispiel #14
0
def test_output_target_for_input_extra():
    tibanna = Tibanna('fourfront-webdev',
                      settings={"run_type": "bedGraphToBigWig", "env": "fourfront-webdev"})
    target_inf = {'workflow_argument_name': 'bgfile', 'value': '83a80cf8-ca2c-421a-bee9-118bd0572424'}
    of = {'format': 'bw'}

    ff_utils.patch_metadata({'extra_files': []},
                            '83a80cf8-ca2c-421a-bee9-118bd0572424',
                            key=tibanna.ff_keys)
    time.sleep(10)
    target_key = output_target_for_input_extra(target_inf, of, tibanna)
    assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'

    with pytest.raises(Exception) as expinfo:
        target_key = output_target_for_input_extra(target_inf, of, tibanna)
        assert "input already has extra: 'User overwrite_input_extra'" in str(expinfo.value)

    target_key = output_target_for_input_extra(target_inf, of, tibanna, True)
    assert target_key == '83a80cf8-ca2c-421a-bee9-118bd0572424/4DNFIF14KRAK.bw'
Beispiel #15
0
def test_bed2beddb_opendata():
    key = dev_key()
    # prep new File
    data = get_test_json('bedtobeddb_opendata.json')
    #bed_content = b'chr1\t1000000\t2000000\tregion1'
    #gzipped_content = gzip.compress(bed_content)
    #bed_uuid = post_new_processedfile(file_format='bed', key=key, upload_content=gzipped_content, extension='bed.gz')
    #data['input_files'][0]['uuid'] = bed_uuid
    bed_uuid = data['input_files'][0]['uuid']
    # first delete extra file from s3 so that we can check it's newly created.
    boto3.client('s3').delete_object(
        Bucket='elasticbeanstalk-fourfront-webdev-wfoutput',
        Key='614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb')
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(bed_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['extra_files']
    assert len(res['extra_files']) == 1
    extra = res['extra_files'][0]
    assert extra[
        'upload_key'] == '614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb'
    # check the extra file is created in the right bucket.
    head = boto3.client('s3').head_object(
        Bucket='elasticbeanstalk-fourfront-webdev-wfoutput',
        Key='614d119e-9330-41a3-a7c9-d149d0456c8e/4DNFI1664939.beddb')
    assert head
    assert extra['file_format']['display_title'] == 'beddb'
    #ff_utils.patch_metadata({'status': 'deleted'}, bed_uuid, key=key)
    ff_utils.patch_metadata({'status': 'deleted'}, wfr_uuid, key=key)
Beispiel #16
0
def test_md5():
    key = dev_key()
    # prep new File
    data = get_test_json('md5.json')
    content = bytes(str(uuid.uuid4()), 'utf-8')
    gzipped_content = gzip.compress(content)
    fq_uuid = post_new_fastqfile(
        key=key, upload_content=gzipped_content
    )  # upload random content to avoid md5 conflict
    # prep input json
    data['input_files'][0]['uuid'] = fq_uuid
    # run workflow
    api = API()
    res = api.run_workflow(data, sfn=DEV_SFN)
    assert 'jobid' in res
    assert 'exec_arn' in res['_tibanna']
    time.sleep(420)
    # check step function status
    assert api.check_status(res['_tibanna']['exec_arn']) == 'SUCCEEDED'
    outjson = api.check_output(res['_tibanna']['exec_arn'])
    # check postrun json
    postrunjson = json.loads(api.log(job_id=res['jobid'], postrunjson=True))
    assert 'status' in postrunjson['Job']
    assert postrunjson['Job']['status'] == '0'
    # check metadata update
    res = ff_utils.get_metadata(fq_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    ff_utils.patch_metadata({'status': 'deleted'}, fq_uuid, key=key)
    assert res['md5sum'] == hashlib.md5(gzipped_content).hexdigest()
    assert res['content_md5sum'] == hashlib.md5(content).hexdigest()
    assert res['file_size'] == len(gzipped_content)
    assert 'ff_meta' in outjson
    assert 'uuid' in outjson['ff_meta']
    wfr_uuid = outjson['ff_meta']['uuid']
    res = ff_utils.get_metadata(wfr_uuid,
                                key=key,
                                ff_env=DEV_ENV,
                                check_queue=True)
    assert res['run_status'] == 'complete'
    assert 'quality_metric' in res
Beispiel #17
0
def _input_extra_updater(status,
                         tibanna,
                         accession,
                         extra_file_format,
                         md5=None,
                         filesize=None,
                         higlass_uid=None):
    try:
        original_file = ff_utils.get_metadata(accession,
                                              key=tibanna.ff_keys,
                                              ff_env=tibanna.env,
                                              add_on='frame=object',
                                              check_queue=True)
    except Exception as e:
        raise Exception("Can't get metadata for input file %s" % e)
    if 'extra_files' not in original_file:
        raise Exception(
            "inconsistency - extra file metadata deleted during workflow run?")
    matching_exf_found = False
    for exf in original_file['extra_files']:
        if parse_formatstr(exf['file_format']) == extra_file_format:
            matching_exf_found = True
            exf['status'] = status
            if status == 'uploaded':
                if md5:
                    exf['md5sum'] = md5
                if filesize:
                    exf['file_size'] = filesize
    if not matching_exf_found:
        raise Exception(
            "inconsistency - extra file metadata deleted during workflow run?")
    try:
        patch_file = {'extra_files': original_file['extra_files']}
        if higlass_uid:
            patch_file['higlass_uid'] = higlass_uid
        ff_utils.patch_metadata(patch_file,
                                original_file['uuid'],
                                key=tibanna.ff_keys)
    except Exception as e:
        raise Exception("patch_metadata failed in extra_updater." + str(e) +
                        "original_file ={}\n".format(str(original_file)))
Beispiel #18
0
def patch_items_with_headers(connection, action, kwargs):
    """
    Arguments are:
    - the connection (FS connection)
    - the action (from ActionResult)
    - kwargs (from the action function)
    Takes care of patching info on Fourfront and also populating fields on the
    action
    """
    action_logs = {'patch_failure': [], 'patch_success': []}
    # get latest results from prepare_static_headers
    headers_check_result = action.get_associated_check_result(kwargs)
    # the dictionaries can be combined
    total_patches = headers_check_result['full_output']['to_add']
    total_patches.update(headers_check_result['full_output']['to_remove'])
    for item, headers in total_patches.items():
        # if all headers are deleted, use ff_utils.delete_field
        if headers == []:
            try:
                ff_utils.delete_field(item,
                                      'static_headers',
                                      key=connection.ff_keys)
            except Exception as e:
                patch_error = '\n'.join([item, str(e)])
                action_logs['patch_failure'].append(patch_error)
            else:
                action_logs['patch_success'].append(item)
        else:
            patch_data = {'static_headers': headers}
            try:
                ff_utils.patch_metadata(patch_data,
                                        obj_id=item,
                                        key=connection.ff_keys)
            except Exception as e:
                patch_error = '\n'.join([item, str(e)])
                action_logs['patch_failure'].append(patch_error)
            else:
                action_logs['patch_success'].append(item)
    action.status = 'DONE'
    action.output = action_logs
Beispiel #19
0
def main():  # pragma: no cover
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    taggable = scu.get_types_that_can_have_field(auth, 'tags')
    if args.types2exclude is not None:
        # remove explicitly provide types not to tag
        taggable = [t for t in taggable if t not in args.types2exclude]

    seen = [
    ]  # only need to add tag once so this keeps track of what's been seen
    to_patch = {}  # keep track of those to patch
    # main loop through the top level item ids
    for itemid in itemids:
        items2tag = {}
        if args.taglinked:
            # need to get linked items and tag them
            linked = scu.get_linked_items(auth, itemid, {})
            items2tag = scu.filter_dict_by_value(linked,
                                                 taggable,
                                                 include=True)
        else:
            # only want to tag provided items
            itype = scu.get_item_type(auth, itemid)
            if itype in taggable:
                items2tag = {itemid: itype}
        for i, t in items2tag.items():
            if i not in seen:
                seen.append(i)
                item = get_metadata(i, auth)
                if not scu.has_field_value(item, 'tags', args.tag):
                    # not already tagged with this tag so make a patch and add 2 dict
                    to_patch[i] = make_tag_patch(item, args.tag)

    # now do the patching or reporting
    for pid, patch in to_patch.items():
        if args.dbupdate:
            pres = patch_metadata(patch, pid, auth)
            print(pres['status'])
        else:
            print("DRY RUN: patch ", pid, " with ", patch)
def do_patch(uid, type, patch, auth, dbupdate, cnts):
    if not dbupdate:
        print('DRY RUN - will update %s of type %s with %s' %
              (uid, type, patch))
        cnts['not_patched'] += 1
        return
    # import pdb; pdb.set_trace()
    res = patch_metadata(patch, uid, auth)
    # res = {'status': 'testing'}
    print('UPDATING - %s of type %s with %s' % (uid, type, patch))
    rs = res['status']
    print(rs)
    if rs == 'success':
        cnts['patched'] += 1
    else:
        cnts['errors'] += 1
        print(res)
    return
Beispiel #21
0
def add_notes_to_tsv(file_meta, auth):
    """ adds a notes to tsv with the canned value below to the processed file
        returns success, error or skip if the value already exists
    """
    note_txt = "This file contains processed results performed outside of the 4DN-DCIC standardized pipelines. The file and the information about its provenance, i.e. which files were used as input to generate this output was provided by or done in collaboration with the lab that did the experiments to generate the raw data. For more information about the specific analysis performed, please contact the submitting lab or refer to the relevant publication if available."
    n2tsv = file_meta.get('notes_to_tsv', [])
    for note in n2tsv:
        if note_txt in note:
            return "SKIP"
    n2tsv.append(note_txt)
    patch = {'notes_to_tsv': n2tsv}
    try:
        pres = patch_metadata(patch, file_meta.get('uuid'), auth)
    except Exception as e:
        print(e)
        return "ERROR"
    if pres.get('status') == 'success':
        return "SUCCESS"
    return "ERROR"
Beispiel #22
0
def main():  # pragma: no cover
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    print('#', auth.get('server'))
    id_list = scu.get_item_ids_from_args(args.input, auth, args.search)

    for itemid in id_list:
        # get the existing data in other p
        item_data = get_metadata(itemid, auth, add_on='frame=raw')
        pfiles = item_data.get('processed_files')
        if not pfiles:
            continue
        patch_data = item_data.get('other_processed_files', [])
        if patch_data:
            # does the same title exist
            if args.title in [i['title'] for i in patch_data]:
                print(itemid, 'already has preliminary results')
                continue

        patch_data.append({
            'title': args.title,
            'type': 'preliminary',
            'files': pfiles
        })
        if patch_data:
            patch = {'other_processed_files': patch_data}
            if args.dbupdate:
                res = patch_metadata(patch,
                                     obj_id=itemid,
                                     key=auth,
                                     add_on='delete_fields=processed_files')
                print(res.get('status'))
            else:
                print("DRY RUN -- will patch")
                print(patch)
                print('and delete processed_files field value')
def main(ff_env='fourfront-cgapwolf',
         skip_software=False,
         skip_file_format=False,
         skip_workflow=False):
    """post / patch contents from portal_objects to the portal"""
    keycgap = ff_utils.get_authentication_with_server(ff_env=ff_env)

    # software
    if not skip_software:
        print("Processing software...")
        with open('portal_objects/software.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'Software', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # file formats
    if not skip_file_format:
        print("Processing file format...")
        with open('portal_objects/file_format.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'FileFormat', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # workflows
    if not skip_workflow:
        print("Processing workflow...")
        wf_dir = "portal_objects/workflows"
        files = os.listdir(wf_dir)

        for fn in files:
            if fn.endswith('.json'):
                print("  processing file %s" % fn)
                with open(os.path.join(wf_dir, fn), 'r') as f:
                    d = json.load(f)
                try:
                    ff_utils.post_metadata(d, 'Workflow', key=keycgap)
                except:
                    ff_utils.patch_metadata(d, d['uuid'], key=keycgap)
Beispiel #24
0
def md5_updater(status, wf_file, ff_meta, tibanna):
    # get key
    ff_key = tibanna.ff_keys
    # get metadata about original input file
    accession = wf_file.runner.inputfile_accessions['input_file']
    original_file = ff_utils.get_metadata(accession,
                                          key=ff_key,
                                          ff_env=tibanna.env,
                                          add_on='frame=object',
                                          check_queue=True)

    if status.lower() == 'uploaded':
        md5_array = wf_file.read().split('\n')
        if not md5_array:
            print("report has no content")
            return md5_updater("upload failed", wf_file, ff_meta, tibanna)
        if len(md5_array) == 1:
            md5 = None
            content_md5 = md5_array[0]
        elif len(md5_array) > 1:
            md5 = md5_array[0]
            content_md5 = md5_array[1]
        new_file = _md5_updater(original_file, md5, content_md5)
        if new_file and new_file != "Failed":
            try:
                ff_utils.patch_metadata(new_file, accession, key=ff_key)
            except Exception as e:
                # TODO specific excpetion
                # if patch fails try to patch worfklow status as failed
                new_file = {}
                new_file['status'] = 'upload failed'
                new_file['description'] = str(e)
                ff_utils.patch_metadata(new_file,
                                        original_file['uuid'],
                                        key=ff_key)
        elif new_file == "Failed":
            # we may not have to update the file, cause it already correct info
            # so we return Failed when we know upload failed
            md5_updater("upload failed", wf_file, ff_meta, tibanna)
    elif status == 'upload failed':
        new_file = {}
        new_file['status'] = 'upload failed'
        ff_utils.patch_metadata(new_file, original_file['uuid'], key=ff_key)

    # nothing to patch to ff_meta
    return None
Beispiel #25
0
def md5_updater(status, awsemfile, ff_meta, tibanna, **kwargs):
    # get key
    ff_key = tibanna.ff_keys
    # get metadata about original input file
    accession = awsemfile.runner.get_file_accessions('input_file')[0]
    format_if_extras = awsemfile.runner.get_format_if_extras('input_file')
    original_file = ff_utils.get_metadata(accession,
                                          key=ff_key,
                                          ff_env=tibanna.env,
                                          add_on='frame=object',
                                          check_queue=True)
    if status.lower() == 'uploaded':  # md5 report file is uploaded
        md5, content_md5 = parse_md5_report(awsemfile.read())
        # add file size to input file metadata
        input_file = awsemfile.runner.input_files()[0]
        file_size = boto3.client('s3').head_object(Bucket=input_file.bucket,
                                                   Key=input_file.key).get(
                                                       'ContentLength', '')
        for format_if_extra in format_if_extras:
            printlog("format_if_extra : %s" % format_if_extra)
            new_file = _md5_updater(original_file, md5, content_md5,
                                    format_if_extra, file_size)
            if new_file:
                break
        printlog("new_file = %s" % str(new_file))
        if new_file:
            try:
                resp = ff_utils.patch_metadata(new_file, accession, key=ff_key)
                printlog(resp)
            except Exception as e:
                # TODO specific excpetion
                # if patch fails try to patch worfklow status as failed
                raise e
    else:
        pass
    # nothing to patch to ff_meta
    return None
Beispiel #26
0
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    print("Working on {}".format(auth.get('server')))
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    field = args.field
    val = args.value
    if val == 'True':
        val = True
    elif val == 'False':
        val = False
    if args.isarray:
        val = [v for v in val.split("'") if v]
    ntype = args.numtype
    if ntype:
        if ntype == 'i':
            val = int(val)
        elif ntype == 'f':
            val = float(val)
    for iid in itemids:
        print("PATCHING", iid, "to", field, "=", val)
        if (args.dbupdate):
            # do the patch
            if val == '*delete*':
                res = delete_field(iid, field, auth)
            else:
                res = patch_metadata({args.field: val}, iid, auth)
            if res['status'] == 'success':
                print("SUCCESS!")
            else:
                print("FAILED TO PATCH", iid, "RESPONSE STATUS", res['status'],
                      res['description'])
Beispiel #27
0
def md5_updater(status, awsemfile, ff_meta, tibanna):
    # get key
    ff_key = tibanna.ff_keys
    # get metadata about original input file
    accession = awsemfile.runner.inputfile_accessions['input_file']
    format_if_extra = awsemfile.runner.inputfile_format_if_extra['input_file']
    original_file = ff_utils.get_metadata(accession,
                                          key=ff_key,
                                          ff_env=tibanna.env,
                                          add_on='frame=object',
                                          check_queue=True)
    if status.lower() == 'uploaded':  # md5 report file is uploaded
        md5_array = awsemfile.read().split('\n')
        if not md5_array:
            raise Exception("md5 report has no content")
        if len(md5_array) == 1:
            md5 = None
            content_md5 = md5_array[0]
        elif len(md5_array) > 1:
            md5 = md5_array[0]
            content_md5 = md5_array[1]
        new_file = _md5_updater(original_file, md5, content_md5,
                                format_if_extra)
        print("new_file = %s" % str(new_file))
        if new_file:
            try:
                resp = ff_utils.patch_metadata(new_file, accession, key=ff_key)
                print(resp)
            except Exception as e:
                # TODO specific excpetion
                # if patch fails try to patch worfklow status as failed
                raise e
    else:
        pass
    # nothing to patch to ff_meta
    return None
Beispiel #28
0
def upload_file_to_uuid(filename, uuid, auth):
    """
    Upload file to a target environment.

    :param filename: the name of a file to upload.
    :param uuid: the item into which the filename is to be uploaded.
    :param auth: auth info in the form of a dictionary containing 'key', 'secret', and 'server'.
    """

    # filename here should not include path
    patch_data = {'filename': os.path.basename(filename)}

    response = ff_utils.patch_metadata(patch_data, uuid, key=auth)

    try:
        [metadata] = response['@graph']
        upload_credentials = metadata['upload_credentials']
    except Exception:
        raise RuntimeError("Unable to obtain upload credentials for file %s." %
                           filename)

    execute_prearranged_upload(filename,
                               upload_credentials=upload_credentials,
                               auth=auth)
def clean_for_reupload(file_acc, key, clean_release_dates=False, delete_runs=True):
    """Rare cases we want to reupload the file, and this needs some cleanupself.
    If you want to delete release dates too, set 'clean_release_dates' to True"""
    resp = ff_utils.get_metadata(file_acc, key=key)
    clean_fields = ['extra_files', 'md5sum', 'content_md5sum', 'file_size', 'filename', 'quality_metric']
    if clean_release_dates:
        clean_fields.extend(['public_release', 'project_release'])
    if delete_runs:
        runs = resp.get('workflow_run_inputs', [])
        if runs:
            for a_run in runs:
                ff_utils.patch_metadata({'status': 'deleted'}, obj_id=a_run['uuid'], key=key)
    if resp.get('quality_metric'):
        ff_utils.patch_metadata({'status': 'deleted'}, obj_id=resp['quality_metric']['uuid'], key=key)
    del_f = []
    for field in clean_fields:
        if field in resp:
            del_f.append(field)
    del_add_on = 'delete_fields=' + ','.join(del_f)
    ff_utils.patch_metadata({'status': 'uploading'}, obj_id=resp['uuid'], key=key, add_on=del_add_on)
Beispiel #30
0
def main():
    """
    Load a given JSON file with ontology terms inserts to a server using
    the `load_data` endpoint defined in loadxl.
    """
    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('encoded').setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        description="Load Ontology Term Data", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('json_file', help="File containing terms to load")
    parser.add_argument('--env', default='local',
                        help='FF environment to update from. Defaults to local')
    parser.add_argument('--local-key', help='Access key ID if using local')
    parser.add_argument('--local-secret', help='Access key secret if using local')
    args = parser.parse_args()

    # authentication with Fourfront
    if args.env == 'local':
        # prompt access key ID and secret from user
        config_uri = 'development.ini'
        local_id = args.local_key if args.local_key else input('[local access key ID] ')
        local_secret = args.local_secret if args.local_secret else input('[local access key secret] ')
        auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'}
    else:
        config_uri = 'production.ini'
        auth = ff_utils.get_authentication_with_server(None, args.env)

    load_endpoint = '/'.join([auth['server'], 'load_data'])
    logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint)
    json_data = {'config_uri': config_uri, 'itype': 'ontology_term',
                 'overwrite': True, 'iter_response': True}
    with open(args.json_file) as infile:
        json_data['store'] = {'ontology_term': json.load(infile)}
    num_to_load = len(json_data['store']['ontology_term'])
    logger.info('Will attempt to load %s ontology terms to %s'
                % (num_to_load, auth['server']))
    start = datetime.now()
    try:
        # sustained by returning Response.app_iter from loadxl.load_data
        res =  ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST',
                                           timeout=None, json=json_data)
    except Exception as exc:
        logger.error('Error on POST: %s' % str(exc))
    else:
        # process the individual item responses from the generator.
        # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>"
        load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []}
        for val in res.text.split('\n'):
            if val.startswith('POST') or val.startswith('SKIP'):
                prefix_len = 4  # 'POST' or 'SKIP'
            else:
                prefix_len = 5  # 'PATCH' or 'ERROR'
            # this is a bit weird, but we want to split out the POST/PATCH...
            # and also remove ': ' from the value for each message
            cat, msg = val[:prefix_len], val[prefix_len + 2:]
            if not msg:
                continue
            if cat in load_res:
                load_res[cat].append(msg)
        logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s"
                    % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP'])))
        if load_res['ERROR']:
            logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR'])
        if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']):
            logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s"
                         % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH'])))
    logger.info("Finished request in %s" % str(datetime.now() - start))

    # update sysinfo. Don't worry about doing this on local
    if args.env != 'local':
        data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()}
        try:
            found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth)
        except Exception:
            found_info = None

        if found_info:
            ff_utils.patch_metadata(data, found_info['uuid'], key=auth)
        else:
            ff_utils.post_metadata(data, 'sysinfos', key=auth)
        logger.info("Updated sysinfo with name %s" % data['name'])
    logger.info("DONE!")
Beispiel #31
0
 def patch(self, key, type_name=None):
     return patch_metadata(self.as_dict(), key=key)
Beispiel #32
0
def add_processed_files(item_id, list_pc, auth):
    # patch the exp or set
    patch_data = {'processed_files': list_pc}
    ff_utils.patch_metadata(patch_data, obj_id=item_id, key=auth)
    return