Esempio n. 1
0
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    dryrun = not args.dbupdate

    file_list = scu.get_item_ids_from_args(args.input, auth, args.search)
    wf_data = get_metadata(args.workflow, auth)
    for f in file_list:
        file_info = get_metadata(f, auth)
        parents = file_info.get('produced_from')
        if parents:
            inputs = []
            for p in parents:
                inputs.append(get_metadata(p, auth))
            wfr_json = create_wfr_meta_only_json(auth, wf_data, inputs,
                                                 [file_info])
            if dryrun:
                print('DRY RUN -- will post')
                print(wfr_json)
            else:
                res = post_metadata(wfr_json, 'workflow_run_awsem', auth)
                # and add a notes_to_tsv to the file
                patchstatus = add_notes_to_tsv(file_info, auth)
                print(res)
                print(patchstatus)
def main():  # pragma: no cover
    # initial set up
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    # bucket addresses
    ff_health = get_metadata('/health', auth)
    source_bucket = ff_health['file_upload_bucket']
    target_bucket = ff_health['processed_file_bucket']
    s3 = boto3.resource('s3')

    # get the uuids for the files
    query = 'type=FileVistrack'
    uids = scu.get_item_ids_from_args([query], auth, True)
    files2copy = [get_metadata(uid, auth).get('upload_key') for uid in uids]

    for file_key in files2copy:
        copy_source = {'Bucket': source_bucket, 'Key': file_key}
        try:
            # print(file_key + ' cp from ' + source_bucket + ' to ' + target_bucket)
            s3.meta.client.copy(copy_source, target_bucket, file_key)
        except Exception:
            print('Can not find file on source', file_key)
            continue
        print('{} file copied'.format(file_key))
Esempio n. 3
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as items:
        for i in items:
            [iid, payload] = [t.strip() for t in i.split('\t')]
            payload = json.loads(payload)
            if args.dbupdate:
                e = patch_metadata(payload, iid, auth)
            else:
                print("DRY RUN\n\tPATCH: ", iid, " TO\n", payload)
                e = {'status': 'success'}

            status = e.get('status')
            if status and status == 'success':
                print(status)
            else:
                print('FAILED', e)

    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))
def main():
    args = get_args()
    try:
        auth = ff.get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    print("Working on {}".format(auth.get('server')))
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    seen = []
    failed = []
    for itemid in itemids:
        print("Touching ", itemid)
        if args.dbupdate:
            try:
                res = ff.patch_metadata({}, itemid, auth)
                print(res.get('status'))
                if res.get('status') == 'success':
                    seen.append(itemid)
            except Exception:
                print(itemid, ' failed to patch')
                failed.append(itemid)
                continue
        else:
            print('dry run!')
    for i in seen:
        print(i)
    print("Failures")
    for f in failed:
        print(f)
Esempio n. 5
0
def testrun_md5(workflow_name='tibanna_pony', env='webdev'):
    """Creates a random file object with no md5sum/content_md5sum and run md5 workflow.
    It waits for 6 mintues till the workflow run finishes and checks the input file object
    has been updated.
    """
    bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput"
    ff_key = get_authentication_with_server(ff_env='fourfront-' + env)
    newfile = post_random_file(bucket, ff_key)
    uuid = newfile['uuid']
    accession = newfile['accession']
    input_json = {
        "config": {
            "ebs_type": "io1",
            "ebs_iops": 500,
            "s3_access_arn":
            "arn:aws:iam::643366669028:instance-profile/S3_access",
            "ami_id": "ami-cfb14bb5",
            "json_bucket": "4dn-aws-pipeline-run-json",
            "shutdown_min": 30,
            "copy_to_s3": True,
            "launch_instance": True,
            "log_bucket": "tibanna-output",
            "script_url":
            "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/",
            "key_name": "4dn-encode",
            "password": ""
        },
        "_tibanna": {
            "env": "fourfront-webdev",
            "run_type": "md5"
        },
        "parameters": {},
        "app_name":
        "md5",
        "workflow_uuid":
        "c77a117b-9a58-477e-aaa5-291a109a99f6",
        "input_files": [{
            "workflow_argument_name": "input_file",
            "bucket_name": bucket,
            "uuid": uuid,
            "object_key": accession + '.pairs.gz'
        }],
        "output_bucket":
        bucket
    }
    resp = run_workflow(input_json, workflow=workflow_name)
    print(resp)

    # check result
    time.sleep(6 * 60)  # wait for 6 minutes
    filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database')
    content_md5sum = filemeta.get('content_md5sum')
    md5sum = filemeta.get('md5sum')
    if content_md5sum and md5sum:
        print(content_md5sum)
        print(md5sum)
        patch_metadata({'status': 'deleted'}, uuid, key=ff_key)
    else:
        raise Exception('md5 step function run failed')
Esempio n. 6
0
def extract_file_info(obj_id, arg_name, env, rename=[]):
    auth = ff_utils.get_authentication_with_server({}, ff_env=env)
    my_s3_util = s3Utils(env=env)

    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket
    """Creates the formatted dictionary for files.
    """
    # start a dictionary
    template = {"workflow_argument_name": arg_name}
    if rename:
        change_from = rename[0]
        change_to = rename[1]
    # if it is list of items, change the structure
    if isinstance(obj_id, list):
        object_key = []
        uuid = []
        buckets = []
        for obj in obj_id:
            metadata = ff_utils.get_metadata(obj, key=auth)
            object_key.append(metadata['display_title'])
            uuid.append(metadata['uuid'])
            # get the bucket
            if 'FileProcessed' in metadata['@type']:
                my_bucket = out_bucket
            else:  # covers cases of FileFastq, FileReference, FileMicroscopy
                my_bucket = raw_bucket
            buckets.append(my_bucket)
        # check bucket consistency
        try:
            assert len(list(set(buckets))) == 1
        except AssertionError:
            print('Files from different buckets', obj_id)
            return
        template['object_key'] = object_key
        template['uuid'] = uuid
        template['bucket_name'] = buckets[0]
        if rename:
            template['rename'] = [
                i.replace(change_from, change_to)
                for i in template['object_key']
            ]

    # if obj_id is a string
    else:
        metadata = ff_utils.get_metadata(obj_id, key=auth)
        template['object_key'] = metadata['display_title']
        template['uuid'] = metadata['uuid']
        # get the bucket
        if 'FileProcessed' in metadata['@type']:
            my_bucket = out_bucket
        else:  # covers cases of FileFastq, FileReference, FileMicroscopy
            my_bucket = raw_bucket
        template['bucket_name'] = my_bucket
        if rename:
            template['rename'] = template['object_key'].replace(
                change_from, change_to)
    return template
def main():
    args = get_args()
    try:
        auth = ff.get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args([args.query], auth, True)
    for itemid in itemids:
        print(itemid)
def main(ff_env='fourfront-cgapwolf',
         skip_software=False,
         skip_file_format=False,
         skip_workflow=False):
    """post / patch contents from portal_objects to the portal"""
    keycgap = ff_utils.get_authentication_with_server(ff_env=ff_env)

    # software
    if not skip_software:
        print("Processing software...")
        with open('portal_objects/software.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'Software', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # file formats
    if not skip_file_format:
        print("Processing file format...")
        with open('portal_objects/file_format.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'FileFormat', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # workflows
    if not skip_workflow:
        print("Processing workflow...")
        wf_dir = "portal_objects/workflows"
        files = os.listdir(wf_dir)

        for fn in files:
            if fn.endswith('.json'):
                print("  processing file %s" % fn)
                with open(os.path.join(wf_dir, fn), 'r') as f:
                    d = json.load(f)
                try:
                    ff_utils.post_metadata(d, 'Workflow', key=keycgap)
                except:
                    ff_utils.patch_metadata(d, d['uuid'], key=keycgap)
Esempio n. 9
0
def main():  # pragma: no cover
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    taggable = scu.get_types_that_can_have_field(auth, 'tags')
    if args.types2exclude is not None:
        # remove explicitly provide types not to tag
        taggable = [t for t in taggable if t not in args.types2exclude]

    seen = [
    ]  # only need to add tag once so this keeps track of what's been seen
    to_patch = {}  # keep track of those to patch
    # main loop through the top level item ids
    for itemid in itemids:
        items2tag = {}
        if args.taglinked:
            # need to get linked items and tag them
            linked = scu.get_linked_items(auth, itemid, {})
            items2tag = scu.filter_dict_by_value(linked,
                                                 taggable,
                                                 include=True)
        else:
            # only want to tag provided items
            itype = scu.get_item_type(auth, itemid)
            if itype in taggable:
                items2tag = {itemid: itype}
        for i, t in items2tag.items():
            if i not in seen:
                seen.append(i)
                item = get_metadata(i, auth)
                if not scu.has_field_value(item, 'tags', args.tag):
                    # not already tagged with this tag so make a patch and add 2 dict
                    to_patch[i] = make_tag_patch(item, args.tag)

    # now do the patching or reporting
    for pid, patch in to_patch.items():
        if args.dbupdate:
            pres = patch_metadata(patch, pid, auth)
            print(pres['status'])
        else:
            print("DRY RUN: patch ", pid, " with ", patch)
def connect2server(env=None, key=None, keyfile=None, logger=None):
    """Sets up credentials for accessing the server.  Generates a key using info
       from the named keyname in the keyfile and checks that the server can be
       reached with that key.
       Also handles keyfiles stored in s3 using the env param"""
    if key and keyfile:
        keys = None
        if os.path.isfile(keyfile):
            with io.open(keyfile, 'r') as kf:
                keys_json_string = kf.read()
                keys = json.loads(keys_json_string)
        if keys:
            key = keys.get(key)
    try:
        auth = get_authentication_with_server(key, env)
    except Exception:
        logger.error("Authentication failed")
        sys.exit(1)
    return auth
Esempio n. 11
0
def main():  # pragma: no cover
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    dryrun = not args.dbupdate

    biorxiv = get_metadata(args.old, auth)
    jarticle = get_metadata(args.new, auth)

    if biorxiv.get('status') == 'error':
        print('Biorxiv record %s cannot be found' % args.old)
        sys.exit(1)
    if jarticle.get('status') == 'error':
        print('Journal Article record %s cannot be found' % args.new)
        sys.exit(1)
    # make sure we can get the uuid to patch
    juuid = jarticle.get('uuid')
    # build the patch dictionary
    fields2transfer = [
        'categories', 'exp_sets_prod_in_pub', 'exp_sets_used_in_pub',
        'published_by'
    ]
    patch_dict, skipped = create_patch_for_new_from_old(
        biorxiv, jarticle, fields2transfer, args.vals2skip)
    patch_dict, skipped = move_old_url_to_new_aka(biorxiv, jarticle,
                                                  patch_dict, skipped)

    # do the patch
    ok = patch_and_report(auth, patch_dict, skipped, juuid, dryrun)

    if not ok:
        sys.exit(1)  # bail out if initial transfer doesn't work

    # find items with reference to old paper
    buuid = biorxiv.get('uuid')
    complete = find_and_patch_item_references(auth, buuid, juuid, dryrun)
    if not complete:
        print("ALL REFERENCES POINTING TO %s NOT UPDATED - CHECK AND FIX!" %
              buuid)
Esempio n. 12
0
def set_load_params(auth, env):
    # authentication with Fourfront
    # auth is dict: key, secret, server - set config appropriately
    if not (auth or env):
        return
    if auth:
        if auth.get('server') == 'http://localhost:8000':
            config_uri = 'development.ini'
        else:
            config_uri = 'production.ini'
    elif env == 'local':
        # prompt access key ID and secret from user
        local_id = input('enter local access key ID: ')
        local_secret = input('enter local access key secret: ')
        auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'}
        config_uri = 'development.ini'
    else:
        auth = ff_utils.get_authentication_with_server(None, env)
        config_uri = 'production.ini'
    return auth, config_uri
Esempio n. 13
0
def main():  # pragma: no cover
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    print('#', auth.get('server'))
    id_list = scu.get_item_ids_from_args(args.input, auth, args.search)

    for itemid in id_list:
        # get the existing data in other p
        item_data = get_metadata(itemid, auth, add_on='frame=raw')
        pfiles = item_data.get('processed_files')
        if not pfiles:
            continue
        patch_data = item_data.get('other_processed_files', [])
        if patch_data:
            # does the same title exist
            if args.title in [i['title'] for i in patch_data]:
                print(itemid, 'already has preliminary results')
                continue

        patch_data.append({
            'title': args.title,
            'type': 'preliminary',
            'files': pfiles
        })
        if patch_data:
            patch = {'other_processed_files': patch_data}
            if args.dbupdate:
                res = patch_metadata(patch,
                                     obj_id=itemid,
                                     key=auth,
                                     add_on='delete_fields=processed_files')
                print(res.get('status'))
            else:
                print("DRY RUN -- will patch")
                print(patch)
                print('and delete processed_files field value')
Esempio n. 14
0
def connect2server(env=None, key=None):
    '''Sets up credentials for accessing the server.  Generates a key using info
       from the named keyname in the keyfile and checks that the server can be
       reached with that key.
       Also handles keyfiles stored in s3'''
    if key == 's3':
        assert env
        key = unified_authentication(None, env)

    if all([v in key for v in ['key', 'secret', 'server']]):
        import ast
        key = ast.literal_eval(key)

    try:
        auth = get_authentication_with_server(key, env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    print("Running on: {server}".format(server=auth.get('server')))
    return auth
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    excluded_types = get_excluded(args.types2exclude, args.types2include)
    no_child = ['Publication', 'Lab', 'User', 'Award']  # default no_childs
    if args.no_children:
        no_child.extend(args.no_children)
        no_child = list(set(no_child))

    all_linked_ids = []
    # main loop through the top level item ids
    for itemid in itemids:
        linked = scu.get_linked_items(auth, itemid, {})
        if excluded_types is not None:
            linked = scu.filter_dict_by_value(linked,
                                              excluded_types,
                                              include=False)
        ll = [(k, linked[k]) for k in sorted(linked, key=linked.get)]
        for i, t in ll:
            suff = ''
            if i == itemid:
                suff = '\tINPUT'
            if is_released(i, auth):
                suff = '\tRELEASED' + suff
                if not args.include_released:
                    print(i, '\t', t, '\tSKIPPING', suff)
                    continue
            if i not in all_linked_ids:
                all_linked_ids.append(i)
            else:
                suff = suff + '\tSEEN'
            print(i, '\t', t, suff)
    for a in all_linked_ids:
        print(a)
Esempio n. 16
0
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    print("Working on {}".format(auth.get('server')))
    itemids = scu.get_item_ids_from_args(args.input, auth, args.search)
    field = args.field
    val = args.value
    if val == 'True':
        val = True
    elif val == 'False':
        val = False
    if args.isarray:
        val = [v for v in val.split("'") if v]
    ntype = args.numtype
    if ntype:
        if ntype == 'i':
            val = int(val)
        elif ntype == 'f':
            val = float(val)
    for iid in itemids:
        print("PATCHING", iid, "to", field, "=", val)
        if (args.dbupdate):
            # do the patch
            if val == '*delete*':
                res = delete_field(iid, field, auth)
            else:
                res = patch_metadata({args.field: val}, iid, auth)
            if res['status'] == 'success':
                print("SUCCESS!")
            else:
                print("FAILED TO PATCH", iid, "RESPONSE STATUS", res['status'],
                      res['description'])
Esempio n. 17
0
def find_pairs(my_rep_set, my_env, lookfor='pairs', exclude_miseq=True):
    auth = ff_utils.get_authentication_with_server({}, ff_env=my_env)
    my_s3_util = s3Utils(env=my_env)
    """Find fastq files from experiment set, exclude miseq.
    """
    report = {}
    rep_resp = my_rep_set['experiments_in_set']
    lab = [my_rep_set['lab']['@id']]
    enzymes = []
    organisms = []
    total_f_size = 0
    for exp in rep_resp:

        exp_resp = exp

        report[exp['accession']] = []
        if not organisms:
            biosample = exp['biosample']
            organisms = list(
                set([
                    bs['individual']['organism']['name']
                    for bs in biosample['biosource']
                ]))
            if len(organisms) != 1:
                print('multiple organisms in set', my_rep_set['accession'])
                break
        exp_files = exp['files']
        enzyme = exp.get('digestion_enzyme')
        if enzyme:
            enzymes.append(enzyme['display_title'])

        for fastq_file in exp_files:
            file_resp = ff_utils.get_metadata(fastq_file['uuid'], key=auth)
            if not file_resp.get('file_size'):
                print("WARNING!", file_resp['accession'],
                      'does not have filesize')
            else:
                total_f_size += file_resp['file_size']
            # skip pair no 2
            if file_resp.get('paired_end') == '2':
                continue
            # exclude miseq
            if exclude_miseq:
                if file_resp.get('instrument') == 'Illumina MiSeq':
                    # print 'skipping miseq files', exp
                    continue
            # Some checks before running
            # check if status is deleted
            if file_resp['status'] == 'deleted':
                print('deleted file', file_resp['accession'], 'in',
                      my_rep_set['accession'])
                continue
            # if no uploaded file in the file item report and skip
            if not file_resp.get('filename'):
                print(file_resp['accession'], "does not have a file")
                continue
            # check if file is in s3

            head_info = my_s3_util.does_key_exist(file_resp['upload_key'],
                                                  my_s3_util.raw_file_bucket)

            if not head_info:
                print(file_resp['accession'], "does not have a file in S3")
                continue
            # check that file has a pair
            f1 = file_resp['@id']
            f2 = ""
            paired = ""
            # is there a pair?
            try:
                relations = file_resp['related_files']
                paired_files = [
                    relation['file']['@id'] for relation in relations
                    if relation['relationship_type'] == 'paired with'
                ]
                assert len(paired_files) == 1
                f2 = paired_files[0]
                paired = "Yes"
            except:
                paired = "No"

            # for experiments with unpaired fastq files
            if lookfor == 'single':
                if paired == 'No':
                    report[exp_resp['accession']].append(f1)
                else:
                    print('expected single files, found paired end')
                    return
            # for experiments with paired files
            else:
                if paired != 'Yes':
                    print('expected paired files, found single end')
                    return
                f2 = ''
                relations = file_resp.get('related_files')

                if not relations:
                    print(f1, 'does not have a pair')
                    return
                for relation in relations:
                    if relation['relationship_type'] == 'paired with':
                        f2 = relation['file']['@id']
                if not f2:
                    print(f1, 'does not have a pair')
                    return

                report[exp_resp['accession']].append((f1, f2))
    # get the organism
    if len(list(set(organisms))) == 1:
        organism = organisms[0]
    else:
        organism = None

    # get the enzyme
    if len(list(set(enzymes))) == 1:
        enz = enzymes[0]
    else:
        enz = None

    bwa = bwa_index.get(organism)
    chrsize = chr_size.get(organism)
    if re_nz.get(organism):
        enz_file = re_nz[organism].get(enz)
    else:
        print('no enzyme information for the organism {}'.format(organism))
        enz_file = None

    return report, organism, enz, bwa, chrsize, enz_file, int(
        total_f_size / (1024 * 1024 * 1024)), lab
Esempio n. 18
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    print('working on ', auth.get('server'))
    if args.as_file:
        if not args.dbupdate:
            print("DRY RUN - use --dbupdate to update the database")
        else:
            try:
                load_file(auth, args.itype, args.infile)
            except Exception as e:
                print(e)
    else:
        with open(args.infile) as ifile:
            item_store = json.loads(ifile.read())
            if not args.itype:
                if not isinstance(item_store, dict):
                    print("File is not in correct format")
                    sys.exit(1)
            else:
                if not isinstance(item_store, list):
                    print("File is not in correct format")
                    sys.exit(1)
                item_store = {args.itype: item_store}
            for itype, items in sorted(item_store.items(),
                                       key=lambda x: ORDER.index(x[0])):
                if not args.dbupdate:
                    print('DRY RUN - would try to load {} {} items'.format(
                        len(items), itype))
                    continue
                if args.id_field:
                    identifiers = [args.id_field]
                else:
                    schema_path = 'profiles/' + itype + '.json'
                    schema_info = get_metadata(schema_path, auth)
                    identifiers = schema_info.get('identifyingProperties')
                # checking to see if an item exists
                # if no can use load_data endpoint
                # if yes do it the old fashioned way
                to_patch = []
                to_post = []
                for item in items:
                    uid = item.get('uuid')
                    if uid:
                        exists = get_item(uid, auth)
                        if exists:  # try a patch
                            to_patch.append(item)
                        else:
                            to_post.append(item)
                    else:
                        uid = check_for_existing(item, itype, identifiers,
                                                 auth)
                        if uid:  # try a patch
                            item['uuid'] = uid
                            to_patch.append(item)
                        else:
                            uid = str(uuid4())
                            item['uuid'] = uid
                            to_post.append(item)
                if to_post:
                    load_json(auth, itype, to_post, chunk_size=1000)
                if to_patch:
                    patch_jsons(auth, to_patch)
    stop = datetime.now()
    print(str(stop))
Esempio n. 19
0
def main():

    # getting authentication keys
    args = get_args()
    try:
        auth = ff_utils.get_authentication_with_server(args.key)
    except Exception as e:
        print("Authentication failed", e)
        sys.exit(1)

    dryrun = args.dryrun
    if dryrun:
        print("\nThis is a dry run\n")

    # collecting publication and expset search results
    hic_types = [
        'in+situ+Hi-C', 'Dilution+Hi-C', 'DNase+Hi-C', 'Micro-C', 'TCC'
    ]
    query_pub = '/search/?type=Publication'
    query_exp = '/search/?type=ExperimentSetReplicate&status=released'
    for type in hic_types:
        query_pub += '&exp_sets_prod_in_pub.experiments_in_set.experiment_type.display_title=' + type
        query_exp += '&experiments_in_set.experiment_type.display_title=' + type
    pubs_search = ff_utils.search_metadata(query_pub, key=auth)
    expsets_search = ff_utils.search_metadata(query_exp, key=auth)

    # building publications dictionary
    pubs_dict = convert_pubs_list_to_lookup(pubs_search)

    # loading dataset groups from json file
    repo_path = Path(__file__).resolve().parents[1]
    dsg_filename = repo_path.joinpath('files', 'dsg.json')
    if dsg_filename.exists():
        with open(dsg_filename) as dsg_fn:
            dsgs = json.load(dsg_fn)
    else:
        sys.exit("ERROR: Dataset grouping file not found")

    # making dataset list and mapping to dataset group
    dataset_list = []
    datasets_of_dsg = {}
    for k, v in dsgs.items():
        if v.get("datasets"):
            dataset_list.extend(v["datasets"])
            datasets_of_dsg[k] = v["datasets"]
        else:
            # if a dsg does not have datasets, then the dsg itself is the dataset
            dataset_list.append(k)

    # building the output table
    table = {}
    new_datasets = set()
    study_groups = set()

    for expset in expsets_search:
        dataset = expset.get("dataset_label")
        if dataset not in dataset_list:
            new_datasets.add(dataset)
            continue

        dsg = dataset
        dsg_link = "dataset_label=" + dataset
        for group, elements in datasets_of_dsg.items():
            if dataset in elements:
                dsg_link = ("dataset_label=" +
                            "&dataset_label=".join(elements))
                dsg = group
                break
        dsg_link = "/browse/?" + dsg_link.replace("+", "%2B").replace(
            "/", "%2F").replace(" ", "+")

        study_groups.add(dsgs[dsg].get("study_group"))

        row = table.get(dsg, {})
        table[dsg] = assemble_data_for_the_row(row, expset, dsg, dsg_link,
                                               pubs_dict, dsgs[dsg])

    # summarize number of experiment sets of each experiment type in a string
    for dsg, row in table.items():
        exp_type_summary = ""
        for exp_type, count in row["Replicate Sets"].items():
            if count > 0:
                exp_type_summary += str(count) + " " + exp_type + "<br>"
        if len(exp_type_summary) > 0:
            row['Replicate Sets'] = exp_type_summary[:
                                                     -4]  #remove <br> at the end
        else:
            row['Replicate Sets'] = ""

    # if new datasets are not in the json, ask what to do
    if new_datasets:
        print("New datasets found (not present in the json file):")
        for ds in new_datasets:
            print(ds)
        print("(i)gnore datasets or (e)xit to manually add them? [i/e]")
        response = None
        while response not in ['i', 'e']:
            response = input()
        if response == 'e':
            sys.exit("Add new dataset to dsg.json before generating table")

    # patch the static section for each study group
    skipped = []
    posted = []
    patched = []
    for studygroup in list(study_groups):

        # prepare static section
        table_dsg = {}
        for dsg in dsgs:
            if table.get(dsg):
                if table[dsg].get("Class") != studygroup:
                    continue
                else:
                    table_dsg[dsg] = table.get(dsg)

        keys = [
            'Data Set', 'Project', 'Replicate Sets', 'Species', 'Biosources',
            'Publication', 'Study', 'Lab'
        ]
        if studygroup == "Single Time Point and Condition":
            keys.remove('Study')

        name = alias = output = filetype = None
        if args.format == 'markdown':
            name = "data-highlights.hic." + studygroup + ".md"
            name = name.lower().replace(" ", "-")
            alias = "4dn-dcic-lab:" + name
            filetype = 'jsx'
            default_col_widths = "[-1,100,-1,100,-1,-1,-1,-1]"
            if "Study" not in keys:
                default_col_widths = "[-1,100,-1,120,250,-1,170]"
            output = md_table_maker(table_dsg, keys, name, default_col_widths)
        else:
            name = "data-highlights.hic." + studygroup
            name = name.lower().replace(" ", "-")
            alias = "4dn-dcic-lab:" + name
            filetype = 'html'
            styles = {
                'Data Set': ";width:20%;min-width:120px",
                'Replicate Sets': ";width:150px",
                'Publication': ";width:200px"
            }
            output = html_table_maker(table_dsg, keys, styles)

        # check if static section exists
        post = False
        try:
            ff_utils.get_metadata(alias, auth)
        except Exception:
            print(
                "'{}' static section cannot be patched because it does not exist"
                .format(studygroup))
            print("Do you want to (p)ost or (s)kip this static section? [p/s]")
            response = None
            while response not in ['p', 's']:
                response = input()
            if response == 's':
                skipped.append(alias)
                continue
            else:
                post = True

        # post or patch static section
        if post:
            post_body = {
                "name": name,
                "aliases": [alias],
                "body": output,
                "section_type": "Page Section",
                "title": studygroup,
                "options": {
                    "collapsible": True,
                    "default_open": True,
                    "filetype": filetype
                }
            }
            if not dryrun:
                res = ff_utils.post_metadata(post_body,
                                             "StaticSection",
                                             key=auth)
            posted.append(alias)
        else:
            patch_body = {"body": output}
            if not dryrun:
                res = ff_utils.patch_metadata(patch_body, alias, key=auth)
            patched.append(alias)
        if not dryrun:
            print("{}: {}".format(alias, res['status']))

    # summarize results
    print("Static sections summary: {} patched, {} posted, {} skipped".format(
        len(patched), len(posted), len(skipped)))
    if posted:
        print(
            "Remember to add the new static section(s) to the hic-data-overview page:"
        )
        for item in posted:
            print(item)
    if skipped:
        print("Skipped sections:")
        for item in skipped:
            print(item)
Esempio n. 20
0
def main():  # pragma: no cover
    args = get_args()
    dbupdate = args.dbupdate
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    cnts = Counter()
    reltag = args.reltag
    # build the search query string
    query = 'type=DataReleaseUpdate&update_tag=' + reltag
    relupdates = scu.get_item_ids_from_args([query], auth, True)
    update_items = []
    for u in relupdates:
        res = get_metadata(u, auth)
        for ui in res.get('update_items'):
            if ui.get('primary_id'):
                update_items.append(ui['primary_id'])
    seen = []
    # update_items = ['experiment-set-replicates/4DNESOI2ALTL']
    for item in update_items:
        res = get_metadata(item, auth)
        uid = res.get('uuid')
        type = get_attype(res)
        cnts[type] += 1
        if (not uid) or (uid in seen) or ('ExperimentSet' not in type):
            # case for first freeze (no processed files included)
            print("SKIPPING ", uid)
            cnts['skipped'] += 1
            continue
        add_tag2item(auth, uid, reltag, seen, cnts, type, dbupdate)

        if 'ExperimentSet' in type:
            # get the experiments and files
            exps = res.get('experiments_in_set')
            if exps is not None:
                cnts['Experiment'] += len(exps)
                for exp in exps:
                    # import pdb; pdb.set_trace()
                    add_tag2item(auth, exp, reltag, seen, cnts, 'Experiment',
                                 dbupdate)
                    files = exp.get('files')
                    if files is not None:
                        cnts['FileFastq'] += len(files)
                        for file in files:
                            file = add_tag2item(auth, file, reltag, seen, cnts,
                                                'FileFastq', dbupdate)
                    epfiles = exp.get('processed_files')
                    # epfiles = None  # case for first freeze (no processed files included)
                    if epfiles is not None:
                        cnts['FileProcessed'] += len(epfiles)
                        for epf in epfiles:
                            add_tag2item(auth, epf, reltag, seen, cnts,
                                         'FileProcessed', dbupdate)

            # check the processed files directly associated to the eset
            # pfiles = res.get('procesed_files')
            pfiles = None  # case for first freeze (no processed files included)
            if pfiles is not None:
                cnts['FileProcessed'] += len(pfiles)
                for pf in pfiles:
                    add_tag2item(auth, pf, reltag, seen, cnts, 'FileProcessed',
                                 dbupdate)
    print(cnts)
Esempio n. 21
0
def testrun_md5_input_json_w_extra_file_object_name(env='webdev'):
    """Creates a random file object with no md5sum/content_md5sum and run md5 workflow.
    It waits for 6 mintues till the workflow run finishes and checks the input file object
    has been updated.
    """
    bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput"
    ff_key = get_authentication_with_server(ff_env='fourfront-' + env)
    newfile = post_random_file(bucket, ff_key)
    uuid = newfile['uuid']
    accession = newfile['accession']
    wf_uuid = "c77a117b-9a58-477e-aaa5-291a109a99f6"
    input_json = {
        "config": {
            "ebs_type": "io1",
            "ebs_iops": 500,
            "json_bucket": "4dn-aws-pipeline-run-json",
            "shutdown_min": 30,
            "log_bucket": "tibanna-output",
            "key_name": "4dn-encode",
            "password": ""
        },
        "_tibanna": {
            "env": "fourfront-webdev",
            "run_type": "md5_test_extra"
        },
        "parameters": {},
        "app_name":
        "md5",
        "workflow_uuid":
        wf_uuid,
        "input_files": [{
            "workflow_argument_name": "input_file",
            "bucket_name": bucket,
            "uuid": uuid,
            "object_key": accession + '.pairs.gz.px2',
            "format_if_extra": "pairs_px2"
        }],
        "output_bucket":
        bucket,
        "wfr_meta": {
            "notes": "extra file md5 trigger test from test_webdev.py"
        }
    }
    resp = run_workflow(input_json)
    print(resp)

    # check result
    time.sleep(6 * 60)  # wait for 6 minutes
    filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database')
    content_md5sum = filemeta.get('extra_files')[0].get('content_md5sum')
    md5sum = filemeta.get('extra_files')[0].get('md5sum')
    file_size = filemeta.get('extra_files')[0].get('file_size')
    wfr_uuid = get_wfr_uuid(resp['_tibanna']['exec_arn'])
    wfr_meta = get_metadata(wfr_uuid, key=ff_key, add_on='?datastore=database')
    assert 'input_files' in wfr_meta
    assert 'format_if_extra' in wfr_meta['input_files'][0]
    assert md5sum
    assert content_md5sum
    assert file_size
    print(content_md5sum)
    print(md5sum)
    print(file_size)
    patch_metadata({'status': 'deleted'}, uuid, key=ff_key)
    patch_metadata({'status': 'deleted'}, wfr_uuid, key=ff_key)
Esempio n. 22
0
def main():  # pragma: no cover
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    print('#', auth.get('server'))
    id_list = scu.get_item_ids_from_args(args.input, auth, args.search)
    if args.fields:
        fields = args.fields

        header = '#id\t' + '\t'.join(fields)
        if args.noid is True:
            header = header.replace('#id\t', '#')
        print(header)
    problems = []
    for iid in id_list:
        try:
            res = get_metadata(iid, auth, add_on='frame=object')
        except Exception:
            problems.append(iid)
            continue

        if args.fields:
            line = ''
            # counts = {}
            for f in fields:
                val = res.get(f)
                # if val is not None:  # added in for specific use case
                if isinstance(val, dict):
                    val = val.get('uuid')
                elif isinstance(val, list):
                    # counts[f] = len(val)  # added in for specific use case
                    # if len(counts) > 1:
                    #     print(iid, '\t', counts)
                    # else:
                    #     cnt = list(counts.values())[0]
                    #     if cnt > 1:
                    #         print(iid, '\t', cnt)
                    vs = ''
                    for v in val:
                        if isinstance(v, dict):
                            v = v.get('uuid')
                        else:
                            v = str(v)
                        vs = vs + v + ', '
                    val = vs
                    if val.endswith(', '):
                        val = val[:-2]
                line = line + str(val) + '\t'
            if args.noid == 'False':
                line = iid + '\t' + line
            print(line)
        else:
            if args.noid is True:
                print(res)
            else:
                print(iid, '\t', res)
    if problems:
        print('THERE WAS A PROBLEM GETTING METADATA FOR THE FOLLOWING:')
        for p in problems:
            print(p)
Esempio n. 23
0
def main():
    """
    Load a given JSON file with ontology terms inserts to a server using
    the `load_data` endpoint defined in loadxl.
    """
    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('encoded').setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        description="Load Ontology Term Data", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('json_file', help="File containing terms to load")
    parser.add_argument('--env', default='local',
                        help='FF environment to update from. Defaults to local')
    parser.add_argument('--local-key', help='Access key ID if using local')
    parser.add_argument('--local-secret', help='Access key secret if using local')
    args = parser.parse_args()

    # authentication with Fourfront
    if args.env == 'local':
        # prompt access key ID and secret from user
        config_uri = 'development.ini'
        local_id = args.local_key if args.local_key else input('[local access key ID] ')
        local_secret = args.local_secret if args.local_secret else input('[local access key secret] ')
        auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'}
    else:
        config_uri = 'production.ini'
        auth = ff_utils.get_authentication_with_server(None, args.env)

    load_endpoint = '/'.join([auth['server'], 'load_data'])
    logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint)
    json_data = {'config_uri': config_uri, 'itype': 'ontology_term',
                 'overwrite': True, 'iter_response': True}
    with open(args.json_file) as infile:
        json_data['store'] = {'ontology_term': json.load(infile)}
    num_to_load = len(json_data['store']['ontology_term'])
    logger.info('Will attempt to load %s ontology terms to %s'
                % (num_to_load, auth['server']))
    start = datetime.now()
    try:
        # sustained by returning Response.app_iter from loadxl.load_data
        res =  ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST',
                                           timeout=None, json=json_data)
    except Exception as exc:
        logger.error('Error on POST: %s' % str(exc))
    else:
        # process the individual item responses from the generator.
        # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>"
        load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []}
        for val in res.text.split('\n'):
            if val.startswith('POST') or val.startswith('SKIP'):
                prefix_len = 4  # 'POST' or 'SKIP'
            else:
                prefix_len = 5  # 'PATCH' or 'ERROR'
            # this is a bit weird, but we want to split out the POST/PATCH...
            # and also remove ': ' from the value for each message
            cat, msg = val[:prefix_len], val[prefix_len + 2:]
            if not msg:
                continue
            if cat in load_res:
                load_res[cat].append(msg)
        logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s"
                    % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP'])))
        if load_res['ERROR']:
            logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR'])
        if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']):
            logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s"
                         % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH'])))
    logger.info("Finished request in %s" % str(datetime.now() - start))

    # update sysinfo. Don't worry about doing this on local
    if args.env != 'local':
        data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()}
        try:
            found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth)
        except Exception:
            found_info = None

        if found_info:
            ff_utils.patch_metadata(data, found_info['uuid'], key=auth)
        else:
            ff_utils.post_metadata(data, 'sysinfos', key=auth)
        logger.info("Updated sysinfo with name %s" % data['name'])
    logger.info("DONE!")
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    phase2 = {}
    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as terms:
        for t in terms:
            phase2json = {}
            term = json.loads(t)
            id_tag = get_id(term)
            if id_tag is None:
                print("No Identifier for ", term)
            else:
                tid = '/ontology-terms/' + id_tag
                # look for parents and remove for phase 2 loading if they are there
                if 'parents' in term:
                    phase2json['parents'] = term['parents']
                    del term['parents']
                if 'slim_terms' in term:
                    phase2json['slim_terms'] = term['slim_terms']
                    del term['slim_terms']

                try:
                    dbterm = get_metadata(tid, auth)
                except:  # noqa
                    dbterm = None
                op = ''
                if dbterm and 'OntologyTerm' in dbterm.get('@type', []):
                    if args.dbupdate:
                        e = patch_metadata(term, dbterm["uuid"], auth)
                    else:
                        e = {'status': 'dry run'}
                    op = 'PATCH'
                else:
                    if args.dbupdate:
                        e = post_metadata(term, 'OntologyTerm', auth)
                    else:
                        e = {'status': 'dry run'}
                    op = 'POST'
                status = e.get('status')
                if status and status == 'dry run':
                    print(op, status)
                elif status and status == 'success':
                    print(op, status, e['@graph'][0]['uuid'])
                    if phase2json:
                        phase2[e['@graph'][0]['uuid']] = phase2json
                else:
                    print('FAILED', tid, e)

    print("START LOADING PHASE2 at ", str(datetime.now()))
    for tid, data in phase2.items():
        if args.dbupdate:
            e = patch_metadata(data, tid, auth)
        else:
            e = {'status': 'dry run'}
        status = e.get('status')
        if status and status == 'dry run':
            print('PATCH', status)
        elif status and status == 'success':
            print('PATCH', status, e['@graph'][0]['uuid'])
        else:
            print('FAILED', tid, e)
    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))