def main(): args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) dryrun = not args.dbupdate file_list = scu.get_item_ids_from_args(args.input, auth, args.search) wf_data = get_metadata(args.workflow, auth) for f in file_list: file_info = get_metadata(f, auth) parents = file_info.get('produced_from') if parents: inputs = [] for p in parents: inputs.append(get_metadata(p, auth)) wfr_json = create_wfr_meta_only_json(auth, wf_data, inputs, [file_info]) if dryrun: print('DRY RUN -- will post') print(wfr_json) else: res = post_metadata(wfr_json, 'workflow_run_awsem', auth) # and add a notes_to_tsv to the file patchstatus = add_notes_to_tsv(file_info, auth) print(res) print(patchstatus)
def main(): # pragma: no cover # initial set up args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) # bucket addresses ff_health = get_metadata('/health', auth) source_bucket = ff_health['file_upload_bucket'] target_bucket = ff_health['processed_file_bucket'] s3 = boto3.resource('s3') # get the uuids for the files query = 'type=FileVistrack' uids = scu.get_item_ids_from_args([query], auth, True) files2copy = [get_metadata(uid, auth).get('upload_key') for uid in uids] for file_key in files2copy: copy_source = {'Bucket': source_bucket, 'Key': file_key} try: # print(file_key + ' cp from ' + source_bucket + ' to ' + target_bucket) s3.meta.client.copy(copy_source, target_bucket, file_key) except Exception: print('Can not find file on source', file_key) continue print('{} file copied'.format(file_key))
def main(): # pragma: no cover start = datetime.now() print(str(start)) args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) # assumes a single line corresponds to json for single term if not args.dbupdate: print("DRY RUN - use --dbupdate to update the database") with open(args.infile) as items: for i in items: [iid, payload] = [t.strip() for t in i.split('\t')] payload = json.loads(payload) if args.dbupdate: e = patch_metadata(payload, iid, auth) else: print("DRY RUN\n\tPATCH: ", iid, " TO\n", payload) e = {'status': 'success'} status = e.get('status') if status and status == 'success': print(status) else: print('FAILED', e) end = datetime.now() print("FINISHED - START: ", str(start), "\tEND: ", str(end))
def main(): args = get_args() try: auth = ff.get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print("Working on {}".format(auth.get('server'))) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) seen = [] failed = [] for itemid in itemids: print("Touching ", itemid) if args.dbupdate: try: res = ff.patch_metadata({}, itemid, auth) print(res.get('status')) if res.get('status') == 'success': seen.append(itemid) except Exception: print(itemid, ' failed to patch') failed.append(itemid) continue else: print('dry run!') for i in seen: print(i) print("Failures") for f in failed: print(f)
def testrun_md5(workflow_name='tibanna_pony', env='webdev'): """Creates a random file object with no md5sum/content_md5sum and run md5 workflow. It waits for 6 mintues till the workflow run finishes and checks the input file object has been updated. """ bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput" ff_key = get_authentication_with_server(ff_env='fourfront-' + env) newfile = post_random_file(bucket, ff_key) uuid = newfile['uuid'] accession = newfile['accession'] input_json = { "config": { "ebs_type": "io1", "ebs_iops": 500, "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access", "ami_id": "ami-cfb14bb5", "json_bucket": "4dn-aws-pipeline-run-json", "shutdown_min": 30, "copy_to_s3": True, "launch_instance": True, "log_bucket": "tibanna-output", "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/", "key_name": "4dn-encode", "password": "" }, "_tibanna": { "env": "fourfront-webdev", "run_type": "md5" }, "parameters": {}, "app_name": "md5", "workflow_uuid": "c77a117b-9a58-477e-aaa5-291a109a99f6", "input_files": [{ "workflow_argument_name": "input_file", "bucket_name": bucket, "uuid": uuid, "object_key": accession + '.pairs.gz' }], "output_bucket": bucket } resp = run_workflow(input_json, workflow=workflow_name) print(resp) # check result time.sleep(6 * 60) # wait for 6 minutes filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database') content_md5sum = filemeta.get('content_md5sum') md5sum = filemeta.get('md5sum') if content_md5sum and md5sum: print(content_md5sum) print(md5sum) patch_metadata({'status': 'deleted'}, uuid, key=ff_key) else: raise Exception('md5 step function run failed')
def extract_file_info(obj_id, arg_name, env, rename=[]): auth = ff_utils.get_authentication_with_server({}, ff_env=env) my_s3_util = s3Utils(env=env) raw_bucket = my_s3_util.raw_file_bucket out_bucket = my_s3_util.outfile_bucket """Creates the formatted dictionary for files. """ # start a dictionary template = {"workflow_argument_name": arg_name} if rename: change_from = rename[0] change_to = rename[1] # if it is list of items, change the structure if isinstance(obj_id, list): object_key = [] uuid = [] buckets = [] for obj in obj_id: metadata = ff_utils.get_metadata(obj, key=auth) object_key.append(metadata['display_title']) uuid.append(metadata['uuid']) # get the bucket if 'FileProcessed' in metadata['@type']: my_bucket = out_bucket else: # covers cases of FileFastq, FileReference, FileMicroscopy my_bucket = raw_bucket buckets.append(my_bucket) # check bucket consistency try: assert len(list(set(buckets))) == 1 except AssertionError: print('Files from different buckets', obj_id) return template['object_key'] = object_key template['uuid'] = uuid template['bucket_name'] = buckets[0] if rename: template['rename'] = [ i.replace(change_from, change_to) for i in template['object_key'] ] # if obj_id is a string else: metadata = ff_utils.get_metadata(obj_id, key=auth) template['object_key'] = metadata['display_title'] template['uuid'] = metadata['uuid'] # get the bucket if 'FileProcessed' in metadata['@type']: my_bucket = out_bucket else: # covers cases of FileFastq, FileReference, FileMicroscopy my_bucket = raw_bucket template['bucket_name'] = my_bucket if rename: template['rename'] = template['object_key'].replace( change_from, change_to) return template
def main(): args = get_args() try: auth = ff.get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) itemids = scu.get_item_ids_from_args([args.query], auth, True) for itemid in itemids: print(itemid)
def main(ff_env='fourfront-cgapwolf', skip_software=False, skip_file_format=False, skip_workflow=False): """post / patch contents from portal_objects to the portal""" keycgap = ff_utils.get_authentication_with_server(ff_env=ff_env) # software if not skip_software: print("Processing software...") with open('portal_objects/software.json') as f: d = json.load(f) for dd in d: print(" processing uuid %s" % dd['uuid']) try: ff_utils.post_metadata(dd, 'Software', key=keycgap) except: ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap) # file formats if not skip_file_format: print("Processing file format...") with open('portal_objects/file_format.json') as f: d = json.load(f) for dd in d: print(" processing uuid %s" % dd['uuid']) try: ff_utils.post_metadata(dd, 'FileFormat', key=keycgap) except: ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap) # workflows if not skip_workflow: print("Processing workflow...") wf_dir = "portal_objects/workflows" files = os.listdir(wf_dir) for fn in files: if fn.endswith('.json'): print(" processing file %s" % fn) with open(os.path.join(wf_dir, fn), 'r') as f: d = json.load(f) try: ff_utils.post_metadata(d, 'Workflow', key=keycgap) except: ff_utils.patch_metadata(d, d['uuid'], key=keycgap)
def main(): # pragma: no cover args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) taggable = scu.get_types_that_can_have_field(auth, 'tags') if args.types2exclude is not None: # remove explicitly provide types not to tag taggable = [t for t in taggable if t not in args.types2exclude] seen = [ ] # only need to add tag once so this keeps track of what's been seen to_patch = {} # keep track of those to patch # main loop through the top level item ids for itemid in itemids: items2tag = {} if args.taglinked: # need to get linked items and tag them linked = scu.get_linked_items(auth, itemid, {}) items2tag = scu.filter_dict_by_value(linked, taggable, include=True) else: # only want to tag provided items itype = scu.get_item_type(auth, itemid) if itype in taggable: items2tag = {itemid: itype} for i, t in items2tag.items(): if i not in seen: seen.append(i) item = get_metadata(i, auth) if not scu.has_field_value(item, 'tags', args.tag): # not already tagged with this tag so make a patch and add 2 dict to_patch[i] = make_tag_patch(item, args.tag) # now do the patching or reporting for pid, patch in to_patch.items(): if args.dbupdate: pres = patch_metadata(patch, pid, auth) print(pres['status']) else: print("DRY RUN: patch ", pid, " with ", patch)
def connect2server(env=None, key=None, keyfile=None, logger=None): """Sets up credentials for accessing the server. Generates a key using info from the named keyname in the keyfile and checks that the server can be reached with that key. Also handles keyfiles stored in s3 using the env param""" if key and keyfile: keys = None if os.path.isfile(keyfile): with io.open(keyfile, 'r') as kf: keys_json_string = kf.read() keys = json.loads(keys_json_string) if keys: key = keys.get(key) try: auth = get_authentication_with_server(key, env) except Exception: logger.error("Authentication failed") sys.exit(1) return auth
def main(): # pragma: no cover args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) dryrun = not args.dbupdate biorxiv = get_metadata(args.old, auth) jarticle = get_metadata(args.new, auth) if biorxiv.get('status') == 'error': print('Biorxiv record %s cannot be found' % args.old) sys.exit(1) if jarticle.get('status') == 'error': print('Journal Article record %s cannot be found' % args.new) sys.exit(1) # make sure we can get the uuid to patch juuid = jarticle.get('uuid') # build the patch dictionary fields2transfer = [ 'categories', 'exp_sets_prod_in_pub', 'exp_sets_used_in_pub', 'published_by' ] patch_dict, skipped = create_patch_for_new_from_old( biorxiv, jarticle, fields2transfer, args.vals2skip) patch_dict, skipped = move_old_url_to_new_aka(biorxiv, jarticle, patch_dict, skipped) # do the patch ok = patch_and_report(auth, patch_dict, skipped, juuid, dryrun) if not ok: sys.exit(1) # bail out if initial transfer doesn't work # find items with reference to old paper buuid = biorxiv.get('uuid') complete = find_and_patch_item_references(auth, buuid, juuid, dryrun) if not complete: print("ALL REFERENCES POINTING TO %s NOT UPDATED - CHECK AND FIX!" % buuid)
def set_load_params(auth, env): # authentication with Fourfront # auth is dict: key, secret, server - set config appropriately if not (auth or env): return if auth: if auth.get('server') == 'http://localhost:8000': config_uri = 'development.ini' else: config_uri = 'production.ini' elif env == 'local': # prompt access key ID and secret from user local_id = input('enter local access key ID: ') local_secret = input('enter local access key secret: ') auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'} config_uri = 'development.ini' else: auth = ff_utils.get_authentication_with_server(None, env) config_uri = 'production.ini' return auth, config_uri
def main(): # pragma: no cover args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print('#', auth.get('server')) id_list = scu.get_item_ids_from_args(args.input, auth, args.search) for itemid in id_list: # get the existing data in other p item_data = get_metadata(itemid, auth, add_on='frame=raw') pfiles = item_data.get('processed_files') if not pfiles: continue patch_data = item_data.get('other_processed_files', []) if patch_data: # does the same title exist if args.title in [i['title'] for i in patch_data]: print(itemid, 'already has preliminary results') continue patch_data.append({ 'title': args.title, 'type': 'preliminary', 'files': pfiles }) if patch_data: patch = {'other_processed_files': patch_data} if args.dbupdate: res = patch_metadata(patch, obj_id=itemid, key=auth, add_on='delete_fields=processed_files') print(res.get('status')) else: print("DRY RUN -- will patch") print(patch) print('and delete processed_files field value')
def connect2server(env=None, key=None): '''Sets up credentials for accessing the server. Generates a key using info from the named keyname in the keyfile and checks that the server can be reached with that key. Also handles keyfiles stored in s3''' if key == 's3': assert env key = unified_authentication(None, env) if all([v in key for v in ['key', 'secret', 'server']]): import ast key = ast.literal_eval(key) try: auth = get_authentication_with_server(key, env) except Exception: print("Authentication failed") sys.exit(1) print("Running on: {server}".format(server=auth.get('server'))) return auth
def main(): args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) excluded_types = get_excluded(args.types2exclude, args.types2include) no_child = ['Publication', 'Lab', 'User', 'Award'] # default no_childs if args.no_children: no_child.extend(args.no_children) no_child = list(set(no_child)) all_linked_ids = [] # main loop through the top level item ids for itemid in itemids: linked = scu.get_linked_items(auth, itemid, {}) if excluded_types is not None: linked = scu.filter_dict_by_value(linked, excluded_types, include=False) ll = [(k, linked[k]) for k in sorted(linked, key=linked.get)] for i, t in ll: suff = '' if i == itemid: suff = '\tINPUT' if is_released(i, auth): suff = '\tRELEASED' + suff if not args.include_released: print(i, '\t', t, '\tSKIPPING', suff) continue if i not in all_linked_ids: all_linked_ids.append(i) else: suff = suff + '\tSEEN' print(i, '\t', t, suff) for a in all_linked_ids: print(a)
def main(): args = get_args(sys.argv[1:]) try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print("Working on {}".format(auth.get('server'))) itemids = scu.get_item_ids_from_args(args.input, auth, args.search) field = args.field val = args.value if val == 'True': val = True elif val == 'False': val = False if args.isarray: val = [v for v in val.split("'") if v] ntype = args.numtype if ntype: if ntype == 'i': val = int(val) elif ntype == 'f': val = float(val) for iid in itemids: print("PATCHING", iid, "to", field, "=", val) if (args.dbupdate): # do the patch if val == '*delete*': res = delete_field(iid, field, auth) else: res = patch_metadata({args.field: val}, iid, auth) if res['status'] == 'success': print("SUCCESS!") else: print("FAILED TO PATCH", iid, "RESPONSE STATUS", res['status'], res['description'])
def find_pairs(my_rep_set, my_env, lookfor='pairs', exclude_miseq=True): auth = ff_utils.get_authentication_with_server({}, ff_env=my_env) my_s3_util = s3Utils(env=my_env) """Find fastq files from experiment set, exclude miseq. """ report = {} rep_resp = my_rep_set['experiments_in_set'] lab = [my_rep_set['lab']['@id']] enzymes = [] organisms = [] total_f_size = 0 for exp in rep_resp: exp_resp = exp report[exp['accession']] = [] if not organisms: biosample = exp['biosample'] organisms = list( set([ bs['individual']['organism']['name'] for bs in biosample['biosource'] ])) if len(organisms) != 1: print('multiple organisms in set', my_rep_set['accession']) break exp_files = exp['files'] enzyme = exp.get('digestion_enzyme') if enzyme: enzymes.append(enzyme['display_title']) for fastq_file in exp_files: file_resp = ff_utils.get_metadata(fastq_file['uuid'], key=auth) if not file_resp.get('file_size'): print("WARNING!", file_resp['accession'], 'does not have filesize') else: total_f_size += file_resp['file_size'] # skip pair no 2 if file_resp.get('paired_end') == '2': continue # exclude miseq if exclude_miseq: if file_resp.get('instrument') == 'Illumina MiSeq': # print 'skipping miseq files', exp continue # Some checks before running # check if status is deleted if file_resp['status'] == 'deleted': print('deleted file', file_resp['accession'], 'in', my_rep_set['accession']) continue # if no uploaded file in the file item report and skip if not file_resp.get('filename'): print(file_resp['accession'], "does not have a file") continue # check if file is in s3 head_info = my_s3_util.does_key_exist(file_resp['upload_key'], my_s3_util.raw_file_bucket) if not head_info: print(file_resp['accession'], "does not have a file in S3") continue # check that file has a pair f1 = file_resp['@id'] f2 = "" paired = "" # is there a pair? try: relations = file_resp['related_files'] paired_files = [ relation['file']['@id'] for relation in relations if relation['relationship_type'] == 'paired with' ] assert len(paired_files) == 1 f2 = paired_files[0] paired = "Yes" except: paired = "No" # for experiments with unpaired fastq files if lookfor == 'single': if paired == 'No': report[exp_resp['accession']].append(f1) else: print('expected single files, found paired end') return # for experiments with paired files else: if paired != 'Yes': print('expected paired files, found single end') return f2 = '' relations = file_resp.get('related_files') if not relations: print(f1, 'does not have a pair') return for relation in relations: if relation['relationship_type'] == 'paired with': f2 = relation['file']['@id'] if not f2: print(f1, 'does not have a pair') return report[exp_resp['accession']].append((f1, f2)) # get the organism if len(list(set(organisms))) == 1: organism = organisms[0] else: organism = None # get the enzyme if len(list(set(enzymes))) == 1: enz = enzymes[0] else: enz = None bwa = bwa_index.get(organism) chrsize = chr_size.get(organism) if re_nz.get(organism): enz_file = re_nz[organism].get(enz) else: print('no enzyme information for the organism {}'.format(organism)) enz_file = None return report, organism, enz, bwa, chrsize, enz_file, int( total_f_size / (1024 * 1024 * 1024)), lab
def main(): # pragma: no cover start = datetime.now() print(str(start)) args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print('working on ', auth.get('server')) if args.as_file: if not args.dbupdate: print("DRY RUN - use --dbupdate to update the database") else: try: load_file(auth, args.itype, args.infile) except Exception as e: print(e) else: with open(args.infile) as ifile: item_store = json.loads(ifile.read()) if not args.itype: if not isinstance(item_store, dict): print("File is not in correct format") sys.exit(1) else: if not isinstance(item_store, list): print("File is not in correct format") sys.exit(1) item_store = {args.itype: item_store} for itype, items in sorted(item_store.items(), key=lambda x: ORDER.index(x[0])): if not args.dbupdate: print('DRY RUN - would try to load {} {} items'.format( len(items), itype)) continue if args.id_field: identifiers = [args.id_field] else: schema_path = 'profiles/' + itype + '.json' schema_info = get_metadata(schema_path, auth) identifiers = schema_info.get('identifyingProperties') # checking to see if an item exists # if no can use load_data endpoint # if yes do it the old fashioned way to_patch = [] to_post = [] for item in items: uid = item.get('uuid') if uid: exists = get_item(uid, auth) if exists: # try a patch to_patch.append(item) else: to_post.append(item) else: uid = check_for_existing(item, itype, identifiers, auth) if uid: # try a patch item['uuid'] = uid to_patch.append(item) else: uid = str(uuid4()) item['uuid'] = uid to_post.append(item) if to_post: load_json(auth, itype, to_post, chunk_size=1000) if to_patch: patch_jsons(auth, to_patch) stop = datetime.now() print(str(stop))
def main(): # getting authentication keys args = get_args() try: auth = ff_utils.get_authentication_with_server(args.key) except Exception as e: print("Authentication failed", e) sys.exit(1) dryrun = args.dryrun if dryrun: print("\nThis is a dry run\n") # collecting publication and expset search results hic_types = [ 'in+situ+Hi-C', 'Dilution+Hi-C', 'DNase+Hi-C', 'Micro-C', 'TCC' ] query_pub = '/search/?type=Publication' query_exp = '/search/?type=ExperimentSetReplicate&status=released' for type in hic_types: query_pub += '&exp_sets_prod_in_pub.experiments_in_set.experiment_type.display_title=' + type query_exp += '&experiments_in_set.experiment_type.display_title=' + type pubs_search = ff_utils.search_metadata(query_pub, key=auth) expsets_search = ff_utils.search_metadata(query_exp, key=auth) # building publications dictionary pubs_dict = convert_pubs_list_to_lookup(pubs_search) # loading dataset groups from json file repo_path = Path(__file__).resolve().parents[1] dsg_filename = repo_path.joinpath('files', 'dsg.json') if dsg_filename.exists(): with open(dsg_filename) as dsg_fn: dsgs = json.load(dsg_fn) else: sys.exit("ERROR: Dataset grouping file not found") # making dataset list and mapping to dataset group dataset_list = [] datasets_of_dsg = {} for k, v in dsgs.items(): if v.get("datasets"): dataset_list.extend(v["datasets"]) datasets_of_dsg[k] = v["datasets"] else: # if a dsg does not have datasets, then the dsg itself is the dataset dataset_list.append(k) # building the output table table = {} new_datasets = set() study_groups = set() for expset in expsets_search: dataset = expset.get("dataset_label") if dataset not in dataset_list: new_datasets.add(dataset) continue dsg = dataset dsg_link = "dataset_label=" + dataset for group, elements in datasets_of_dsg.items(): if dataset in elements: dsg_link = ("dataset_label=" + "&dataset_label=".join(elements)) dsg = group break dsg_link = "/browse/?" + dsg_link.replace("+", "%2B").replace( "/", "%2F").replace(" ", "+") study_groups.add(dsgs[dsg].get("study_group")) row = table.get(dsg, {}) table[dsg] = assemble_data_for_the_row(row, expset, dsg, dsg_link, pubs_dict, dsgs[dsg]) # summarize number of experiment sets of each experiment type in a string for dsg, row in table.items(): exp_type_summary = "" for exp_type, count in row["Replicate Sets"].items(): if count > 0: exp_type_summary += str(count) + " " + exp_type + "<br>" if len(exp_type_summary) > 0: row['Replicate Sets'] = exp_type_summary[: -4] #remove <br> at the end else: row['Replicate Sets'] = "" # if new datasets are not in the json, ask what to do if new_datasets: print("New datasets found (not present in the json file):") for ds in new_datasets: print(ds) print("(i)gnore datasets or (e)xit to manually add them? [i/e]") response = None while response not in ['i', 'e']: response = input() if response == 'e': sys.exit("Add new dataset to dsg.json before generating table") # patch the static section for each study group skipped = [] posted = [] patched = [] for studygroup in list(study_groups): # prepare static section table_dsg = {} for dsg in dsgs: if table.get(dsg): if table[dsg].get("Class") != studygroup: continue else: table_dsg[dsg] = table.get(dsg) keys = [ 'Data Set', 'Project', 'Replicate Sets', 'Species', 'Biosources', 'Publication', 'Study', 'Lab' ] if studygroup == "Single Time Point and Condition": keys.remove('Study') name = alias = output = filetype = None if args.format == 'markdown': name = "data-highlights.hic." + studygroup + ".md" name = name.lower().replace(" ", "-") alias = "4dn-dcic-lab:" + name filetype = 'jsx' default_col_widths = "[-1,100,-1,100,-1,-1,-1,-1]" if "Study" not in keys: default_col_widths = "[-1,100,-1,120,250,-1,170]" output = md_table_maker(table_dsg, keys, name, default_col_widths) else: name = "data-highlights.hic." + studygroup name = name.lower().replace(" ", "-") alias = "4dn-dcic-lab:" + name filetype = 'html' styles = { 'Data Set': ";width:20%;min-width:120px", 'Replicate Sets': ";width:150px", 'Publication': ";width:200px" } output = html_table_maker(table_dsg, keys, styles) # check if static section exists post = False try: ff_utils.get_metadata(alias, auth) except Exception: print( "'{}' static section cannot be patched because it does not exist" .format(studygroup)) print("Do you want to (p)ost or (s)kip this static section? [p/s]") response = None while response not in ['p', 's']: response = input() if response == 's': skipped.append(alias) continue else: post = True # post or patch static section if post: post_body = { "name": name, "aliases": [alias], "body": output, "section_type": "Page Section", "title": studygroup, "options": { "collapsible": True, "default_open": True, "filetype": filetype } } if not dryrun: res = ff_utils.post_metadata(post_body, "StaticSection", key=auth) posted.append(alias) else: patch_body = {"body": output} if not dryrun: res = ff_utils.patch_metadata(patch_body, alias, key=auth) patched.append(alias) if not dryrun: print("{}: {}".format(alias, res['status'])) # summarize results print("Static sections summary: {} patched, {} posted, {} skipped".format( len(patched), len(posted), len(skipped))) if posted: print( "Remember to add the new static section(s) to the hic-data-overview page:" ) for item in posted: print(item) if skipped: print("Skipped sections:") for item in skipped: print(item)
def main(): # pragma: no cover args = get_args() dbupdate = args.dbupdate try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) cnts = Counter() reltag = args.reltag # build the search query string query = 'type=DataReleaseUpdate&update_tag=' + reltag relupdates = scu.get_item_ids_from_args([query], auth, True) update_items = [] for u in relupdates: res = get_metadata(u, auth) for ui in res.get('update_items'): if ui.get('primary_id'): update_items.append(ui['primary_id']) seen = [] # update_items = ['experiment-set-replicates/4DNESOI2ALTL'] for item in update_items: res = get_metadata(item, auth) uid = res.get('uuid') type = get_attype(res) cnts[type] += 1 if (not uid) or (uid in seen) or ('ExperimentSet' not in type): # case for first freeze (no processed files included) print("SKIPPING ", uid) cnts['skipped'] += 1 continue add_tag2item(auth, uid, reltag, seen, cnts, type, dbupdate) if 'ExperimentSet' in type: # get the experiments and files exps = res.get('experiments_in_set') if exps is not None: cnts['Experiment'] += len(exps) for exp in exps: # import pdb; pdb.set_trace() add_tag2item(auth, exp, reltag, seen, cnts, 'Experiment', dbupdate) files = exp.get('files') if files is not None: cnts['FileFastq'] += len(files) for file in files: file = add_tag2item(auth, file, reltag, seen, cnts, 'FileFastq', dbupdate) epfiles = exp.get('processed_files') # epfiles = None # case for first freeze (no processed files included) if epfiles is not None: cnts['FileProcessed'] += len(epfiles) for epf in epfiles: add_tag2item(auth, epf, reltag, seen, cnts, 'FileProcessed', dbupdate) # check the processed files directly associated to the eset # pfiles = res.get('procesed_files') pfiles = None # case for first freeze (no processed files included) if pfiles is not None: cnts['FileProcessed'] += len(pfiles) for pf in pfiles: add_tag2item(auth, pf, reltag, seen, cnts, 'FileProcessed', dbupdate) print(cnts)
def testrun_md5_input_json_w_extra_file_object_name(env='webdev'): """Creates a random file object with no md5sum/content_md5sum and run md5 workflow. It waits for 6 mintues till the workflow run finishes and checks the input file object has been updated. """ bucket = "elasticbeanstalk-fourfront-" + env + "-wfoutput" ff_key = get_authentication_with_server(ff_env='fourfront-' + env) newfile = post_random_file(bucket, ff_key) uuid = newfile['uuid'] accession = newfile['accession'] wf_uuid = "c77a117b-9a58-477e-aaa5-291a109a99f6" input_json = { "config": { "ebs_type": "io1", "ebs_iops": 500, "json_bucket": "4dn-aws-pipeline-run-json", "shutdown_min": 30, "log_bucket": "tibanna-output", "key_name": "4dn-encode", "password": "" }, "_tibanna": { "env": "fourfront-webdev", "run_type": "md5_test_extra" }, "parameters": {}, "app_name": "md5", "workflow_uuid": wf_uuid, "input_files": [{ "workflow_argument_name": "input_file", "bucket_name": bucket, "uuid": uuid, "object_key": accession + '.pairs.gz.px2', "format_if_extra": "pairs_px2" }], "output_bucket": bucket, "wfr_meta": { "notes": "extra file md5 trigger test from test_webdev.py" } } resp = run_workflow(input_json) print(resp) # check result time.sleep(6 * 60) # wait for 6 minutes filemeta = get_metadata(uuid, key=ff_key, add_on='?datastore=database') content_md5sum = filemeta.get('extra_files')[0].get('content_md5sum') md5sum = filemeta.get('extra_files')[0].get('md5sum') file_size = filemeta.get('extra_files')[0].get('file_size') wfr_uuid = get_wfr_uuid(resp['_tibanna']['exec_arn']) wfr_meta = get_metadata(wfr_uuid, key=ff_key, add_on='?datastore=database') assert 'input_files' in wfr_meta assert 'format_if_extra' in wfr_meta['input_files'][0] assert md5sum assert content_md5sum assert file_size print(content_md5sum) print(md5sum) print(file_size) patch_metadata({'status': 'deleted'}, uuid, key=ff_key) patch_metadata({'status': 'deleted'}, wfr_uuid, key=ff_key)
def main(): # pragma: no cover args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) print('#', auth.get('server')) id_list = scu.get_item_ids_from_args(args.input, auth, args.search) if args.fields: fields = args.fields header = '#id\t' + '\t'.join(fields) if args.noid is True: header = header.replace('#id\t', '#') print(header) problems = [] for iid in id_list: try: res = get_metadata(iid, auth, add_on='frame=object') except Exception: problems.append(iid) continue if args.fields: line = '' # counts = {} for f in fields: val = res.get(f) # if val is not None: # added in for specific use case if isinstance(val, dict): val = val.get('uuid') elif isinstance(val, list): # counts[f] = len(val) # added in for specific use case # if len(counts) > 1: # print(iid, '\t', counts) # else: # cnt = list(counts.values())[0] # if cnt > 1: # print(iid, '\t', cnt) vs = '' for v in val: if isinstance(v, dict): v = v.get('uuid') else: v = str(v) vs = vs + v + ', ' val = vs if val.endswith(', '): val = val[:-2] line = line + str(val) + '\t' if args.noid == 'False': line = iid + '\t' + line print(line) else: if args.noid is True: print(res) else: print(iid, '\t', res) if problems: print('THERE WAS A PROBLEM GETTING METADATA FOR THE FOLLOWING:') for p in problems: print(p)
def main(): """ Load a given JSON file with ontology terms inserts to a server using the `load_data` endpoint defined in loadxl. """ logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('encoded').setLevel(logging.INFO) parser = argparse.ArgumentParser( description="Load Ontology Term Data", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('json_file', help="File containing terms to load") parser.add_argument('--env', default='local', help='FF environment to update from. Defaults to local') parser.add_argument('--local-key', help='Access key ID if using local') parser.add_argument('--local-secret', help='Access key secret if using local') args = parser.parse_args() # authentication with Fourfront if args.env == 'local': # prompt access key ID and secret from user config_uri = 'development.ini' local_id = args.local_key if args.local_key else input('[local access key ID] ') local_secret = args.local_secret if args.local_secret else input('[local access key secret] ') auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'} else: config_uri = 'production.ini' auth = ff_utils.get_authentication_with_server(None, args.env) load_endpoint = '/'.join([auth['server'], 'load_data']) logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint) json_data = {'config_uri': config_uri, 'itype': 'ontology_term', 'overwrite': True, 'iter_response': True} with open(args.json_file) as infile: json_data['store'] = {'ontology_term': json.load(infile)} num_to_load = len(json_data['store']['ontology_term']) logger.info('Will attempt to load %s ontology terms to %s' % (num_to_load, auth['server'])) start = datetime.now() try: # sustained by returning Response.app_iter from loadxl.load_data res = ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST', timeout=None, json=json_data) except Exception as exc: logger.error('Error on POST: %s' % str(exc)) else: # process the individual item responses from the generator. # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>" load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []} for val in res.text.split('\n'): if val.startswith('POST') or val.startswith('SKIP'): prefix_len = 4 # 'POST' or 'SKIP' else: prefix_len = 5 # 'PATCH' or 'ERROR' # this is a bit weird, but we want to split out the POST/PATCH... # and also remove ': ' from the value for each message cat, msg = val[:prefix_len], val[prefix_len + 2:] if not msg: continue if cat in load_res: load_res[cat].append(msg) logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s" % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP']))) if load_res['ERROR']: logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR']) if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']): logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s" % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH']))) logger.info("Finished request in %s" % str(datetime.now() - start)) # update sysinfo. Don't worry about doing this on local if args.env != 'local': data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()} try: found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth) except Exception: found_info = None if found_info: ff_utils.patch_metadata(data, found_info['uuid'], key=auth) else: ff_utils.post_metadata(data, 'sysinfos', key=auth) logger.info("Updated sysinfo with name %s" % data['name']) logger.info("DONE!")
def main(): # pragma: no cover start = datetime.now() print(str(start)) args = get_args() try: auth = get_authentication_with_server(args.key, args.env) except Exception: print("Authentication failed") sys.exit(1) phase2 = {} # assumes a single line corresponds to json for single term if not args.dbupdate: print("DRY RUN - use --dbupdate to update the database") with open(args.infile) as terms: for t in terms: phase2json = {} term = json.loads(t) id_tag = get_id(term) if id_tag is None: print("No Identifier for ", term) else: tid = '/ontology-terms/' + id_tag # look for parents and remove for phase 2 loading if they are there if 'parents' in term: phase2json['parents'] = term['parents'] del term['parents'] if 'slim_terms' in term: phase2json['slim_terms'] = term['slim_terms'] del term['slim_terms'] try: dbterm = get_metadata(tid, auth) except: # noqa dbterm = None op = '' if dbterm and 'OntologyTerm' in dbterm.get('@type', []): if args.dbupdate: e = patch_metadata(term, dbterm["uuid"], auth) else: e = {'status': 'dry run'} op = 'PATCH' else: if args.dbupdate: e = post_metadata(term, 'OntologyTerm', auth) else: e = {'status': 'dry run'} op = 'POST' status = e.get('status') if status and status == 'dry run': print(op, status) elif status and status == 'success': print(op, status, e['@graph'][0]['uuid']) if phase2json: phase2[e['@graph'][0]['uuid']] = phase2json else: print('FAILED', tid, e) print("START LOADING PHASE2 at ", str(datetime.now())) for tid, data in phase2.items(): if args.dbupdate: e = patch_metadata(data, tid, auth) else: e = {'status': 'dry run'} status = e.get('status') if status and status == 'dry run': print('PATCH', status) elif status and status == 'success': print('PATCH', status, e['@graph'][0]['uuid']) else: print('FAILED', tid, e) end = datetime.now() print("FINISHED - START: ", str(start), "\tEND: ", str(end))