def main(): # pragma: no cover args = get_args() try: connection = fdn_connection(args.keyfile, keyname=args.key) except Exception as e: print("Connection failed") sys.exit(1) id_list = scu.get_item_ids_from_args(args.input, connection, args.search) if args.fields: fields = args.fields header = '#id\t' + '\t'.join(fields) if args.noid is True: header = header.replace('#id\t', '#') print(header) for iid in id_list: res = get_FDN(iid, connection) if args.fields: line = '' for f in fields: val = res.get(f) if isinstance(val, list): val = ', '.join(val) if val.endswith(', '): val = val[:-2] line = line + str(val) + '\t' if args.noid == 'False': line = iid + '\t' + line print(line) else: if args.noid is True: print(res) else: print(iid, '\t', res)
def get_item_type(connection, item): try: return item['@type'].pop(0) except (KeyError, TypeError): res = submit_utils.get_FDN(item, connection) try: return res['@type'][0] except AttributeError: # noqa: E722 print("Can't find a type for item %s" % item) return None
def main(): # pragma: no cover args = get_args() try: connection = fdn_connection(args.keyfile, keyname=args.key) except Exception as e: print("Connection failed") sys.exit(1) dryrun = not args.dbupdate biorxiv = get_FDN(args.old, connection) jarticle = get_FDN(args.new, connection) if biorxiv.get('status') == 'error': print('Biorxiv record %s cannot be found' % args.old) sys.exit(1) if jarticle.get('status') == 'error': print('Journal Article record %s cannot be found' % args.new) sys.exit(1) # make sure we can get the uuid to patch juuid = jarticle.get('uuid') # build the patch dictionary fields2transfer = [ 'categories', 'exp_sets_prod_in_pub', 'exp_sets_used_in_pub', 'published_by' ] patch_dict, skipped = create_patch_for_new_from_old( biorxiv, jarticle, fields2transfer, args.vals2skip) patch_dict, skipped = move_old_url_to_new_aka(biorxiv, jarticle, patch_dict, skipped) # do the patch ok = patch_and_report(connection, patch_dict, skipped, juuid, dryrun) if not ok: sys.exit(1) # bail out if initial transfer doesn't work # find items with reference to old paper buuid = biorxiv.get('uuid') complete = find_and_patch_item_references(connection, buuid, juuid, dryrun) if not complete: print("ALL REFERENCES POINTING TO %s NOT UPDATED - CHECK AND FIX!" % buuid)
def get_types_that_can_have_field(connection, field): """find items that have the passed in fieldname in their properties even if there is currently no value for that field""" profiles = submit_utils.get_FDN('/profiles/', connection=connection, frame='raw') types_w_field = [] for t, j in profiles.items(): if j['properties'].get(field): types_w_field.append(t) return types_w_field
def get_linked_items(connection, itemid, found_items={}, no_children=['Publication', 'Lab', 'User', 'Award']): """Given an ID for an item all descendant linked item uuids (as given in 'frame=raw') are stored in a dict with each item type as the value. All descendants are retrieved recursively except the children of the types indicated in the no_children argument. The relationships between descendant linked items are not preserved - i.e. you don't know who are children, grandchildren, great grandchildren ... """ # import pdb; pdb.set_trace() if not found_items.get(itemid): res = submit_utils.get_FDN(itemid, connection=connection, frame='raw') if 'error' not in res['status']: # create an entry for this item in found_items try: obj_type = submit_utils.get_FDN( itemid, connection=connection)['@type'][0] found_items[itemid] = obj_type except AttributeError: # noqa: E722 print("Can't find a type for item %s" % itemid) if obj_type not in no_children: fields_to_check = copy.deepcopy(res) id_list = [] for key, val in fields_to_check.items(): # could be more than one item in a value foundids = find_uuids(val) if foundids: id_list.extend(foundids) if id_list: id_list = [ i for i in list(set(id_list)) if i not in found_items ] for uid in id_list: found_items.update( get_linked_items(connection, uid, found_items)) return found_items
def main(): # pragma: no cover args = get_args() try: connection = fdn_connection(args.keyfile, keyname=args.key) except Exception as e: print("Connection failed") sys.exit(1) itemids = scu.get_item_ids_from_args(args.input, connection, args.search) taggable = scu.get_types_that_can_have_field(connection, 'tags') if args.types2exclude is not None: # remove explicitly provide types not to tag taggable = [t for t in taggable if t not in args.types2exclude] seen = [] # only need to add tag once so this keeps track of what's been seen to_patch = {} # keep track of those to patch # main loop through the top level item ids for itemid in itemids: items2tag = {} if args.taglinked: # need to get linked items and tag them linked = scu.get_linked_items(connection, itemid, {}) items2tag = scu.filter_dict_by_value(linked, taggable, include=True) else: # only want to tag provided items itype = scu.get_item_type(connection, itemid) if itype in taggable: items2tag = {itemid: itype} for i, t in items2tag.items(): if i not in seen: seen.append(i) item = get_FDN(i, connection) if not scu.has_field_value(item, 'tags', args.tag): # not already tagged with this tag so make a patch and add 2 dict to_patch[i] = make_tag_patch(item, args.tag) # now do the patching or reporting for pid, patch in to_patch.items(): if args.dbupdate: pres = patch_FDN(pid, connection, patch) print(pres['status']) else: print("DRY RUN: patch ", pid, " with ", patch)
def add_tag2item(connection, iid, tag, seen, cnts, itype=None, dbupdate=False): # turns out that we do need to do a get as tags aren't embedded item = get_FDN(iid, connection) status = item.get('status') uid = item.get('uuid') if (not uid) or (uid in seen): print("SEEN OR IDLESS ITEM - SKIPPING") cnts['skipped'] += 1 return seen.append(uid) if has_released(status): attype = get_attype(item) if not attype: attype = itype patch = make_tag_patch(item, tag) if patch: do_patch(uid, attype, patch, connection, dbupdate, cnts) else: print('NOTHING TO PATCH - skipping %s' % uid) cnts['skipped'] += 1 else: print("STATUS %s doesn't get tagged - skipping %s" % (status, uid)) cnts['skipped'] += 1 return
def main(): # pragma: no cover args = get_args() dbupdate = args.dbupdate try: connection = fdn_connection(args.keyfile, keyname=args.key) except Exception as e: print("Connection failed") sys.exit(1) cnts = Counter() reltag = args.reltag # build the search query string query = 'type=DataReleaseUpdate&update_tag=' + reltag relupdates = scu.get_item_ids_from_args([query], connection, True) update_items = [] for u in relupdates: res = get_FDN(u, connection) for ui in res.get('update_items'): if ui.get('primary_id'): update_items.append(ui['primary_id']) seen = [] # update_items = ['experiment-set-replicates/4DNESOI2ALTL'] for item in update_items: res = get_FDN(item, connection) uid = res.get('uuid') type = get_attype(res) cnts[type] += 1 if (not uid) or (uid in seen) or ('ExperimentSet' not in type): # case for first freeze (no processed files included) print("SKIPPING ", uid) cnts['skipped'] += 1 continue add_tag2item(connection, uid, reltag, seen, cnts, type, dbupdate) if 'ExperimentSet' in type: # get the experiments and files exps = res.get('experiments_in_set') if exps is not None: cnts['Experiment'] += len(exps) for exp in exps: # import pdb; pdb.set_trace() add_tag2item(connection, exp, reltag, seen, cnts, 'Experiment', dbupdate) files = exp.get('files') if files is not None: cnts['FileFastq'] += len(files) for file in files: file = add_tag2item(connection, file, reltag, seen, cnts, 'FileFastq', dbupdate) epfiles = exp.get('processed_files') # epfiles = None # case for first freeze (no processed files included) if epfiles is not None: cnts['FileProcessed'] += len(epfiles) for epf in epfiles: add_tag2item(connection, epf, reltag, seen, cnts, 'FileProcessed', dbupdate) # check the processed files directly associated to the eset # pfiles = res.get('procesed_files') pfiles = None # case for first freeze (no processed files included) if pfiles is not None: cnts['FileProcessed'] += len(pfiles) for pf in pfiles: add_tag2item(connection, pf, reltag, seen, cnts, 'FileProcessed', dbupdate) print(cnts)
def get_item_uuid(iid, connection): """return a uuid for an item passed another id type""" if is_uuid(iid): return iid res = submit_utils.get_FDN(iid, connection) return res.get('uuid')
def is_released(itemid, connection): item = get_FDN(itemid, connection) if item.get('status'): if item['status'] == 'released': return True return False
def main(): # pragma: no cover start = datetime.now() print(str(start)) args = get_args() try: connection = fdn_connection(args.keyfile, keyname=args.key) except Exception as e: print("Connection failed") sys.exit(1) phase2 = {} # assumes a single line corresponds to json for single term if not args.dbupdate: print("DRY RUN - use --dbupdate to update the database") with open(args.infile) as terms: for t in terms: phase2json = {} term = json.loads(t) id_tag = get_id(term) if id_tag is None: print("No Identifier for ", term) else: tid = '/ontology-terms/' + id_tag # look for parents and remove for phase 2 loading if they are there if 'parents' in term: phase2json['parents'] = term['parents'] del term['parents'] if 'slim_terms' in term: phase2json['slim_terms'] = term['slim_terms'] del term['slim_terms'] dbterm = get_FDN(tid, connection) op = '' if 'OntologyTerm' in dbterm['@type']: if args.dbupdate: e = patch_FDN(dbterm["uuid"], connection, term) else: e = {'status': 'dry run'} op = 'PATCH' else: if args.dbupdate: e = new_FDN(connection, 'OntologyTerm', term) else: e = {'status': 'dry run'} op = 'POST' status = e.get('status') if status and status == 'dry run': print(op, status) elif status and status == 'success': print(op, status, e['@graph'][0]['uuid']) if phase2json: phase2[e['@graph'][0]['uuid']] = phase2json else: print('FAILED', tid, e) print("START LOADING PHASE2 at ", str(datetime.now())) for tid, data in phase2.items(): if args.dbupdate: e = patch_FDN(tid, connection, data) else: e = {'status': 'dry run'} status = e.get('status') if status and status == 'dry run': print('PATCH', status) elif status and status == 'success': print('PATCH', status, e['@graph'][0]['uuid']) else: print('FAILED', tid, e) end = datetime.now() print("FINISHED - START: ", str(start), "\tEND: ", str(end))