Beispiel #1
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    id_list = scu.get_item_ids_from_args(args.input, connection, args.search)
    if args.fields:
        fields = args.fields

        header = '#id\t' + '\t'.join(fields)
        if args.noid is True:
            header = header.replace('#id\t', '#')
        print(header)
    for iid in id_list:
        res = get_FDN(iid, connection)
        if args.fields:
            line = ''
            for f in fields:
                val = res.get(f)
                if isinstance(val, list):
                    val = ', '.join(val)
                    if val.endswith(', '):
                        val = val[:-2]
                line = line + str(val) + '\t'
            if args.noid == 'False':
                line = iid + '\t' + line
            print(line)
        else:
            if args.noid is True:
                print(res)
            else:
                print(iid, '\t', res)
Beispiel #2
0
def get_item_type(connection, item):
    try:
        return item['@type'].pop(0)
    except (KeyError, TypeError):
        res = submit_utils.get_FDN(item, connection)
        try:
            return res['@type'][0]
        except AttributeError:  # noqa: E722
            print("Can't find a type for item %s" % item)
    return None
Beispiel #3
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    dryrun = not args.dbupdate

    biorxiv = get_FDN(args.old, connection)
    jarticle = get_FDN(args.new, connection)

    if biorxiv.get('status') == 'error':
        print('Biorxiv record %s cannot be found' % args.old)
        sys.exit(1)
    if jarticle.get('status') == 'error':
        print('Journal Article record %s cannot be found' % args.new)
        sys.exit(1)
    # make sure we can get the uuid to patch
    juuid = jarticle.get('uuid')
    # build the patch dictionary
    fields2transfer = [
        'categories', 'exp_sets_prod_in_pub', 'exp_sets_used_in_pub',
        'published_by'
    ]
    patch_dict, skipped = create_patch_for_new_from_old(
        biorxiv, jarticle, fields2transfer, args.vals2skip)
    patch_dict, skipped = move_old_url_to_new_aka(biorxiv, jarticle,
                                                  patch_dict, skipped)

    # do the patch
    ok = patch_and_report(connection, patch_dict, skipped, juuid, dryrun)

    if not ok:
        sys.exit(1)  # bail out if initial transfer doesn't work

    # find items with reference to old paper
    buuid = biorxiv.get('uuid')
    complete = find_and_patch_item_references(connection, buuid, juuid, dryrun)
    if not complete:
        print("ALL REFERENCES POINTING TO %s NOT UPDATED - CHECK AND FIX!" %
              buuid)
Beispiel #4
0
def get_types_that_can_have_field(connection, field):
    """find items that have the passed in fieldname in their properties
        even if there is currently no value for that field"""
    profiles = submit_utils.get_FDN('/profiles/',
                                    connection=connection,
                                    frame='raw')
    types_w_field = []
    for t, j in profiles.items():
        if j['properties'].get(field):
            types_w_field.append(t)
    return types_w_field
Beispiel #5
0
def get_linked_items(connection,
                     itemid,
                     found_items={},
                     no_children=['Publication', 'Lab', 'User', 'Award']):
    """Given an ID for an item all descendant linked item uuids (as given in 'frame=raw')
        are stored in a dict with each item type as the value.
        All descendants are retrieved recursively except the children of the types indicated
        in the no_children argument.
        The relationships between descendant linked items are not preserved - i.e. you don't
        know who are children, grandchildren, great grandchildren ... """
    # import pdb; pdb.set_trace()
    if not found_items.get(itemid):
        res = submit_utils.get_FDN(itemid, connection=connection, frame='raw')
        if 'error' not in res['status']:
            # create an entry for this item in found_items
            try:
                obj_type = submit_utils.get_FDN(
                    itemid, connection=connection)['@type'][0]
                found_items[itemid] = obj_type
            except AttributeError:  # noqa: E722
                print("Can't find a type for item %s" % itemid)
            if obj_type not in no_children:
                fields_to_check = copy.deepcopy(res)
                id_list = []
                for key, val in fields_to_check.items():
                    # could be more than one item in a value
                    foundids = find_uuids(val)
                    if foundids:
                        id_list.extend(foundids)
                if id_list:
                    id_list = [
                        i for i in list(set(id_list)) if i not in found_items
                    ]
                    for uid in id_list:
                        found_items.update(
                            get_linked_items(connection, uid, found_items))
    return found_items
Beispiel #6
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, connection, args.search)
    taggable = scu.get_types_that_can_have_field(connection, 'tags')
    if args.types2exclude is not None:
        # remove explicitly provide types not to tag
        taggable = [t for t in taggable if t not in args.types2exclude]

    seen = []   # only need to add tag once so this keeps track of what's been seen
    to_patch = {}   # keep track of those to patch
    # main loop through the top level item ids
    for itemid in itemids:
        items2tag = {}
        if args.taglinked:
            # need to get linked items and tag them
            linked = scu.get_linked_items(connection, itemid, {})
            items2tag = scu.filter_dict_by_value(linked, taggable, include=True)
        else:
            # only want to tag provided items
            itype = scu.get_item_type(connection, itemid)
            if itype in taggable:
                items2tag = {itemid: itype}
        for i, t in items2tag.items():
            if i not in seen:
                seen.append(i)
                item = get_FDN(i, connection)
                if not scu.has_field_value(item, 'tags', args.tag):
                    # not already tagged with this tag so make a patch and add 2 dict
                    to_patch[i] = make_tag_patch(item, args.tag)

    # now do the patching or reporting
    for pid, patch in to_patch.items():
        if args.dbupdate:
            pres = patch_FDN(pid, connection, patch)
            print(pres['status'])
        else:
            print("DRY RUN: patch ", pid, " with ", patch)
Beispiel #7
0
def add_tag2item(connection, iid, tag, seen, cnts, itype=None, dbupdate=False):
    # turns out that we do need to do a get as tags aren't embedded
    item = get_FDN(iid, connection)
    status = item.get('status')
    uid = item.get('uuid')
    if (not uid) or (uid in seen):
        print("SEEN OR IDLESS ITEM - SKIPPING")
        cnts['skipped'] += 1
        return
    seen.append(uid)
    if has_released(status):
        attype = get_attype(item)
        if not attype:
            attype = itype
        patch = make_tag_patch(item, tag)
        if patch:
            do_patch(uid, attype, patch, connection, dbupdate, cnts)
        else:
            print('NOTHING TO PATCH - skipping %s' % uid)
            cnts['skipped'] += 1
    else:
        print("STATUS %s doesn't get tagged - skipping %s" % (status, uid))
        cnts['skipped'] += 1
    return
Beispiel #8
0
def main():  # pragma: no cover
    args = get_args()
    dbupdate = args.dbupdate
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    cnts = Counter()
    reltag = args.reltag
    # build the search query string
    query = 'type=DataReleaseUpdate&update_tag=' + reltag
    relupdates = scu.get_item_ids_from_args([query], connection, True)
    update_items = []
    for u in relupdates:
        res = get_FDN(u, connection)
        for ui in res.get('update_items'):
            if ui.get('primary_id'):
                update_items.append(ui['primary_id'])
    seen = []
    # update_items = ['experiment-set-replicates/4DNESOI2ALTL']
    for item in update_items:
        res = get_FDN(item, connection)
        uid = res.get('uuid')
        type = get_attype(res)
        cnts[type] += 1
        if (not uid) or (uid in seen) or ('ExperimentSet' not in type):
            # case for first freeze (no processed files included)
            print("SKIPPING ", uid)
            cnts['skipped'] += 1
            continue
        add_tag2item(connection, uid, reltag, seen, cnts, type, dbupdate)

        if 'ExperimentSet' in type:
            # get the experiments and files
            exps = res.get('experiments_in_set')
            if exps is not None:
                cnts['Experiment'] += len(exps)
                for exp in exps:
                    # import pdb; pdb.set_trace()
                    add_tag2item(connection, exp, reltag, seen, cnts, 'Experiment', dbupdate)
                    files = exp.get('files')
                    if files is not None:
                        cnts['FileFastq'] += len(files)
                        for file in files:
                            file = add_tag2item(connection, file, reltag, seen, cnts, 'FileFastq', dbupdate)
                    epfiles = exp.get('processed_files')
                    # epfiles = None  # case for first freeze (no processed files included)
                    if epfiles is not None:
                        cnts['FileProcessed'] += len(epfiles)
                        for epf in epfiles:
                            add_tag2item(connection, epf, reltag, seen, cnts, 'FileProcessed', dbupdate)

            # check the processed files directly associated to the eset
            # pfiles = res.get('procesed_files')
            pfiles = None  # case for first freeze (no processed files included)
            if pfiles is not None:
                cnts['FileProcessed'] += len(pfiles)
                for pf in pfiles:
                    add_tag2item(connection, pf, reltag, seen, cnts, 'FileProcessed', dbupdate)
    print(cnts)
Beispiel #9
0
def get_item_uuid(iid, connection):
    """return a uuid for an item passed another id type"""
    if is_uuid(iid):
        return iid
    res = submit_utils.get_FDN(iid, connection)
    return res.get('uuid')
Beispiel #10
0
def is_released(itemid, connection):
    item = get_FDN(itemid, connection)
    if item.get('status'):
        if item['status'] == 'released':
            return True
    return False
Beispiel #11
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    phase2 = {}
    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as terms:
        for t in terms:
            phase2json = {}
            term = json.loads(t)
            id_tag = get_id(term)
            if id_tag is None:
                print("No Identifier for ", term)
            else:
                tid = '/ontology-terms/' + id_tag
                # look for parents and remove for phase 2 loading if they are there
                if 'parents' in term:
                    phase2json['parents'] = term['parents']
                    del term['parents']
                if 'slim_terms' in term:
                    phase2json['slim_terms'] = term['slim_terms']
                    del term['slim_terms']

                dbterm = get_FDN(tid, connection)
                op = ''
                if 'OntologyTerm' in dbterm['@type']:
                    if args.dbupdate:
                        e = patch_FDN(dbterm["uuid"], connection, term)
                    else:
                        e = {'status': 'dry run'}
                    op = 'PATCH'
                else:
                    if args.dbupdate:
                        e = new_FDN(connection, 'OntologyTerm', term)
                    else:
                        e = {'status': 'dry run'}
                    op = 'POST'
                status = e.get('status')
                if status and status == 'dry run':
                    print(op, status)
                elif status and status == 'success':
                    print(op, status, e['@graph'][0]['uuid'])
                    if phase2json:
                        phase2[e['@graph'][0]['uuid']] = phase2json
                else:
                    print('FAILED', tid, e)

    print("START LOADING PHASE2 at ", str(datetime.now()))
    for tid, data in phase2.items():
        if args.dbupdate:
            e = patch_FDN(tid, connection, data)
        else:
            e = {'status': 'dry run'}
        status = e.get('status')
        if status and status == 'dry run':
            print('PATCH', status)
        elif status and status == 'success':
            print('PATCH', status, e['@graph'][0]['uuid'])
        else:
            print('FAILED', tid, e)
    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))