Ejemplo n.º 1
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as items:
        for i in items:
            [iid, payload] = [t.strip() for t in i.split('\t')]
            payload = json.loads(payload)
            if args.dbupdate:
                e = patch_FDN(iid, connection, payload)
            else:
                print("DRY RUN\n\tPATCH: ", iid, " TO\n", payload)
                e = {'status': 'success'}

            status = e.get('status')
            if status and status == 'success':
                print(status)
            else:
                print('FAILED', e)

    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))
Ejemplo n.º 2
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    id_list = scu.get_item_ids_from_args(args.input, connection, args.search)
    if args.fields:
        fields = args.fields

        header = '#id\t' + '\t'.join(fields)
        if args.noid is True:
            header = header.replace('#id\t', '#')
        print(header)
    for iid in id_list:
        res = get_FDN(iid, connection)
        if args.fields:
            line = ''
            for f in fields:
                val = res.get(f)
                if isinstance(val, list):
                    val = ', '.join(val)
                    if val.endswith(', '):
                        val = val[:-2]
                line = line + str(val) + '\t'
            if args.noid == 'False':
                line = iid + '\t' + line
            print(line)
        else:
            if args.noid is True:
                print(res)
            else:
                print(iid, '\t', res)
Ejemplo n.º 3
0
def main():
    args = get_args()
    try:
        connection = ff.fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args([args.query], connection, True)
    for itemid in itemids:
        print(itemid)
Ejemplo n.º 4
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    dryrun = not args.dbupdate

    biorxiv = get_FDN(args.old, connection)
    jarticle = get_FDN(args.new, connection)

    if biorxiv.get('status') == 'error':
        print('Biorxiv record %s cannot be found' % args.old)
        sys.exit(1)
    if jarticle.get('status') == 'error':
        print('Journal Article record %s cannot be found' % args.new)
        sys.exit(1)
    # make sure we can get the uuid to patch
    juuid = jarticle.get('uuid')
    # build the patch dictionary
    fields2transfer = [
        'categories', 'exp_sets_prod_in_pub', 'exp_sets_used_in_pub',
        'published_by'
    ]
    patch_dict, skipped = create_patch_for_new_from_old(
        biorxiv, jarticle, fields2transfer, args.vals2skip)
    patch_dict, skipped = move_old_url_to_new_aka(biorxiv, jarticle,
                                                  patch_dict, skipped)

    # do the patch
    ok = patch_and_report(connection, patch_dict, skipped, juuid, dryrun)

    if not ok:
        sys.exit(1)  # bail out if initial transfer doesn't work

    # find items with reference to old paper
    buuid = biorxiv.get('uuid')
    complete = find_and_patch_item_references(connection, buuid, juuid, dryrun)
    if not complete:
        print("ALL REFERENCES POINTING TO %s NOT UPDATED - CHECK AND FIX!" %
              buuid)
Ejemplo n.º 5
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, connection, args.search)
    taggable = scu.get_types_that_can_have_field(connection, 'tags')
    if args.types2exclude is not None:
        # remove explicitly provide types not to tag
        taggable = [t for t in taggable if t not in args.types2exclude]

    seen = []   # only need to add tag once so this keeps track of what's been seen
    to_patch = {}   # keep track of those to patch
    # main loop through the top level item ids
    for itemid in itemids:
        items2tag = {}
        if args.taglinked:
            # need to get linked items and tag them
            linked = scu.get_linked_items(connection, itemid, {})
            items2tag = scu.filter_dict_by_value(linked, taggable, include=True)
        else:
            # only want to tag provided items
            itype = scu.get_item_type(connection, itemid)
            if itype in taggable:
                items2tag = {itemid: itype}
        for i, t in items2tag.items():
            if i not in seen:
                seen.append(i)
                item = get_FDN(i, connection)
                if not scu.has_field_value(item, 'tags', args.tag):
                    # not already tagged with this tag so make a patch and add 2 dict
                    to_patch[i] = make_tag_patch(item, args.tag)

    # now do the patching or reporting
    for pid, patch in to_patch.items():
        if args.dbupdate:
            pres = patch_FDN(pid, connection, patch)
            print(pres['status'])
        else:
            print("DRY RUN: patch ", pid, " with ", patch)
Ejemplo n.º 6
0
def main():
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    id_list = scu.get_item_ids_from_args(args.input, connection, args.search)
    val = args.value
    if args.isarray:
        val = val.split("'")[1::2]
    for iid in id_list:
        print("PATCHING", iid, "to", args.field, "=", val)
        if (args.dbupdate):
            # do the patch
            res = patch_FDN(iid, connection, {args.field: val})
            if res['status'] == 'success':
                print("SUCCESS!")
            else:
                print("FAILED TO PATCH", iid, "RESPONSE STATUS", res['status'],
                      res['description'])
Ejemplo n.º 7
0
def main():  # pragma: no cover
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)
    itemids = scu.get_item_ids_from_args(args.input, connection, args.search)
    excluded_types = get_excluded(args.types2exclude, args.types2include)
    no_child = ['Publication', 'Lab', 'User', 'Award']  # default no_childs
    if args.no_children:
        no_child = list(set(no_child.extend(args.no_children)))

    all_linked_ids = []
    # main loop through the top level item ids
    for itemid in itemids:
        linked = scu.get_linked_items(connection, itemid, {})
        if excluded_types is not None:
            linked = scu.filter_dict_by_value(linked, excluded_types, include=False)
        ll = [(k, linked[k]) for k in sorted(linked, key=linked.get)]
        for i, t in ll:
            suff = ''
            if i == itemid:
                suff = '\tINPUT'
            if is_released(i, connection):
                suff = '\tRELEASED' + suff
                if not args.include_released:
                    print(i, '\t', t, '\tSKIPPING', suff)
                    continue
            if i not in all_linked_ids:
                all_linked_ids.append(i)
            else:
                suff = suff + '\tSEEN'
            print(i, '\t', t, suff)
    for a in all_linked_ids:
        print(a)
Ejemplo n.º 8
0
def main():  # pragma: no cover
    args = get_args()
    dbupdate = args.dbupdate
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    cnts = Counter()
    reltag = args.reltag
    # build the search query string
    query = 'type=DataReleaseUpdate&update_tag=' + reltag
    relupdates = scu.get_item_ids_from_args([query], connection, True)
    update_items = []
    for u in relupdates:
        res = get_FDN(u, connection)
        for ui in res.get('update_items'):
            if ui.get('primary_id'):
                update_items.append(ui['primary_id'])
    seen = []
    # update_items = ['experiment-set-replicates/4DNESOI2ALTL']
    for item in update_items:
        res = get_FDN(item, connection)
        uid = res.get('uuid')
        type = get_attype(res)
        cnts[type] += 1
        if (not uid) or (uid in seen) or ('ExperimentSet' not in type):
            # case for first freeze (no processed files included)
            print("SKIPPING ", uid)
            cnts['skipped'] += 1
            continue
        add_tag2item(connection, uid, reltag, seen, cnts, type, dbupdate)

        if 'ExperimentSet' in type:
            # get the experiments and files
            exps = res.get('experiments_in_set')
            if exps is not None:
                cnts['Experiment'] += len(exps)
                for exp in exps:
                    # import pdb; pdb.set_trace()
                    add_tag2item(connection, exp, reltag, seen, cnts, 'Experiment', dbupdate)
                    files = exp.get('files')
                    if files is not None:
                        cnts['FileFastq'] += len(files)
                        for file in files:
                            file = add_tag2item(connection, file, reltag, seen, cnts, 'FileFastq', dbupdate)
                    epfiles = exp.get('processed_files')
                    # epfiles = None  # case for first freeze (no processed files included)
                    if epfiles is not None:
                        cnts['FileProcessed'] += len(epfiles)
                        for epf in epfiles:
                            add_tag2item(connection, epf, reltag, seen, cnts, 'FileProcessed', dbupdate)

            # check the processed files directly associated to the eset
            # pfiles = res.get('procesed_files')
            pfiles = None  # case for first freeze (no processed files included)
            if pfiles is not None:
                cnts['FileProcessed'] += len(pfiles)
                for pf in pfiles:
                    add_tag2item(connection, pf, reltag, seen, cnts, 'FileProcessed', dbupdate)
    print(cnts)
Ejemplo n.º 9
0
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        connection = fdn_connection(args.keyfile, keyname=args.key)
    except Exception as e:
        print("Connection failed")
        sys.exit(1)

    phase2 = {}
    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as terms:
        for t in terms:
            phase2json = {}
            term = json.loads(t)
            id_tag = get_id(term)
            if id_tag is None:
                print("No Identifier for ", term)
            else:
                tid = '/ontology-terms/' + id_tag
                # look for parents and remove for phase 2 loading if they are there
                if 'parents' in term:
                    phase2json['parents'] = term['parents']
                    del term['parents']
                if 'slim_terms' in term:
                    phase2json['slim_terms'] = term['slim_terms']
                    del term['slim_terms']

                dbterm = get_FDN(tid, connection)
                op = ''
                if 'OntologyTerm' in dbterm['@type']:
                    if args.dbupdate:
                        e = patch_FDN(dbterm["uuid"], connection, term)
                    else:
                        e = {'status': 'dry run'}
                    op = 'PATCH'
                else:
                    if args.dbupdate:
                        e = new_FDN(connection, 'OntologyTerm', term)
                    else:
                        e = {'status': 'dry run'}
                    op = 'POST'
                status = e.get('status')
                if status and status == 'dry run':
                    print(op, status)
                elif status and status == 'success':
                    print(op, status, e['@graph'][0]['uuid'])
                    if phase2json:
                        phase2[e['@graph'][0]['uuid']] = phase2json
                else:
                    print('FAILED', tid, e)

    print("START LOADING PHASE2 at ", str(datetime.now()))
    for tid, data in phase2.items():
        if args.dbupdate:
            e = patch_FDN(tid, connection, data)
        else:
            e = {'status': 'dry run'}
        status = e.get('status')
        if status and status == 'dry run':
            print('PATCH', status)
        elif status and status == 'success':
            print('PATCH', status, e['@graph'][0]['uuid'])
        else:
            print('FAILED', tid, e)
    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))