def load_file(auth, itype, filename):
    payload = {'in_file': filename, 'overwrite': True, 'itype': itype}
    if 'localhost' in auth.get('server', ''):
        payload['config_uri'] = 'development.ini'
    try:
        post_metadata(payload, 'load_data', auth)
    except Exception:
        raise
Beispiel #2
0
def main():
    args = get_args(sys.argv[1:])
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)
    dryrun = not args.dbupdate

    file_list = scu.get_item_ids_from_args(args.input, auth, args.search)
    wf_data = get_metadata(args.workflow, auth)
    for f in file_list:
        file_info = get_metadata(f, auth)
        parents = file_info.get('produced_from')
        if parents:
            inputs = []
            for p in parents:
                inputs.append(get_metadata(p, auth))
            wfr_json = create_wfr_meta_only_json(auth, wf_data, inputs,
                                                 [file_info])
            if dryrun:
                print('DRY RUN -- will post')
                print(wfr_json)
            else:
                res = post_metadata(wfr_json, 'workflow_run_awsem', auth)
                # and add a notes_to_tsv to the file
                patchstatus = add_notes_to_tsv(file_info, auth)
                print(res)
                print(patchstatus)
Beispiel #3
0
def run(keypairs_file, post_json_file, schema_name):

    assert os.path.isfile(keypairs_file)

    try:
        key = FDN_Key(keypairs_file, "default")
    except Exception as e:
        print(e)
        print("key error")
        raise e

    try:
        connection = FDN_Connection(key)
    except Exception as e:
        print(e)
        print("connection error")
        raise e

    try:
        with open(post_json_file, 'r') as f:
            post_item = json.load(f)
            response = ff_utils.post_metadata(post_item, schema_name, key=connection.key)
    except Exception as e:
        print(e)
        print("post error")
        raise e

    print(json.dumps(response))
def post_report_document_to_portal(connection, itype, logfile):
    """ Read the log file and encode it for upload as an attachment (blob) and
        post a Document for the log file

        TD: the institution and project are hard coded.  should get this info
        from the user running script?
    """
    inst = '828cd4fe-ebb0-4b36-a94a-d2e3a36cc989'
    proj = '12a92962-8265-4fc0-b2f8-cf14f05db58b'
    meta = {'institution': inst, 'project': proj}
    mimetype = "text/plain"
    rtype = 'document'
    date = datetime.utcnow().strftime("%Y-%m-%dT%H-%M-%SZ")
    if os.path.isfile(logfile):
        attach_fn = '{}_update_report_{}.txt'.format(itype, date)
        with io.open(logfile, 'rb') as at:
            data = at.read()
            data_href = 'data:%s;base64,%s' % (mimetype,
                                               b64encode(data).decode('ascii'))
            attach = {
                'download': attach_fn,
                'type': mimetype,
                'href': data_href
            }
            meta['attachment'] = attach
    if 'attachment' in meta:
        try:
            res = post_metadata(meta, rtype, connection)
            assert res.get('status') == 'success'
        except Exception as e:
            print("Problem posting report", e)
    return
Beispiel #5
0
        def create_tracking_item(self, report_data=None, do_post_request=False, **kwargs):
            '''
            Wraps `report_data` in a TrackingItem Item.

            If `do_post_request` is True, will also POST the Item into fourfront database, according to the access_keys
            that the class was instantiated with.

            If `report_data` is not supplied or set to None, will run query_reports() to get all reports defined as are defined in instance methods.
            '''
            if report_data is None:
                report_data = self.query_reports(**kwargs)

            # First make sure _all_ reporting methods defined on this class are included. Otherwise we might have tracking items in DB with missing data.
            for method_name in self.get_report_provider_method_names():
                if report_data['reports'].get(method_name) is None:
                    raise Exception("Not all potentially available data is included in report_data. Exiting.")
                if not isinstance(report_data['reports'][method_name], list):
                    raise Exception("Can only make tracking_item for report_data which does not contain extra raw report and request data, per the schema.")

            tracking_item = {
                "status"            : "released",
                "tracking_type"     : "google_analytics",
                "google_analytics"  : report_data
            }
            if do_post_request:
                response = ff_utils.post_metadata(tracking_item, 'tracking-items', key=dict(self.owner.access_key, server=self.owner.server))
                return response['@graph'][0]
            else:
                return tracking_item
Beispiel #6
0
def post_new_processedfile(file_format,
                           key,
                           extra_file_formats=None,
                           upload_file=None,
                           upload_content=None,
                           extension=None,
                           **kwargs):
    """upload_content must be in bytes"""
    if extra_file_formats:
        extra_files = [{'file_format': ef} for ef in extra_file_formats]
    else:
        extra_files = None
    other_fields = kwargs
    other_fields.update({"description": "tibanna test"})
    new_pf = ProcessedFileMetadata(file_format=file_format,
                                   extra_files=extra_files,
                                   other_fields=other_fields).as_dict()
    res = ff_utils.post_metadata(new_pf, 'FileProcessed', key=key)
    if upload_file:
        f_uuid = res['@graph'][0]['uuid']
        accession = res['@graph'][0]['accession']
        upload_key = f_uuid + '/' + accession + '.' + extension
        boto3.client('s3').upload_file(upload_file,
                                       BUCKET_NAME(DEV_ENV, 'FileProcessed'),
                                       upload_key)
    if upload_content:
        f_uuid = res['@graph'][0]['uuid']
        accession = res['@graph'][0]['accession']
        upload_key = f_uuid + '/' + accession + '.' + extension
        boto3.client('s3').put_object(Body=upload_content,
                                      Bucket=BUCKET_NAME(
                                          DEV_ENV, 'FileProcessed'),
                                      Key=upload_key)
    return res['@graph'][0]['uuid']
Beispiel #7
0
def post_new_fastqfile(key, upload_file=None, upload_content=None):
    """upload_content must be in bytes"""
    ffobject = {
        "uuid": str(uuid.uuid4()),
        "file_format": "fastq",
        "description": "tibanna test",
        "lab": DEFAULT_LAB,
        "award": DEFAULT_AWARD
    }
    res = ff_utils.post_metadata(ffobject, 'FileFastq', key=key)
    if upload_file:
        f_uuid = res['@graph'][0]['uuid']
        accession = res['@graph'][0]['accession']
        upload_key = f_uuid + '/' + accession + '.fastq.gz'
        boto3.client('s3').upload_file(upload_file,
                                       BUCKET_NAME(DEV_ENV, 'FileFastq'),
                                       upload_key)
    if upload_content:
        f_uuid = res['@graph'][0]['uuid']
        accession = res['@graph'][0]['accession']
        upload_key = f_uuid + '/' + accession + '.fastq.gz'
        boto3.client('s3').put_object(Body=upload_content,
                                      Bucket=BUCKET_NAME(DEV_ENV, 'FileFastq'),
                                      Key=upload_key)
    return res['@graph'][0]['uuid']
Beispiel #8
0
 def post(self, key, type_name=None):
     if not type_name:
         if self.run_platform == 'AWSEM':
             type_name = 'workflow_run_awsem'
         else:
             raise Exception(
                 "cannot determine workflow schema type from the run platform: should be AWSEM."
             )
     return post_metadata(self.as_dict(), type_name, key=key)
def load_json(auth, itype, item_list, chunk_size=50):
    list_length = len(item_list)
    curr_pos = 0
    while curr_pos < list_length:
        slice_for = chunk_size if (
            chunk_size and chunk_size <=
            (list_length - curr_pos)) else list_length - curr_pos
        new_end = curr_pos + slice_for
        chunk = item_list[curr_pos:new_end]
        store = {itype: chunk}
        payload = {'store': store, 'overwrite': True}
        if 'localhost' in auth.get('server', ''):
            payload['config_uri'] = 'development.ini'
        try:
            post_metadata(payload, 'load_data', auth)
        except Exception as e:
            print("PROBLEM WITH POST")
            print(e)
        curr_pos = new_end
Beispiel #10
0
def post_new_qc(qctype, key, **kwargs):
    if not qctype.startswith('QualityMetric'):
        raise Exception("qctype must begin with QualityMetric")
    qc_object = {
        "uuid": str(uuid.uuid4()),
        "lab": DEFAULT_LAB,
        "award": DEFAULT_AWARD
    }
    for k, v in kwargs.items():
        qc_object[k] = v
    res = ff_utils.post_metadata(qc_object, qctype, key=key)
    return res['@graph'][0]['uuid']
Beispiel #11
0
def publish_data_release_updates(connection, **kwargs):
    """ TODO: This action probably needs rewriting as well as it based on the OLD data_release_updates check. """
    action = ActionResult(connection, 'publish_data_release_updates')
    report_result = action.get_associated_check_result(kwargs)
    action.description = "Publish data release updates to Fourfront."
    updates_to_post = report_result.get('brief_output', {}).get('release_updates', [])
    section_to_post = report_result.get('brief_output', {}).get('static_section')
    # post items to FF
    posted_updates = []
    for update in updates_to_post:
        # should be in good shape to post as-is
        resp = ff_utils.post_metadata(update, 'data-release-updates', key=connection.ff_keys)
        posted_updates.append({'update': update, 'response': resp})
    if section_to_post:
        resp = ff_utils.post_metadata(section_to_post, 'static-sections', key=connection.ff_keys)
        posted_section = {'static_section': section_to_post, 'response': resp}
    else:
        posted_section = None
    action.output = {
        'updates_posted': posted_updates,
        'section_posted': posted_section
    }
    action.status = 'DONE'
    return action
Beispiel #12
0
def run_missing_wfr(wf_info, input_files, run_name, auth, env):
    all_inputs = []
    for arg, files in input_files.items():
        inp = extract_file_info(files, arg, env)
        all_inputs.append(inp)
    # small tweak to get bg2bw working
    all_inputs = sorted(all_inputs, key=itemgetter('workflow_argument_name'))

    input_json = run_json(all_inputs, env, wf_info, run_name)
    e = ff_utils.post_metadata(input_json, 'WorkflowRun/run', key=auth)

    url = json.loads(e['input'])['_tibanna']['url']
    display(
        HTML("<a href='{}' target='_blank'>{}</a>".format(url, e['status'])))
    return
Beispiel #13
0
def post_random_file(bucket,
                     ff_key,
                     file_format='pairs',
                     extra_file_format='pairs_px2',
                     file_extension='pairs.gz',
                     extra_file_extension='pairs.gz.px2',
                     schema='file_processed',
                     extra_status=None):
    """Generates a fake file with random uuid and accession
    and posts it to fourfront. The content is unique since it contains
    its own uuid. The file metadata does not contain md5sum or
    content_md5sum.
    Uses the given fourfront keys
    """
    uuid = str(uuid4())
    accession = generate_rand_accession()
    newfile = {
        "accession": accession,
        "file_format": file_format,
        "award": "b0b9c607-f8b4-4f02-93f4-9895b461334b",
        "lab": "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989",
        "uuid": uuid
    }
    upload_key = uuid + '/' + accession + '.' + file_extension
    tmpfilename = 'alsjekvjf'
    with gzip.open(tmpfilename, 'wb') as f:
        f.write(uuid)
    s3 = boto3.resource('s3')
    s3.meta.client.upload_file(tmpfilename, bucket, upload_key)

    # extra file
    if extra_file_format:
        newfile["extra_files"] = [{
            "file_format": extra_file_format,
            "accession": accession,
            "uuid": uuid
        }]
        if extra_status:
            newfile["extra_files"][0]['status'] = extra_status
        extra_upload_key = uuid + '/' + accession + '.' + extra_file_extension
        extra_tmpfilename = 'alsjekvjf-extra'
        with open(extra_tmpfilename, 'w') as f:
            f.write(uuid + extra_file_extension)
        s3.meta.client.upload_file(extra_tmpfilename, bucket, extra_upload_key)
    response = post_metadata(newfile, schema, key=ff_key)
    print(response)
    return newfile
Beispiel #14
0
def post_random_file(bucket,
                     ff_key,
                     file_format='pairs',
                     extra_file_format='pairs_px2',
                     file_extension='pairs.gz',
                     extra_file_extension='pairs.gz.px2',
                     schema='file_processed',
                     extra_status=None):
    """Generates a fake file with random uuid and accession
    and posts it to fourfront. The content is unique since it contains
    its own uuid. The file metadata does not contain md5sum or
    content_md5sum.
    Uses the given fourfront keys
    """
    uuid = str(uuid4())
    accession = generate_rand_accession(ACCESSION_PREFIX, 'FI')
    newfile = {
        "accession": accession,
        "file_format": file_format,
        "institution": DEFAULT_INSTITUTION,
        "project": DEFAULT_PROJECT,
        "uuid": uuid
    }
    upload_key = uuid + '/' + accession + '.' + file_extension
    tmpfilename = 'alsjekvjf'
    with gzip.open(tmpfilename, 'wb') as f:
        f.write(uuid.encode('utf-8'))
    s3 = boto3.resource('s3')
    s3.meta.client.upload_file(tmpfilename, bucket, upload_key)

    # extra file
    if extra_file_format:
        newfile["extra_files"] = [{
            "file_format": extra_file_format,
            "accession": accession,
            "uuid": uuid
        }]
        if extra_status:
            newfile["extra_files"][0]['status'] = extra_status
        extra_upload_key = uuid + '/' + accession + '.' + extra_file_extension
        extra_tmpfilename = 'alsjekvjf-extra'
        with open(extra_tmpfilename, 'w') as f:
            f.write(uuid + extra_file_extension)
        s3.meta.client.upload_file(extra_tmpfilename, bucket, extra_upload_key)
    response = post_metadata(newfile, schema, key=ff_key)
    print(response)
    return newfile
def main(ff_env='fourfront-cgapwolf',
         skip_software=False,
         skip_file_format=False,
         skip_workflow=False):
    """post / patch contents from portal_objects to the portal"""
    keycgap = ff_utils.get_authentication_with_server(ff_env=ff_env)

    # software
    if not skip_software:
        print("Processing software...")
        with open('portal_objects/software.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'Software', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # file formats
    if not skip_file_format:
        print("Processing file format...")
        with open('portal_objects/file_format.json') as f:
            d = json.load(f)

        for dd in d:
            print("  processing uuid %s" % dd['uuid'])
            try:
                ff_utils.post_metadata(dd, 'FileFormat', key=keycgap)
            except:
                ff_utils.patch_metadata(dd, dd['uuid'], key=keycgap)

    # workflows
    if not skip_workflow:
        print("Processing workflow...")
        wf_dir = "portal_objects/workflows"
        files = os.listdir(wf_dir)

        for fn in files:
            if fn.endswith('.json'):
                print("  processing file %s" % fn)
                with open(os.path.join(wf_dir, fn), 'r') as f:
                    d = json.load(f)
                try:
                    ff_utils.post_metadata(d, 'Workflow', key=keycgap)
                except:
                    ff_utils.patch_metadata(d, d['uuid'], key=keycgap)
Beispiel #16
0
def post_random_file(bucket, ff_key):
    """Generates a fake pairs.gz file with random uuid and accession
    and posts it to fourfront. The content is unique since it contains
    its own uuid. The file metadata does not contain md5sum or
    content_md5sum.
    Uses the given fourfront keys
    """
    uuid = str(uuid4())
    accession = generate_rand_accession()
    newfile = {
        "accession":
        accession,
        "file_format":
        "pairs",
        "award":
        "b0b9c607-f8b4-4f02-93f4-9895b461334b",
        "lab":
        "828cd4fe-ebb0-4b36-a94a-d2e3a36cc989",
        "uuid":
        uuid,
        "extra_files": [{
            "file_format": "pairs_px2",
            "accession": accession,
            "uuid": uuid
        }]
    }
    upload_key = uuid + '/' + accession + '.pairs.gz'
    tmpfilename = 'alsjekvjf.gz'
    with gzip.open(tmpfilename, 'wb') as f:
        f.write(uuid)
    extra_upload_key = uuid + '/' + accession + '.pairs.gz.px2'
    extra_tmpfilename = 'alsjekvjf-extra.gz'
    with gzip.open(extra_tmpfilename, 'wb') as f:
        f.write(uuid + '.px2')
    response = post_metadata(newfile, 'file_processed', key=ff_key)
    print(response)
    s3 = boto3.resource('s3')
    s3.meta.client.upload_file(tmpfilename, bucket, upload_key)
    s3.meta.client.upload_file(extra_tmpfilename, bucket, extra_upload_key)
    return newfile
Beispiel #17
0
 def post(self, key):
     return post_metadata(self.as_dict(),
                          "file_processed",
                          key=key,
                          add_on='force_md5')
Beispiel #18
0
def main():

    # getting authentication keys
    args = get_args()
    try:
        auth = ff_utils.get_authentication_with_server(args.key)
    except Exception as e:
        print("Authentication failed", e)
        sys.exit(1)

    dryrun = args.dryrun
    if dryrun:
        print("\nThis is a dry run\n")

    # collecting publication and expset search results
    hic_types = [
        'in+situ+Hi-C', 'Dilution+Hi-C', 'DNase+Hi-C', 'Micro-C', 'TCC'
    ]
    query_pub = '/search/?type=Publication'
    query_exp = '/search/?type=ExperimentSetReplicate&status=released'
    for type in hic_types:
        query_pub += '&exp_sets_prod_in_pub.experiments_in_set.experiment_type.display_title=' + type
        query_exp += '&experiments_in_set.experiment_type.display_title=' + type
    pubs_search = ff_utils.search_metadata(query_pub, key=auth)
    expsets_search = ff_utils.search_metadata(query_exp, key=auth)

    # building publications dictionary
    pubs_dict = convert_pubs_list_to_lookup(pubs_search)

    # loading dataset groups from json file
    repo_path = Path(__file__).resolve().parents[1]
    dsg_filename = repo_path.joinpath('files', 'dsg.json')
    if dsg_filename.exists():
        with open(dsg_filename) as dsg_fn:
            dsgs = json.load(dsg_fn)
    else:
        sys.exit("ERROR: Dataset grouping file not found")

    # making dataset list and mapping to dataset group
    dataset_list = []
    datasets_of_dsg = {}
    for k, v in dsgs.items():
        if v.get("datasets"):
            dataset_list.extend(v["datasets"])
            datasets_of_dsg[k] = v["datasets"]
        else:
            # if a dsg does not have datasets, then the dsg itself is the dataset
            dataset_list.append(k)

    # building the output table
    table = {}
    new_datasets = set()
    study_groups = set()

    for expset in expsets_search:
        dataset = expset.get("dataset_label")
        if dataset not in dataset_list:
            new_datasets.add(dataset)
            continue

        dsg = dataset
        dsg_link = "dataset_label=" + dataset
        for group, elements in datasets_of_dsg.items():
            if dataset in elements:
                dsg_link = ("dataset_label=" +
                            "&dataset_label=".join(elements))
                dsg = group
                break
        dsg_link = "/browse/?" + dsg_link.replace("+", "%2B").replace(
            "/", "%2F").replace(" ", "+")

        study_groups.add(dsgs[dsg].get("study_group"))

        row = table.get(dsg, {})
        table[dsg] = assemble_data_for_the_row(row, expset, dsg, dsg_link,
                                               pubs_dict, dsgs[dsg])

    # summarize number of experiment sets of each experiment type in a string
    for dsg, row in table.items():
        exp_type_summary = ""
        for exp_type, count in row["Replicate Sets"].items():
            if count > 0:
                exp_type_summary += str(count) + " " + exp_type + "<br>"
        if len(exp_type_summary) > 0:
            row['Replicate Sets'] = exp_type_summary[:
                                                     -4]  #remove <br> at the end
        else:
            row['Replicate Sets'] = ""

    # if new datasets are not in the json, ask what to do
    if new_datasets:
        print("New datasets found (not present in the json file):")
        for ds in new_datasets:
            print(ds)
        print("(i)gnore datasets or (e)xit to manually add them? [i/e]")
        response = None
        while response not in ['i', 'e']:
            response = input()
        if response == 'e':
            sys.exit("Add new dataset to dsg.json before generating table")

    # patch the static section for each study group
    skipped = []
    posted = []
    patched = []
    for studygroup in list(study_groups):

        # prepare static section
        table_dsg = {}
        for dsg in dsgs:
            if table.get(dsg):
                if table[dsg].get("Class") != studygroup:
                    continue
                else:
                    table_dsg[dsg] = table.get(dsg)

        keys = [
            'Data Set', 'Project', 'Replicate Sets', 'Species', 'Biosources',
            'Publication', 'Study', 'Lab'
        ]
        if studygroup == "Single Time Point and Condition":
            keys.remove('Study')

        name = alias = output = filetype = None
        if args.format == 'markdown':
            name = "data-highlights.hic." + studygroup + ".md"
            name = name.lower().replace(" ", "-")
            alias = "4dn-dcic-lab:" + name
            filetype = 'jsx'
            default_col_widths = "[-1,100,-1,100,-1,-1,-1,-1]"
            if "Study" not in keys:
                default_col_widths = "[-1,100,-1,120,250,-1,170]"
            output = md_table_maker(table_dsg, keys, name, default_col_widths)
        else:
            name = "data-highlights.hic." + studygroup
            name = name.lower().replace(" ", "-")
            alias = "4dn-dcic-lab:" + name
            filetype = 'html'
            styles = {
                'Data Set': ";width:20%;min-width:120px",
                'Replicate Sets': ";width:150px",
                'Publication': ";width:200px"
            }
            output = html_table_maker(table_dsg, keys, styles)

        # check if static section exists
        post = False
        try:
            ff_utils.get_metadata(alias, auth)
        except Exception:
            print(
                "'{}' static section cannot be patched because it does not exist"
                .format(studygroup))
            print("Do you want to (p)ost or (s)kip this static section? [p/s]")
            response = None
            while response not in ['p', 's']:
                response = input()
            if response == 's':
                skipped.append(alias)
                continue
            else:
                post = True

        # post or patch static section
        if post:
            post_body = {
                "name": name,
                "aliases": [alias],
                "body": output,
                "section_type": "Page Section",
                "title": studygroup,
                "options": {
                    "collapsible": True,
                    "default_open": True,
                    "filetype": filetype
                }
            }
            if not dryrun:
                res = ff_utils.post_metadata(post_body,
                                             "StaticSection",
                                             key=auth)
            posted.append(alias)
        else:
            patch_body = {"body": output}
            if not dryrun:
                res = ff_utils.patch_metadata(patch_body, alias, key=auth)
            patched.append(alias)
        if not dryrun:
            print("{}: {}".format(alias, res['status']))

    # summarize results
    print("Static sections summary: {} patched, {} posted, {} skipped".format(
        len(patched), len(posted), len(skipped)))
    if posted:
        print(
            "Remember to add the new static section(s) to the hic-data-overview page:"
        )
        for item in posted:
            print(item)
    if skipped:
        print("Skipped sections:")
        for item in skipped:
            print(item)
Beispiel #19
0
def run_missing_atac1(wf_info, organism, paired, files, obj_keys, my_env,
                      my_key, run_name):
    my_s3_util = s3Utils(env=my_env)
    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket

    if organism == "human":
        org = 'hs'
        input_files = [{
            "object_key": "4DNFIMQPTYDY.bowtie2Index.tar",
            "rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.bowtie2_idx_tar",
            "uuid": "28ab6265-f426-4a23-bb8a-f28467ad505b"
        }, {
            "object_key": "4DNFIZ1TGJZR.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.blacklist",
            "uuid": "9562ffbd-9f7a-4bd7-9c10-c335137d8966"
        }, {
            "object_key": "4DNFIZJB62D1.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.chrsz",
            "uuid": "9866d158-da3c-4d9b-96a9-1d59632eabeb"
        }]

    elif organism == "mouse":
        org = 'mm'
        input_files = [{
            "object_key": "4DNFI2493SDN.bowtie2Index.tar",
            "rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.bowtie2_idx_tar",
            "uuid": "63e22058-79c6-4e24-8231-ca4afac29dda"
        }, {
            "object_key": "4DNFIZ3FBPK8.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.blacklist",
            "uuid": "a32747a3-8a9e-4a9e-a7a1-4db0e8b65925"
        }, {
            "object_key": "4DNFIBP173GC.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.chrsz",
            "uuid": "be0a9819-d2ce-4422-be4b-234fb1677dd9"
        }]

    input_files.append({
        "object_key": obj_keys,
        "bucket_name": raw_bucket,
        "workflow_argument_name": "atac.fastqs",
        "uuid": files
    })

    if paired == 'single':
        chip_p = False
    elif paired == 'paired':
        chip_p = True
    parameters = {
        "atac.pipeline_type": 'atac',
        "atac.paired_end": chip_p,
        "atac.gensz": org,
        "atac.bam2ta.regex_grep_v_ta": "chr[MUE]|random|alt",
        "atac.disable_ataqc": True,
        "atac.enable_xcor": False,
        "atac.trim_adapter.auto_detect_adapter": True,
        "atac.bowtie2.cpu": 4,
        "atac.filter.cpu": 4,
        "atac.bam2ta.cpu": 4,
        "atac.trim_adapter.cpu": 4,
        "atac.align_only": True
    }

    if paired == 'single':
        frag_temp = [300]
        fraglist = frag_temp * len(files)
        parameters['atac.fraglen'] = fraglist

    tag = '1.1.1'
    """Creates the trigger json that is used by foufront endpoint.
    """
    input_json = {
        'input_files': input_files,
        'output_bucket': out_bucket,
        'workflow_uuid': wf_info['wf_uuid'],
        "app_name": wf_info['wf_name'],
        "wfr_meta": wf_info['wfr_meta'],
        "parameters": parameters,
        "config": wf_info['config'],
        "custom_pf_fields": wf_info['custom_pf_fields'],
        "_tibanna": {
            "env": my_env,
            "run_type": wf_info['wf_name'],
            "run_id": run_name
        },
        "tag": tag
    }
    # r = json.dumps(input_json)
    # print(r)
    e = ff_utils.post_metadata(input_json, 'WorkflowRun/run', key=my_key)
    url = json.loads(e['input'])['_tibanna']['url']
    display(
        HTML("<a href='{}' target='_blank'>{}</a>".format(url, e['status'])))
Beispiel #20
0
def run_missing_chip2(control_set, wf_info, organism, target_type, paired, ta,
                      ta_xcor, ta_cnt, my_env, my_key, run_ids):
    my_s3_util = s3Utils(env=my_env)
    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket

    if organism == "human":
        org = 'hs'
        input_files = [{
            "object_key": "4DNFIZ1TGJZR.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.blacklist",
            "uuid": "9562ffbd-9f7a-4bd7-9c10-c335137d8966"
        }, {
            "object_key": "4DNFIZJB62D1.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.chrsz",
            "uuid": "9866d158-da3c-4d9b-96a9-1d59632eabeb"
        }]

    elif organism == "mouse":
        org = 'mm'
        input_files = [{
            "object_key": "4DNFIZ3FBPK8.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.blacklist",
            "uuid": "a32747a3-8a9e-4a9e-a7a1-4db0e8b65925"
        }, {
            "object_key": "4DNFIBP173GC.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.chrsz",
            "uuid": "be0a9819-d2ce-4422-be4b-234fb1677dd9"
        }]

    ta_f = extract_file_info(ta,
                             'chip.tas',
                             my_env,
                             rename=['bed', 'tagAlign'])
    input_files.append(ta_f)
    ta_xcor_f = extract_file_info(ta_xcor,
                                  'chip.bam2ta_no_filt_R1.ta',
                                  my_env,
                                  rename=['bed', 'tagAlign'])
    input_files.append(ta_xcor_f)
    if control_set:
        ta_cnt = extract_file_info(ta_cnt,
                                   'chip.ctl_tas',
                                   my_env,
                                   rename=['bed', 'tagAlign'])
        input_files.append(ta_cnt)

    if paired == 'single':
        chip_p = False
    elif paired == 'paired':
        chip_p = True
    if not control_set:
        if target_type == 'histone':
            print(
                'HISTONE WITHOUT CONTROL NEEDS ATTENTION (change to tf), skipping for now'
            )
            return

    parameters = {
        "chip.pipeline_type": target_type,
        "chip.paired_end": chip_p,
        "chip.choose_ctl.always_use_pooled_ctl": True,
        "chip.qc_report.name": run_ids['run_name'],
        "chip.qc_report.desc": run_ids['desc'],
        "chip.gensz": org,
        "chip.xcor.cpu": 4,
        "chip.spp_cpu": 4
    }

    if paired == 'single':
        frag_temp = [300]
        fraglist = frag_temp * len(ta)
        parameters['chip.fraglen'] = fraglist

    tag = '1.1.1'
    """Creates the trigger json that is used by foufront endpoint.
    """
    input_json = {
        'input_files': input_files,
        'output_bucket': out_bucket,
        'workflow_uuid': wf_info['wf_uuid'],
        "app_name": wf_info['wf_name'],
        "wfr_meta": wf_info['wfr_meta'],
        "parameters": parameters,
        "config": wf_info['config'],
        "custom_pf_fields": wf_info['custom_pf_fields'],
        "_tibanna": {
            "env": my_env,
            "run_type": wf_info['wf_name'],
            "run_id": run_ids['run_name']
        },
        "tag": tag
    }
    # r = json.dumps(input_json)
    # print(r)
    e = ff_utils.post_metadata(input_json, 'WorkflowRun/run', key=my_key)
    url = json.loads(e['input'])['_tibanna']['url']
    display(
        HTML("<a href='{}' target='_blank'>{}</a>".format(url, e['status'])))
Beispiel #21
0
def run_missing_chip1(control, wf_info, organism, target_type, paired, files,
                      obj_keys, my_env, my_key, run_name):
    my_s3_util = s3Utils(env=my_env)
    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket

    if organism == "human":
        org = 'hs'
        input_files = [{
            "object_key": "4DNFIZQB369V.bwaIndex.tar",
            "rename": "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.tar",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.bwa_idx_tar",
            "uuid": "38077b98-3862-45cd-b4be-8e28e9494549"
        }, {
            "object_key": "4DNFIZ1TGJZR.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.blacklist",
            "uuid": "9562ffbd-9f7a-4bd7-9c10-c335137d8966"
        }, {
            "object_key": "4DNFIZJB62D1.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.chrsz",
            "uuid": "9866d158-da3c-4d9b-96a9-1d59632eabeb"
        }]

    elif organism == "mouse":
        org = 'mm'
        input_files = [{
            "object_key": "4DNFIZ2PWCC2.bwaIndex.tar",
            "rename": "mm10_no_alt_analysis_set_ENCODE.fasta.tar",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.bwa_idx_tar",
            "uuid": "f4b63d31-65d8-437f-a76a-6bedbb52ae6f"
        }, {
            "object_key": "4DNFIZ3FBPK8.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.blacklist",
            "uuid": "a32747a3-8a9e-4a9e-a7a1-4db0e8b65925"
        }, {
            "object_key": "4DNFIBP173GC.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.chrsz",
            "uuid": "be0a9819-d2ce-4422-be4b-234fb1677dd9"
        }]
    if control:
        input_files.append({
            "object_key": obj_keys,
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.ctl_fastqs",
            "uuid": files
        })
    else:
        input_files.append({
            "object_key": obj_keys,
            "bucket_name": raw_bucket,
            "workflow_argument_name": "chip.fastqs",
            "uuid": files
        })

    if paired == 'single':
        chip_p = False
    elif paired == 'paired':
        chip_p = True
    if control:
        parameters = {
            "chip.pipeline_type": target_type,
            "chip.paired_end": chip_p,
            "chip.choose_ctl.always_use_pooled_ctl": True,
            "chip.gensz": org,
            "chip.bam2ta_ctl.regex_grep_v_ta": "chr[MUE]|random|alt",
            "chip.bwa_ctl.cpu": 8,
            "chip.merge_fastq_ctl.cpu": 8,
            "chip.filter_ctl.cpu": 8,
            "chip.bam2ta_ctl.cpu": 8,
            "chip.align_only": True
        }
    else:
        parameters = {
            "chip.pipeline_type": target_type,
            "chip.paired_end": chip_p,
            "chip.choose_ctl.always_use_pooled_ctl": True,
            "chip.gensz": org,
            "chip.bam2ta.regex_grep_v_ta": "chr[MUE]|random|alt",
            "chip.bwa.cpu": 8,
            "chip.merge_fastq.cpu": 8,
            "chip.filter.cpu": 8,
            "chip.bam2ta.cpu": 8,
            "chip.xcor.cpu": 8,
            "chip.align_only": True
        }
    if paired == 'single':
        frag_temp = [300]
        fraglist = frag_temp * len(files)
        parameters['chip.fraglen'] = fraglist

    tag = '1.1.1'
    """Creates the trigger json that is used by foufront endpoint.
    """
    input_json = {
        'input_files': input_files,
        'output_bucket': out_bucket,
        'workflow_uuid': wf_info['wf_uuid'],
        "app_name": wf_info['wf_name'],
        "wfr_meta": wf_info['wfr_meta'],
        "parameters": parameters,
        "config": wf_info['config'],
        "custom_pf_fields": wf_info['custom_pf_fields'],
        "_tibanna": {
            "env": my_env,
            "run_type": wf_info['wf_name'],
            "run_id": run_name
        },
        "tag": tag
    }
    # r = json.dumps(input_json)
    # print(r)
    e = ff_utils.post_metadata(input_json, 'WorkflowRun/run', key=my_key)
    url = json.loads(e['input'])['_tibanna']['url']
    display(
        HTML("<a href='{}' target='_blank'>{}</a>".format(url, e['status'])))
Beispiel #22
0
def run_missing_atac2(wf_info, organism, paired, ta, my_env, my_key, run_name):
    my_s3_util = s3Utils(env=my_env)
    raw_bucket = my_s3_util.raw_file_bucket
    out_bucket = my_s3_util.outfile_bucket

    if organism == "human":
        org = 'hs'
        input_files = [{
            "object_key": "4DNFIZ1TGJZR.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.blacklist",
            "uuid": "9562ffbd-9f7a-4bd7-9c10-c335137d8966"
        }, {
            "object_key": "4DNFIZJB62D1.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.chrsz",
            "uuid": "9866d158-da3c-4d9b-96a9-1d59632eabeb"
        }]

    elif organism == "mouse":
        org = 'mm'
        input_files = [{
            "object_key": "4DNFIZ3FBPK8.bed.gz",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.blacklist",
            "uuid": "a32747a3-8a9e-4a9e-a7a1-4db0e8b65925"
        }, {
            "object_key": "4DNFIBP173GC.chrom.sizes",
            "bucket_name": raw_bucket,
            "workflow_argument_name": "atac.chrsz",
            "uuid": "be0a9819-d2ce-4422-be4b-234fb1677dd9"
        }]

    ta_f = extract_file_info(ta,
                             'atac.tas',
                             my_env,
                             rename=['bed', 'tagAlign'])
    input_files.append(ta_f)

    if paired == 'single':
        chip_p = False
    elif paired == 'paired':
        chip_p = True

    parameters = {
        "atac.pipeline_type": 'atac',
        "atac.paired_end": chip_p,
        "atac.gensz": org,
        "atac.disable_ataqc": True,
        "atac.enable_xcor": False,
    }

    if paired == 'single':
        frag_temp = [300]
        fraglist = frag_temp * len(ta)
        parameters['atac.fraglen'] = fraglist

    tag = '1.1.1'
    """Creates the trigger json that is used by foufront endpoint.
    """
    input_json = {
        'input_files': input_files,
        'output_bucket': out_bucket,
        'workflow_uuid': wf_info['wf_uuid'],
        "app_name": wf_info['wf_name'],
        "wfr_meta": wf_info['wfr_meta'],
        "parameters": parameters,
        "config": wf_info['config'],
        "custom_pf_fields": wf_info['custom_pf_fields'],
        "_tibanna": {
            "env": my_env,
            "run_type": wf_info['wf_name'],
            "run_id": run_name
        },
        "tag": tag
    }
    # r = json.dumps(input_json)
    # print(r)
    e = ff_utils.post_metadata(input_json, 'WorkflowRun/run', key=my_key)
    url = json.loads(e['input'])['_tibanna']['url']
    display(
        HTML("<a href='{}' target='_blank'>{}</a>".format(url, e['status'])))
def main():  # pragma: no cover
    start = datetime.now()
    print(str(start))
    args = get_args()
    try:
        auth = get_authentication_with_server(args.key, args.env)
    except Exception:
        print("Authentication failed")
        sys.exit(1)

    phase2 = {}
    # assumes a single line corresponds to json for single term
    if not args.dbupdate:
        print("DRY RUN - use --dbupdate to update the database")
    with open(args.infile) as terms:
        for t in terms:
            phase2json = {}
            term = json.loads(t)
            id_tag = get_id(term)
            if id_tag is None:
                print("No Identifier for ", term)
            else:
                tid = '/ontology-terms/' + id_tag
                # look for parents and remove for phase 2 loading if they are there
                if 'parents' in term:
                    phase2json['parents'] = term['parents']
                    del term['parents']
                if 'slim_terms' in term:
                    phase2json['slim_terms'] = term['slim_terms']
                    del term['slim_terms']

                try:
                    dbterm = get_metadata(tid, auth)
                except:  # noqa
                    dbterm = None
                op = ''
                if dbterm and 'OntologyTerm' in dbterm.get('@type', []):
                    if args.dbupdate:
                        e = patch_metadata(term, dbterm["uuid"], auth)
                    else:
                        e = {'status': 'dry run'}
                    op = 'PATCH'
                else:
                    if args.dbupdate:
                        e = post_metadata(term, 'OntologyTerm', auth)
                    else:
                        e = {'status': 'dry run'}
                    op = 'POST'
                status = e.get('status')
                if status and status == 'dry run':
                    print(op, status)
                elif status and status == 'success':
                    print(op, status, e['@graph'][0]['uuid'])
                    if phase2json:
                        phase2[e['@graph'][0]['uuid']] = phase2json
                else:
                    print('FAILED', tid, e)

    print("START LOADING PHASE2 at ", str(datetime.now()))
    for tid, data in phase2.items():
        if args.dbupdate:
            e = patch_metadata(data, tid, auth)
        else:
            e = {'status': 'dry run'}
        status = e.get('status')
        if status and status == 'dry run':
            print('PATCH', status)
        elif status and status == 'success':
            print('PATCH', status, e['@graph'][0]['uuid'])
        else:
            print('FAILED', tid, e)
    end = datetime.now()
    print("FINISHED - START: ", str(start), "\tEND: ", str(end))
Beispiel #24
0
def _qc_updater(status,
                awsemfile,
                ff_meta,
                tibanna,
                quality_metric='quality_metric_fastqc',
                file_argument='input_fastq',
                report_html=None,
                datafiles=None):
    # avoid using [] as default argument
    if datafiles is None:
        datafiles = ['summary.txt', 'fastqc_data.txt']
    if status == 'uploading':
        # wait until this bad boy is finished
        return
    # keys
    ff_key = tibanna.ff_keys
    # move files to proper s3 location
    # need to remove sbg from this line
    accession = awsemfile.runner.all_file_accessions[file_argument]
    zipped_report = awsemfile.key
    files_to_parse = datafiles
    if report_html:
        files_to_parse.append(report_html)
    LOG.info("accession is %s" % accession)
    try:
        files = awsemfile.s3.unzip_s3_to_s3(zipped_report,
                                            accession,
                                            files_to_parse,
                                            acl='public-read')
    except Exception as e:
        LOG.info(tibanna.s3.__dict__)
        raise Exception("%s (key={})\n".format(zipped_report) % e)
    # schema. do not need to check_queue
    qc_schema = ff_utils.get_metadata("profiles/" + quality_metric + ".json",
                                      key=ff_key,
                                      ff_env=tibanna.env)
    # parse fastqc metadata
    LOG.info("files : %s" % str(files))
    filedata = [files[_]['data'] for _ in datafiles]
    if report_html in files:
        qc_url = files[report_html]['s3key']
    else:
        qc_url = None
    meta = parse_qc_table(filedata,
                          qc_schema=qc_schema.get('properties'),
                          url=qc_url)
    LOG.info("qc meta is %s" % meta)
    # post fastq metadata
    qc_meta = ff_utils.post_metadata(meta, quality_metric, key=ff_key)
    if qc_meta.get('@graph'):
        qc_meta = qc_meta['@graph'][0]
    LOG.info("qc_meta is %s" % qc_meta)
    # update original file as well
    try:
        original_file = ff_utils.get_metadata(accession,
                                              key=ff_key,
                                              ff_env=tibanna.env,
                                              add_on='frame=object',
                                              check_queue=True)
        LOG.info("original_file is %s" % original_file)
    except Exception as e:
        raise Exception(
            "Couldn't get metadata for accession {} : ".format(accession) +
            str(e))
    patch_file = {'quality_metric': qc_meta['@id']}
    try:
        ff_utils.patch_metadata(patch_file, original_file['uuid'], key=ff_key)
    except Exception as e:
        raise Exception("patch_metadata failed in fastqc_updater." + str(e) +
                        "original_file ={}\n".format(str(original_file)))
    # patch the workflow run, value_qc is used to make drawing graphs easier.
    output_files = ff_meta.output_files
    output_files[0]['value_qc'] = qc_meta['@id']
    retval = {
        "output_quality_metrics": [{
            "name": quality_metric,
            "value": qc_meta['@id']
        }],
        'output_files':
        output_files
    }
    LOG.info("retval is %s" % retval)
    return retval
Beispiel #25
0
def _qc_updater(status,
                awsemfile,
                ff_meta,
                tibanna,
                quality_metric='quality_metric_fastqc',
                file_argument='input_fastq',
                report_html=None,
                datafiles=None,
                zipped=True,
                datajson_argument=None,
                other_fields=None):
    if datajson_argument == awsemfile.argument_name:
        return
    # avoid using [] as default argument
    if datafiles is None:
        datafiles = ['summary.txt', 'fastqc_data.txt']
    if status == 'uploading':
        # wait until this bad boy is finished
        return
    # keys
    ff_key = tibanna.ff_keys
    # move files to proper s3 location
    # need to remove sbg from this line
    accession = awsemfile.runner.get_file_accessions(file_argument)[0]
    zipped_report = awsemfile.key
    files_to_parse = datafiles
    if report_html:
        files_to_parse.append(report_html)
    printlog("accession is %s" % accession)
    jsondata = dict()
    if zipped:
        try:
            files = awsemfile.s3.unzip_s3_to_s3(zipped_report,
                                                accession,
                                                files_to_parse,
                                                acl='public-read')
        except Exception as e:
            printlog(tibanna.s3.__dict__)
            raise Exception("%s (key={})\n".format(zipped_report) % e)
        printlog("files : %s" % str(files))
        filedata = [files[_]['data'] for _ in datafiles]
    else:
        if datajson_argument:
            datajson_key = awsemfile.runner.get_file_key(datajson_argument)
            jsondata0 = [
                json.loads(awsemfile.s3.read_s3(_)) for _ in datajson_key
            ]
            for d in jsondata0:
                jsondata.update(d)
        filedata = [awsemfile.s3.read_s3(_) for _ in datafiles]
        reportdata = awsemfile.s3.read_s3(report_html)
        report_html = accession + 'qc_report.html'
        awsemfile.s3.s3_put(reportdata, report_html, acl='public-read')
        qc_url = 'https://s3.amazonaws.com/' + awsemfile.bucket + '/' + report_html
        files = {report_html: {'data': reportdata, 's3key': qc_url}}
    # schema. do not need to check_queue
    qc_schema = ff_utils.get_metadata("profiles/" + quality_metric + ".json",
                                      key=ff_key,
                                      ff_env=tibanna.env)
    # parse fastqc metadata
    if report_html in files:
        qc_url = files[report_html]['s3key']
    else:
        qc_url = None
    meta = parse_qc_table(filedata,
                          qc_schema=qc_schema.get('properties'),
                          url=qc_url)
    if jsondata:
        meta.update(jsondata)
    # custom fields
    if other_fields:
        for field in other_fields:
            meta.update(other_fields)
    printlog("qc meta is %s" % meta)
    # post fastq metadata
    qc_meta = ff_utils.post_metadata(meta, quality_metric, key=ff_key)
    if qc_meta.get('@graph'):
        qc_meta = qc_meta['@graph'][0]
    printlog("qc_meta is %s" % qc_meta)
    # update original file as well
    try:
        original_file = ff_utils.get_metadata(accession,
                                              key=ff_key,
                                              ff_env=tibanna.env,
                                              add_on='frame=object',
                                              check_queue=True)
        printlog("original_file is %s" % original_file)
    except Exception as e:
        raise Exception(
            "Couldn't get metadata for accession {} : ".format(accession) +
            str(e))
    patch_file = {'quality_metric': qc_meta['@id']}
    try:
        ff_utils.patch_metadata(patch_file, original_file['uuid'], key=ff_key)
    except Exception as e:
        raise Exception("patch_metadata failed in fastqc_updater." + str(e) +
                        "original_file ={}\n".format(str(original_file)))
    # patch the workflow run, value_qc is used to make drawing graphs easier.
    output_files = ff_meta.output_files
    output_files[0]['value_qc'] = qc_meta['@id']
    retval = {'output_files': output_files}
    printlog("retval is %s" % retval)
    return retval
Beispiel #26
0
def initialize_user_content(spawner):
    """
    Used to initialize the users s3-backed notebook storage.
    For initialization, ensure all notebook templates are copied
    (check every time)
    In addition, load access keys from Fourfront and add them to the
    environment variables of the notebook. Also delete previously created
    access keys used for Jupyterhub for the user
    Also initialized a TrackingItem of type jupyterhub_session to track some
    basic information on the JH session
    """
    err_output = []  # keep track of errors for debugging

    # grab this info fresh every time
    ff_keys = recompute_ff_keys(err_output)

    username = spawner.user.name  # get the username
    list_res = s3_client.list_objects_v2(
        Bucket=os.environ['AWS_TEMPLATE_BUCKET'])

    # check each template individually
    for template_res in list_res.get('Contents', []):
        template_key = template_res['Key']
        user_subdir = 'user-' + escape_string(username)
        notebook_temp_key = '/'.join([user_subdir, template_key])
        source_info = {
            "Bucket": os.environ['AWS_TEMPLATE_BUCKET'],
            "Key": template_key
        }
        try:  # always replace templates
            s3_client.copy_object(Bucket=os.environ["AWS_NOTEBOOK_BUCKET"],
                                  Key=notebook_temp_key,
                                  CopySource=source_info)
        except Exception as copy_exc:
            err_output.append({'copying_templates': str(copy_exc)})

    # get the access keys and set them as environment variables for the user
    try:
        ff_user = ff_utils.get_metadata('/users/' + username, key=ff_keys)
    except Exception as user_exc:
        err_output.append({'getting_user': str(user_exc)})
        clear_old_access_keys(
        )  # if we get here, old access key state must be cleared.
    else:
        key_descrip = 'jupyterhub_key'
        search_q = ''.join([
            '/search/?type=AccessKey&status=current&description=', key_descrip,
            '&user.uuid=', ff_user['uuid']
        ])
        try:
            user_keys = ff_utils.search_metadata(search_q, key=ff_keys)
        except Exception as search_exc:
            err_output.append({'searching_keys': str(search_exc)})
        else:
            for ukey in user_keys:
                try:
                    ff_utils.patch_metadata({'status': 'deleted'},
                                            ukey['uuid'],
                                            key=ff_keys)
                except Exception as patch_exc:
                    err_output.append({'deleting_keys': str(patch_exc)})
        # access key will be submitted by 4dn-dcic admin but belong to user
        key_body = {'user': ff_user['uuid'], 'description': key_descrip}
        try:
            key_res = ff_utils.post_metadata(key_body,
                                             'access-keys',
                                             key=ff_keys)
        except Exception as key_exc:
            err_output.append({'post_key': str(key_exc)})
            clear_old_access_keys(
            )  # if we get here, old access key state must be cleared.
        else:
            os.environ['FF_ACCESS_KEY'] = key_res['access_key_id']
            os.environ['FF_ACCESS_SECRET'] = key_res['secret_access_key']

        # intialize a tracking item for the session and store its uuid in env
        # set `submitted_by` manually to allow user to edit
        tracking_body = {
            'jupyterhub_session': {
                'date_initialized':
                datetime.datetime.utcnow().isoformat() + '+00:00',
                'user_uuid': ff_user['uuid']
            },
            'tracking_type': 'jupyterhub_session',
            'submitted_by': ff_user['uuid']
        }
        try:
            track_res = ff_utils.post_metadata(tracking_body,
                                               'tracking-items',
                                               key=ff_keys)
        except Exception as track_exc:
            err_output.append({'tracking_item': str(track_exc)})
        else:
            os.environ['FF_TRACKING_ID'] = track_res['@graph'][0]['uuid']

    os.environ['INIT_ERR_OUTPUT'] = json.dumps(err_output)
Beispiel #27
0
def main():
    """
    Load a given JSON file with ontology terms inserts to a server using
    the `load_data` endpoint defined in loadxl.
    """
    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('encoded').setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        description="Load Ontology Term Data", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('json_file', help="File containing terms to load")
    parser.add_argument('--env', default='local',
                        help='FF environment to update from. Defaults to local')
    parser.add_argument('--local-key', help='Access key ID if using local')
    parser.add_argument('--local-secret', help='Access key secret if using local')
    args = parser.parse_args()

    # authentication with Fourfront
    if args.env == 'local':
        # prompt access key ID and secret from user
        config_uri = 'development.ini'
        local_id = args.local_key if args.local_key else input('[local access key ID] ')
        local_secret = args.local_secret if args.local_secret else input('[local access key secret] ')
        auth = {'key': local_id, 'secret': local_secret, 'server': 'http://localhost:8000'}
    else:
        config_uri = 'production.ini'
        auth = ff_utils.get_authentication_with_server(None, args.env)

    load_endpoint = '/'.join([auth['server'], 'load_data'])
    logger.info('load_ontology_terms: Starting POST to %s' % load_endpoint)
    json_data = {'config_uri': config_uri, 'itype': 'ontology_term',
                 'overwrite': True, 'iter_response': True}
    with open(args.json_file) as infile:
        json_data['store'] = {'ontology_term': json.load(infile)}
    num_to_load = len(json_data['store']['ontology_term'])
    logger.info('Will attempt to load %s ontology terms to %s'
                % (num_to_load, auth['server']))
    start = datetime.now()
    try:
        # sustained by returning Response.app_iter from loadxl.load_data
        res =  ff_utils.authorized_request(load_endpoint, auth=auth, verb='POST',
                                           timeout=None, json=json_data)
    except Exception as exc:
        logger.error('Error on POST: %s' % str(exc))
    else:
        # process the individual item responses from the generator.
        # each item should be "POST: <uuid>,", "PATCH: <uuid>,", or "SKIP: <uuid>"
        load_res = {'POST': [], 'PATCH': [], 'SKIP': [], 'ERROR': []}
        for val in res.text.split('\n'):
            if val.startswith('POST') or val.startswith('SKIP'):
                prefix_len = 4  # 'POST' or 'SKIP'
            else:
                prefix_len = 5  # 'PATCH' or 'ERROR'
            # this is a bit weird, but we want to split out the POST/PATCH...
            # and also remove ': ' from the value for each message
            cat, msg = val[:prefix_len], val[prefix_len + 2:]
            if not msg:
                continue
            if cat in load_res:
                load_res[cat].append(msg)
        logger.info("Success! Attempted to load %s items. Result: POSTed %s, PATCHed %s, skipped %s"
                    % (num_to_load, len(load_res['POST']), len(load_res['PATCH']), len(load_res['SKIP'])))
        if load_res['ERROR']:
            logger.error("ERROR encountered during load_data! Error: %s" % load_res['ERROR'])
        if (len(load_res['POST']) + len(load_res['SKIP'])) > len(load_res['PATCH']):
            logger.error("The following items passed round I (POST/skip) but not round II (PATCH): %s"
                         % (set(load_res['POST'] + load_res['SKIP']) - set(load_res['PATCH'])))
    logger.info("Finished request in %s" % str(datetime.now() - start))

    # update sysinfo. Don't worry about doing this on local
    if args.env != 'local':
        data = {"name": "ffsysinfo", "ontology_updated": datetime.today().isoformat()}
        try:
            found_info = ff_utils.get_metadata('/sysinfos/' + data['name'], key=auth)
        except Exception:
            found_info = None

        if found_info:
            ff_utils.patch_metadata(data, found_info['uuid'], key=auth)
        else:
            ff_utils.post_metadata(data, 'sysinfos', key=auth)
        logger.info("Updated sysinfo with name %s" % data['name'])
    logger.info("DONE!")