Exemple #1
0
def download_remote_pairs(namespace, workspace):
    """Download remote pairs from Firecloud
    """
    res = firecloud_api.get_entities_tsv(namespace, workspace, "pair")
    with open('./Pairs/remote_pairs.txt', 'w') as outfile:
        outfile.write(res.text)
    return
Exemple #2
0
 def test_get_entities_tsv(self):
     """Test get_entities_tsv()."""
     r =  fapi.get_entities_tsv(self.project,
                                self.workspace,
                                "participant")
     print(r.status_code, r.content)
     self.assertEqual(r.status_code, 200)
def download_remote_samples(namespace, workspace):
    """Download remote samples from Firecloud
    Writes:
        - remote_samples.txt: samples in FC
    """
    res = firecloud_api.get_entities_tsv(namespace, workspace, "sample")
    with open('remote_files/remote_samples.txt', 'w') as outfile:
        outfile.write(res.text)
    return
def get_data_from_workspace(billing_project, workspace_name,
                            participant_table_name, output_table_name):
    r = fapi.get_entities_tsv(billing_project, workspace_name,
                              participant_table_name)
    with open(output_table_name, 'w') as fout:
        fout.write(r.content.decode())

    if r.status_code != 200:
        print("ERROR :" + updated_workflow.content)
        sys.exit(1)
    else:
        print("Downloaded the table successfully")
def main():
    if len(sys.argv) < 2:
        return

    global billing_project
    global template_workspace_name

    parser = argparse.ArgumentParser(prog="python " + sys.argv[0],
                                     add_help=False)
    subparser = parser.add_subparsers(dest="cmd")

    delete_workspace = subparser.add_parser('delete_workspace',
                                            help='delete workspace')
    delete_workspace.add_argument('--workspace-name',
                                  dest="workspace_name",
                                  help="name of the workspace")

    clone_workspace = subparser.add_parser(
        'clone_workspace', help='clone from existing workspace')
    clone_workspace.add_argument('--source-work-space',
                                 dest='src_work_space',
                                 help="name of source workspace")
    clone_workspace.add_argument('--destination-work-space',
                                 dest='dest_work_space',
                                 help="name of destination workspace")

    get_data_info = subparser.add_parser('get_participant_table',
                                         help='get participant.tsv')
    get_data_info.add_argument('--workspace-name',
                               dest="workspace_name",
                               help="name of the workspace")
    get_data_info.add_argument('--participant-table-name',
                               dest="participant_table_name",
                               help="name of sample table")
    get_data_info.add_argument('--output-name',
                               dest="output_table_name",
                               required=False,
                               default="participant.tsv",
                               help="name of output tsv")

    create_participant_lane = subparser.add_parser(
        'create_participant_lane',
        help='create participant_lane/lane_set_id tables')
    create_participant_lane.add_argument('--input-name',
                                         dest="input_participant_table_name",
                                         required=False,
                                         default="participant.tsv",
                                         help="input participant table  name")

    create_participant_lane.add_argument(
        '--output-prefix',
        dest="output_prefix",
        required=False,
        help="name of output prefix for the lanes")

    upload_participant_lane = subparser.add_parser(
        'upload_participant',
        help=
        'uploads the participant_lane_set, _lane_membership and _lane_entity files'
    )
    upload_participant_lane.add_argument('--workspace-name',
                                         dest="workspace_name",
                                         help="name of the workspace")
    upload_participant_lane.add_argument('--input-prefix',
                                         dest="input_prefix",
                                         help="name of the input prefix")

    upload_workflow = subparser.add_parser(
        'upload_workflow', help='uploads wdl to --workspace-name')
    upload_workflow.add_argument('--workspace-name',
                                 dest="workspace_name",
                                 help="name of the workspace")
    upload_workflow.add_argument('--method',
                                 dest="method",
                                 help="name of the input prefix")
    upload_workflow.add_argument('--wdl',
                                 dest="wdl",
                                 help="name of the input prefix")

    upload_config = subparser.add_parser('upload_config',
                                         help='upload config information')
    upload_config.add_argument('--workspace-name',
                               dest="workspace_name",
                               help="name of the workspace")
    upload_config.add_argument('--chemistry',
                               dest="chemistry",
                               choices=["V2", "V3"],
                               help="chemistry")
    upload_config.add_argument(
        '--counting-mode',
        dest="counting_mode",
        choices=["sc_rna", "sn_rna"],
        help="counting mode: whether to count intronic alignments")
    upload_config.add_argument('--species',
                               dest="species",
                               choices=["human", "mouse"],
                               help="species")

    submit_workflow = subparser.add_parser('submit_workflow',
                                           help='submit a workflow run')
    submit_workflow.add_argument('--workspace-name',
                                 dest="workspace_name",
                                 help="name of the workspace")
    submit_workflow.add_argument('--workflow-repo',
                                 dest="workflow_repo",
                                 help="workflow repo name")
    submit_workflow.add_argument('--workflow-name',
                                 dest="workflow_name",
                                 help="workflow name")
    submit_workflow.add_argument('--entity-id',
                                 dest="entity_id",
                                 help="entity id")

    get_status = subparser.add_parser('get_status',
                                      help='get status of a submission')
    get_status.add_argument('--workspace-name',
                            dest="workspace_name",
                            help="name of the workspace")
    get_status.add_argument('--submission-id',
                            dest="submission_id",
                            help="submission_id")

    # show help when no arguments supplied
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    args = parser.parse_args()

    # new_workspace_name = "DCP2_Optimus_template_KMK_v1"
    if args.cmd == 'delete_workspace':
        print("Delete existing workspace ", args.workspace_name)
        delete_status = fapi.delete_workspace(billing_project,
                                              args.workspace_name)

    elif args.cmd == 'clone_workspace':
        print("Cloning a new workspace from template", args.src_work_space)
        status = create_newworkspace(billing_project, args.src_work_space,
                                     args.dest_work_space)

    elif args.cmd == 'get_participant_table':
        print("Get information from workspace", args.workspace_name)
        r = fapi.get_entities_tsv(billing_project, args.workspace_name,
                                  args.participant_table_name)
        with open(args.output_table_name, 'w') as fout:
            fout.write(r.content.decode())

    elif args.cmd == 'create_participant_lane':
        parse_terra.create_output_files(args.input_participant_table_name,
                                        args.output_prefix)

    elif args.cmd == 'upload_participant':
        upload_tables(args.input_prefix + ".tsv", billing_project,
                      args.workspace_name)
        upload_tables(args.input_prefix + "_membership.tsv", billing_project,
                      args.workspace_name)
        upload_tables(args.input_prefix + "_entity.tsv", billing_project,
                      args.workspace_name)
    elif args.cmd == 'upload_workflow':
        r = fapi.update_repository_method(args.workspace_name, args.method,
                                          "args.synopsis", args.wdl,
                                          "comment.txt", "args.comment")
        with open("response.txt", 'w') as fout:
            fout.write(r.content.decode())

    elif args.cmd == 'upload_config':

        work_space_config = fapi.get_workspace_config(billing_project,
                                                      args.workspace_name,
                                                      args.workspace_name,
                                                      "Optimus")
        work_space_json = work_space_config.json()
        work_space_json['inputs'] = json_templates.optimus_inputs
        work_space_json['outputs'] = json_templates.optimus_outputs

        if args.chemistry == "V2":
            work_space_json['inputs']['Optimus.chemistry'] = '\"tenX_v2\"'
            work_space_json['inputs'][
                'Optimus.whitelist'] = 'workspace.whitelist_v2'
        if args.chemistry == "V3":
            work_space_json['inputs']['Optimus.chemistry'] = '\"tenX_v3\"'
            work_space_json['inputs'][
                'Optimus.whitelist'] = 'workspace.whitelist_v3'

        if args.chemistry == "sn_rna":
            work_space_json['inputs']['Optimus.counting_mode'] = "\"sn_rna\""
        if args.chemistry == "sc_rna":
            work_space_json['inputs']['Optimus.counting_mode'] = "\"sc_rna\""

        if args.species == "human":
            work_space_json['inputs'][
                'Optimus.annotations_gtf'] = 'workspace.human_annotations_gtf'
            work_space_json['inputs'][
                'Optimus.ref_genome_fasta'] = 'workspace.human_ref_genome_fasta'
            work_space_json['inputs'][
                'Optimus.tar_star_reference'] = 'workspace.human_tar_star_reference'
        if args.species == "mouse":
            work_space_json['inputs'][
                'Optimus.annotations_gtf'] = 'workspace.mouse_annotations_gtf'
            work_space_json['inputs'][
                'Optimus.ref_genome_fasta'] = 'workspace.mouse_ref_genome_fasta'
            work_space_json['inputs'][
                'Optimus.tar_star_reference'] = 'workspace.mouse_tar_star_reference'

        updated_workflow = fapi.update_workspace_config(
            billing_project, args.workspace_name, args.workspace_name,
            "Optimus", work_space_json)

        if updated_workflow.status_code != 200:
            print("ERROR :" + updated_workflow.content)
            sys.exit(1)
        else:
            print("updated successfully")

    elif args.cmd == 'submit_workflow':
        # Launching the Updated Monitor Submission Workflow
        response = fapi.get_entities_with_type(billing_project,
                                               args.workspace_name)
        entities = response.json()

        for ent in entities:
            ent_name = ent['name']
            ent_type = ent['entityType']
            ent_attrs = ent['attributes']

        submisson_response = fapi.create_submission(
            billing_project,
            args.workspace_name,
            args.workflow_repo,
            args.workflow_name,
            entity=args.entity_id,
            etype="participant_lane_set",
            expression=None,
            use_callcache=True)

        if submisson_response.status_code != 201:
            print(submisson_response.content)
            sys.exit(1)
        else:
            print("Successfully Created Submisson")
            with open('response.txt', 'w') as fout:
                # json.dump(submisson_response.json(), fout)
                fout.write(submisson_response.json()['submissionId'] + '\n')
        # r = create_workspace_config("broadgdac", args.workspace_name, body):
        # print(r.content.decode())
    elif args.cmd == 'get_status':
        res = fapi.get_submission(billing_project, args.workspace_name,
                                  args.submission_id)
        print(res.content.decode())
def update_entities_to_compact_identifier(workspace, project,
                                          single_etypes_list, dry_run):
    """Update Data Model entity attributes to DRS 1.1 Compact Identifier."""

    for etype in single_etypes_list:
        print(f'Starting TCGA DRS updates for entity: {etype}')

        # get entity table response for API call
        res_etype = fapi.get_entities_tsv(project,
                                          workspace,
                                          etype,
                                          model="flexible")

        # save current/original data model tsv files for provenance
        print(f'Saving original {etype} TSV.')
        original_tsv_name = f"original_{etype}_{project}-{workspace}_table.tsv"
        with open(original_tsv_name, "w") as f:
            f.write(res_etype.text)

        # read entity table response into dictionary to perform DRS URL updates
        dict_etype = list(
            csv.DictReader(StringIO(res_etype.text), delimiter='\t'))

        # create empty list to add updated rows and list to capture list of columns that were modified
        drs_dict_table = []
        modified_cols = set()
        # for "row" (each row is [list] of column:values)
        for row in dict_etype:
            drs_row = row.copy()
            # for each column in row
            for col in row:
                # check if the col values are dataguids.org URLs and parse out guid
                if row[col].startswith("drs://dataguids.org"):
                    guid = row[col].split("/")[3]
                    # only modify col if guid is valid and exists
                    if guid and GUID_PATTERN.match(guid):
                        drs_url = "drs://dg.4DFC:" + guid
                        drs_row[col] = drs_url
                        modified_cols.add(col)

            # append new "row" with updated drs values to new list
            drs_dict_table.append(drs_row)

        # save new/drs updated data model tsv files for provenance
        print(f'Saving updated {etype} TSV.')
        updated_tsv_name = f"updated_{etype}_{project}-{workspace}_table.tsv"
        tsv_headers = drs_dict_table[0].keys()

        with open(updated_tsv_name, 'w') as outfile:
            # get keys from OrderedDictionary and write rows, separate with tabs
            writer = csv.DictWriter(outfile, tsv_headers, delimiter="\t")
            writer.writeheader()
            writer.writerows(drs_dict_table)

        # list of the columns that are scoped to be updated if re-run without --dry_run flag
        modified_cols = list(modified_cols)
        if dry_run:
            print(
                f"Columns in the {etype} table that *will be* be updated when script is re-run without the `--dry_run` flag:"
            )
            if not modified_cols:
                print("\t" * 4 +
                      f"No columns to update in the {etype} table." + "\n\n")
            else:
                print('\n'.join(['\t' * 4 + c for c in modified_cols]))
                print(
                    f"To view in detail what will be updated, inspect the {updated_tsv_name} file."
                    + "\n\n")
        else:
            # upload newly created tsv file containing drs urls
            print(
                f"Starting update of the {etype} table with compact DRS identifiers (drs://df.4DFC:GUID)."
            )

            res_update = fapi.upload_entities_tsv(project,
                                                  workspace,
                                                  updated_tsv_name,
                                                  model="flexible")
            if res_update.status_code != 200:
                print(
                    f"Could not update existing {etype} table. Error message: {res_update.text}"
                )

            print(
                f"Finished uploading TCGA DRS updated .tsv for entity: {etype}"
                + "\n")