コード例 #1
0
def copy_workspace_entities_sushma(destination_workspace_namespace, destination_workspace_name, source_workspace_namespace, source_workspace_name, destination_workspace_bucket):
    """Copy workspace data tables to destination workspace."""

    source_etypes = fapi.list_entity_types(source_workspace_namespace, source_workspace_name)
    if source_etypes.status_code != 200:  # getting list of data tables fails
        message = f"Failed to retrieve list of data tables (entity types) from: {source_workspace_namespace}/{source_workspace_name}. API error: {source_etypes.text}."
        print(message)
        return False, message
    source_set_etypes = [s for s in list(source_etypes.json().keys()) if s.endswith("_set")]
    source_single_etypes = [s for s in list(source_etypes.json().keys()) if not s.endswith("_set")]

    # for each table that is not a set
    for etype in source_single_etypes:
        # get entity names for etype
        entities = fapi.get_entities(source_workspace_namespace, source_workspace_name, etype)
        if entities.status_code != 200:  # getting an etype's entities fails
            message = f"Failed to retrieve entities (row names) for {etype}. API error: {entities.text}"
            print(message)
            return False, message

        entity_names = [ent["name"] for ent in entities.json()]
        # copy single etype (with entities) to destination workspace
        copy_response = fapi.copy_entities(source_workspace_namespace, source_workspace_name, destination_workspace_namespace, destination_workspace_name, etype, entity_names, link_existing_entities=True)
        if copy_response.status_code not in [201, 409]:  # if copying table with entities fails
            message = f"Failed to copy {etype} with entities({entity_names}) to {destination_workspace_namespace}/{destination_workspace_name}. API error: {copy_response.text}."
            print(message)
            return False, message

    for set_etype in source_set_etypes:
        # get entity names for etype
        set_entities = fapi.get_entities(source_workspace_namespace, source_workspace_name, set_etype)
        if set_entities.status_code != 200:  # getting a set etype's entities fails
            message = f"Failed to retrieve entities (row names) for {set_etype}. API error: {set_entities.text}"
            print(message)
            return False, message

        set_entity_names = [ent["name"] for ent in set_entities.json()]
        # copy single etype (with entities) to destination workspace
        set_copy_response = fapi.copy_entities(source_workspace_namespace, source_workspace_name, destination_workspace_namespace, destination_workspace_name, set_etype, set_entity_names, link_existing_entities=True)
        if set_copy_response.status_code not in [201, 409]:  # if copying set table with entities fails
            message = f"Failed to copy {set_etype} with entities({set_entity_names}) to {destination_workspace_namespace}/{destination_workspace_name}. API error: {set_copy_response.text}."
            print(message)
            return False, message

    print(f"Successfully copied data tables to {destination_workspace_namespace}/{destination_workspace_name}: {list(source_etypes.json().keys())}")
    # get original workpace bucket id
    get_bucket_success, get_bucket_message = get_workspace_bucket(source_workspace_name, source_workspace_namespace)
    # TODO: handle if getting workspace bucket fails
    source_bucket = json.loads(get_bucket_message)["workspace"]["bucketName"]
    destination_bucket = destination_workspace_bucket.replace("gs://", "")

    # update bucket links in the destination workspace so that it matches the path structure of what the WDL generates when it migrates data
    # gs://new_bucket_id/original_bucket_id/[original data structure]
    update_entities(destination_workspace_name, destination_workspace_namespace, replace_this=source_bucket, with_this=f"{destination_bucket}/{source_bucket}")

    print(f"Successfully updated data tables with new bucket paths data tables in {destination_workspace_namespace}/{destination_workspace_name}.")
    return True, list(source_etypes.json().keys())
コード例 #2
0
ファイル: workspace.py プロジェクト: MicahR-Y/TAG-fiss
 def __get_entities(self, etype):
     """Helper to get entities for a given type."""
     r = fapi.get_entities(self.namespace, self.name,
                           etype, self.api_url)
     fapi._check_response_code(r, 200)
     return [Entity(e['entityType'], e['name'], e['attributes'])
             for e in r.json()]
コード例 #3
0
ファイル: lowlevel_tests.py プロジェクト: agraubert/fiss
 def test_get_entities(self):
     """Test get_entities()."""
     r =  fapi.get_entities(self.project,
                            self.workspace,
                            "participant")
     print(r.status_code, r.content)
     self.assertEqual(r.status_code, 200)
コード例 #4
0
def get_entities(namespace='anvil-datastorage',
                 workspace=None,
                 entity_name=None):
    """Return all entities in a workspace."""
    entities = [
        AttrDict(e)
        for e in FAPI.get_entities(namespace, workspace, entity_name).json()
    ]
    return entities
コード例 #5
0
def load_table(namespace, workspace, table_name, store_membership=False):
    ent_old = fapi.get_entities(namespace, workspace, table_name).json()
    tbl_old = None

    membership = None
    if len(ent_old) > 0:
        tbl_old = pd.DataFrame(list(map(lambda e: e['attributes'], ent_old)))
        tbl_old[f"entity:{table_name}_id"] = list(
            map(lambda f: f['name'], ent_old))

        if store_membership:
            membership = list(
                map(lambda g: set(map(lambda h: h['entityName'], g['items'])),
                    tbl_old['samples']))
            del tbl_old['samples']

        c = list(tbl_old.columns)
        c.remove(f"entity:{table_name}_id")
        c = [f"entity:{table_name}_id"] + c
        tbl_old = tbl_old[c]
        tbl_old = tbl_old.astype(str)

    return tbl_old, membership
コード例 #6
0
def get_sample_sets(namespace, workspace, batches):
    response = fapi.get_entities(namespace, workspace, 'sample_set')
    fapi._check_response_code(response, 200)

    return [entity for entity in response.json() if entity['name'] in batches]
def gather_and_concatenate_data_model_tsvs(input_file, entity_name):
    """Get data table tsv files from list of workspaces and concatenate results into a single excel report."""

    # read full excel sheet into dataframe - all rows of workspace project and workspace names
    workspace_info = pd.read_excel(input_file,
                                   sheet_name="Sheet1",
                                   index_col=None)

    # instantiate empty list to hold all entity information from all workspaces
    all_workspace_entities = []
    failed_workspaces = []
    # for each workspace_name, workspace_project pair
    for index, workspace in workspace_info.iterrows():
        # get workspace details
        workspace_name = workspace["workspace_name"]
        workspace_project = workspace["workspace_project"]

        # get a response with all attributes for each row in entity table
        entities = fapi.get_entities(workspace_project, workspace_name,
                                     entity_name)

        # if get entities call fails, add workspace details to dictionary
        # skip to next workspace
        if entities.status_code != 200:
            print(
                f"{entity_name} table in {workspace_project}/{workspace_name} does not exist or user does not have workspace access."
            )
            failed_workspaces.append({
                "workspace_project": workspace_project,
                "workspace_name": workspace_name
            })
            continue

        # for each row in entity table, re-format nested response json
        for entity in entities.json():
            entity_attributes = entity[
                "attributes"]  # [{attr name: attr value}] for each row
            entity_id = entity["name"]  # name of entity

            # insert entity_id, workspace_project, and workspace_name into list of dictionaries
            entity_attributes[f"entity:{entity_name}_id"] = entity_id
            entity_attributes["workspace_project"] = workspace_project
            entity_attributes["workspace_name"] = workspace_name

            # add entity informatioon (dictionary) to list
            all_workspace_entities.append(entity_attributes)

        print(
            f"{entity_name} table in {workspace_project}/{workspace_name} successfully gathered."
        )

    # successful entity dictionaries -> df - dict per row (entity) for each entity table in all workspaces
    succeeded_data = pd.DataFrame(all_workspace_entities)
    # failed workspaces -> df
    failed_data = pd.DataFrame(failed_workspaces)

    # reorder dataframe entity:table_name column is first
    ent_id_col = succeeded_data.pop(f"entity:{entity_name}_id")
    succeeded_data.insert(0, ent_id_col.name, ent_id_col)

    # # write final dataframes to excel file - separate sheets for success and failed data
    succeeded_output_filename = input_file.split("/")[-1].split(
        ".")[0] + "_succeeded.tsv"
    failed_output_filename = input_file.split("/")[-1].split(
        ".")[0] + "_failed.tsv"

    succeeded_data.to_csv(succeeded_output_filename, sep="\t", index=None)
    failed_data.to_csv(failed_output_filename, sep="\t", index=None)

    # if any failures, print warning message.
    if len(failed_workspaces) > 0:
        print(
            f"Warning: Completed gather and concatenate with the exception of some workspace/s. Please examine details in {failed_output_filename}."
        )
        return
    # else print success message
    print(
        f"Successfully completed gather and concatenate for all workspaces. Results can be found in {succeeded_output_filename}."
    )
コード例 #8
0
from collections import defaultdict
from firecloud import api
import hail as hl
import os
import pandas as pd
import re
import tqdm

hl.init(log="/dev/null")

#%%

entities = api.get_entities("cmg-exomes-gcnv", "cmg_gcnv", "sample_set").json()

#%%


# copied from https://stackabuse.com/python-how-to-flatten-list-of-lists/
def flatten(list_of_lists):
    if len(list_of_lists) == 0:
        return list_of_lists
    if isinstance(list_of_lists[0], list):
        return flatten(list_of_lists[0]) + flatten(list_of_lists[1:])
    return list_of_lists[:1] + flatten(list_of_lists[1:])


#%%

samples_counter = 0
gcnv_cluster_to_sample_bed_paths = {}