Exemple #1
0
def get_all_bound_file_paths(ws_namespace, ws_name):
    request = firecloud_api.list_entity_types(ws_namespace, ws_name)
    if request.status_code != 200:
        fail(request.text)

    entity_types_json = request.json()
    attribute_name_for_url_to_entity_json = defaultdict(list)
    referenced_file_paths_in_workspace = []

    #
    for entity_type in entity_types_json:
        entity_count = entity_types_json[entity_type]["count"]

        page_size = 1000
        num_pages = int(math.ceil(float(entity_count) / page_size))
        for i in range(1, num_pages + 1):
            for entity_json in get_entity_by_page(ws_namespace, ws_name,
                                                  entity_type, i,
                                                  page_size)["results"]:
                for attribute_name, attribute_value in entity_json[
                        "attributes"].items():
                    if re.match(r"gs://", str(attribute_value)):
                        referenced_file_paths_in_workspace.append(
                            attribute_value)
                        attribute_name_for_url_to_entity_json[
                            attribute_name].append(entity_json)
    return attribute_name_for_url_to_entity_json
def copy_workspace_entities_sushma(destination_workspace_namespace, destination_workspace_name, source_workspace_namespace, source_workspace_name, destination_workspace_bucket):
    """Copy workspace data tables to destination workspace."""

    source_etypes = fapi.list_entity_types(source_workspace_namespace, source_workspace_name)
    if source_etypes.status_code != 200:  # getting list of data tables fails
        message = f"Failed to retrieve list of data tables (entity types) from: {source_workspace_namespace}/{source_workspace_name}. API error: {source_etypes.text}."
        print(message)
        return False, message
    source_set_etypes = [s for s in list(source_etypes.json().keys()) if s.endswith("_set")]
    source_single_etypes = [s for s in list(source_etypes.json().keys()) if not s.endswith("_set")]

    # for each table that is not a set
    for etype in source_single_etypes:
        # get entity names for etype
        entities = fapi.get_entities(source_workspace_namespace, source_workspace_name, etype)
        if entities.status_code != 200:  # getting an etype's entities fails
            message = f"Failed to retrieve entities (row names) for {etype}. API error: {entities.text}"
            print(message)
            return False, message

        entity_names = [ent["name"] for ent in entities.json()]
        # copy single etype (with entities) to destination workspace
        copy_response = fapi.copy_entities(source_workspace_namespace, source_workspace_name, destination_workspace_namespace, destination_workspace_name, etype, entity_names, link_existing_entities=True)
        if copy_response.status_code not in [201, 409]:  # if copying table with entities fails
            message = f"Failed to copy {etype} with entities({entity_names}) to {destination_workspace_namespace}/{destination_workspace_name}. API error: {copy_response.text}."
            print(message)
            return False, message

    for set_etype in source_set_etypes:
        # get entity names for etype
        set_entities = fapi.get_entities(source_workspace_namespace, source_workspace_name, set_etype)
        if set_entities.status_code != 200:  # getting a set etype's entities fails
            message = f"Failed to retrieve entities (row names) for {set_etype}. API error: {set_entities.text}"
            print(message)
            return False, message

        set_entity_names = [ent["name"] for ent in set_entities.json()]
        # copy single etype (with entities) to destination workspace
        set_copy_response = fapi.copy_entities(source_workspace_namespace, source_workspace_name, destination_workspace_namespace, destination_workspace_name, set_etype, set_entity_names, link_existing_entities=True)
        if set_copy_response.status_code not in [201, 409]:  # if copying set table with entities fails
            message = f"Failed to copy {set_etype} with entities({set_entity_names}) to {destination_workspace_namespace}/{destination_workspace_name}. API error: {set_copy_response.text}."
            print(message)
            return False, message

    print(f"Successfully copied data tables to {destination_workspace_namespace}/{destination_workspace_name}: {list(source_etypes.json().keys())}")
    # get original workpace bucket id
    get_bucket_success, get_bucket_message = get_workspace_bucket(source_workspace_name, source_workspace_namespace)
    # TODO: handle if getting workspace bucket fails
    source_bucket = json.loads(get_bucket_message)["workspace"]["bucketName"]
    destination_bucket = destination_workspace_bucket.replace("gs://", "")

    # update bucket links in the destination workspace so that it matches the path structure of what the WDL generates when it migrates data
    # gs://new_bucket_id/original_bucket_id/[original data structure]
    update_entities(destination_workspace_name, destination_workspace_namespace, replace_this=source_bucket, with_this=f"{destination_bucket}/{source_bucket}")

    print(f"Successfully updated data tables with new bucket paths data tables in {destination_workspace_namespace}/{destination_workspace_name}.")
    return True, list(source_etypes.json().keys())
Exemple #3
0
def callFirecloud():
	try:
		response = fapi.list_entity_types(BILLING_PROJECT_ID, WORKSPACE)
		if response.status_code != 200:
			print("Error in Firecloud, check your billing project ID and the name of your workspace.")
			raise
		else:
			print("Firecloud has found your workspace!")
			directory = BUCKET + SUBDIRECTORY
			return directory
	except NameError:
		print("Caught a NameError exception. This may mean the kernal was restarted or you didn't run ",
			  "the cells above. Try running the cells above again.")
		raise
def get_single_entity_types(workspace, project):
    """Get a list of all non-set entity types in given workspace."""

    # API call to get all entity types in workspace (type set and non-set)
    res_etypes = fapi.list_entity_types(project, workspace)
    dict_all_etypes = json.loads(res_etypes.text)

    # get non-set entities and add to list
    # "set" entities do not need to be updated because they only reference the unique ID of each single entity
    # the unique ID of any single entity is not modified so sets should remain the same
    single_etypes_list = []
    single_etypes_list = [
        key for key in dict_all_etypes.keys() if not key.endswith("_set")
    ]

    print(f"List of entity types that will be updated, if applicable:")
    print('\n'.join(['\t' * 7 + c for c in single_etypes_list]))

    return single_etypes_list
Exemple #5
0
 def test_list_entity_types(self):
     """Test list_entity_types()."""
     r = fapi.list_entity_types(self.project, self.workspace)
     print(r.status_code, r.content)
     self.assertEqual(r.status_code, 200)
Exemple #6
0
def download_tsv_from_workspace(project,
                                workspace,
                                entity_type,
                                tsv_name,
                                page_size=DEFAULT_PAGE_SIZE,
                                attr_list=None):
    """Download large TSV file from Terra workspace by designated number of rows."""
    # get all entity types in workspace using API call
    # API = https://api.firecloud.org/#!/Entities/getEntityTypes
    response = fapi.list_entity_types(project, workspace)
    if response.status_code != 200:
        print(response.text)
        exit(1)

    # get/report # of entities + associated attributes(column names) of input entity type
    entity_types_json = response.json()
    entity_count = entity_types_json[entity_type]["count"]
    entity_id = entity_types_json[entity_type]["idName"]
    # if user provided list of specific attributes to return, else return all attributes
    if attr_list:
        all_attribute_names = entity_types_json[entity_type]["attributeNames"]
        attribute_names = [
            attr for attr in all_attribute_names if attr in attr_list
        ]
    else:
        attribute_names = entity_types_json[entity_type]["attributeNames"]

    # add the entity_id value to list of attributes (not a default attribute of API response)
    attribute_names.insert(0, entity_id)

    print(f'{entity_count} {entity_type}(s) to export.')

    with open(tsv_name, "w") as tsvout:
        # add header with attribute values to tsv
        tsvout.write("\t".join(attribute_names) + "\n")
        # set starting row value and calculate number of pages
        row_num = 0
        num_pages = int(math.ceil(float(entity_count) / page_size))

        # get entities by page where each page has page_size # of rows using API call
        print(f'Getting all {num_pages} pages of entity data.')
        all_page_responses = []
        for page in tqdm(range(1, num_pages + 1)):
            all_page_responses.append(
                get_entity_by_page(project, workspace, entity_type, page,
                                   page_size))

        # for each response(page) in all_page_responses[] - contains parameter metadata
        print(f'Writing {entity_count} attributes to tsv file.')
        for page_response in tqdm(all_page_responses):
            # for each set of attributes in results (no parameters) get attribute names and entity_id(name)
            for entity_json in page_response["results"]:
                attributes = entity_json["attributes"]
                name = entity_json["name"]
                # add name and value to dictionary of attributes
                attributes[entity_id] = name

                values = []
                # for each attribute(column name) in list of attribute names(all columns for entity)
                for attribute_name in attribute_names:
                    value = ""
                    # if entity's attribute(column) is in list of attributes from response, set response's attribute value
                    if attribute_name in attributes:
                        value = attributes[attribute_name]

                    values.append(str(value))

                tsvout.write("\t".join(values) + "\n")
                row_num += 1

    print(f'Finished exporting {entity_type}(s) to tsv with name {tsv_name}.')
Exemple #7
0
# Make sure to include slashes in your SUBDIRECTORY variable.

# ## Environmental variables

# In[ ]:

BILLING_PROJECT_ID = os.environ['GOOGLE_PROJECT']
WORKSPACE = os.path.basename(os.path.dirname(os.getcwd()))
BUCKET = os.environ["WORKSPACE_BUCKET"]

# ## Call FireCloud

# In[ ]:

try:
    response = fapi.list_entity_types(BILLING_PROJECT_ID, WORKSPACE)
    if response.status_code != 200:
        print(
            "Error in Firecloud, check your billing project ID and the name of your workspace."
        )
        raise
    else:
        print("Firecloud has found your workspace!")
        directory = BUCKET + SUBDIRECTORY
except NameError:
    print(
        "Caught a NameError exception. This may mean the kernal was restarted or you didn't run ",
        "the cells above. Try running the cells above again.")
    raise

# ## Display the contents of your workspace bucket
Exemple #8
0
def get_schema(namespace, workspace):
    """Fetch all entity types."""
    return FAPI.list_entity_types(namespace=namespace,
                                  workspace=workspace).json()