def update_metadata(namespace, workspace, vcf_lists): fpath = 'gs://{0}/other_vcf_lists/{1}' bucket_name = get_bucket_name(namespace, workspace) for batch, pesr_vcflist, depth_vcflist in vcf_lists: list_path = fpath.format(bucket_name, pesr_vcflist) update = fapi._attr_set('other_batches_filtered_pesr_vcf_list', list_path) r = fapi.update_entity(namespace, workspace, 'sample_set', batch, [update]) fapi._check_response_code(r, 200) list_path = fpath.format(bucket_name, depth_vcflist) update = fapi._attr_set('other_batches_filtered_depth_vcf_list', list_path) r = fapi.update_entity(namespace, workspace, 'sample_set', batch, [update]) fapi._check_response_code(r, 200)
def update_metadata(namespace, workspace, cohort_batch_id, pesr_vcflist, depth_vcflist): fpath = 'gs://{0}/cohort_vcf_lists/{1}' bucket_name = get_bucket_name(namespace, workspace) list_path = fpath.format(bucket_name, pesr_vcflist) update = fapi._attr_set('cohort_filtered_pesr_vcf_list', list_path) r = fapi.update_entity(namespace, workspace, 'sample_set', cohort_batch_id, [update]) fapi._check_response_code(r, 200) list_path = fpath.format(bucket_name, depth_vcflist) update = fapi._attr_set('cohort_filtered_depth_vcf_list', list_path) r = fapi.update_entity(namespace, workspace, 'sample_set', cohort_batch_id, [update]) fapi._check_response_code(r, 200)
def update_entities(workspace_name, workspace_project, replace_this, with_this): ## update workspace entities print("Updating DATA ENTITIES for " + workspace_name) # get data attributes response = call_fiss(fapi.get_entities_with_type, 200, workspace_project, workspace_name) entities = response for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] attrs_list = [] for attr in ent_attrs.keys(): value = ent_attrs[attr] updated_attr = find_and_replace(attr, value, replace_this, with_this) if updated_attr: attrs_list.append(updated_attr) if len(attrs_list) > 0: response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) if response.status_code == 200: print('Updated entities:') for attr in attrs_list: print(' ' + attr['attributeName'] + ' : ' + attr['addUpdateAttribute'])
def update_entities(workspace_name, workspace_project, replace_this, with_this): """Update data model tables with new destination workspace bucket file paths.""" # update workspace entities print(f"Starting update of data tables in workspace: {workspace_name}") # get data attributes response = fapi.get_entities_with_type(workspace_project, workspace_name) entities = response.json() for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] attrs_list = [] for attr in ent_attrs.keys(): value = ent_attrs[attr] updated_attr = find_and_replace(attr, value, replace_this, with_this) if updated_attr: attrs_list.append(updated_attr) if len(attrs_list) > 0: response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) if response.status_code == 200: print('Updated entities:') for attr in attrs_list: print(' ' + str(attr['attributeName']) + ' : ' + str(attr['addUpdateAttribute']))
def update_entity_data_paths(workspace_name, workspace_project, bucket_list): print("Listing all gs:// paths in DATA ENTITIES for " + workspace_name) # get data attributes response = call_fiss(fapi.get_entities_with_type, 200, workspace_project, workspace_name) entities = response paths_without_replacements = { } # where we store paths for which we don't have a replacement replacements_made = 0 for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] gs_paths = {} attrs_list = [] for attr in ent_attrs.keys(): if is_gs_path(attr, ent_attrs[attr]): # this is a gs:// path original_path = ent_attrs[attr] if is_in_bucket_list( original_path, bucket_list ): # this is a path we think we want to update new_path = get_replacement_path(original_path) gs_paths[attr] = original_path if new_path: # format the update updated_attr = fapi._attr_set(attr, new_path) attrs_list.append( updated_attr) # what we have replacements for replacements_made += 1 else: paths_without_replacements[ attr] = original_path # what we don't have replacements for if len(gs_paths) > 0: print(f'Found the following paths to update in {ent_name}:') for item in gs_paths.keys(): print(' ' + item + ' : ' + gs_paths[item]) if len(attrs_list) > 0: response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) if response.status_code == 200: print(f'\nUpdated entities in {ent_name}:') for attr in attrs_list: print(' ' + attr['attributeName'] + ' : ' + attr['addUpdateAttribute']) if replacements_made == 0: print('\nNo paths were updated!') if len(paths_without_replacements) > 0: print('\nWe could not find replacements for the following paths: ') for item in paths_without_replacements.keys(): print(' ' + item + ' : ' + paths_without_replacements[item])
def update_metadata(namespace, workspace, sample_set, data): fpath = 'gs://{0}/cohort_lists/{1}' bucket_name = get_bucket_name(namespace, workspace) for attr, fname in data: list_path = fpath.format(bucket_name, fname) update = fapi._attr_set(attr, list_path) try: r = fapi.update_entity(namespace, workspace, 'sample_set', sample_set, [update]) fapi._check_response_code(r, 200) except ferrors.FireCloudServerError: pass
def delete_entity_attributes(namespace, workspace, entity_type, entity_name, attrs): """Delete entity attributes Args: - attrs: list of attributes to delete """ attr_update = [{ "op": "RemoveAttribute", "attributeName": attr } for attr in attrs] res = firecloud_api.update_entity(namespace, workspace, entity_type, entity_name, attr_update) return res
def update_pair_attrs(namespace, workspace, pairs, attrs): """Update the FC (remote) pair attributes, listed in @attrs, present in the @pairs dataframe. The purpose of this function is to "push" to Firecloud changes to pairs made locally. Args: - pairs: df of pairs to update - attrs: list of attributes to update """ for idx, row in pairs.iterrows(): attr_update = [{"op": "AddUpdateAttribute", "attributeName": attr, \ "addUpdateAttribute": row[attr]} for attr in attrs] res = firecloud_api.update_entity(namespace, workspace, "pair", row['entity:pair_id'], attr_update) return
def update_entity_data_paths_deprecated(workspace_name, workspace_project, mapping_tsv, do_replacement=True): if do_replacement: print( f'Updating paths in {workspace_name}\n\nNOTE: THIS STEP MAY TAKE A FEW MINUTES. As long as you see `In [*]:` to the left of this cell, it\'s still working!' ) else: print(f'Listing paths to update in {workspace_name}') # load path mapping mapping = load_mapping(mapping_tsv) # set up dataframe to track all paths columns = [ 'entity_name', 'entity_type', 'attribute', 'original_path', 'new_path', 'map_key', 'fail_reason', 'file_type', 'update_status' ] df_paths = pd.DataFrame(columns=columns) # get data attributes entities = call_fiss(fapi.get_entities_with_type, 200, workspace_project, workspace_name) for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] gs_paths = {} attrs_list = [] inds = [] # to keep track of rows to update with API call status for attr in ent_attrs.keys(): if is_gs_path(attr, ent_attrs[attr]) and is_migratable_extension( attr, ent_attrs[attr]): # this is a gs:// path original_path = ent_attrs[attr] if is_in_bucket_list( original_path, bucket_list=None ): # this is a path we think we want to update new_path, map_key, fail_reason = get_replacement_path( original_path, mapping) gs_paths[attr] = original_path if new_path: updated_attr = fapi._attr_set( attr, new_path) # format the update attrs_list.append( updated_attr) # what we have replacements for inds.append(len(df_paths)) df_paths = df_paths.append( { 'entity_name': ent_name, 'entity_type': ent_type, 'attribute': attr, 'original_path': original_path, 'new_path': new_path, 'map_key': map_key, 'fail_reason': fail_reason, 'file_type': original_path.split('.')[-1] }, ignore_index=True) if len(attrs_list) > 0: if do_replacement: # DO THE REPLACEMENT response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) status_code = response.status_code if status_code != 200: print( f'ERROR {status_code} updating {ent_name} with {str(attrs_list)} - {response.text}' ) else: status_code = 0 df_paths.loc[inds, 'update_status'] = status_code summarize_results(df_paths) return df_paths