def share_workspace(self, email_to_add): """Share the workspace with the provided email address (VIEWER, canShare, no compute).""" acl_updates = [{ "email": email_to_add, "accessLevel": "READER", "canShare": True, "canCompute": False }] call_fiss(fapi.update_workspace_acl, 200, self.project, self.workspace, acl_updates, False) # set invite_users_not_found=False
def cleanup_workspaces(project, match_str=None, age_days=None, verbose=True): if verbose: print('searching for workspaces to clean up') # hard code any cloned workspaces we do NOT want to delete exceptions = [] # get a list of all workspaces in the project ws_json = call_fiss(fapi.list_workspaces, 200) ws_all = [] for ws in ws_json: ws_project = ws['workspace']['namespace'] ws_name = ws['workspace']['name'] if ws_project == project: ws_all.append(ws_name) if verbose: print(str(len(ws_all)) + ' workspaces found in project ' + project) FMT = '%Y-%m-%d-%H-%M-%S' # datetime format used in workspace_test_report.py > clone_workspace() # select the cloned workspaces older than [age_days] ago ws_to_delete = set( ) # collect workspace names to delete in a set (to prevent duplicates) for ws in ws_all: if age_days is not None: # pull out the clone date and determine how many days ago the clone was made clone_date = ws.split('_')[-1] try: tdelta = datetime.now() - datetime.strptime(clone_date, FMT) tdelta_days = tdelta.days except: # if the workspace doesn't contain a datetime string, i.e. it wasn't cloned by us tdelta_days = -1 # add workspace to the delete list if it's more than [age_days] old and not in our list of exceptions if tdelta_days > age_days: if ws not in exceptions: ws_to_delete.add(ws) if verbose: print(ws + ' is ' + str(tdelta_days) + ' days old') if match_str is not None: # add workspace to the delete list if it contains the target string (match_str) and not in our list of exceptions if match_str in ws: if ws not in exceptions: ws_to_delete.add(ws) # delete those old workspaces for ws in ws_to_delete: call_fiss(fapi.delete_workspace, 202, project, ws) if verbose: print(ws + ' deleted')
def update_entities(workspace_name, workspace_project, replace_this, with_this): ## update workspace entities print("Updating DATA ENTITIES for " + workspace_name) # get data attributes response = call_fiss(fapi.get_entities_with_type, 200, workspace_project, workspace_name) entities = response for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] attrs_list = [] for attr in ent_attrs.keys(): value = ent_attrs[attr] updated_attr = find_and_replace(attr, value, replace_this, with_this) if updated_attr: attrs_list.append(updated_attr) if len(attrs_list) > 0: response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) if response.status_code == 200: print('Updated entities:') for attr in attrs_list: print(' ' + attr['attributeName'] + ' : ' + attr['addUpdateAttribute'])
def get_ws_bucket(project, name): ''' get the google bucket path name for the given workspace ''' # call the api, check for errors, pull out the bucket name workspace = call_fiss(fapi.get_workspace, 200, project, name) bucket = workspace['workspace']['bucketName'] return bucket
def update_entity_data_paths(workspace_name, workspace_project, bucket_list): print("Listing all gs:// paths in DATA ENTITIES for " + workspace_name) # get data attributes response = call_fiss(fapi.get_entities_with_type, 200, workspace_project, workspace_name) entities = response paths_without_replacements = { } # where we store paths for which we don't have a replacement replacements_made = 0 for ent in entities: ent_name = ent['name'] ent_type = ent['entityType'] ent_attrs = ent['attributes'] gs_paths = {} attrs_list = [] for attr in ent_attrs.keys(): if is_gs_path(attr, ent_attrs[attr]): # this is a gs:// path original_path = ent_attrs[attr] if is_in_bucket_list( original_path, bucket_list ): # this is a path we think we want to update new_path = get_replacement_path(original_path) gs_paths[attr] = original_path if new_path: # format the update updated_attr = fapi._attr_set(attr, new_path) attrs_list.append( updated_attr) # what we have replacements for replacements_made += 1 else: paths_without_replacements[ attr] = original_path # what we don't have replacements for if len(gs_paths) > 0: print(f'Found the following paths to update in {ent_name}:') for item in gs_paths.keys(): print(' ' + item + ' : ' + gs_paths[item]) if len(attrs_list) > 0: response = fapi.update_entity(workspace_project, workspace_name, ent_type, ent_name, attrs_list) if response.status_code == 200: print(f'\nUpdated entities in {ent_name}:') for attr in attrs_list: print(' ' + attr['attributeName'] + ' : ' + attr['addUpdateAttribute']) if replacements_made == 0: print('\nNo paths were updated!') if len(paths_without_replacements) > 0: print('\nWe could not find replacements for the following paths: ') for item in paths_without_replacements.keys(): print(' ' + item + ' : ' + paths_without_replacements[item])
def abort_submission(self): ''' abort submission ''' res = call_fiss(fapi.abort_submission, 204, self.project, self.workspace, self.sub_id, specialcodes=[404])
def get_final_status(self): ''' once a submission is done: update submission with finished status and error messages ''' # 3 cases: 1) has wfID & subID; 2) has subID (submission happened but wf failed); 3) has neither (submission failed) if self.wf_id is not None: # has wf_id and sub_id # get info about workflow submission - but exclude info about calls & inputs (which can make the json too big and cause an error) res = call_fiss(get_workflow_metadata_withExclude, 200, self.project, self.workspace, self.sub_id, self.wf_id, 'calls', 'inputs') self.final_status = res['status'] start_time = res[ 'start'] # overwrite status from submission tracking end_time = res['end'] # overwrite status from submission tracking terra_time_fmt = '%Y-%m-%dT%H:%M:%S.%fZ' elapsed = datetime.strptime(end_time, terra_time_fmt) - datetime.strptime( start_time, terra_time_fmt) self.runtime = format_timedelta( elapsed, 2) # 2 hours threshold for marking in red # in case of failure, pull out the error message if self.final_status == 'Failed': self.message = '' for failed in res['failures']: for message in failed['causedBy']: self.message += str(message['message']) elif self.sub_id is not None: # no wf_id but has sub_id res = call_fiss(fapi.get_submission, 200, self.project, self.workspace, self.sub_id) for i in res['workflows']: self.final_status = i['status'] if self.final_status == 'Failed': # get the error message(s) for why it failed self.message = '' for i in res['workflows']: self.message += str(i['messages'])[1:-1] else: # should probably never get here, but just in case self.message = 'unrecognized status' else: # no wf_id or sub_id self.final_status = self.status
def monitor_submission(terra_workspace, terra_project, submission_id, sleep_time=300, write_outputs_to_disk=False): # set up monitoring of status of submission break_out = False while not break_out: # check status of submission res = call_fiss(fapi.get_submission, 200, terra_project, terra_workspace, submission_id) # submission status submission_status = res['status'] if submission_status in ['Done', 'Aborted']: break_out = True else: sleep(sleep_time) submission_metadata = res # check workflow status for all workflows (failed or succeeded) submission_succeeded = True for i in submission_metadata['workflows']: # check workflow outcome if i['status'] != 'Succeeded': submission_succeeded = False # if using WDL, this flag should be set to true so these outputs can be parsed if write_outputs_to_disk: # save submission_succeeded save_name = 'SUBMISSION_STATUS' with open(save_name, 'w') as f: f.write('true' if submission_succeeded else 'false') print(f'submission status (boolean) saved to {save_name}') # save metadata save_name = 'monitor_submission_metadata.json' # terra_details to be appended to json terra_details = { "terra_workspace": terra_workspace, "terra_project": terra_project } # updating/appending the terra_details to current json submission_metadata.update(terra_details) with open(save_name, 'w') as f: # writes final result to file: f.write(json.dumps(submission_metadata)) print(f'submission metadata saved to {save_name}') # upon success or failure (final status), capture into variable and return as output return submission_succeeded, submission_metadata
def format_fws(get_info=False, verbose=True): ''' format json file of featured workspaces into dictionary of workspace classes ''' # call api fws_json = get_fw_json() fws = {} for ws in fws_json: ws_project = ws['namespace'] ws_name = ws['name'] if verbose: print(ws_name + '\t (billing project: ' + ws_project + ')') ### load into Wspace class object fw = Wspace(workspace=ws_name, project=ws_project) if get_info: ### Extract workflows res_wf = call_fiss(fapi.list_workspace_configs, 200, ws_project, ws_name, allRepos=True) wfs = [] for wf in res_wf: wf_name = wf['name'] wfs.append(wf_name) if len(wfs) > 0: if verbose: print('\tWorkflows:') print('\t\t' + '\n\t\t'.join(wfs)) else: wfs = None ### Extract notebooks nbs = list_notebooks(ws_project, ws_name, ipynb_only=True) if len(nbs) > 0: if verbose: print('\tNotebooks: ') print('\t\t' + '\n\t\t'.join(nbs)) else: nbs = None # save workflows and notebooks to Wspace fw.workflows = wfs fw.notebooks = nbs fws[fw.key] = fw return fws
def get_cost(self, verbose=True): sub_json = call_fiss(fapi.get_submission, 200, self.project, self.workspace, self.sub_id, specialcodes=[404]) if sub_json.status_code != 404: # 404 means submission not found sub_json = sub_json.json() cost = sub_json['cost'] self.cost = cost if verbose: print(f' cost ${cost:.2f}') return cost
def check_status(self, verbose=False): ''' check the status of a workflow submission using fiss ''' res = call_fiss(fapi.get_submission, 200, self.project, self.workspace, self.sub_id) # try to get the wf_id if self.wf_id is None: for i in res['workflows']: if 'workflowId' in i: self.wf_id = i['workflowId'] self.status = res['status'] if verbose: print(' ' + datetime.today().strftime('%H:%M') + ' ' + self.status + ' - ' + self.wf_name)
def create_submission(self, verbose=False): ''' create a workflow submission using fiss ''' # only run if status is None # create a submission to run for this workflow if self.status is None: # include list of specialcodes to handle the 400/404 errors with output res = call_fiss(fapi.create_submission, 201, self.project, self.workspace, self.wf_project, self.wf_name, self.entity_name, self.entity_type, specialcodes=[400, 404], use_callcache=self.call_cache, expression=self.expression) # because we included specialcodes input, call_fiss returns the un-parsed json if res.status_code in [400, 404]: self.status = 'Submission Failed' try: self.message = res.json()['message'] except Exception: self.message = 'UNKNOWN FAILURE!' if verbose: print('SUBMISSION FAILED (error ' + str(res.status_code) + ', status marked Submission Failed) - ' + self.wf_name) print(self.message) else: # fapi._check_response_code(res, 201) # don't need to check, since this will only pass here if the response code = 201 res = res.json() self.sub_id = res['submissionId'] self.status = 'submission initialized in Python' # this will be set to the Terra status when check_status() is called if verbose: print('NEW SUBMISSION: ' + self.wf_name)
def update_attributes(workspace_name, workspace_project, replace_this, with_this): ## update workspace data attributes print("Updating ATTRIBUTES for " + workspace_name) # get data attributes response = call_fiss(fapi.get_workspace, 200, workspace_project, workspace_name) attributes = response['workspace']['attributes'] attrs_list = [] for attr in attributes.keys(): value = attributes[attr] updated_attr = find_and_replace(attr, value, replace_this, with_this) if updated_attr: attrs_list.append(updated_attr) if len(attrs_list) > 0: response = fapi.update_workspace_attributes(workspace_project, workspace_name, attrs_list) if response.status_code == 200: print('Updated attributes:') for attr in attrs_list: print(attr)
def create_submissions(self, verbose=False): project = self.project workspace = self.workspace if verbose: print('\nRunning workflow submissions on ' + workspace) # Get a list of workflows in the project res = call_fiss(fapi.list_workspace_configs, 200, project, workspace, allRepos=True) # set up submission classes and structure them as lists if len(res) > 0: # only proceed if there are workflows # get list of workflows to submit workflow_names = [] submissions_unordered = {} for item in res: # for each item (workflow) wf_name = item['name'] # the name of the workflow workflow_names.append(wf_name) entityType = None expression = None # identify the type of data (entity) being used by this workflow, if any if 'rootEntityType' in item: entityType = item['rootEntityType'] # if it's a workflow that requires multiple entities, do it if wf_name in WORKFLOWS_THAT_REQUIRE_MULTIPLE_ENTITIES: expression = f'this.{entityType}s' entityType = f'{entityType}_set' project_orig = item['namespace'] # workflow billing project wf_name = item['name'] # the name of the workflow # get and store the name of the data (entity) being used, if any entities = call_fiss(fapi.get_entities, 200, project, workspace, entityType) entityName = None if len(entities) != 0: allEntities = [] for ent in entities: allEntities.append(ent['name']) # if there's a _test entity, use it for ent in allEntities: if '_test' in ent: entityName = ent # otherwise if there's a _small entity, use it if entityName is None: for ent in allEntities: if '_small' in ent: entityName = ent # otherwise just use the first entity if entityName is None: entityName = allEntities[0] # use the first one # if there is no entityName, make sure entityType is also None if entityName is None: entityType = None # populate dictionary of inputs for fapi.create_submission submissions_unordered[wf_name] = Submission( workspace=workspace, project=project, wf_project=project_orig, wf_name=wf_name, entity_name=entityName, entity_type=entityType, call_cache=self.call_cache, expression=expression) print(submissions_unordered[wf_name]) # if workflow is 'optional', do not run a test if 'optional' in wf_name.lower(): submissions_unordered[wf_name].status = 'Done' submissions_unordered[wf_name].final_status = 'Not tested' submissions_unordered[ wf_name].message = 'Optional workflow not tested' # check whether workflows are ordered, and structure list of submissions accordingly first_char = list(wf[0] for wf in workflow_names) submissions_list = [] if ('1' in first_char) and ('2' in first_char): do_order = True workflow_names.sort() for wf_name in workflow_names: submissions_list.append([submissions_unordered[wf_name]]) if verbose: print('[submitting workflows sequentially]') else: do_order = False sub_list = [] for wf_name in workflow_names: sub_list.append(submissions_unordered[wf_name]) submissions_list = [sub_list] if verbose: print('[submitting workflows in parallel]') self.active_submissions = submissions_list
def clone_workspace(original_project, original_name, clone_project, clone_name=None, clone_time=None, share_with=None, call_cache=True, verbose=False, copy_notebooks=False, copy_bucket=False): ''' clone a workspace, though not bucket files or notebooks unless indicated; this also shares the workspace with emails/groups listed in share_with ''' # define the name of the cloned workspace if clone_name is None: if clone_time is None: clone_time = datetime.today().strftime( '%Y-%m-%d-%H-%M-%S') # time of clone clone_name = original_name + '_' + clone_time # cloned name is the original name + current date/time if verbose: print('\nCloning ' + original_name + ' to ' + clone_name) # get email address(es) of owner(s) of original workspace response = call_fiss(fapi.get_workspace, 200, original_project, original_name) original_owners = response['owners'] # clone the Featured Workspace & check for errors call_fiss(fapi.clone_workspace, 201, original_project, original_name, clone_project, clone_name, specialcodes=[409]) # 409 = workspace already exists # optionally copy entire bucket, including notebooks # get gs addresses of original & cloned workspace buckets original_bucket = get_ws_bucket(original_project, original_name) clone_bucket = get_ws_bucket(clone_project, clone_name) if copy_bucket: # copy everything in the bucket bucket_files = run_subprocess( ['gsutil', 'ls', 'gs://' + original_bucket + '/'], 'Error listing bucket contents') if len(bucket_files) > 0: gsutil_args = [ 'gsutil', '-m', 'rsync', '-r', 'gs://' + original_bucket, 'gs://' + clone_bucket ] bucket_files = run_subprocess( gsutil_args, 'Error copying over original bucket to clone bucket') elif copy_notebooks: # only copy notebooks if len( list_notebooks( original_project, original_name, ipynb_only=False, verbose=False)) > 0: # if the notebooks folder isn't empty gsutil_args = [ 'gsutil', '-m', 'rsync', '-r', 'gs://' + original_bucket + '/notebooks', 'gs://' + clone_bucket + '/notebooks' ] bucket_files = run_subprocess( gsutil_args, 'Error copying over original bucket to clone bucket') if verbose: print('Notebook files copied:') # TODO: note that if we ever do want to enable notebooks in these tests, there is # an eventual consistency issue with the SA having permissions to this bucket # immediately after creating the workspace - so this sometimes throws an error, # but only because the SA hasn't gatorcounted enough. list_notebooks(clone_project, clone_name, ipynb_only=False, verbose=True) clone_ws = Wspace(workspace=clone_name, project=clone_project, workspace_orig=original_name, project_orig=original_project, owner_orig=original_owners, call_cache=call_cache) # share cloned workspace with anyone listed in share_with if share_with is not None: if isinstance(share_with, str): share_with = [share_with] # need to add each email address separately for email_to_add in share_with: clone_ws.share_workspace(email_to_add) return clone_ws
def get_cost_of_test(gcs_path, report_name, clone_project, verbose=True): clone_time = report_name.replace('master_report_','').replace('.html','') if verbose: print('generating cost report for '+report_name) # get the folder where the individual reports live report_folder = gcs_path + clone_time # get a list of the individual reports for this master report system_command = "gsutil ls " + report_folder all_paths = os.popen(system_command).read() # get a dict of all workspaces ws_to_check = get_fws_dict_from_folder(gcs_path, clone_time, clone_project, False) # get a list of all workspaces & projects ws_json = call_fiss(fapi.list_workspaces, 200) project_dict = {} for ws in ws_json: project_dict[ws['workspace']['name']] = ws['workspace']['namespace'] names_with_spaces = [key for key in project_dict.keys() if ' ' in key] total_cost = 0 abort = False # for each workspace, get a list of submissions run for ws in ws_to_check.values(): # find unformatted workspace name (where spaces are really spaces) for key in names_with_spaces: if key.replace(' ','_') == ws.workspace: ws.workspace = key if verbose: print(ws.workspace) # get & store cost of each submission, and track total cost for the workspace in ws_cost ws_cost = 0 submissions_json = call_fiss(fapi.list_submissions, 200, ws.project, ws.workspace, specialcodes=[404]) if submissions_json.status_code == 404: # error 404 means workspace does not exist abort = True break else: submissions_json = submissions_json.json() submissions_dict = {} for sub in submissions_json: wf_name = sub['methodConfigurationName'] if verbose: print(' '+wf_name) subID = sub['submissionId'] sub_json = call_fiss(fapi.get_submission, 200, ws.project, ws.workspace, subID, specialcodes=[404]) if sub_json.status_code != 404: # 404 means submission not found sub_json = sub_json.json() cost = sub_json['cost'] submissions_dict[wf_name] = '${:.2f}'.format(cost) total_cost += cost ws_cost += cost if verbose: print(' cost '+'${:.2f}'.format(cost)) ws.submissions_cost=submissions_dict ws.total_cost=ws_cost if abort: print('At least one workspace did not exist. Cost reporting aborted.') report_path = None else: if verbose: print(str(len(ws_to_check)) + ' workspaces in report') print('${:.2f}'.format(total_cost)) # format a report report_path = generate_cost_report(gcs_path, report_name, total_cost, ws_to_check, verbose) return report_path, total_cost
parser.add_argument( '--write_outputs_to_disk', action='store_true', help='whether to save function outputs to disk (useful in WDL)') args = parser.parse_args() [submission_succeeded, submission_metadata ] = monitor_submission(args.terra_workspace, args.terra_project, args.submission_id, args.sleep_time, args.write_outputs_to_disk) # demo of pulling out workflow output metadata if submission_succeeded: print('\nWorkflow succeeded!') # pull out metadata for all workflows in the submission for i in submission_metadata['workflows']: if 'workflowId' in i: workflow_id = i['workflowId'] res_workflow = call_fiss(fapi.get_workflow_metadata, 200, args.terra_project, args.terra_workspace, args.submission_id, workflow_id) workflow_outputs = res_workflow['outputs'] print(f'workflow_outputs for {workflow_id}') pprint.pprint(workflow_outputs) else: print('\nWorkflow failed. retrieved submission metadata:') pprint.pprint(submission_metadata)