Ejemplo n.º 1
0
 def share_workspace(self, email_to_add):
     """Share the workspace with the provided email address (VIEWER, canShare, no compute)."""
     acl_updates = [{
         "email": email_to_add,
         "accessLevel": "READER",
         "canShare": True,
         "canCompute": False
     }]
     call_fiss(fapi.update_workspace_acl, 200, self.project, self.workspace,
               acl_updates, False)  # set invite_users_not_found=False
def cleanup_workspaces(project, match_str=None, age_days=None, verbose=True):
    if verbose:
        print('searching for workspaces to clean up')

    # hard code any cloned workspaces we do NOT want to delete
    exceptions = []

    # get a list of all workspaces in the project
    ws_json = call_fiss(fapi.list_workspaces, 200)
    ws_all = []
    for ws in ws_json:
        ws_project = ws['workspace']['namespace']
        ws_name = ws['workspace']['name']
        if ws_project == project:
            ws_all.append(ws_name)

    if verbose:
        print(str(len(ws_all)) + ' workspaces found in project ' + project)

    FMT = '%Y-%m-%d-%H-%M-%S'  # datetime format used in workspace_test_report.py > clone_workspace()

    # select the cloned workspaces older than [age_days] ago
    ws_to_delete = set(
    )  # collect workspace names to delete in a set (to prevent duplicates)
    for ws in ws_all:
        if age_days is not None:
            # pull out the clone date and determine how many days ago the clone was made
            clone_date = ws.split('_')[-1]
            try:
                tdelta = datetime.now() - datetime.strptime(clone_date, FMT)
                tdelta_days = tdelta.days
            except:  # if the workspace doesn't contain a datetime string, i.e. it wasn't cloned by us
                tdelta_days = -1

            # add workspace to the delete list if it's more than [age_days] old and not in our list of exceptions
            if tdelta_days > age_days:
                if ws not in exceptions:
                    ws_to_delete.add(ws)
                    if verbose:
                        print(ws + ' is ' + str(tdelta_days) + ' days old')

        if match_str is not None:
            # add workspace to the delete list if it contains the target string (match_str) and not in our list of exceptions
            if match_str in ws:
                if ws not in exceptions:
                    ws_to_delete.add(ws)

    # delete those old workspaces
    for ws in ws_to_delete:
        call_fiss(fapi.delete_workspace, 202, project, ws)
        if verbose:
            print(ws + ' deleted')
Ejemplo n.º 3
0
def update_entities(workspace_name, workspace_project, replace_this,
                    with_this):
    ## update workspace entities
    print("Updating DATA ENTITIES for " + workspace_name)

    # get data attributes
    response = call_fiss(fapi.get_entities_with_type, 200, workspace_project,
                         workspace_name)
    entities = response

    for ent in entities:
        ent_name = ent['name']
        ent_type = ent['entityType']
        ent_attrs = ent['attributes']
        attrs_list = []
        for attr in ent_attrs.keys():
            value = ent_attrs[attr]
            updated_attr = find_and_replace(attr, value, replace_this,
                                            with_this)
            if updated_attr:
                attrs_list.append(updated_attr)

        if len(attrs_list) > 0:
            response = fapi.update_entity(workspace_project, workspace_name,
                                          ent_type, ent_name, attrs_list)
            if response.status_code == 200:
                print('Updated entities:')
                for attr in attrs_list:
                    print('   ' + attr['attributeName'] + ' : ' +
                          attr['addUpdateAttribute'])
def get_ws_bucket(project, name):
    ''' get the google bucket path name for the given workspace
    '''
    # call the api, check for errors, pull out the bucket name
    workspace = call_fiss(fapi.get_workspace, 200, project, name)
    bucket = workspace['workspace']['bucketName']
    return bucket
Ejemplo n.º 5
0
def update_entity_data_paths(workspace_name, workspace_project, bucket_list):
    print("Listing all gs:// paths in DATA ENTITIES for " + workspace_name)

    # get data attributes
    response = call_fiss(fapi.get_entities_with_type, 200, workspace_project,
                         workspace_name)
    entities = response

    paths_without_replacements = {
    }  # where we store paths for which we don't have a replacement

    replacements_made = 0

    for ent in entities:
        ent_name = ent['name']
        ent_type = ent['entityType']
        ent_attrs = ent['attributes']
        gs_paths = {}
        attrs_list = []
        for attr in ent_attrs.keys():
            if is_gs_path(attr, ent_attrs[attr]):  # this is a gs:// path
                original_path = ent_attrs[attr]
                if is_in_bucket_list(
                        original_path, bucket_list
                ):  # this is a path we think we want to update
                    new_path = get_replacement_path(original_path)
                    gs_paths[attr] = original_path
                    if new_path:
                        # format the update
                        updated_attr = fapi._attr_set(attr, new_path)
                        attrs_list.append(
                            updated_attr)  # what we have replacements for
                        replacements_made += 1
                    else:
                        paths_without_replacements[
                            attr] = original_path  # what we don't have replacements for

        if len(gs_paths) > 0:
            print(f'Found the following paths to update in {ent_name}:')
            for item in gs_paths.keys():
                print('   ' + item + ' : ' + gs_paths[item])

        if len(attrs_list) > 0:
            response = fapi.update_entity(workspace_project, workspace_name,
                                          ent_type, ent_name, attrs_list)
            if response.status_code == 200:
                print(f'\nUpdated entities in {ent_name}:')
                for attr in attrs_list:
                    print('   ' + attr['attributeName'] + ' : ' +
                          attr['addUpdateAttribute'])

    if replacements_made == 0:
        print('\nNo paths were updated!')

    if len(paths_without_replacements) > 0:
        print('\nWe could not find replacements for the following paths: ')
        for item in paths_without_replacements.keys():
            print('   ' + item + ' : ' + paths_without_replacements[item])
Ejemplo n.º 6
0
 def abort_submission(self):
     ''' abort submission
     '''
     res = call_fiss(fapi.abort_submission,
                     204,
                     self.project,
                     self.workspace,
                     self.sub_id,
                     specialcodes=[404])
Ejemplo n.º 7
0
    def get_final_status(self):
        ''' once a submission is done: update submission with finished status and error messages
        '''

        # 3 cases: 1) has wfID & subID; 2) has subID (submission happened but wf failed); 3) has neither (submission failed)
        if self.wf_id is not None:  # has wf_id and sub_id
            # get info about workflow submission - but exclude info about calls & inputs (which can make the json too big and cause an error)
            res = call_fiss(get_workflow_metadata_withExclude, 200,
                            self.project, self.workspace, self.sub_id,
                            self.wf_id, 'calls', 'inputs')
            self.final_status = res['status']

            start_time = res[
                'start']  # overwrite status from submission tracking
            end_time = res['end']  # overwrite status from submission tracking
            terra_time_fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
            elapsed = datetime.strptime(end_time,
                                        terra_time_fmt) - datetime.strptime(
                                            start_time, terra_time_fmt)
            self.runtime = format_timedelta(
                elapsed, 2)  # 2 hours threshold for marking in red

            # in case of failure, pull out the error message
            if self.final_status == 'Failed':
                self.message = ''
                for failed in res['failures']:
                    for message in failed['causedBy']:
                        self.message += str(message['message'])

        elif self.sub_id is not None:  # no wf_id but has sub_id
            res = call_fiss(fapi.get_submission, 200, self.project,
                            self.workspace, self.sub_id)
            for i in res['workflows']:
                self.final_status = i['status']
            if self.final_status == 'Failed':
                # get the error message(s) for why it failed
                self.message = ''
                for i in res['workflows']:
                    self.message += str(i['messages'])[1:-1]
            else:  # should probably never get here, but just in case
                self.message = 'unrecognized status'

        else:  # no wf_id or sub_id
            self.final_status = self.status
Ejemplo n.º 8
0
def monitor_submission(terra_workspace,
                       terra_project,
                       submission_id,
                       sleep_time=300,
                       write_outputs_to_disk=False):
    # set up monitoring of status of submission
    break_out = False
    while not break_out:
        # check status of submission
        res = call_fiss(fapi.get_submission, 200, terra_project,
                        terra_workspace, submission_id)

        # submission status
        submission_status = res['status']
        if submission_status in ['Done', 'Aborted']:
            break_out = True
        else:
            sleep(sleep_time)

    submission_metadata = res

    # check workflow status for all workflows (failed or succeeded)
    submission_succeeded = True

    for i in submission_metadata['workflows']:
        # check workflow outcome
        if i['status'] != 'Succeeded':
            submission_succeeded = False

    # if using WDL, this flag should be set to true so these outputs can be parsed
    if write_outputs_to_disk:
        # save submission_succeeded
        save_name = 'SUBMISSION_STATUS'
        with open(save_name, 'w') as f:
            f.write('true' if submission_succeeded else 'false')
            print(f'submission status (boolean) saved to {save_name}')

        # save metadata
        save_name = 'monitor_submission_metadata.json'

        # terra_details to be appended to json
        terra_details = {
            "terra_workspace": terra_workspace,
            "terra_project": terra_project
        }

        # updating/appending the terra_details to current json
        submission_metadata.update(terra_details)

        with open(save_name, 'w') as f:
            # writes final result to file:
            f.write(json.dumps(submission_metadata))
            print(f'submission metadata saved to {save_name}')

    # upon success or failure (final status), capture into variable and return as output
    return submission_succeeded, submission_metadata
Ejemplo n.º 9
0
def format_fws(get_info=False, verbose=True):
    ''' format json file of featured workspaces into dictionary of workspace classes 
    '''
    # call api
    fws_json = get_fw_json()

    fws = {}

    for ws in fws_json:
        ws_project = ws['namespace']
        ws_name = ws['name']

        if verbose:
            print(ws_name + '\t (billing project: ' + ws_project + ')')

        ### load into Wspace class object
        fw = Wspace(workspace=ws_name, project=ws_project)

        if get_info:
            ### Extract workflows
            res_wf = call_fiss(fapi.list_workspace_configs,
                               200,
                               ws_project,
                               ws_name,
                               allRepos=True)

            wfs = []
            for wf in res_wf:
                wf_name = wf['name']
                wfs.append(wf_name)

            if len(wfs) > 0:
                if verbose:
                    print('\tWorkflows:')
                    print('\t\t' + '\n\t\t'.join(wfs))
            else:
                wfs = None

            ### Extract notebooks
            nbs = list_notebooks(ws_project, ws_name, ipynb_only=True)

            if len(nbs) > 0:
                if verbose:
                    print('\tNotebooks: ')
                    print('\t\t' + '\n\t\t'.join(nbs))
            else:
                nbs = None

            # save workflows and notebooks to Wspace
            fw.workflows = wfs
            fw.notebooks = nbs

        fws[fw.key] = fw

    return fws
Ejemplo n.º 10
0
 def get_cost(self, verbose=True):
     sub_json = call_fiss(fapi.get_submission,
                          200,
                          self.project,
                          self.workspace,
                          self.sub_id,
                          specialcodes=[404])
     if sub_json.status_code != 404:  # 404 means submission not found
         sub_json = sub_json.json()
         cost = sub_json['cost']
         self.cost = cost
         if verbose:
             print(f'    cost ${cost:.2f}')
     return cost
Ejemplo n.º 11
0
    def check_status(self, verbose=False):
        ''' check the status of a workflow submission using fiss
        '''
        res = call_fiss(fapi.get_submission, 200, self.project, self.workspace,
                        self.sub_id)

        # try to get the wf_id
        if self.wf_id is None:
            for i in res['workflows']:
                if 'workflowId' in i:
                    self.wf_id = i['workflowId']

        self.status = res['status']
        if verbose:
            print('    ' + datetime.today().strftime('%H:%M') + ' ' +
                  self.status + ' - ' + self.wf_name)
Ejemplo n.º 12
0
    def create_submission(self, verbose=False):
        ''' create a workflow submission using fiss
        '''
        # only run if status is None
        # create a submission to run for this workflow
        if self.status is None:
            # include list of specialcodes to handle the 400/404 errors with output
            res = call_fiss(fapi.create_submission,
                            201,
                            self.project,
                            self.workspace,
                            self.wf_project,
                            self.wf_name,
                            self.entity_name,
                            self.entity_type,
                            specialcodes=[400, 404],
                            use_callcache=self.call_cache,
                            expression=self.expression)

            # because we included specialcodes input, call_fiss returns the un-parsed json
            if res.status_code in [400, 404]:
                self.status = 'Submission Failed'
                try:
                    self.message = res.json()['message']
                except Exception:
                    self.message = 'UNKNOWN FAILURE!'
                if verbose:
                    print('SUBMISSION FAILED (error ' + str(res.status_code) +
                          ', status marked Submission Failed) - ' +
                          self.wf_name)
                    print(self.message)
            else:
                # fapi._check_response_code(res, 201) # don't need to check, since this will only pass here if the response code = 201
                res = res.json()

                self.sub_id = res['submissionId']
                self.status = 'submission initialized in Python'  # this will be set to the Terra status when check_status() is called
                if verbose:
                    print('NEW SUBMISSION: ' + self.wf_name)
Ejemplo n.º 13
0
def update_attributes(workspace_name, workspace_project, replace_this,
                      with_this):
    ## update workspace data attributes
    print("Updating ATTRIBUTES for " + workspace_name)

    # get data attributes
    response = call_fiss(fapi.get_workspace, 200, workspace_project,
                         workspace_name)
    attributes = response['workspace']['attributes']

    attrs_list = []
    for attr in attributes.keys():
        value = attributes[attr]
        updated_attr = find_and_replace(attr, value, replace_this, with_this)
        if updated_attr:
            attrs_list.append(updated_attr)

    if len(attrs_list) > 0:
        response = fapi.update_workspace_attributes(workspace_project,
                                                    workspace_name, attrs_list)
        if response.status_code == 200:
            print('Updated attributes:')
            for attr in attrs_list:
                print(attr)
Ejemplo n.º 14
0
    def create_submissions(self, verbose=False):
        project = self.project
        workspace = self.workspace
        if verbose:
            print('\nRunning workflow submissions on ' + workspace)

        # Get a list of workflows in the project
        res = call_fiss(fapi.list_workspace_configs,
                        200,
                        project,
                        workspace,
                        allRepos=True)

        # set up submission classes and structure them as lists
        if len(res) > 0:  # only proceed if there are workflows
            # get list of workflows to submit
            workflow_names = []
            submissions_unordered = {}

            for item in res:  # for each item (workflow)
                wf_name = item['name']  # the name of the workflow
                workflow_names.append(wf_name)

                entityType = None
                expression = None
                # identify the type of data (entity) being used by this workflow, if any
                if 'rootEntityType' in item:
                    entityType = item['rootEntityType']

                    # if it's a workflow that requires multiple entities, do it
                    if wf_name in WORKFLOWS_THAT_REQUIRE_MULTIPLE_ENTITIES:
                        expression = f'this.{entityType}s'
                        entityType = f'{entityType}_set'

                project_orig = item['namespace']  # workflow billing project
                wf_name = item['name']  # the name of the workflow

                # get and store the name of the data (entity) being used, if any
                entities = call_fiss(fapi.get_entities, 200, project,
                                     workspace, entityType)
                entityName = None
                if len(entities) != 0:
                    allEntities = []
                    for ent in entities:
                        allEntities.append(ent['name'])

                    # if there's a _test entity, use it
                    for ent in allEntities:
                        if '_test' in ent:
                            entityName = ent
                    # otherwise if there's a _small entity, use it
                    if entityName is None:
                        for ent in allEntities:
                            if '_small' in ent:
                                entityName = ent

                    # otherwise just use the first entity
                    if entityName is None:
                        entityName = allEntities[0]  # use the first one

                # if there is no entityName, make sure entityType is also None
                if entityName is None:
                    entityType = None

                # populate dictionary of inputs for fapi.create_submission
                submissions_unordered[wf_name] = Submission(
                    workspace=workspace,
                    project=project,
                    wf_project=project_orig,
                    wf_name=wf_name,
                    entity_name=entityName,
                    entity_type=entityType,
                    call_cache=self.call_cache,
                    expression=expression)

                print(submissions_unordered[wf_name])

                # if workflow is 'optional', do not run a test
                if 'optional' in wf_name.lower():
                    submissions_unordered[wf_name].status = 'Done'
                    submissions_unordered[wf_name].final_status = 'Not tested'
                    submissions_unordered[
                        wf_name].message = 'Optional workflow not tested'

            # check whether workflows are ordered, and structure list of submissions accordingly
            first_char = list(wf[0] for wf in workflow_names)
            submissions_list = []
            if ('1' in first_char) and ('2' in first_char):
                do_order = True
                workflow_names.sort()
                for wf_name in workflow_names:
                    submissions_list.append([submissions_unordered[wf_name]])
                if verbose:
                    print('[submitting workflows sequentially]')
            else:
                do_order = False
                sub_list = []
                for wf_name in workflow_names:
                    sub_list.append(submissions_unordered[wf_name])
                submissions_list = [sub_list]
                if verbose:
                    print('[submitting workflows in parallel]')

            self.active_submissions = submissions_list
def clone_workspace(original_project,
                    original_name,
                    clone_project,
                    clone_name=None,
                    clone_time=None,
                    share_with=None,
                    call_cache=True,
                    verbose=False,
                    copy_notebooks=False,
                    copy_bucket=False):
    ''' clone a workspace, though not bucket files or notebooks unless indicated;
    this also shares the workspace with emails/groups listed in share_with
    '''

    # define the name of the cloned workspace
    if clone_name is None:
        if clone_time is None:
            clone_time = datetime.today().strftime(
                '%Y-%m-%d-%H-%M-%S')  # time of clone
        clone_name = original_name + '_' + clone_time  # cloned name is the original name + current date/time

    if verbose:
        print('\nCloning ' + original_name + ' to ' + clone_name)

    # get email address(es) of owner(s) of original workspace
    response = call_fiss(fapi.get_workspace, 200, original_project,
                         original_name)
    original_owners = response['owners']

    # clone the Featured Workspace & check for errors
    call_fiss(fapi.clone_workspace,
              201,
              original_project,
              original_name,
              clone_project,
              clone_name,
              specialcodes=[409])  # 409 = workspace already exists

    # optionally copy entire bucket, including notebooks
    # get gs addresses of original & cloned workspace buckets
    original_bucket = get_ws_bucket(original_project, original_name)
    clone_bucket = get_ws_bucket(clone_project, clone_name)

    if copy_bucket:  # copy everything in the bucket
        bucket_files = run_subprocess(
            ['gsutil', 'ls', 'gs://' + original_bucket + '/'],
            'Error listing bucket contents')
        if len(bucket_files) > 0:
            gsutil_args = [
                'gsutil', '-m', 'rsync', '-r', 'gs://' + original_bucket,
                'gs://' + clone_bucket
            ]
            bucket_files = run_subprocess(
                gsutil_args,
                'Error copying over original bucket to clone bucket')
    elif copy_notebooks:  # only copy notebooks
        if len(
                list_notebooks(
                    original_project,
                    original_name,
                    ipynb_only=False,
                    verbose=False)) > 0:  # if the notebooks folder isn't empty
            gsutil_args = [
                'gsutil', '-m', 'rsync', '-r',
                'gs://' + original_bucket + '/notebooks',
                'gs://' + clone_bucket + '/notebooks'
            ]
            bucket_files = run_subprocess(
                gsutil_args,
                'Error copying over original bucket to clone bucket')

        if verbose:
            print('Notebook files copied:')
            # TODO: note that if we ever do want to enable notebooks in these tests, there is
            # an eventual consistency issue with the SA having permissions to this bucket
            # immediately after creating the workspace - so this sometimes throws an error,
            # but only because the SA hasn't gatorcounted enough.
            list_notebooks(clone_project,
                           clone_name,
                           ipynb_only=False,
                           verbose=True)

    clone_ws = Wspace(workspace=clone_name,
                      project=clone_project,
                      workspace_orig=original_name,
                      project_orig=original_project,
                      owner_orig=original_owners,
                      call_cache=call_cache)

    # share cloned workspace with anyone listed in share_with
    if share_with is not None:
        if isinstance(share_with, str):
            share_with = [share_with]

        # need to add each email address separately
        for email_to_add in share_with:
            clone_ws.share_workspace(email_to_add)

    return clone_ws
def get_cost_of_test(gcs_path, report_name, clone_project, verbose=True):
    clone_time = report_name.replace('master_report_','').replace('.html','')
    if verbose:
        print('generating cost report for '+report_name)


    # get the folder where the individual reports live
    report_folder = gcs_path + clone_time
    # get a list of the individual reports for this master report
    system_command = "gsutil ls " + report_folder
    all_paths = os.popen(system_command).read()

    # get a dict of all workspaces
    ws_to_check = get_fws_dict_from_folder(gcs_path, clone_time, clone_project, False)

    # get a list of all workspaces & projects
    ws_json = call_fiss(fapi.list_workspaces, 200)
    project_dict = {}
    for ws in ws_json:
        project_dict[ws['workspace']['name']] = ws['workspace']['namespace']
    names_with_spaces = [key for key in project_dict.keys() if ' ' in key]
    
    total_cost = 0
    abort = False
    # for each workspace, get a list of submissions run
    for ws in ws_to_check.values(): 
        # find unformatted workspace name (where spaces are really spaces)
        for key in names_with_spaces:
            if key.replace(' ','_') == ws.workspace:
                ws.workspace = key
        if verbose:
            print(ws.workspace)
        
        # get & store cost of each submission, and track total cost for the workspace in ws_cost
        ws_cost = 0
        submissions_json = call_fiss(fapi.list_submissions, 200, ws.project, ws.workspace, specialcodes=[404])
        if submissions_json.status_code == 404: # error 404 means workspace does not exist
            abort = True
            break
        else:
            submissions_json = submissions_json.json()
        submissions_dict = {}
        for sub in submissions_json:
            wf_name = sub['methodConfigurationName']
            if verbose:
                print('  '+wf_name)
            subID = sub['submissionId']
            sub_json = call_fiss(fapi.get_submission, 200, ws.project, ws.workspace, subID, specialcodes=[404])
            if sub_json.status_code != 404: # 404 means submission not found
                sub_json = sub_json.json()
                cost = sub_json['cost']
                submissions_dict[wf_name] = '${:.2f}'.format(cost)
                total_cost += cost
                ws_cost += cost
                if verbose:
                    print('    cost '+'${:.2f}'.format(cost))

        ws.submissions_cost=submissions_dict
        ws.total_cost=ws_cost

    if abort:
        print('At least one workspace did not exist. Cost reporting aborted.')
        report_path = None
    else:
        if verbose:
            print(str(len(ws_to_check)) + ' workspaces in report')
            print('${:.2f}'.format(total_cost))

        # format a report
        report_path = generate_cost_report(gcs_path, report_name, total_cost, ws_to_check, verbose)
    
    return report_path, total_cost
Ejemplo n.º 17
0
    parser.add_argument(
        '--write_outputs_to_disk',
        action='store_true',
        help='whether to save function outputs to disk (useful in WDL)')

    args = parser.parse_args()

    [submission_succeeded, submission_metadata
     ] = monitor_submission(args.terra_workspace, args.terra_project,
                            args.submission_id, args.sleep_time,
                            args.write_outputs_to_disk)

    # demo of pulling out workflow output metadata
    if submission_succeeded:
        print('\nWorkflow succeeded!')
        # pull out metadata for all workflows in the submission
        for i in submission_metadata['workflows']:
            if 'workflowId' in i:
                workflow_id = i['workflowId']
                res_workflow = call_fiss(fapi.get_workflow_metadata, 200,
                                         args.terra_project,
                                         args.terra_workspace,
                                         args.submission_id, workflow_id)

                workflow_outputs = res_workflow['outputs']
                print(f'workflow_outputs for {workflow_id}')
                pprint.pprint(workflow_outputs)
    else:
        print('\nWorkflow failed. retrieved submission metadata:')
        pprint.pprint(submission_metadata)